我需要在JavaScript代码中抓取这些数据,我认为我们需要通过js2xml将JavaScript代码转换为XML数据,问题是它可以转换所有JavaScript,例如;如果您想刮擦“电话”怎么办。 还有其他解决方案吗?您可以在脚本中使用某些单词。 采用: -python 3.7.0 -刮皮1.5 我该如何解决? 谢谢
<script type="text/javascript">
var price = '';
price = price.replace(/ /g ,'');
pulse('track', 'trackerEvent', {
name: 'Ad detail viewed',
type: 'View',
object: {
type: 'ClassifiedAd',
id: '32088977',
contentId: '32088977',
url: 'https://www.example.com/fr/namecity/accessoires_informatique_et_gadgets/Toner_Heidelberg_Ricoh_C751_C651_32088977.htm?icl=1',
name: 'Orgacom sarl',
currency:'DH',
adType: 'sell',
price: Number(price),
category: 'INFORMATIQUE ET MULTIMEDIA > Accessoires informatique et Gadgets ',
AccountType : 'Pro',
location: {
type: 'PostalAddress',
addressCountry: 'Maroc',
City : "Rabat" ,
Region : 'autre_secteur',
},
},
customs: {
type: 'Contact',
AdID: '40695389',
list_id:'32088977',
lang:'fr',
region: 'Rabat',
appl:'vi',
categoryID: '5060',
Page: 'view_ad ',
D2d_offered: 'False'
},
actor: email,
provider: { productType: 'Web'}
});
function ClickPhoneShow() {
pulse('track', 'trackerEvent', {
name: 'Ad phone number displayed',
type: 'Show',
action: 'Click',
object: {
type: 'PhoneContact',
id: '32088977',
inReplyTo: {
id: '32088977',
type: 'ClassifiedAd',
category: 'INFORMATIQUE ET MULTIMEDIA > Accessoires informatique et Gadgets ',
AccountType : 'Pro',
name: 'example sarl',
phone: '066666666',
subject:"Toner Heidelberg Ricoh C751/C651",
body:"C751/C651 on met en vente Toner ricoh c751/c651 origine importé d'allemand avec un bon prix <br> (toner + photocopie + ricoh + Heidelberg)<br>contactez nous merci",
price: Number(price),
location: {
type: 'PostalAddress',
addressCountry: 'Maroc',
City : "Rabat" ,
Region : 'autre_secteur',
},
},
},
target: {
type: 'Contact',
AdID: '40695389',
list_id:'32088977',
AdPrice: '',
url:'fr/namecitey/accessoires_informatique_et_gadgets/Toner_Heidelberg_Ricoh_C751_C651_32088977.htm ',
appl :'vi ',
category :'Accessoires informatique et gadgets ',
region: 'Rabat',
PageType: 'TransactionPage',
eventPrefix: 'Phone_Desktop',
D2d_offered: 'False'
},
actor: email,
provider: { productType: 'Web'}
});
}
</script>
答案 0 :(得分:0)
我建议您坚持使用js2xml
,因为它提供了一种遍历javascript代码的方法,但是当然,这需要额外的精力。
另一种解决方案是使用正则表达式:
phones = re.findall("phone: '(.+)',", script_text)