我正在尝试学习如何进行网络爬网/网络抓取,并且需要一些帮助。我目前正在从以下网站进行网页抓取:http://books.toscrape.com/。但是,我在网上很难抓取该网站的价格,等级和封面网址。有人能帮我吗?下面列出的是我尝试使用的编码。
要在data.xpath(“ // article [@ class ='product_pod']”))中查看:
title = i.xpath("h3/a/@title")
price = i.xpath("//p[@class='price_color']/text()")
rating= i.xpath("//p[@class='star-rating']/@class")
coverurl= i.xpath("a/img/@src")
moreinfo= i.xpath("h3/a/@href")
print(title,price,rating,coverurl, moreinfo)
答案 0 :(得分:1)
尝试以下代码
let RequiredPictures = {
"Additional product": [
{
"required_picture_id": "001",
"label": "MRI",
"has_picture": true,
"url": "https:bbymakeitright.png"
},
{
"required_picture_id": "002",
"label": "MR",
"has_picture": true,
"url": "https:bbymakeitright.png"
}
],
"Additional product two": [
{
"required_picture_id": "003",
"label": "IMR",
"has_picture": true,
"url": "https:bbymakeitright.png"
},
{
"required_picture_id": "004",
"label": "IR",
"has_picture": false,
"url": ""
}
]
}
let newData = [
{ title: Object.keys(RequiredPictures)[0], data: RequiredPictures[Object.keys(RequiredPictures)[0]] },
{ title: Object.keys(RequiredPictures)[1], data: RequiredPictures[Object.keys(RequiredPictures)[1]] },
]
console.log(newData)