因此,我正在抓捕一个电子商务网站,我希望从该网站获得有关产品的一些数据。我得到了我需要的所有数据,但是得到了一个奇怪的图像链接格式:“ data:image / gif; base64,R0lGODlhAQABAIAAAAAAAP /// yH5BAEAAAAALAAAAAAAAABAAEAAAIBRAA7”而不是“ https://tn.jumia.is/D8RjVA7gYqsaxxwKgqOa_6582_k=/fit-in/220x220/filters:fill(white):sharpen(1,0,false):quality(100)/product/48/9141/1.jpg?5043”
我在其他网站上尝试了相同的刮板,效果很好。
const link = "https://www.jumia.com.tn/smartphones/"
var categorieTag = ".osh-cat-header .title";
var siteTag = ".logo a img";
var productEnvelopeTag = ".sku";
var imageLinkTag = ".image-wrapper img ";
var productNameTag = ".name";
var priceTag = ".price";
var currencyTag = ".data-currency-iso";
var availabilityTag = "#stock_availability";
var productRefTag = ".product-reference";
var productLink = ".product-title a";
function getHtmlSite() {
return fetch(`${link}`).then(response => response.text());
}
getHtmlSite().then(body => {
const $ = cheerio.load(body);
var $categorie = $(categorieTag)
.children()
.remove()
.end()
.text();
var $site = $(siteTag);
$(productEnvelopeTag).each(function(i, element) {
const $image = $(element).find(imageLinkTag);
const $productName = $(element).find(productNameTag);
const $price = $(element).find(priceTag);
const $currencyTag = $(element).find(currencyTag);
const $availability = $(element).find(availabilityTag);
const $refProduct = $(element).find(productRefTag);
const $productLink = $(element).find(productLink);
product = {
image: $image.attr("src"),
productName: $productName.text(),
price: $price.text().trim(),
currencyTag: $currencyTag.text().trim(),
categorie: $categorie,
availability: $availability.text().trim(),
site: $site.attr("alt"),
refProduct: $refProduct.text(),
prodcutLink: $productLink.attr("href")
};
allProducts.push(product);
});
fs.writeFileSync("JumiaProducts.json", JSON.stringify(allProducts, null, 2));
});
答案 0 :(得分:0)
您想要这些的data-src属性:
image: $image.attr("data-src")
有时候,您需要查看“视图源” html而不是元素检查器。