错误:找不到<img data-src="url&gt;

时间:2019-04-28 06:12:38

标签: javascript node.js xpath puppeteer

<p>Running on Puppeteer, all updated. </p> <p>The intended process is to go to website, where url is url/{search item} and run through the list of search names. Then for each search item --> search page, get name, price and image url for each listing. Now theres error it cannot find selector. Appreciate any help on this, many thanks!</p> <p><strong>Layout of the data of the website is as follows:</strong></p> <pre><code><div class=" items-box-content"> ="" <section="" class="items-box" > ="" <a="" href="https://listingurl" > ="" <figure="" class="items-box-photo" > ="" <img="" data-src="https://imageurl.jpg" class=" lazyloaded" src="https://imageurl.jpg" > ="" <="" figure> ="" <div="" class="items-box-main" > ="" <h3="" class="items-box-name" >="" listing="" name="" <="" h3> ="" <div="" class="items-box-figure" > ="" <div="" class="items-price font-4" >="" $29.95="" <="" div>="" item's="" price ="" <="" h3> ="" <="" div> <="" code=""/>的元素匹配选择器

我现在所拥有的(引发错误):

const puppeteer = require('puppeteer');
const searches = ["a", "b", "c"]; //appended to url
(async () => {
   const browser = await puppeteer.launch({ headless: false });
   let results =[];
   for (const search of searches) {
         try {
            page = await browser.newPage();
            await page.goto(`https://weburl/?keyword=${search}`);
            await page.evaluate(() => { document.querySelector('div[class*="items-box"]').scrollIntoView();});
            let elements = await page.$$('div[class*="items-box"]');
            for (let element of elements) {
               let listImg = await element.$eval(('img[class="items-box-photo]'), img => img.getAttribute('src'));
               let listTitle = await element.$eval(('d[class="items-box-main"] > h[class="items-box-name"]'), node => node.innerText.trim());
               let listPrice = await element.$eval(('d[class="items-box-figure"] > d[class="items-price"]'), node => node.innerText.trim());
               let listUrl = await element.$eval(('d[class="items-box-content"] > a[class*="items-box"]'), node => node.getAttribute('href'));

               results.push({ 
                  listImg, 
                  listTitle, 
                  listPrice, 
                  listUrl 
               })
               return results;
            }
         } finally {
            await page.close
         }
   }
})();

引发的错误是

  

(节点:5168)UnhandledPromiseRejectionWarning:错误:错误:未能   查找与选择器匹配的元素“ img [class =“ items-box-photo]”

2 个答案:

答案 0 :(得分:1)

我通过测试/调试更新了您的代码。

const puppeteer = require('puppeteer');

const searches = ["a"];

(async () => {

    const browser = await puppeteer.launch({ headless: false });

    function delay(timeout) {
        return new Promise((resolve) => {
            setTimeout(resolve, timeout);
        });
    }

    let results = [];

    for (const search of searches) {

        try {
            page = await browser.newPage();
            await page.goto(`https:url/`);

            await page.evaluate(() => { document.querySelector('section[class*="items-box"]').scrollIntoView(); });

            let elements = await page.$$('section[class*="items-box"]');
            console.log(elements.length)
            console.log('wait 6  seconds')
            await delay(6000);

            for (let element of elements) {
                // await delay(6000);

                let listImg = await element.$eval(('img'), img => img.getAttribute('src'));
                let listTitle = await element.$eval(('h3[class="items-box-name font-2"]'), node => node.innerText.trim());
                let listPrice = await element.$eval(('div[class="items-box-price font-5"]'), node => node.innerText.trim());
                let listUrl = await element.$eval(('div[class="items-box-content clearfix"] a'), node => node.getAttribute('href'));

                results.push({
                    listImg,
                    listTitle,
                    listPrice,
                    listUrl
                });
            }

            debugger;

        } catch (error) {
            console.log(error)
        } finally {
            //await page.close
            await browser.close
        }
    }
    console.log(results)
    return results;
})();

更新的内容:
1. return result循环中的for

for(){
   return result;
}

=>

for(){

}
return result;
  1. 更新了querySelector
section[class*="items-box"]

img  // There is only one img tags in "element"

h3[class="items-box-name font-2"]  // removed outer 'element'

div[class="items-box-figure"] > div[class="items-price font-4"]

div[class="items-box-price font-5  // updated class name? on my side 

items-box-price

div[class="items-box-content clearfix"] a 
  1. 更新的睡眠持续时间6秒,这是相对网络速度(网络负载持续时间)。

  2. try catch finally
    catch虽然一步步崩溃,但可以帮助您处理下一步。

答案 1 :(得分:1)

问题就出现在错误消息(Error: failed to find element matching selector ...)中。

以下行中的选择器错误:

let listImg = await element.$eval(('img[class="items-box-photo]'), img => img.getAttribute('src'));
let listTitle = await element.$eval(('d[class="items-box-main"] > h[class="items-box-name"]'), node => node.innerText.trim());
let listPrice = await element.$eval(('d[class="items-box-figure"] > d[class="items-price"]'), node => node.innerText.trim());
let listUrl = await element.$eval(('d[class="items-box-content"] > a[class*="items-box"]'), node => node.getAttribute('href'));

根据您提供的HTML代码,这些代码应为:

let listImg = await element.$eval('img.lazyloaded', img => img.getAttribute('src'));
let listTitle = await element.$eval('h3.items-box-name', node => node.innerText.trim());
let listPrice = await element.$eval('div.items-price', node => node.innerText.trim());
let listUrl = await element.$eval('div.items-box-content a', node => node.getAttribute('href'));

请注意,不是使用[class=...]来查询类的正确方法是使用class selector.