从puppeteer的ElementHandle

时间:2018-04-01 09:25:09

标签: headless-browser puppeteer google-chrome-headless

我目前正与Puppeteer合作。我注意到在Chrome开发工具中,我能够从任何元素(复制/复制选择器)中获取选择器,我想知道是否有可能从Puppeteer中的ElementHandle获得完整的css选择器。 / p>

例如,给出以下html:

<body>
<h1>Main Title</h1>
</body>

然后,在Puppeteer中我得到了h1元素:

const myElement=await page.$("h1");

我想获得myElement(body > h1

的完整css选择器

2 个答案:

答案 0 :(得分:0)

使用a userscript of mine的这一部分:

var xpathNamespaceResolver = {
    svg: 'http://www.w3.org/2000/svg',
    mathml: 'http://www.w3.org/1998/Math/MathML'
};

getElementByXPath = function getElementByXPath(expression) {
    var a = document.evaluate(expression, document.body, xpathNamespaceResolver, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
    if (a.snapshotLength > 0) {
        return a.snapshotItem(0);
    }
};

retrieveCssOrXpathSelectorFromTextOrNode = function(arg, type) {
    var root = [], node;
    nodeType = type.toLowerCase();
    function retrieveNodeNameAndAttributes(node) {
        var output = '';
        try {
            var nodeName = node.nodeName.toLowerCase();
        } catch(e) {
            console.error('ERROR no matching node');
            return;
        }
        if (node.hasAttributes()) {
            var attrs = node.attributes;
            for (var i = 0; i < attrs.length; i++) {
                if (nodeType === 'xpath') {
                    if (attrs[i].value) {
                        output += '[@' + attrs[i].name + "='" + attrs[i].value + "']";
                    }
                    else {
                        output += '[@' + attrs[i].name + ']';
                    }
                }
                else if (nodeType === 'css') {
                    if (attrs[i].value) {
                        if (attrs[i].name === 'id') {
                            if (/:/.test(attrs[i].value)) {
                                output += "[id='" + attrs[i].value + "']"; // new Ex: [id="foo:bar"]
                            }
                            else {
                                output += "#" + attrs[i].value;
                            }
                        } else if (attrs[i].name === 'class') {
                            var classes = attrs[i].value.split(/\s+\b/).join('.');
                            output += '.' + classes;
                        } else {
                            output += "[" + attrs[i].name + "='" + attrs[i].value + "']";
                        }
                    }
                    else {
                        output += "[" + attrs[i].name + "]";
                    }
                }
            }
        }

        var txt = '';
        if (nodeName === 'a' && nodeType === 'xpath') {
            txt = "[text()='" + node.innerText + "']";
        }

        root.push({ 'name': nodeName, 'attrs': output, txt });

        if (nodeName === 'body') return;
        else retrieveNodeNameAndAttributes(node.parentNode); // recursive function
    }

    if (typeof arg === 'string') { // text from within the page
        var selector = '//*[text()[contains(.,"' + arg + '")]]';
        node = getElementByXPath(selector);
    } else if (typeof arg === 'object') { // node argument, let's do some 'duck typing'
        if (arg && arg.nodeType) {
            node = arg;
        }
        else {
            console.error("ERROR expected node, get object");
            return;
        }
    } else {
        console.error("ERROR expected node or string argumument");
        return;
    }

    retrieveNodeNameAndAttributes(node);

    var output = '';
    if (nodeType === 'css') {
        output = root.reverse().map(elt => elt.name + elt.attrs ).join(' > ');
    }
    else if (nodeType === 'xpath') {
        output = '//' + root.reverse().map(elt => elt.name + elt.txt + elt.attrs ).join('/');
    }
    else {
        console.error('ERROR unknown type ' + type);
    }

    return output;
    //console.log(output);

};


x = function(arg) {
    console.log("CSS\n" + retrieveCssOrXpathSelectorFromTextOrNode(arg, 'css'));
    console.log("XPath\n" + retrieveCssOrXpathSelectorFromTextOrNode(arg, 'xpath'));
};

用法:

console.log(x(node));

Chrome开发工具中此页面上所选textArea节点的输出:

CSS
body.question-page.new-topbar[style] > div.container._full  > div#content.snippet-hidden > div[itemscope][itemtype='http://schema.org/Question'] > div.inner-content.clearfix > div#mainbar[role='main'][aria-label='question and answers'] > div#answers > div#answer-49596712.answer[data-answerid='49596712'][itemscope][itemtype='http://schema.org/Answer'] > div.post-layout > div.answercell.post-layout--right > div.inline-editor[style] > form.inline-post[action='/posts/49596712/edit-submit/dbac2e78-20ec-4f98-86ee-60c3a57fb791'][method='post'][data-post-params='{"is_suggested_edit":false,"post_type":2,"owner":1,"PostId":49596712}'] > div#post-editor-49596712.post-editor.js-post-editor > div[style='position: relative;'] > div.wmd-container > textarea#wmd-input-49596712.wmd-input.processed[name='post-text'][cols='92'][rows='15'][tabindex='81'][data-min-length]

XPath
//body[@class='question-page new-topbar'][@style]/div[@class='container _full ']/div[@id='content'][@class='snippet-hidden']/div[@itemscope][@itemtype='http://schema.org/Question']/div[@class='inner-content clearfix']/div[@id='mainbar'][@role='main'][@aria-label='question and answers']/div[@id='answers']/div[@id='answer-49596712'][@class='answer'][@data-answerid='49596712'][@itemscope][@itemtype='http://schema.org/Answer']/div[@class='post-layout']/div[@class='answercell post-layout--right']/div[@class='inline-editor'][@style]/form[@class='inline-post'][@action='/posts/49596712/edit-submit/dbac2e78-20ec-4f98-86ee-60c3a57fb791'][@method='post'][@data-post-params='{"is_suggested_edit":false,"post_type":2,"owner":1,"PostId":49596712}']/div[@id='post-editor-49596712'][@class='post-editor js-post-editor']/div[@style='position: relative;']/div[@class='wmd-container']/textarea[@id='wmd-input-49596712'][@class='wmd-input processed'][@name='post-text'][@cols='92'][@rows='15'][@tabindex='81'][@data-min-length]

答案 1 :(得分:0)

刚刚做了一些测试: https://www.npmjs.com/package/puppeteer-element2selector

到目前为止效果很好。我为 ElementHandles:

找到了非常漂亮的独特选择器
.product-headline
.has-zoom .description-container:nth-child(1)

它在表面下使用 https://github.com/antonmedv/finder