刮网站。在抓取期间无法自动执行用户点击

时间:2016-01-28 19:23:52

标签: javascript jquery html reactjs phantomjs

试图抓一个网站。为了做到这一点,我想自动点击一个按钮。我似乎无法按钮做任何事情。

链接: http://shop.nordstrom.com/s/polo-ralph-lauren-pajama-pants/2849416

网站堆栈:ReactJS,JQueryJS

按钮选择器:#product-selection-2849416> section.color-filter> div> ul> li:nth-​​child(2)> a> span> span.image-sprite-image.cover> span> IMG

尝试

JQuery click,mousedown,touchstart和native click ...在Chrome开发工具控制台中。

$("‪#‎product‬-selection-2849416 > section.color-filter > div > ul > li:nth-child(2) > a > span > span.image-sprite-image.cover > span > img").click()

$("#product-selection-2849416 > section.color-filter > div > ul > li:nth-child(2) > a > span > span.image-sprite-image.cover > span > img")[0].click()

$("#product-selection-2849416 > section.color-filter > div > ul > li:nth-child(2) > a > span > span.image-sprite-image.cover > span > img").mousedown()

$('#product-selection-2849416 > section.color-filter > div > ul > li:nth-child(2) > a > span > span.image-sprite-image.cover > span > img').trigger('touchstart');

PhantomJS sendEvent函数......通过PhantomJS无头浏览器。

var webpage = require('webpage');
var page = webpage.create();
var href = "http://shop.nordstrom.com/s/polo-ralph-lauren-pajama-pants/2849416";
page.open(href, function (status) {
    var elem = "#product-selection-2849416 > section.color-filter > div > ul > li:nth-child(2) > a > span > span.image-sprite-image.cover > span > img";
    var rect = page.evaluate(function(elem) {
        return $(elem)[0].getBoundingClientRect();
    }, elem);

    function computeCenter(bounds) {
        var x = Math.round(bounds.left + bounds.width / 2);
        var y = Math.round(bounds.top  + bounds.height / 2);
        return [x, y];
    }

    var cor = computeCenter(rect);
    page.sendEvent('click', cor.x, cor.y, 'left');
    setTimeout(function() {
        page.render('websiteAfterClick.png');
        page.close();
    }, 1000);
}

和HTML活动......在Chrome开发者工具控制台中。

var elem = $("#product-selection-2849416 > section.color-filter > div > ul > li:nth-child(2) > a > span > span.image-sprite-image.cover > span > img")[0];
var evt = document.createEvent("MouseEvents");
var center_x = 1, center_y = 1;
try {
    var pos = elem.getBoundingClientRect();
    center_x = Math.floor((pos.left + pos.right) / 2);
    center_y = Math.floor((pos.top + pos.bottom) / 2);
} catch(e) {}
evt.initMouseEvent('click', true, false, window, 1, 1, 1, center_x, center_y, false, false, false, false, 0, elem);

React Test Utils ...通过PhantomJS无头浏览器。

var webpage = require('webpage');
var page = webpage.create();
var href = "http://shop.nordstrom.com/s/polo-ralph-lauren-pajama-pants/2849416";
page.open(href, function (status) {
   page.includeJs("https://cdnjs.cloudflare.com/ajax/libs/react/0.14.6/react-with-addons.js", function() {
        var elem = "#product-selection-2849416 > section.color-filter > div > ul > li:nth-child(2) > a > span > span.image-sprite-image.cover > span > img";
        page.evaluate(function(elem) {
            React.addons.TestUtils.Simulate.click($(elem)[0]);
        }, elem);

        setTimeout(function() {
            page.render('websiteAfterClick.png');
            page.close();
        }, 1000);
    });
}

Hacky尝试。该网站的选项与我想要点击的按钮具有相同的选项....在Chrome开发工具控制台中。

$('#product-selection-2849416 > section.color-filter > div > select').val('Black Royal Oxford').change();

$('#product-selection-2849416 > section.color-filter > div > select').val('Black Royal Oxford').trigger('change');

找出一种在React Components中提取道具的方法。它们还包含我想要的数据。不知道该怎么做......

使用WebDriver& Selenium创建一个点击。不确定与PhantonJS的集成。

找到与点击处理程序关联的功能,并尝试调用它。正在努力......

使用XPath Clicker。不知道该怎么做。在网上找不到很多资源。

结论

这里有人可以帮助我吗?不知道还有什么可以尝试。

1 个答案:

答案 0 :(得分:2)

我稍微调试了他们的代码,看起来他们连接到mousedown / up而不是点击。下面的代码应该有效:

    var el = jQuery("#product-selection-2849416 > section.color-filter > div > ul > li:nth-child(2) > a > span > span.image-sprite-image.cover > span > img")[0];

    var evtMouseDown = new MouseEvent("mousedown", {
    bubbles: true, cancelable: true, cancelBubble: false,
    defaultPrevented: false, isTrusted: true,
    button: 0,buttons: 1, which: 1, view: window
    });
    var evtMouseUp = new MouseEvent("mouseup", {
    bubbles: true, cancelable: true, cancelBubble: false,
    defaultPrevented: false, isTrusted: true,
    button: 0, buttons: 1, which: 1, view: window
    });

    el.dispatchEvent(evtMouseDown);
    el.dispatchEvent(evtMouseUp);