使用CasperJS自动删除Facebook群组帖子

时间:2017-05-17 03:44:46

标签: javascript phantomjs casperjs

我正在编写一个脚本来删除Facebook群组中的帖子,因为Facebook Graph API不允许开发人员这样做,除非这些帖子来自开发者的帐户。

到目前为止,我已经能够登录Facebook,然后导航到所需的组页面。从那里我可以获得页面上可见的每个帖子的XPath(使用选择器a[data-testid='post_chevron_button'])。尝试在每个XPath选择器上调用this.click()时,我的脚本失败。

我目前的脚本如下:

phantom.casperTest = true;
var x = require('casper').selectXPath;
var casper = require('casper').create({   
    verbose: true,
    pageSettings: {
         loadImages:  false,
         loadPlugins: false,
         userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.4'
    }
});

// print out all the messages in the headless browser context
casper.on('remote.message', function(msg) {
    this.echo('remote message caught: ' + msg);
});

// print out all the messages in the headless browser context
casper.on("page.error", function(msg, trace) {
    this.echo("Page Error: " + msg, "ERROR");
});

var url = 'http://www.facebook.com/';

casper.start(url, function() {
    console.log("page loaded");
    this.test.assertExists('form#login_form', 'form is found');
    this.fill('form#login_form', { 
        email: '{email}',
        pass: '{password}'
    }, true);
    this.click('#u_0_q');
    this.wait(1000, function() {
        this.echo("Capturing image of page after login.");
        this.capture('loggedin.png');
    });
});

casper.thenOpen('https://www.facebook.com/groups/{group-id}/', function() {
    this.echo(this.getTitle());
    this.wait(1000, function() {
        this.capture('group.png');
    });

    var elements = casper.getElementsInfo("a[data-testid='post_chevron_button']");

    var index = 1;
    elements.forEach(function(element){
        var xpath = '//*[@id="' + element.attributes["id"] + '"]';
        console.log(xpath);
        this.click(x(xpath));
        this.wait(100, function() {
            this.capture('chevronlink' + index + '.png');
        });
        index++;
    });
});

casper.run();

当脚本到达this.click(x(xpath));时,我收到错误消息TypeError: undefined is not a constructor (evaluating 'this.click(x(xpath))')。如果我只是替换创建数组的最后一段代码并使用this.click("a[data-testid='post_chevron_button']");进行迭代,那么我的脚本没有问题。

有谁知道CasperJS不喜欢用XPath选择器调用click()? XPath似乎是CasperJS docs的有效选择器。

更新

我已更新问题的标题,以便更准确地描述所需的结果。

根据dasmelch的建议,我稍微重写了脚本并将此位合并到脚本中( casper.thenOpen部分之后):

casper.then(function() {
  var elements = casper.getElementsAttribute("a[data-
testid='post_chevron_button']", 'id');
  while (elements.length > 0) {
    // get always the last element with target id
    element = elements.pop();
    (function(element) {
      var xpath = '//*[@id="' + element + '"]';
      console.log(xpath);
      // do it step by step
      casper.then(function() {
        this.click(x(xpath));
      });
      casper.then(function() {
        this.capture('chevronlink' + element + '.png');
      });
      // go back to the page with the links (if necessary)
      casper.then(function() {
        casper.back();
      });
    })(element);
  };
});

我现在收到此错误:Cannot dispatch mousedown event on nonexistent selector: xpath selector: //*[@id="u_0_47"]

昨晚,我决定采取一些不同的方式。我更接近所需的最终结果,但现在CasperJS和/或PhantomJS在单击post_chevron_button后找不到下拉列表中的元素时遇到问题。这是我最终得到的结果(casper.thenOpen之前的所有内容在最初显示的脚本中保持不变):

casper.thenOpen('https://www.facebook.com/groups/{group-id}/', function() {
    this.echo(this.getTitle());
    this.wait(1000, function() {
        this.capture('group.png');
    });

    var elements = casper.getElementsInfo("a[data-
    testid='post_chevron_button']");
    while (elements.length > 0) {
        this.click("a[data-testid='post_chevron_button']");
        this.wait(1000, function() {
            this.capture('chevron_click.png');
            console.log("chevron_click.png saved");
        });
        var chevronLinks = casper.getElementsInfo("a[ajaxify]")
        console.log("Found " + chevronLinks.length + " elements with ajaxify attribute.");
        var chevronLinksIndex = 1;
        chevronLinks.forEach(function(element){
            var ajaxifyValue = element.attributes["ajaxify"];
            console.log(ajaxifyValue);
            if (ajaxifyValue.indexOf("delete.php?group_id={group-id}") !== -1) {
                this.click("a[ajaxify='"+ajaxifyValue+"']");
                this.wait(100, function(){
                    this.capture('deletePost' + chevronLinksIndex);
                });
                chevronLinksIndex++;
            }
        });
        if (chevronLinksIndex === 1) {
            break;
        }
        elements = casper.getElementsInfo("a[data-testid='post_chevron_button']");
    } 
});

我知道应该有一个包含ajaxify属性的元素,其值为I' m搜索(因为在浏览器中单击它会在点击a[data-testid='post_chevron_button']后显示该元素),但卡斯帕找不到它。不仅如此,我的chevron_click.png图片文件应该在每次运行此脚本时更新,但事实并非如此。

有些代码执行没有按顺序执行。例如,在查看ajaxify之前,控制台中正在记录chevron_click.png saved属性值。这可能是预期的,但不幸的是我没有很多JS经验。这个执行顺序问题可以解释为什么我对必要元素的搜索没有返回我期望的结果。

以下是需要点击删除帖子的元素的示例:

<a class="_54nc" href="#" rel="async-post" 
ajaxify="/ajax/groups/mall/delete.php?group_id={group-id}&amp;message_id=806608486110204&amp;story_dom_id=mall_post_806608486110204%3A6%3A0&amp;entstory_context=%7B%22last_view_time%22%3A1495072771%2C%22fbfeed_context%22%3Atrue%2C%22location_type%22%3A2%2C%22outer_object_element_id%22%3A%22mall_post_806608486110204%3A6%3A0%22%2C%22object_element_id%22%3A%22mall_post_806608486110204%3A6%3A0%22%2C%22is_ad_preview%22%3Afalse%2C%22is_editable%22%3Afalse%2C%22mall_how_many_post_comments%22%3A2%2C%22bump_reason%22%3A0%2C%22story_width%22%3A502%2C%22shimparams%22%3A%7B%22page_type%22%3A16%2C%22actor_id%22%3A664025626%2C%22story_id%22%3A806608486110204%2C%22ad_id%22%3A0%2C%22_ft_%22%3A%22%22%2C%22location%22%3A%22group%22%7D%2C%22story_id%22%3A%22u_0_21%22%2C%22caret_id%22%3A%22u_0_22%22%7D&amp;surface=group_post_chevron"
role="menuitem"><span><span class="_54nh"><div class="_41t5"><i
class="_41t7 img sp_gJvT8CoKHU- sx_0f12ae"></i><i class="_41t8 img
sp_s36yWP_7MD_ sx_7e9f7d"></i>Delete Post</div></span></span></a>

2 个答案:

答案 0 :(得分:1)

我能够完成我试图用Selenium 2 API for .NET做的事情。

解决方案代码如下:

class Program
{
    static void Main(string[] args)
    {
        var options = new ChromeOptions();
        options.AddUserProfilePreference("profile.default_content_setting_values.notifications", 2);

        using (IWebDriver driver = new ChromeDriver(options))
        {
            // Maximize window
            driver.Manage().Window.Maximize();

            // Log into Facebook
            driver.Navigate().GoToUrl("http://www.facebook.com/");
            driver.FindElement(By.Id("email")).SendKeys("username");
            driver.FindElement(By.Id("pass")).SendKeys("password");
            driver.FindElement(By.Id("pass")).SendKeys(Keys.Enter);

            driver.Navigate().GoToUrl("https://www.facebook.com/groups/{group-id}/");
            var chevronPostLinks = driver.FindElements(By.XPath("//a[@data-testid='post_chevron_button']"));
            chevronPostLinks.FirstOrDefault().Click();
            Thread.Sleep(1000);
            var deletePostElements = driver.FindElements(By.XPath("//a[contains(@ajaxify,'delete.php?group_id={group-id}')]"));
            while (deletePostElements.Count > 0 && chevronPostLinks.Count > 0)
            {
                Thread.Sleep(1000);
                deletePostElements.Where(x => x.Displayed == true).FirstOrDefault().Click();
                Thread.Sleep(1000);
                driver.FindElement(By.ClassName("layerConfirm")).Click();

                Thread.Sleep(2000);
                chevronPostLinks = driver.FindElements(By.XPath("//a[@data-testid='post_chevron_button']"));
                if (chevronPostLinks.Count > 0)
                {
                    chevronPostLinks.FirstOrDefault().Click();
                }
                else
                {
                    driver.Navigate().GoToUrl("https://www.facebook.com/groups/{group-id}/");
                    chevronPostLinks = driver.FindElements(By.XPath("//a[@data-testid='post_chevron_button']"));
                    chevronPostLinks.FirstOrDefault().Click();
                }
                Thread.Sleep(1000);
                deletePostElements = driver.FindElements(By.XPath("//a[contains(@ajaxify,'delete.php?group_id={group-id}')]"));
            }
        }
    }
}

我想做一些改进,比如使用Selenium来等待元素显示而不是使用Thread.Sleep(),但它可以正常工作。

答案 1 :(得分:0)

你做的xpath是正确的,但似乎forEach的方法不适用于此。 您可以使用casper.getElementsAttribute直接获取所有这些元素的id,并使用while循环轻松迭代它们更容易这样:

...
casper.thenOpen('https://www.facebook.com/groups/{group-id}/', function() {
  this.echo(this.getTitle());
  this.wait(1000, function() {
    this.capture('group.png');
  });
});
// do a while loop with where you can use  every single element and jump back
casper.then(function() {
  var elements = casper.getElementsAttribute("a[data-testid='post_chevron_button']", 'id');
  while (elements.length > 0) {
    // get always the last element with target id
    element = elements.pop();
    (function(element) {
      var xpath = '//*[@id="' + element + '"]';
      console.log(xpath);
      // do it step by step
      casper.then(function() {
        this.click(x(xpath));
      });
      casper.then(function() {
        this.capture('chevronlink' + element + '.png');
      });
      // go back to the page with the links (if necessary)
      casper.then(function() {
        casper.back();
      });
    })(element);
  };
});
...

不看FB,我想你必须回到(casper.back)到链接(元素)的网站。