创建一个Facebook刮刀

时间:2017-03-10 12:06:21

标签: vba web-scraping

我试图制作一个Facebook机器人来解析个人资料链接。但是,它不一致地登录并解析左侧栏的内容。我无法走得更远。任何人都可以指出我正确的方向,以便我可以纠正我已经在我的代码中做出的错误并解析配置文件链接。这是代码:

strdata = "email=sth.com&pass=xxx"
http.Open "POST", "https://www.facebook.com/login.php?login_attempt=1&lwv=110", False
http.setRequestHeader "Content-type", "application/x-www-form-urlencoded"
http.send strdata

html.body.innerHTML = http.responseText

Set topics = html.getElementsByClassName("_li")(0).getElementsByTagName("a")

For Each topic In topics
    Cells(x, 1) = topic.innerText
    x = x + 1
Next topic

2 个答案:

答案 0 :(得分:0)

这有帮助吗?

Sub DumpData()

Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True

URL = "https://www.facebook.com/login.php?login_attempt=1&lwv=110"

'Wait for site to fully load
IE.Navigate2 URL
Do While IE.Busy = True
   DoEvents
Loop

RowCount = 1

With Sheets("Sheet1")
   .Cells.ClearContents
   RowCount = 1
   For Each itm In IE.document.all
      .Range("A" & RowCount) = itm.tagname
      .Range("B" & RowCount) = itm.ID
      .Range("C" & RowCount) = itm.classname
      .Range("D" & RowCount) = Left(itm.innertext, 1024)

      RowCount = RowCount + 1
   Next itm
End With
End Sub

答案 1 :(得分:0)

要获取facebook的内容,要么必须处理api,要么使用selenium,这绝对是最糟糕的方式。但是,我的下面脚本可以登录页面并解析一些标题:

Sub Grabbing_item()
    Dim driver As New ChromeDriver, html As New HTMLDocument
    Dim post As Object

    With driver
        .get "https://www.facebook.com/"
        .FindElementById("email").SendKeys ("email_id")
        .FindElementById("pass").SendKeys ("Pass_word")
        .FindElementById("u_0_2").Click
        .Wait 5
        html.body.innerHTML = .ExecuteScript("return document.body.innerHTML;")
        .Quit
    End With

    For Each post In html.getElementsByClassName("profileLink")
        x = x + 1: Cells(x, 1) = post.innerText
    Next post
End Sub