来自Google搜索结果的特定信息

时间:2015-08-17 12:51:13

标签: excel vba excel-vba

我使用以下代码从Google搜索中收集第一个网址。有没有办法编辑代码,以便它只选取位于Google搜索结果中绿色网址后面的文字?

每个搜索结果包含4行信息:

header
URL in green
text1
text2

我想收集绿色网址后面显示的单行文字。

这可能吗?

Sub XMLHTTP()

Dim url As String, lastRow As Long
Dim XMLHTTP As Object, html As Object, objResultDiv As Object, objH3 As Object, link As Object
Dim start_time As Date
Dim end_time As Date

lastRow = Range("A" & Rows.Count).End(xlUp).Row

Dim cookie As String
Dim result_cookie As String

start_time = Time
Debug.Print "start_time:" & start_time
On Error Resume Next
For i = 2 To lastRow

    url = "https://www.google.co.in/search?q=" & Cells(i, 1) & "&rnd=" & WorksheetFunction.RandBetween(1, 10000)

    Set XMLHTTP = CreateObject("MSXML2.serverXMLHTTP")
    XMLHTTP.Open "GET", url, False
    XMLHTTP.setRequestHeader "Content-Type", "text/xml"
    XMLHTTP.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0"
    XMLHTTP.send

        Set html = CreateObject("htmlfile")
    html.body.innerHTML = XMLHTTP.ResponseText
    Set objResultDiv = html.getelementbyid("rso")
    Set objH3 = objResultDiv.getelementsbytagname("H3")(0)
    Set link = objH3.getelementsbytagname("a")(0)


    str_text = Replace(link.innerHTML, "<EM>", "")
    str_text = Replace(str_text, "</EM>", "")

    Cells(i, 2) = str_text
    Cells(i, 3) = link.href
    DoEvents
Next

end_time = Time
Debug.Print "end_time:" & end_time

Debug.Print "done" & "Time taken : " & DateDiff("n", start_time, end_time)
MsgBox "done" & "Time taken : " & DateDiff("n", start_time, end_time)
End Sub

1 个答案:

答案 0 :(得分:1)

我认为文本在<span class="st">范围内,所以这应该可以解决问题:

Dim HTML
Set HTML = CreateObject("htmlfile")
HTML.body.innerHTML = XMLHTTP.ResponseText

Dim e
For Each e In HTML.getElementsByTagName("span")
    If e.className = "st" Then
        Debug.Print e.innerText
        Exit For
    End If
Next

修改:显示完整的脚本:

Dim XMLHTTP
Set XMLHTTP = CreateObject("MSXML2.serverXMLHTTP")
XMLHTTP.Open "GET", "https://www.google.co.in/search?q=test", False
XMLHTTP.setRequestHeader "Content-Type", "text/xml"
XMLHTTP.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0"
XMLHTTP.send

Dim HTML
Set HTML = CreateObject("htmlfile")
HTML.body.innerHTML = XMLHTTP.ResponseText

Dim e
For Each e In HTML.getElementsByTagName("span")
    If e.className = "st" Then
        Debug.Print e.innerText
        Exit For
    End If
Next

输出

Test your Internet connection bandwidth to locations around the world with this interactive broadband speed test from Ookla.