我使用以下代码从Google搜索中收集第一个网址。有没有办法编辑代码,以便它只选取位于Google搜索结果中绿色网址后面的文字?
每个搜索结果包含4行信息:
header
URL in green
text1
text2
我想收集绿色网址后面显示的单行文字。
这可能吗?
Sub XMLHTTP()
Dim url As String, lastRow As Long
Dim XMLHTTP As Object, html As Object, objResultDiv As Object, objH3 As Object, link As Object
Dim start_time As Date
Dim end_time As Date
lastRow = Range("A" & Rows.Count).End(xlUp).Row
Dim cookie As String
Dim result_cookie As String
start_time = Time
Debug.Print "start_time:" & start_time
On Error Resume Next
For i = 2 To lastRow
url = "https://www.google.co.in/search?q=" & Cells(i, 1) & "&rnd=" & WorksheetFunction.RandBetween(1, 10000)
Set XMLHTTP = CreateObject("MSXML2.serverXMLHTTP")
XMLHTTP.Open "GET", url, False
XMLHTTP.setRequestHeader "Content-Type", "text/xml"
XMLHTTP.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0"
XMLHTTP.send
Set html = CreateObject("htmlfile")
html.body.innerHTML = XMLHTTP.ResponseText
Set objResultDiv = html.getelementbyid("rso")
Set objH3 = objResultDiv.getelementsbytagname("H3")(0)
Set link = objH3.getelementsbytagname("a")(0)
str_text = Replace(link.innerHTML, "<EM>", "")
str_text = Replace(str_text, "</EM>", "")
Cells(i, 2) = str_text
Cells(i, 3) = link.href
DoEvents
Next
end_time = Time
Debug.Print "end_time:" & end_time
Debug.Print "done" & "Time taken : " & DateDiff("n", start_time, end_time)
MsgBox "done" & "Time taken : " & DateDiff("n", start_time, end_time)
End Sub
答案 0 :(得分:1)
我认为文本在<span class="st">
范围内,所以这应该可以解决问题:
Dim HTML
Set HTML = CreateObject("htmlfile")
HTML.body.innerHTML = XMLHTTP.ResponseText
Dim e
For Each e In HTML.getElementsByTagName("span")
If e.className = "st" Then
Debug.Print e.innerText
Exit For
End If
Next
修改:显示完整的脚本:
Dim XMLHTTP
Set XMLHTTP = CreateObject("MSXML2.serverXMLHTTP")
XMLHTTP.Open "GET", "https://www.google.co.in/search?q=test", False
XMLHTTP.setRequestHeader "Content-Type", "text/xml"
XMLHTTP.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0"
XMLHTTP.send
Dim HTML
Set HTML = CreateObject("htmlfile")
HTML.body.innerHTML = XMLHTTP.ResponseText
Dim e
For Each e In HTML.getElementsByTagName("span")
If e.className = "st" Then
Debug.Print e.innerText
Exit For
End If
Next
输出
Test your Internet connection bandwidth to locations around the world with this interactive broadband speed test from Ookla.