我发现此代码会返回Google搜索页中第一个结果的地址。
我需要一个类似的或新的代码来获取包含特定网站的结果的地址。例如,如果第三个结果导致“www.example.com/address”,我需要将此值恢复为Excel(“www.example.com/address”)。
Sub XMLHTTP()
Dim url As String, lastRow As Long
Dim XMLHTTP As Object, html As Object, objResultDiv As Object, objH3 As Object, link As Object
Dim start_time As Date
Dim end_time As Date
lastRow = Range("A" & Rows.Count).End(xlUp).Row
Dim cookie As String
Dim result_cookie As String
start_time = Time
Debug.Print "start_time:" & start_time
For i = 2 To lastRow
url = "https://www.google.co.in/search?q=" & Cells(i, 1) & "&rnd=" & WorksheetFunction.RandBetween(1, 10000)
Set XMLHTTP = CreateObject("MSXML2.serverXMLHTTP")
XMLHTTP.Open "GET", url, False
XMLHTTP.setRequestHeader "Content-Type", "text/xml"
XMLHTTP.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0"
XMLHTTP.send
Set html = CreateObject("htmlfile")
html.body.innerHTML = XMLHTTP.ResponseText
Set objResultDiv = html.getelementbyid("rso")
Set objH3 = objResultDiv.getelementsbytagname("H3")(0)
Set link = objH3.getelementsbytagname("a")(0)
str_text = Replace(link.innerHTML, "<EM>", "")
str_text = Replace(str_text, "</EM>", "")
Cells(i, 2) = str_text
Cells(i, 3) = link.href
DoEvents
Next
end_time = Time
Debug.Print "end_time:" & end_time
Debug.Print "done" & "Time taken : " & DateDiff("n", start_time, end_time)
MsgBox "done" & "Time taken : " & DateDiff("n", start_time, end_time)
End Sub
谢谢!
答案 0 :(得分:0)
将您的搜索限制为仅限该网站,然后您仍然可以获得第一个结果。
url = "https://www.google.com/search?q=" & Cells(i,1) & "+site%3Aexample.com" & "&rnd=" & WorksheetFunction.RandBetween(1, 10000)
如果您还想返回列表中链接所在的位置,则需要替换这两行
'Set objH3 = objResultDiv.getelementsbytagname("H3")(0)
'Set link = objH3.getelementsbytagname("a")(0)
与
Set hHeadElems = objResultDiv.getelementsbytagname("H3")
For j = 0 To hHeadElems.Length
lCnt = lCnt + 1
Set objH3 = hHeadElems(j)
Set link = objH3.getelementsbytagname("a")(0)
If link.href Like "http://www.w3schools.com*" Then
Exit For
End If
Next j
循环遍历所有h3元素,直到找到与某个域相匹配的链接(本例中为w3schools)。 lCnt变量将是它在列表中的位置,您可以像
一样返回它Cells(i, 4) = lCnt
您需要检查link Is Nothing
是否确实找到了某些内容。