我的目标是将第1页的所有Google搜索结果与VBA一起删除到Excel。直到目前为止,我设法取消了第一个结果。头部,链接和日期存储在单元格4,5,6中。我现在必须为其他人制作一个循环,但我无法直接完成。我认为存储日期的功能也不是最佳编码。有谁知道答案?
Sub XMLHTTP()
Dim url As String, lastRow As Long
Dim XMLHTTP As Object, html As Object, objResultDiv As Object, objH3 As Object, link As Object, Objdatum As Object, Ddatum As Object
Dim start_time As Date
Dim end_time As Date
lastRow = Range("A" & Rows.Count).End(xlUp).Row
Dim cookie As String
Dim result_cookie As String
start_time = Time
Debug.Print "start_time:" & start_time
For i = 2 To lastRow
url = "https://www.google.co.in/search?q=" & Cells(i, 3) & "Skipr" & "&rnd=" & WorksheetFunction.RandBetween(1, 10000)
Set XMLHTTP = CreateObject("MSXML2.serverXMLHTTP")
XMLHTTP.Open "GET", url, False
XMLHTTP.setRequestHeader "Content-Type", "text/xml"
XMLHTTP.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0"
XMLHTTP.send
Set html = CreateObject("htmlfile")
html.body.innerHTML = XMLHTTP.ResponseText
Set objResultDiv = html.getelementbyid("rso")
Set objH3 = objResultDiv.getelementsbytagname("H3")(0)
Set link = objH3.getelementsbytagname("a")(0)
Set Objdatum = objResultDiv.getelementsbytagname("span")(2)
str_text = Replace(link.innerHTML, "<EM>", "")
str_text = Replace(str_text, "</EM>", "")
dat_text = Objdatum.innerHTML
Cells(i, 4) = str_text
Cells(i, 5) = link.href
Cells(i, 6) = dat_text
DoEvents
Next
end_time = Time
Debug.Print "end_time:" & end_time
Debug.Print "done" & "Time taken : " & DateDiff("n", start_time, end_time)
MsgBox "done" & "Time taken : " & DateDiff("n", start_time, end_time)
End Sub
答案 0 :(得分:0)
您需要通过getelementsbytagname
调用迭代返回给您的集合,而不是只返回带有数组索引(0)
的第一个元素
我有一个类似的项目,下面是一些提示&amp;我的方法供你参考,它可以帮助你工作和维护未来的代码:
首先,我更喜欢引用公开COM对象的对象库,而不是使用CreateObject
,这使我能够浏览每个对象F2
的函数和属性,并让我完成代码(VBA编辑器中的{speed&amp; less bugs)(F7
会将您带回代码视图)。
给我文档和代码完成:
另外,为清晰起见,请使用这些const
'see ready state : https://msdn.microsoft.com/en-us/library/ie/ms534361(v=vs.85).aspx
Const READYSTATE_UNINITIALIZED = 0
Const READYSTATE_LOADING = 1
Const READYSTATE_LOADED = 2
Const READYSTATE_INTERACTIVE = 3
Const READYSTATE_COMPLETE = 4
最后,使用DOMDocument60
将XML解析为内存中的文档对象模型。
和MSHTML.HTMLDocument
解析HTML文档并迭代表行。
下面是代码,其中迭代嵌入在Web服务器返回的初始XML文档中的html文档中的表中的所有返回行:
Dim xmlDoc As DOMDocument60
Set xmlDoc = GetXMLDocument("http://www.nbg.ge/rss.php")
'extract publication date
Debug.Print xmlDoc.getElementsByTagName("pubDate")(0).Text
'unwrap html document from CDATA in "//item/description" element
Dim htmlDoc As New MSHTML.HTMLDocument
htmlDoc.body.innerHTML = xmlDoc.SelectNodes("//item/description")(0).Text
'extract table data from html document
Dim tr As IHTMLElement, td As IHTMLElement
For Each tr In htmlDoc.getElementsByTagName("tr")
For Each td In tr.Children
'each cell in current row
Debug.Print " " & td.innerHTML
Next td
'next row
Debug.Print "-----"
Next tr
我正在调用的webservice返回的示例数据:
<rss version="2.0">
<channel>
<title>RSS NBG Currency Rates</title>
<link>https://www.nbg.gov.ge/index.php?m=236&lang=geo</link>
<description>Currency Rates</description>
<language>geo</language>
<copyright>Copyright 2015, NBG</copyright>
<pubDate>Wed, 29 Apr 2015 12:39:50 +0400</pubDate>
<lastBuildDate>Wed, 29 Apr 2015 12:39:50 +0400</lastBuildDate>
<managingEditor>alex@proservice.ge</managingEditor>
<webMaster>alex@proservice.ge</webMaster>
<item>
<title>Currency Rates 2015-04-29</title>
<link>https://www.nbg.gov.ge/index.php?m=236&lang=geo</link>
<description>
<![CDATA[
<table border="0">
<tr>
<td>AED</td>
<td>10 არაბეთის გაერთიანებული საამიროების დირჰამი</td>
<td>6.2858</td>
<td><img src="https://www.nbg.gov.ge/images/green.gif"></td>
<td>0.0640</td> </tr><tr> <td>AMD</td> <td>1000 სომხური დრამი</td>
<td>4.8676</td>
<td><img src="https://www.nbg.gov.ge/images/green.gif"></td>
<td>0.0414</td>
</tr>
</table>
]]>
</description>
<pubDate>Wed, 29 Apr 2015 12:39:50 +0400</pubDate>
<guid>
https://www.nbg.gov.ge/index.php?m=236&lang=geo&date=2015-04-29
</guid>
</item>
</channel>
</rss>
以及从网络服务器实际获取文档的功能(仅当你添加了如上图所示的引用时才有效)
Function GetXMLDocument(url As String) As MSXML2.DOMDocument60
Dim xhr As New XMLHTTP60
Dim doc As New DOMDocument60
Dim msg As String
With xhr
.Open bstrMethod:="GET", bstrUrl:=url, varAsync:=False
On Error GoTo SendError
.send
On Error GoTo 0
'http status codes - http://en.wikipedia.org/wiki/List_of_HTTP_status_codes
'200 = SUCCESS - OK
If .readyState = READYSTATE_COMPLETE And .Status = 200 Then
'Debug.Print .responseText
doc.LoadXML (.responseText)
Else
msg = "Error" & vbNewLine & "Ready state: " & .readyState & _
vbNewLine & "HTTP request status: " & .Status
GoTo Error
End If
End With
Set GetXMLDocument = doc
Exit Function
SendError:
'by default access to data source accross internet dissabled
'go to internet options & under security>custom level>Misc>access data sources accross domains> enable
'see: http://stackoverflow.com/a/17402920
MsgBox "Make sure access data sources accross domains is enabled under internet options>security>custom", vbOKOnly, "Could not send request to server"
Error:
MsgBox msg, vbOKOnly, "Unexpected Error"
End Function