在很多情况下,我将IE对象用于测试目的,然后切换到MSXML进行生产。我了解这些响应会产生相同的响应,但是由于某些原因,以下代码会产生两个不同的响应。
Sub testHTTP()
Dim link As String
link = "https://www.govtrack.us/congress/members/ralph_abraham/412630"
'THIS IS THE XML SECTION
Dim xmlHTMLDoc As HTMLDocument
Dim xmlWeb As msxml2.XMLHTTP60
Set xmlHTMLDoc = New HTMLDocument
Set xmlWeb = New msxml2.XMLHTTP60
xmlWeb.Open "GET", link, False
xmlWeb.send
While xmlWeb.readyState <> 4
DoEvents
Wend
Debug.Print " "
Debug.Print link
Debug.Print xmlWeb.Status; "XMLHTTP status "; xmlWeb.statusText; " at "; Time
xmlHTMLDoc.body.innerHTML = xmlWeb.responseText
Debug.Print "MSXML response finds image tag at position: " & InStr(xmlWeb.responseText, "img")
Debug.Print "MSXML response getElementsByTagName(img).Length is: " & xmlHTMLDoc.getElementsByTagName("img").Length
'THIS IS THE IE SECTION
Dim ieHTMLDoc As HTMLDocument
Dim objIE As Object
Set ieHTMLDoc = New HTMLDocument
Set objIE = CreateObject("InternetExplorer.Application")
With objIE
.Top = 0
.Left = 600
.Width = 800
.Height = 600
.Visible = False
End With
objIE.navigate (link)
While objIE.readyState <> 4
DoEvents
Wend
If objIE.readyState = 4 Then
Set ieHTMLDoc = objIE.document
Debug.Print "IE response getElementsByTagName(img).Length is: " & ieHTMLDoc.getElementsByTagName("img").Length
End If
结束子
以下是即时窗口中的结果:
https://www.govtrack.us/congress/members/ralph_abraham/412630
200 XMLHTTP status OK at 7:50:44 PM
MSXML response finds image tag at position: 8936
MSXML response getElementsByTagName(img).Length is: 0
IE response getElementsByTagName(img).Length is: 10
这是另一个示例,这次尝试查找锚链接:
Sub testHTTP()
Dim link As String
link = "https://www.govtrack.us/congress/members/ralph_abraham/412630"
'THIS IS THE XML SECTION
Dim xmlHTMLDoc As HTMLDocument
Dim xmlWeb As msxml2.XMLHTTP60
Set xmlHTMLDoc = New HTMLDocument
Set xmlWeb = New msxml2.XMLHTTP60
xmlWeb.Open "GET", link, False
xmlWeb.send
While xmlWeb.readyState <> 4
DoEvents
Wend
Debug.Print " "
Debug.Print link
Debug.Print xmlWeb.Status; "XMLHTTP status "; xmlWeb.statusText; " at "; Time
xmlHTMLDoc.body.innerHTML = xmlWeb.responseText
Debug.Print "MSXML response finds anchor tag at position: " & InStr(xmlWeb.responseText, "<a ")
Debug.Print "MSXML response getElementsByTagName(<a ).Length is: " & xmlHTMLDoc.getElementsByTagName("a").Length
'THIS IS THE IE SECTION
Dim ieHTMLDoc As HTMLDocument
Dim objIE As Object
Set ieHTMLDoc = New HTMLDocument
Set objIE = CreateObject("InternetExplorer.Application")
With objIE
.Top = 0
.Left = 600
.Width = 800
.Height = 600
.Visible = False
End With
objIE.navigate (link)
While objIE.readyState <> 4
DoEvents
Wend
If objIE.readyState = 4 Then
Set ieHTMLDoc = objIE.document
Debug.Print "IE response getElementsByTagName(<a ).Length is: " & ieHTMLDoc.getElementsByTagName("a").Length
End If
End Sub
这是直接窗口:
https://www.govtrack.us/congress/members/ralph_abraham/412630
200 XMLHTTP status OK at 12:21:08 PM
MSXML response finds anchor tag at position: 3774
MSXML response getElementsByTagName(<a ).Length is: 0
IE response getElementsByTagName(<a ).Length is: 131
以下是炸毁代码的方法:
getElementsByClassName("photo")(0).getElementsByTagName("img")(0).src
针对XML请求响应而不是IE响应运行时,这会产生“对象变量或未设置块变量”错误。看起来XML响应中包含所有内容,但没有正确解释为HTMLDocument对象。可能我可以尝试剥离文本文件的某些开头,然后将其重置为HTMLDocument。
我需要知道如何在vba中用IE替代XML或其他http方法。
答案 0 :(得分:1)
我认为将XML与IE进行比较就像将苹果与桔子 果汁机进行比较。
Internet Explorer 是Web Browser,旨在简化从远程服务器请求和接收软件包信息的过程,并根据人类设备的需要动态呈现。
XML 是machine-readable markup language,专门用于在Internet上显示文档。这是metalanguage的意思,它可以用来描述或其他语言。
XML定义了文档的逻辑结构以及文档的访问和操作方式。一种组织信息的纯文本方法。
*XMLHttpRequest
** XMLHttpRequest
? 答案 1 :(得分:1)
简短的答案可能是MSXML取消了图像(和其他信息),否则这些图像将在使用浏览器时呈现。您正在处理不同的响应文本,尽管它们都是HTML。 MSXML不必将页面访问的所有其他呈现信息通知浏览器。
如果成功,则xmlWeb.responseText返回一个DOMString,其中包含对请求的响应作为文本。
不太理想,但是您可以从responseText中使用regex获得可用的src属性字符串。您可以对其进行调整,使其仅适用于图片扩展名,例如jpeg。
Option Explicit
Public Sub PrintSrcs()
Dim sResponse As String, html As New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.govtrack.us/congress/members/ralph_abraham/412630", False
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
Dim links(), i As Long
links = GetLinks(sResponse, "src=""[^""]*") '(?<=<img src=")[^"]* '<== no supported?
For i = LBound(links) To UBound(links)
Debug.Print links(i)
Next i
End Sub
Public Function GetLinks(ByVal inputString As String, ByVal sPattern As String) As Variant
Dim Matches As Object, iMatch As Object, s As String, arrMatches(), i As Long
With CreateObject("vbscript.regexp")
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = sPattern
If .test(inputString) Then
Set Matches = .Execute(inputString)
For Each iMatch In Matches
ReDim Preserve arrMatches(i)
arrMatches(i) = Replace$(iMatch.Value, "src=""", vbNullString)
i = i + 1
Next
Else
Debug.Print "Failed"
End If
End With
GetLinks = arrMatches
End Function