MSXML2.XMLHTTP无法正常工作

时间:2012-08-17 20:42:50

标签: excel-vba msxml shdocvw vba excel

我不知道这些物体在幕后发生了什么,但我一直都在使用它们并取得了巨大的成功。

dim ie As New SHDocVw.InternetExplorer
ie.navigate url
Set oDoc = ie.Document
iT = oDoc.body.innerText

dim oX As New MSXML2.XMLHTTP
dim oDoc As HTMLDocument
oX.Open "GET", url, False
oX.send
rT = oX.responseText

但是对于一个特定的网页,ie对象挂起,会产生错误,使我的硬盘捶打并让我完全失望。

我喜欢MSXML2对象,因为它总是快速且无故障。这个特定网页上的问题是它没有返回正确的信息。看起来好像是错误的页面,或者它没有得到整个东西。

我的最低要求是获取页面的整个html - 我可以从那里解析。

以下是我想要获取的网页示例: http://www.nfl.com/gamecenter/2011090800/2011/REG1/saints@packers?icampaign=GC_schedule_rr#menu=highlights&tab=analyze&analyze=playbyplay

我的目标是让他们全部完成,回到2001年左右,他们开始保持逐个播放的记录。他们正在使用的技术有些令人不安,即阻塞msxml2。

有什么建议让我朝着正确的方向前进吗?

1 个答案:

答案 0 :(得分:1)

这不是最性感的解决方案,但是你去了:

Option Explicit

Sub fantasyFootball_egghead()
Const READYSTATE_COMPLETE = 4
Const tempDir As String = "C:\Windows\Temp\"

Dim URL$, s_outerhtml$ ''These are strings
Dim IE As Object, IE_Element As Object, IE_HTMLCollection As Object
Dim i_file% ''This is an integer
Dim blnSheetFnd As Boolean
Dim ws As Excel.Worksheet

''Enter your address to navigate to here
URL = "http://www.nfl.com/gamecenter/2011090800/2011/REG1/saints@packers?icampaign=GC_schedule_rr#menu=highlights&tab=analyze&analyze=playbyplay"

''Create an Internet Explorer object if it doesn't exist
If IE Is Nothing Then Set IE = CreateObject("InternetExplorer.Application")

''Make the window visible with true, hidden with false
IE.Visible = True
''navigate to the website
IE.Navigate URL

'' use this loop to make wait until the webpage has loaded
Do While IE.Busy Or IE.readyState <> READYSTATE_COMPLETE
  DoEvents
Loop

''The next line helps ensure that the html has been fully loaded
Application.Wait Now() + TimeValue("0:00:02")
s_outerhtml = IE.document.body.OuterHtml
i_file = FreeFile

''This is a modification of some code I found at www.tek-tips.com <--great resource
''the code saves a temporary copy of the webpage to your temp file
Open tempDir & "\tempFile.htm" For Output As #i_file
Print #i_file, s_outerhtml

Close #i_file

''Creating a "Data" sheet if it doesn't exist
For Each ws In ThisWorkbook.Worksheets
  If ws.Name = "Data" Then blnSheetFnd = True: Exit For
Next

If blnSheetFnd = False Then Sheets.Add: ActiveSheet.Name = "Data"

Sheets("Data").Cells.Clear

''Here is your webquery, using the temporary file as its source
''this is untested in 2003, if it errors out, record a macro
''and replace the property that throws the error with your recorded property
With Sheets("Data").QueryTables.Add(Connection:= _
   "URL;" & tempDir & "tempFile.htm" _
   , Destination:=Range("$A$1"))
   .Name = "Data"
   .FieldNames = True
   .RowNumbers = False
   .FillAdjacentFormulas = False
   .PreserveFormatting = True
   .RefreshOnFileOpen = False
   .BackgroundQuery = True
   .RefreshStyle = xlInsertDeleteCells
   .SavePassword = False
   .SaveData = True
   .AdjustColumnWidth = True
   .RefreshPeriod = 0
   .WebSelectionType = xlEntirePage
   .WebFormatting = xlWebFormattingAll
   .WebPreFormattedTextToColumns = True
   .WebConsecutiveDelimitersAsOne = True
   .WebSingleBlockTextImport = False
   .WebDisableDateRecognition = False
   .WebDisableRedirections = False
   .Refresh BackgroundQuery:=False
End With

''delete the temporary file
Kill tempDir & "\tempFile.htm"

IE.Quit
Set IE = Nothing
Set IE_HTMLCollection = Nothing

End Sub

如果你把它放在循环中,只需确保删除查询表,否则当连接太多时,excel将停止运行。

Sub delete_qryTables()
Dim qt As QueryTable
Dim qts As QueryTables

Set qts = ThisWorkbook.Worksheets("Data").QueryTables
For Each qt In qts
    qt.Delete
Next

End Sub