我不知道这些物体在幕后发生了什么,但我一直都在使用它们并取得了巨大的成功。
dim ie As New SHDocVw.InternetExplorer
ie.navigate url
Set oDoc = ie.Document
iT = oDoc.body.innerText
和
dim oX As New MSXML2.XMLHTTP
dim oDoc As HTMLDocument
oX.Open "GET", url, False
oX.send
rT = oX.responseText
但是对于一个特定的网页,ie对象挂起,会产生错误,使我的硬盘捶打并让我完全失望。
我喜欢MSXML2对象,因为它总是快速且无故障。这个特定网页上的问题是它没有返回正确的信息。看起来好像是错误的页面,或者它没有得到整个东西。
我的最低要求是获取页面的整个html - 我可以从那里解析。
我的目标是让他们全部完成,回到2001年左右,他们开始保持逐个播放的记录。他们正在使用的技术有些令人不安,即阻塞msxml2。
有什么建议让我朝着正确的方向前进吗?
答案 0 :(得分:1)
这不是最性感的解决方案,但是你去了:
Option Explicit
Sub fantasyFootball_egghead()
Const READYSTATE_COMPLETE = 4
Const tempDir As String = "C:\Windows\Temp\"
Dim URL$, s_outerhtml$ ''These are strings
Dim IE As Object, IE_Element As Object, IE_HTMLCollection As Object
Dim i_file% ''This is an integer
Dim blnSheetFnd As Boolean
Dim ws As Excel.Worksheet
''Enter your address to navigate to here
URL = "http://www.nfl.com/gamecenter/2011090800/2011/REG1/saints@packers?icampaign=GC_schedule_rr#menu=highlights&tab=analyze&analyze=playbyplay"
''Create an Internet Explorer object if it doesn't exist
If IE Is Nothing Then Set IE = CreateObject("InternetExplorer.Application")
''Make the window visible with true, hidden with false
IE.Visible = True
''navigate to the website
IE.Navigate URL
'' use this loop to make wait until the webpage has loaded
Do While IE.Busy Or IE.readyState <> READYSTATE_COMPLETE
DoEvents
Loop
''The next line helps ensure that the html has been fully loaded
Application.Wait Now() + TimeValue("0:00:02")
s_outerhtml = IE.document.body.OuterHtml
i_file = FreeFile
''This is a modification of some code I found at www.tek-tips.com <--great resource
''the code saves a temporary copy of the webpage to your temp file
Open tempDir & "\tempFile.htm" For Output As #i_file
Print #i_file, s_outerhtml
Close #i_file
''Creating a "Data" sheet if it doesn't exist
For Each ws In ThisWorkbook.Worksheets
If ws.Name = "Data" Then blnSheetFnd = True: Exit For
Next
If blnSheetFnd = False Then Sheets.Add: ActiveSheet.Name = "Data"
Sheets("Data").Cells.Clear
''Here is your webquery, using the temporary file as its source
''this is untested in 2003, if it errors out, record a macro
''and replace the property that throws the error with your recorded property
With Sheets("Data").QueryTables.Add(Connection:= _
"URL;" & tempDir & "tempFile.htm" _
, Destination:=Range("$A$1"))
.Name = "Data"
.FieldNames = True
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.WebSelectionType = xlEntirePage
.WebFormatting = xlWebFormattingAll
.WebPreFormattedTextToColumns = True
.WebConsecutiveDelimitersAsOne = True
.WebSingleBlockTextImport = False
.WebDisableDateRecognition = False
.WebDisableRedirections = False
.Refresh BackgroundQuery:=False
End With
''delete the temporary file
Kill tempDir & "\tempFile.htm"
IE.Quit
Set IE = Nothing
Set IE_HTMLCollection = Nothing
End Sub
如果你把它放在循环中,只需确保删除查询表,否则当连接太多时,excel将停止运行。
Sub delete_qryTables()
Dim qt As QueryTable
Dim qts As QueryTables
Set qts = ThisWorkbook.Worksheets("Data").QueryTables
For Each qt In qts
qt.Delete
Next
End Sub