使用MSHTML.HTMLDocument从表中解析/ Scrape列

时间:2015-11-24 21:18:02

标签: html xml vba parsing

我编写了这段代码,从ULR var中的网页上抓取整个表格。我只想通过名称"外推Vol"来刮/解列。我的html / xml不强,所以解决方案和解释将不胜感激!

https://services.tcpl.ca/cor/public/gdsr/GdsrNGTLImperial20151122.htm

由于

Sub ExtractAlbertaAIL()

Application.ScreenUpdating = False
Application.EnableEvents = False

Dim URL As String
Dim Request As MSXML2.XMLHTTP
Dim doc As MSHTML.HTMLDocument
Dim tr As MSHTML.HTMLGenericElement
Dim td As MSHTML.HTMLGenericElement

Dim RowNumber As Integer
Dim ColNumber As Integer

ActiveWorkbook.Worksheets("Gas Day Summary").Range("A5:H10000") = ""

Set Request = CreateObject("msxml2.xmlhttp")
If Request Is Nothing Then
  MsgBox "For some reason I wasn't able to make a MSXML2.XMLHTTP object"
  Exit Sub
End If


URL = "https://services.tcpl.ca/cor/public/gdsr/GdsrNGTLImperial20151122.htm"
With Request
    .Open "GET", URL, False
    .send

    Set doc = New MSHTML.HTMLDocument
    doc.body.innerHTML = .responseText
End With



RowNumber = 1
For Each tr In doc.getElementsByTagName("table").Item(2).getElementsByTagName("tr")
   ColNumber = 1
    For Each td In tr.getElementsByTagName("td")
        Worksheets("Gas Day Summary").Cells(RowNumber, ColNumber) = td.innerText
        ColNumber = ColNumber + 1
    Next td
    RowNumber = RowNumber + 1
Next tr


 Application.ScreenUpdating = True
Application.EnableEvents = True
End Sub

1 个答案:

答案 0 :(得分:1)

试试这个



Sub ExtractAlbertaAIL()

Application.ScreenUpdating = False
Application.EnableEvents = False

Dim URL As String
Dim Request As MSXML2.XMLHTTP60
Dim doc As MSHTML.HTMLDocument
Dim tr As MSHTML.HTMLGenericElement
Dim td As MSHTML.HTMLGenericElement
Dim VOLUME_SUMMARY_FOUND As Boolean
VOLUME_SUMMARY_FOUND = False

Dim RowNumber As Integer
Dim ColNumber As Integer

ActiveWorkbook.Worksheets("Gas Day Summary").Range("A5:H10000") = ""

Set Request = CreateObject("msxml2.xmlhttp")
If Request Is Nothing Then
  MsgBox "For some reason I wasn't able to make a MSXML2.XMLHTTP object"
  Exit Sub
End If


URL = "https://services.tcpl.ca/cor/public/gdsr/GdsrNGTLImperial20151122.htm"
With Request
    .Open "GET", URL, False
    .send

    Set doc = New MSHTML.HTMLDocument
    doc.body.innerHTML = .responseText
End With



RowNumber = 1
For Each tr In doc.getElementsByTagName("table").Item(2).getElementsByTagName("tr")

   If tr.Cells(0).innerText = "VOLUME SUMMARY" Then
      VOLUME_SUMMARY_FOUND = True
   End If
   If VOLUME_SUMMARY_FOUND = True Then
       Worksheets("Gas Day Summary").Cells(RowNumber, 1) = tr.Cells(0).innerText
       Worksheets("Gas Day Summary").Cells(RowNumber, 2) = tr.Cells(2).innerText
       RowNumber = RowNumber + 1
   End If
Next tr


 Application.ScreenUpdating = True
Application.EnableEvents = True
End Sub
​