VBA HTML表刮 - 复杂表中的特定原始

时间:2017-03-01 11:03:48

标签: excel vba excel-vba web-scraping

我创建了以下模块,从下面的表格html(需要登录)中的特定列(总销列)中搜索前20名游戏: https://www.appannie.com/apps/google-play/top-chart/united-states/game/?device=&date=2017-03-01&feed=All&rank_sorting_type=rank&page_number=0&page_size=100

特定值嵌套在以下代码中:

table html

以下VBA代码是我尝试抓取的。 有没有人知道循环我的代码直到原始表的数量是20,需要你们的建议。

Sub TopChartGoogle()

Dim IE As New InternetExplorer
Dim tickername As String
Dim doc As HTMLDocument
Dim Nof As String

Dim i As Integer

For i = 8 To 27
tickername = Sheet4.Range("A" & i).Value


IE.Visible = True
IE.navigate "https://www.appannie.com/apps/google-play/top-chart/united-states/game/" & tickername

Do
    DoEvents
    Loop Until IE.readyState = READYSTATE_COMPLETE
Application.Wait (Now + TimeValue("00:00:5"))
Set doc = IE.document


Nof = "ERRORHERE"
On Error Resume Next
Nof = Trim(doc.getElementsByClassName("main-row table-row")(0).getElementsByTagName("td")(3).getElementsByTagName("a")(1).innerText)
On Error GoTo 0
If Nof = "ERRORHERE" Then
Sheet4.Range("C" & i).Value = "Wrong Elements"
Else
Sheet4.Range("C" & i).Value = Nof
End If
Application.Wait (Now + TimeValue("00:00:1"))

Nof = "ERRORHERE"
On Error Resume Next
Nof = Trim(doc.getElementsByClassName("main-row table-row")(0).getElementsByTagName("td")(3).getElementsByTagName("a")(2).innerText)
On Error GoTo 0
If Nof = "ERRORHERE" Then
Sheet4.Range("E" & i).Value = "Wrong Elements"
Else
Sheet4.Range("E" & i).Value = Nof
End If
Application.Wait (Now + TimeValue("00:00:1"))


doc.getElementsByClassName("app-name")(2).Click '<<----click the game name

'-----------------------------Genre Game-------------------------------
Application.Wait (Now + TimeValue("00:00:5"))
Nof = "ERRORHERE"
On Error Resume Next
Nof = Trim(doc.getElementsByClassName("app-box-content")(5).getElementsByTagName("p")(2).innerText)
On Error GoTo 0
If Nof = "ERRORHERE" Then
Sheet4.Range("D" & i).Value = "Wrong Elements"
Else
Sheet4.Range("D" & i).Value = Nof
End If
Application.Wait (Now + TimeValue("00:00:2"))
'------------------------Average Star-------------------------------------
Nof = "ERRORHERE"
On Error Resume Next
Nof = Trim(doc.getElementsByClassName("rating-brief")(0).getElementsByTagName("strong")(1).innerText)
On Error GoTo 0
If Nof = "ERRORHERE" Then
Sheet4.Range("F" & i).Value = "Wrong Elements"
Else
Sheet4.Range("F" & i).Value = Nof
End If
Application.Wait (Now + TimeValue("00:00:2"))
'------------------------Star 5------------------------------------------
Nof = "ERRORHERE"
On Error Resume Next
Nof = Trim(doc.getElementsByClassName("table-wrapper")(0).getElementsByTagName("tr")(0).getElementsByTagName("td")(2).innerText)
On Error GoTo 0
If Nof = "ERRORHERE" Then
Sheet4.Range("G" & i).Value = "Wrong Elements"
Else
Sheet4.Range("G" & i).Value = Nof
End If
Application.Wait (Now + TimeValue("00:00:2"))
'-----------------------------Star 4------------------------------------------
Nof = "ERRORHERE"
On Error Resume Next
Nof = Trim(doc.getElementsByClassName("table-wrapper")(0).getElementsByTagName("tr")(1).getElementsByTagName("td")(2).innerText)
On Error GoTo 0
If Nof = "ERRORHERE" Then
Sheet4.Range("H" & i).Value = "Wrong Elements"
Else
Sheet4.Range("H" & i).Value = Nof
End If
Application.Wait (Now + TimeValue("00:00:2"))
'--------------------------Star 3-------------------------------------------
Nof = "ERRORHERE"
On Error Resume Next
Nof = Trim(doc.getElementsByClassName("table-wrapper")(0).getElementsByTagName("tr")(2).getElementsByTagName("td")(2).innerText)
On Error GoTo 0
If Nof = "ERRORHERE" Then
Sheet4.Range("I" & i).Value = "Wrong Elements"
Else
Sheet4.Range("I" & i).Value = Nof
End If
Application.Wait (Now + TimeValue("00:00:2"))
'----------------------------Star 2-------------------------------------------
Nof = "ERRORHERE"
On Error Resume Next
Nof = Trim(doc.getElementsByClassName("table-wrapper")(0).getElementsByTagName("tr")(3).getElementsByTagName("td")(2).innerText)
On Error GoTo 0
If Nof = "ERRORHERE" Then
Sheet4.Range("J" & i).Value = "Wrong Elements"
Else
Sheet4.Range("J" & i).Value = Nof
End If
Application.Wait (Now + TimeValue("00:00:2"))
'----------------------------Star 1-------------------------------------------
Nof = "ERRORHERE"
On Error Resume Next
Nof = Trim(doc.getElementsByClassName("table-wrapper")(0).getElementsByTagName("tr")(4).getElementsByTagName("td")(2).innerText)
On Error GoTo 0
If Nof = "ERRORHERE" Then
Sheet4.Range("K" & i).Value = "Wrong Elements"
Else
Sheet4.Range("K" & i).Value = Nof
End If
Application.Wait (Now + TimeValue("00:00:2"))

IE.Quit

Next
End Sub

1 个答案:

答案 0 :(得分:0)

如果我尝试循环,例如在下面的第一个块中,结果提取是在Sheet4.Range(&#34; C&#34;&amp; i)中的堆栈直到最后一行,并且excel表中的19行保持为空

Dim j As Integer, k As Integer
For j = 0 To 19
    For k = 2 To 97 Step 5

Nof = "ERRORHERE"
On Error Resume Next
Nof = Trim(doc.getElementsByClassName("main-row table-row")(j).getElementsByTagName("td")(3).getElementsByTagName("a")(1).innerText)
On Error GoTo 0
If Nof = "ERRORHERE" Then
Sheet4.Range("C" & i).Value = "Wrong Elements"
Else
Sheet4.Range("C" & i).Value = Nof
End If
Application.Wait (Now + TimeValue("00:00:1"))

Nof = "ERRORHERE"
On Error Resume Next
Nof = Trim(doc.getElementsByClassName("main-row table-row")(j).getElementsByTagName("td")(3).getElementsByTagName("a")(2).innerText)
On Error GoTo 0
If Nof = "ERRORHERE" Then
Sheet4.Range("E" & i).Value = "Wrong Elements"
Else
Sheet4.Range("E" & i).Value = Nof
End If
Application.Wait (Now + TimeValue("00:00:1"))

doc.getElementsByClassName("app-name")(k).Click '<<----click the target game in current row

    Next k
Next j