代码为什么不从网页接收数据?

时间:2018-10-20 16:57:03

标签: html excel vba excel-vba web-scraping

Sub Yarislar()
    Dim Asays(), ws As Worksheet, Asay As Long, html As HTMLDocument
    Dim http As clsHTTP, url As String, headers(), numberOfRequests As Long
    headers = Array("Asay", "Tarih", "Sehir", "Cins", "Grup", "Msf/Pist", "Derece", "Sira", "Jokey", "Kilo", "GC", "Hnd", "Gny", "Taki")
    Set http = New clsHTTP
    Set ws = ThisWorkbook.Worksheets("X")
    Set html = New HTMLDocument
    Asays = Application.Transpose(Sheets("Y").Range("A2:A" & Sheets("Y").Columns("A:A").Find(What:="boş").Row - 1).Value)
    Const numTableRows As Long = 11
    Const numTableColumns As Long = 15
    Const BASE_URL As String = "https://yenibeygir.com/at/"
    numberOfRequests = UBound(Asays)
    Dim results(), headerRow As Boolean, tRows As Object, tRow As Object, iRow As Long
    Dim tCells As Object, tCell As Object, r As Long, c As Long, hTable As HTMLTable
    ReDim results(1 To numTableRows * numberOfRequests, 1 To numTableColumns)
    Application.ScreenUpdating = False
    For Asay = 1 To numberOfRequests
        headerRow = True
        url = BASE_URL & Asays(Asay)
        html.body.innerHTML = http.GetString(url)
        Set hTable = html.querySelector(".at_Yarislar")
        Set tRows = hTable.getElementsByTagName("tr")
        For Each tRow In tRows
            If Not headerRow Then
                c = 2: r = r + 1
                results(r, 1) = Asays(Asay)
                Set tCells = tRow.getElementsByTagName("td")
                For Each tCell In tCells
                    results(r, c) = tCell.innerText
                    c = c + 1
                Next
            End If
            headerRow = False
        Next
    Next
    With ws
        .Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
        .Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
    End With
    Application.ScreenUpdating = True
end sub

为什么我修改的@Qharr代码无法从同一网页上检索其他数据? @QHarr为“ Galoplar”数据编写的代码可以完美地工作,但是当我修改相同的代码时,它对“ Yarislar”不起作用。我在选择表格时会犯错误吗?

Link

1 个答案:

答案 0 :(得分:4)

您缺少类定义。我认为您还需要其他URL构造。您尚未提供答案,但根据您之前的问题,并且有一点想象力,您需要添加以下网址的构造:

BASE_URL & asay & /name

例如

https://yenibeygir.com/at/10221/dorukhatun

因此,源工作表中的A列必须具有与asay ID对应的名称,即,必须包含诸如10221/dorukhatun之类的字符串。

然后,您还必须调整控制表中行号和列号的常数。

您需要适当调整源列A的范围。

我使用了我记得的两个ID,并在A1:A2中添加了以下内容(请注意,某些结果现在已经在试运行中显示在工作表中)

Sheet1:

enter image description here

VBA:

clsHTTP类:

Option Explicit
Private http As Object

Private Sub Class_Initialize()
    Set http = CreateObject("MSXML2.XMLHTTP")
End Sub

Public Function GetString(ByVal url As String) As String
    Dim sResponse As String
    With http
        .Open "GET", url, False
        .setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
        .send
        sResponse = StrConv(.responseBody, vbUnicode)
        GetString = sResponse
    End With
End Function

标准模块1:

Option Explicit
Public Sub DYarislar()
    Dim asays(), ws As Worksheet, asay As Long, html As HTMLDocument
    Dim http As clsHTTP, url As String, headers(), numberOfRequests As Long

    headers = Array("Asay", "Tarih", "Sehir", "K.Cinsi", "Gr", "Msf/Pist", "Derece", "S", "Jokey", "Kilo", "G.Ç", "Hnd", "Gny", "Taki")
    Set http = New clsHTTP
    Set ws = ThisWorkbook.Worksheets("Sheet1")
    Set html = New HTMLDocument
    asays = Application.Transpose(ws.Range("A1:A2").Value) 'Load asay values from sheet 1

    Const numTableRows As Long = 44
    Const numTableColumns As Long = 14
    Const BASE_URL As String = "https://yenibeygir.com/at/"

    numberOfRequests = UBound(asays)

    Dim results(), headerRow As Boolean, tRows As Object, tRow As Object, iRow As Long
    Dim tCells As Object, tCell As Object, r As Long, c As Long, hTable As HTMLTable
    ReDim results(1 To numTableRows * numberOfRequests, 1 To numTableColumns)

    Application.ScreenUpdating = False

    For asay = 1 To numberOfRequests
        headerRow = True
        url = BASE_URL & asays(asay)
        html.body.innerHTML = http.GetString(url)

        Set hTable = html.querySelector(".at_Yarislar")

        Set tRows = hTable.getElementsByTagName("tr")

        For Each tRow In tRows
            If Not headerRow Then
                c = 2: r = r + 1
                results(r, 1) = asays(asay)
                Set tCells = tRow.getElementsByTagName("td")
                For Each tCell In tCells
                    results(r, c) = tCell.innerText
                    c = c + 1
                Next
            End If
            headerRow = False
        Next
    Next

    With ws
        .Cells(1, 3).Resize(1, UBound(headers) + 1) = headers
        .Cells(2, 3).Resize(UBound(results, 1), UBound(results, 2)) = results
    End With
    Application.ScreenUpdating = True
End Sub