我想使用Excel宏从网上绘制数据

时间:2018-11-21 17:56:12

标签: html excel vba web-scraping

我想从http://arsiv.sahadan.com/genis_ekran_iddaa_programi/开始使用旧程序。为此,我使用名为@QHarr的模块修改了宏,但无法处理表。宏不起作用。

    Public Sub Deneme()
    Application.ScreenUpdating = False
    Sheets("X").Select
    Cells.Delete Shift:=xlUp
    Range("A1").Select
    Dim url As String, ws As Worksheet, html As HTMLDocument, http As clsHTTP, hTable As HTMLTable
    Dim headerRow As Boolean, trow As Object, tRows As Object, tCell As Object, tCells As Object
    Dim iRow As Long, R As Long, C As Long, Hsay As Long, numberOfRequests As Long
    Dim hafta(), results(), headers()
    headers = Array("Hsay", "Saat", "Lig", "Kod", "MBS", "Ev Sahibi", "Misafir", "IY", "MS", "MS1", "MSX", "MS2", "IY1", "IYX", "IY2", "he", "H1", "HX", "H2", "hm", "KGV", "GVY", "CS1/X", "CS1/2", "X/2", "IY1,5A", "IY1,5U", "1,5A", "1,5U", "2,5A", "2,5U", "3,5A", "3,5U", "TG01", "TG23", "TG46", "7+")
    Set http = New clsHTTP
    Set ws = ThisWorkbook.Worksheets("X")
    Set html = New HTMLDocument
    hafta = Application.Transpose(Sheets("Y").Range("A1:A" & Sheets("Y").Range("A1048576").End(xlUp).Row).Value)
    Const numTableRows As Long = 500
    Const numTableColumns As Long = 37
    Const BASE_URL As String = "http://arsiv.sahadan.com/LargeProgram.aspx?"
    numberOfRequests = UBound(hafta)
    ReDim results(1 To numTableRows * numberOfRequests, 1 To numTableColumns)
    For Hsay = 1 To numberOfRequests
        headerRow = True
        url = BASE_URL & "id=weekId&value=" & hafta(Hsay)
        html.body.innerHTML = http.GetString(url)
        Set hTable = html.querySelector("dvLargeHead")
        Set tRows = hTable.getElementsByTagName("tr")
        For Each trow In tRows
            If Not headerRow Then
                C = 2: R = R + 1
                results(R, 1) = hafta(Hsay)
                Set tCells = trow.getElementsByTagName("td")
                For Each tCell In tCells
                    results(R, C) = tCell.innerText
                    C = C + 1
                Next
            End If
            headerRow = False
        Next
    Next
    With ws
        .Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
        .Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
    End With
End Sub

1 个答案:

答案 0 :(得分:2)

我无法成功重现用于更新页面的ajax请求。我收到“拒绝访问”信息,这使我认为除了简单的查询字符串部分之外,我还必须缺少一些协议/身份验证。

以下是使用硒基础的示例。由于要复制所有格式,因此速度很慢,因为布局有点挑剔。

我写的东西没有使用剪贴板,如果我满意的话,以后可以添加。它要快得多。

Option Explicit

Public Sub GetInfo()
    Dim d As WebDriver, clipboard As Object
    Dim ele As Object, ws As Worksheet, t As Date, weeks As Object, i As Long
    Const MAX_WAIT_SEC As Long = 15
    Set ws = ThisWorkbook.Worksheets("Sheet1")
    Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
    Set d = New ChromeDriver
    Const URL = "http://arsiv.sahadan.com/genis_ekran_iddaa_programi/"

    With d
        .Start "Chrome"
        .get URL, timeout:=90000

        Set weeks = .FindElementsByCss("#weekId option")
        .FindElementByCss("[value='-1']").Click
        For i = 1 To weeks.Count
            If i > 1 Then
                .FindElementsByCss("#weekId option")(i).Click
            End If
            Dim html As HTMLDocument
            Set html = New HTMLDocument
            t = Timer
            Do
                DoEvents
                On Error Resume Next
                Set ele = .FindElementByCss("#dvLarge #resultsList")
                On Error GoTo 0
                If Timer - t > MAX_WAIT_SEC Then Exit Do
            Loop While ele Is Nothing

            If Not ele Is Nothing Then
                clipboard.SetText ele.Attribute("outerHTML")
                clipboard.PutInClipboard
                ws.Cells.UnMerge
                Application.Wait Now + TimeSerial(0, 0, 1)
                ws.Cells(GetLastRow(ws, 1) + 1, 1).PasteSpecial
                Application.Wait Now + TimeSerial(0, 0, 3)
            End If

            Set ele = Nothing
        Next
        .Quit
    End With

End Sub

Public Function GetLastRow(ByVal ws As Worksheet, Optional ByVal columnNumber As Long = 1) As Long
    With ws
        GetLastRow = .Cells(.Rows.Count, columnNumber).End(xlUp).Row
    End With
End Function