文件下载并通过VBA打开

时间:2018-11-06 17:29:41

标签: html excel vba web-scraping

我想通过Excel vba下载附加到html的Excel文件,并将其输出到Excel工作表中。该主页列出了在韩国电影院中颇受欢迎的票房。

http://www.kobis.or.kr/kobis/business/stat/boxs/findFormerBoxOfficeList.do?loadEnd=0&searchType=search&sMultiMovieYn=&sRepNationCd=&sWideAreaCd=

附加的文件是Excel。我了解可以通过Internet搜索使用单击方法来完成下载。但是,在文件下载过程中会出现一个警告窗口,并且日期将被插入要下载的Excel文件的名称中。作为Excel VBA的初学者,这非常困难。因此,我离开了这个问题,实施将这个文件传播到Excel工作表上的逻辑有什么用?我是Excel VBA的初学者,因此,如果您给我详细的答案,将会非常有帮助。

<p class = "btn_regi">
<a href="#none" class="btn_type01" onclick="chkform('excel'); return false ;"> 
<strong> Excel </ strong> </a>
</ p>

以下代码是我自己编写的逻辑,直到黎明。但是,逻辑效率太低,结果没有用,所以我寻求帮助。

Sub program_()

        Application.ScreenUpdating = False
        Application.Calculation = xlCalculationManual

        Dim bridge As String

        Dim WinHttp As New WinHttpRequest
        Dim sResponse As String, html As New HTMLDocument, hStructure As Object, hTable As HTMLTable

        Dim ws As Worksheet
        Set ws = ThisWorkbook.Worksheets("Sheet1")

        Set WinHttp = CreateObject("WinHttp.WinHttpRequest.5.1")

        Dim Url As String
        Url = "http://www.kobis.or.kr/kobis/business/stat/boxs/findFormerBoxOfficeList.do?"

        Dim p1 As String 'parameter
        Dim v1 As String
        Dim p2 As String
        Dim v2 As String
        Dim p3 As String
        Dim v3 As String
        Dim p4 As String
        Dim v4 As String
        Dim p5 As String
        Dim v5 As String
        Dim v As Integer
        Dim g As Integer

        bridge = "&"
        p1 = "loadEnd="
        v1 = 0
        p2 = "searchType="
        v2 = "search"
        p3 = "sMultiMovieYn="
        v3 = ""
        p4 = "sRepNationCd="
        v4 = ""
        p5 = "sWideAreaCd="
        v5 = ""


            With WinHttp

                .Open "get", "" & Url & p1 & v1 & bridge & p2 & v2 & bridge & p3 & v3 & bridge & p4 & v4 & bridge & p5 & v5 & ""
                .SetRequestHeader "Referer", "http://www.kobis.or.kr/kobis/business/stat/boxs/findFormerBoxOfficeList.do?"
                .SetRequestHeader "Content-Type", "application/x-www-form-urlencoded"
                .Send
                .WaitForResponse ': DoEvents

                sResponse = StrConv(.responseBody, vbUnicode)

            Dim hforms As HTMLFormElement

            With html
                .body.innerHTML = sResponse
                sResponse = ""


                Set hTable = .getElementsByClassName("boardList03")(0)
            End With

            Dim Arr0() As Variant
            Dim tRow As Object, tCell As Object, tr As Object, td As Object, r As Long, c As Long
            r = 0
            With ws
                Set tRow = hTable.getElementsByTagName("tbody")(0).getElementsByTagName("tr")

                ReDim Arr0(tRow.Length - 1, 10)
                For Each tr In tRow
                    r = r + 1
                    Set tCell = tr.getElementsByTagName("td")

                Dim j As Integer

                    c = 1
                    For Each td In tCell

                        If td.ID = "td_rank" Then
                        Arr0(r - 1, 0) = td.innerText
                        End If

                        If td.ID = "td_movie" Then
                        Arr0(r - 1, 1) = td.getElementsByTagName("a")(0).innerText
                        End If

                        If td.ID = "td_openDt" Then
                        Arr0(r - 1, 2) = td.innerText
                        End If

                        If td.ID = "td_salesAcc" Then
                        Arr0(r - 1, 3) = td.innerText
                        End If

                        If td.ID = "td_audiAcc" Then
                        Arr0(r - 1, 4) = td.innerText
                        End If

                        If td.ID = "td_scrnCnt" Then
                        Arr0(r - 1, 5) = td.innerText
                        End If

                        If td.ID = "td_showCnt" Then
                        Arr0(r - 1, 6) = td.innerText
                        End If

                        c = c + 1
                    Next td

                Next tr

                Dim k As Integer
                Dim i As Integer

                k = 0
                For i = LBound(Arr0, 1) To UBound(Arr0, 1)

                                           .Cells(2 + k + g, 2) = Arr0(i, 0)
                                           .Cells(2 + k + g, 3) = Arr0(i, 1)

                                           .Cells(2 + k + g, 4) = Arr0(i, 2)
                                           .Cells(2 + k + g, 5) = Arr0(i, 3)
                                           .Cells(2 + k + g, 6) = Arr0(i, 4)
                                           .Cells(2 + k + g, 7) = Arr0(i, 5)
                                           .Cells(2 + k + g, 8) = Arr0(i, 6)
                        k = k + 1
                Next i
            End With

        Erase Arr0

        Set tRow = Nothing: Set tCell = Nothing: Set tr = Nothing: Set td = Nothing
        Set hforms = Nothing
        Set hTable = Nothing


        Application.Calculation = xlCalculationAutomatic
        Application.ScreenUpdating = True

        End Sub

1 个答案:

答案 0 :(得分:0)

您只需按表的ID抓住表,然后循环表行和行内的表单元格即可。

Option Explicit
Public Sub GetInfo()
    Dim IE As New InternetExplorer, hTable As HTMLTable
    Const MAX_WAIT_SEC As Long = 5
    Application.ScreenUpdating = False
    With IE
        .Visible = True
        .navigate "http://www.kobis.or.kr/kobis/business/stat/boxs/findFormerBoxOfficeList.do?loadEnd=0&searchType=search&sMultiMovieYn=&sRepNationCd=&sWideAreaCd="

        While .Busy Or .readyState < 4: DoEvents: Wend

        Set hTable = .document.getElementById("table_former")

        WriteTable hTable, 1, ThisWorkbook.Worksheets("Sheet1")

        .Quit
        Application.ScreenUpdating = True
    End With

End Sub


Public Sub WriteTable(ByVal hTable As HTMLTable, Optional ByVal startRow As Long = 1, Optional ByVal ws As Worksheet)

    If ws Is Nothing Then Set ws = ActiveSheet

    Dim tSection As Object, tRow As Object, tCell As Object, tr As Object, td As Object, r As Long, c As Long, tBody As Object
    r = startRow
    With ws
        Dim headers As Object, header As Object, columnCounter As Long
        Set headers = hTable.getElementsByTagName("th")
        For Each header In headers
            columnCounter = columnCounter + 1
            .Cells(startRow, columnCounter) = header.innerText
        Next header
        startRow = startRow + 1
        Set tBody = hTable.getElementsByTagName("tbody")
        For Each tSection In tBody               'HTMLTableSection
            Set tRow = tSection.getElementsByTagName("tr") 'HTMLTableRow
            For Each tr In tRow
                r = r + 1
                Set tCell = tr.getElementsByTagName("td")
                c = 1
                For Each td In tCell             'DispHTMLElementCollection
                    .Cells(r, c).Value = td.innerText 'HTMLTableCell
                    c = c + 1
                Next td
            Next tr
        Next tSection
    End With
End Sub

参考(VBE>工具>参考):

  1. Microsoft HTML对象库
  2. Microsoft Internet控件