跟随链接的宏并将表下载到新工作表中

时间:2014-12-29 14:09:54

标签: excel vba excel-vba web-scraping

我是路易斯安那州一家小型石油公司的地质学家。我构成了我们的技术部门,不幸的是我的编码经验非常有限。我过去使用过非常基本的vba编码,但是在日常工作中我没有那么多代码,所以我已经忘记了大部分内容。

louisiana dnr为该州钻探的每一口油井保留了惊人的记录,所有这些记录都位于www.Sonris.com。这些记录的一部分是每口井的生产记录。我想创建一个跟随给定URL的宏并下载在URL上找到的表(也就是生产记录)。下载文件后,我希望将表格放在新表格中,然后根据井名命名该表格。

我愚弄了来自web功能的检索数据,但是我无法使该功能足够动态。我需要代码来复制单元格中的超链接数据。目前,代码只是遵循我在录制宏时复制和粘贴的超链接。

任何帮助将不胜感激

此致 约西亚

以下是生成的代码;

    Sub Macro2()
'
'     Macro2 Macro
' attempt with multiple well to look at code instead of 1 well
'

'
    Range("E27").Select
    ActiveWorkbook.Worksheets.Add
    With ActiveSheet.QueryTables.Add(Connection:= _
        "URL;http://sonlite.dnr.state.la.us/sundown/cart_prod/cart_con_wellinfo2?p_WSN=159392" _
        , Destination:=Range("$A$1"))
        .Name = "cart_con_wellinfo2?p_WSN=159392"
        .FieldNames = True
        .RowNumbers = False
        .FillAdjacentFormulas = False
        .PreserveFormatting = True
        .RefreshOnFileOpen = False
        .BackgroundQuery = True
        .RefreshStyle = xlInsertDeleteCells
        .SavePassword = False
        .SaveData = True
        .AdjustColumnWidth = True
        .RefreshPeriod = 0
        .WebSelectionType = xlSpecifiedTables
        .WebFormatting = xlWebFormattingNone
        .WebTables = "1,11"
        .WebPreFormattedTextToColumns = True
        .WebConsecutiveDelimitersAsOne = True
        .WebSingleBlockTextImport = False
        .WebDisableDateRecognition = False
        .WebDisableRedirections = False
        .Refresh BackgroundQuery:=False
    End With
    Sheets("Sheet1").Select
End Sub

3 个答案:

答案 0 :(得分:5)

使用所有可用于清理外部数据的方法,许多用户忘记了您可以打开一个网页,其中包含的表格只有一个有效的URL和文件►打开。我在这里发布代码,但我还提供了一个工作样本工作簿的链接,花了大约2分钟从14个连续编号的WSN( web序列号)页面收集完整的网页数据。您自己的结果可能会有所不同。

Option Explicit

Public Const csURL As String = "http://sonlite.dnr.state.la.us/sundown/cart_prod/cart_con_wellinfo2?p_WSN=×WSN×"

Sub Gather_Well_Data()
    Dim rw As Long, lr As Long, w As Long, wsn As String, wb As Workbook
    On Error GoTo Fìn
    Application.ScreenUpdating = False
    Application.DisplayAlerts = False
    With ThisWorkbook.Sheets("WSNs")
        lr = .Cells(Rows.Count, 1).End(xlUp).Row
        For rw = 2 To lr
            .Cells(rw, 2) = 0
            For w = 1 To .Parent.Sheets.Count
                If .Parent.Sheets(w).Name = CStr(.Cells(rw, 1).Value) Then
                    .Parent.Sheets(w).Delete
                    Exit For
                End If
            Next w
            wsn = Replace(csURL, "×WSN×", .Cells(rw, 1).Value)
            Set wb = Workbooks.Open(Filename:=wsn, ReadOnly:=True, addtomru:=False)
            wb.Sheets(1).Range("A1:A3").Font.Size = 12
            wb.Sheets(1).Copy After:=.Parent.Sheets(.Parent.Sheets.Count)
            .Parent.Sheets(.Parent.Sheets.Count).Name = .Cells(rw, 1).Value
            wb.Close savechanges:=False
            Set wb = Nothing
            .Cells(rw, 2) = 1
            Application.ScreenUpdating = True
            Application.ScreenUpdating = False
            .Parent.Save
        Next rw
        .Activate
    End With
Fìn:
    Application.DisplayAlerts = True
    Application.ScreenUpdating = True
End Sub

WSN标识符列表位于 WSNs 工作表中,从第2列开始。通过点击 Alt + F8 运行宏来打开对话框和运行 Gather_Well_Data 宏。完成后,您将获得一个工作簿,其中填写了与下面类似的WSN标识的工作表。

LA Well data

示例工作簿位于我的公共DropBox上:

LA_WSN_Data.xlsb

答案 1 :(得分:3)

只是为了捎带@Jeeped真棒解决方案,我在格式化中添加了删除,只留下了LeaseUnit / Well / Production信息。假设Casing表始终遵循生产表

Option Explicit

Public Const csURL As String = "http://sonlite.dnr.state.la.us/sundown/cart_prod/cart_con_wellinfo2?p_WSN=×WSN×"

Sub Gather_Well_Data()
    Dim rw As Long, lr As Long, w As Long, wsn As String, wb As Workbook, frow As String, lrow As String
    On Error GoTo Fìn
    Application.ScreenUpdating = False
    Application.DisplayAlerts = False



    With ThisWorkbook.Sheets("WSNs")
        lr = .Cells(Rows.Count, 1).End(xlUp).Row
        For rw = 2 To lr
            .Cells(rw, 2) = 0
            For w = 1 To .Parent.Sheets.Count
                If .Parent.Sheets(w).Name = CStr(.Cells(rw, 1).Value) Then
                    .Parent.Sheets(w).Delete
                    Exit For
                End If
            Next w
            wsn = Replace(csURL, "×WSN×", .Cells(rw, 1).Value)
            Set wb = Workbooks.Open(Filename:=wsn, ReadOnly:=True, addtomru:=False)

            frow = Application.WorksheetFunction.Match("LEASE\UNIT\WELL PRODUCTION", Range("A:A"), 0)
            lrow = Application.WorksheetFunction.Match("Casing", Range("A:A"), 0)
            lrow = lrow - 1
            frow = "A" & frow
            lrow = "K" & lrow
            Range(frow, lrow).Cut Range("Q1")
            Columns("A:P").Select
            Selection.Delete Shift:=xlToLeft
            Cells.EntireColumn.AutoFit

            wb.Sheets(1).Range("A1:A3").Font.Size = 12
            wb.Sheets(1).Copy After:=.Parent.Sheets(.Parent.Sheets.Count)
            .Parent.Sheets(.Parent.Sheets.Count).Name = .Cells(rw, 1).Value
            wb.Close savechanges:=False
            Set wb = Nothing
            .Cells(rw, 2) = 1
            Application.ScreenUpdating = True
            Application.ScreenUpdating = False
            .Parent.Save
        Next rw
        .Activate
    End With
Fìn:
    Application.DisplayAlerts = True
    Application.ScreenUpdating = True
End Sub

答案 2 :(得分:1)

吉普的方法令人震惊。+ 1

您还可以针对API发出POST个请求,并按如下所示写出所有表格。

注意:我在每个井信息的下面写一个,但是很容易放置一个Sheets。在下一个API调用之前添加一行,并简单地确保每个数据写出都使用活动表。

Option Explicit
Public Sub GetWellInfo()
    Dim ws As Worksheet, page As HTMLDocument, targetTable As HTMLTable, apiNumbers(), currNumber As Long
    Const PARAM1 As String = "p_apinum"
    Const BASESTRING As String = "http://sonlite.dnr.state.la.us/sundown/cart_prod/"
    apiNumbers = Array(1708300502, 1708300503)

    Application.ScreenUpdating = False
    Set ws = ThisWorkbook.Worksheets("Sheet1")

    With ws
        .Cells.ClearContents
        For currNumber = LBound(apiNumbers) To UBound(apiNumbers)
            Set page = GetPage(BASESTRING & "cart_con_wellapi2", apiNumbers(currNumber), PARAM1)
            Set page = GetPage(BASESTRING & GetNextURL(page.body.innerHTML))
            Dim allTables As Object
            Set allTables = page.getElementsByTagName("table")

            For Each targetTable In allTables
                AddHeaders targetTable, GetLastRow(ws, 1) + 2, ws
                WriteTables targetTable, GetLastRow(ws, 1), ws
            Next targetTable

        Next currNumber
    End With
    Application.ScreenUpdating = True
End Sub

Public Function GetPage(ByVal url As String, Optional ByVal apiNumber As Long, Optional ByVal paramN As String = vbNullString) As HTMLDocument
    Dim objHTTP As Object, html As New HTMLDocument

    Set objHTTP = CreateObject("WinHttp.WinHttpRequest.5.1")

    Dim sBody As String
    If Not paramN = vbNullString Then sBody = paramN & "=" & apiNumber
    With objHTTP
        .SetTimeouts 10000, 10000, 10000, 10000
        .Open "POST", url, False
        .setRequestHeader "User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)"
        .setRequestHeader "Content-type", "application/x-www-form-urlencoded"
        On Error Resume Next
        .send (sBody)
        If Err.Number = 0 Then
            If .Status = "200" Then
                html.body.innerHTML = .responseText
                Set GetPage = html
            Else
                Debug.Print "HTTP " & .Status & " " & .statusText
                Exit Function
            End If
        Else
            Debug.Print "Error " & Err.Number & " " & Err.Source & " " & Err.Description
            Exit Function
        End If
        On Error GoTo 0
    End With

End Function

Public Function GetNextURL(ByVal inputString As String)
    GetNextURL = Replace$(Replace$(Split(Split(inputString, "href=")(1), ">")(0), Chr$(34), vbNullString), "about:", vbNullString)
End Function

Public Sub AddHeaders(ByVal hTable As Object, ByVal startRow As Long, ByVal ws As Worksheet)
    Dim headers As Object, header As Object, columnCounter As Long
    Set headers = hTable.getElementsByTagName("th")
    For Each header In headers
        columnCounter = columnCounter + 1
        ws.Cells(startRow, columnCounter) = header.innerText
    Next header
End Sub

Public Sub WriteTables(ByVal hTable As HTMLTable, Optional ByVal startRow As Long = 1, Optional ByRef ws As Worksheet)
    If ws Is Nothing Then Set ws = ActiveSheet

    Dim tRow As Object, tCell As Object, tr As Object, td As Object, r As Long, c As Long
    r = startRow
    With ActiveSheet
        Set tRow = hTable.getElementsByTagName("tr")
        For Each tr In tRow
            Set tCell = tr.getElementsByTagName("td")
            For Each td In tCell
                .Cells(r, c).Value = td.innerText
                c = c + 1
            Next td
            r = r + 1:  c = 1
        Next tr
    End With
End Sub