使用Excel VBA从网站上刮取数据

时间:2015-09-03 16:00:43

标签: excel vba excel-vba

我要访问以下网站:

https://tools.usps.com/go/ZipLookupResultsAction!input.action?resultMode=1&companyName=&address1=1642+Harmon+Street&address2=&city=Berkeley&state=CA&urbanCode=&postalCode=&zip=94703

我正在尝试提取出现的第一个拉链+4(94703-2636)。

Dim doc As HTMLDocument
Set doc = IE.document
On Error Resume Next
output = doc.getElementsByClassName("zip4")(0).innerText
'Sheet1.Range("E2").Value = output
MsgBox output

'IE.Quit
End Sub

这就是我尝试这样做的方法,但是文本框或将数据添加到范围会给出一个空白答案。这不是完整的代码,但之前的所有内容似乎都正常。

有关如何解决此问题的任何想法?非常感谢你!

编辑:这是我的完整代码:

它引用的单元格是具有完整地址的单元格。

Sub USPS()

Dim IE As Object
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True

IE.Navigate "https://tools.usps.com/go/ZipLookupAction!input.action?mode=1&refresh=true"
Do
DoEvents
Loop Until IE.READYSTATE = 4

Dim Address As String
Address = Sheet1.Range("A2").Value

Dim City As String
City = Sheet1.Range("B2").Value

Dim State As String
State = Sheet1.Range("C2").Value

Dim Zipcode As String
Zipcode = Sheet1.Range("D2").Value


Call IE.document.getElementbyID("tAddress").SetAttribute("value", Address)
Call IE.document.getElementbyID("tCity").SetAttribute("value", City)
With IE.document.getElementbyID("sState")
    For i = 0 To .Length - 1
        If .Item(i).Value = State Then
            .Item(i).Selected = True
            Exit For
        End If
    Next

End With

Call IE.document.getElementbyID("Zzip").SetAttribute("value", Zipcode)

Set ElementCol = IE.document.getElementbyID("lookupZipFindBtn")
ElementCol.Click


''''' Hard Part

Dim doc As HTMLDocument
Set doc = IE.document
On Error Resume Next
output = Trim(doc.getElementsByClassName("zip4")(0).innerText)
'Sheet1.Range("E2").Value = output
MsgBox output

'IE.Quit
End Sub

编辑2:带动态网址的XML?

Sub ZipLookUp()
Dim URL As String, xmlHTTP As Object, html As Object, htmlResponse As String
Dim SStr As String, EStr As String, EndS As Integer, StartS As Integer
Dim Zip4Digit As String

Dim number As String
Dim address As String
Dim city As String
Dim state As String
Dim zipcode As String
Dim abc As String

number = Sheet1.Range("A2")
address = Sheet1.Range("B2")
city = Sheet1.Range("C2")
state = Sheet1.Range("D2")
zipcode = Sheet1.Range("E2")

    URL = "https://tools.usps.com/go/ZipLookupResultsAction!input.action?resultMode=1&companyName=&address1="
    URL = URL & number & "+" & address & "&address2=&city=" & city & "&state=" & state & "&urbanCode=&postalCode=&zip=" & zipcode
    Set xmlHTTP = CreateObject("MSXML2.XMLHTTP")
    xmlHTTP.Open "GET", URL, False
    On Error GoTo NoConnect
    xmlHTTP.send
    On Error GoTo 0
    Set html = CreateObject("htmlfile")
    htmlResponse = xmlHTTP.responseText
    If htmlResponse = Null Then
        MsgBox ("Aborted - HTML response was null")
        GoTo End_Prog
    End If

    SStr = "<span class=""zip4"">": EStr = "</span><br />" 'Searches for a string within 2 strings
    StartS = InStr(1, htmlResponse, SStr, vbTextCompare) + Len(SStr)
    EndS = InStr(StartS, htmlResponse, EStr, vbTextCompare)
    Zip4Digit = Left(Mid(htmlResponse, StartS, EndS - StartS), 4)

    Sheet1.Range("F2").Value = Zip4Digit

GoTo End_Prog
NoConnect:
    If Err = -2147467259 Or Err = -2146697211 Then MsgBox "Error - No Connection": GoTo End_Prog 'MsgBox Err & ": " & Error(Err)
End_Prog:
End Sub

2 个答案:

答案 0 :(得分:1)

这对我有用,而且速度更快。打开IE的实际实例比使用XMLHTTP要慢得多。

Public Sub ZipLookUp()
    Dim URL As String, xmlHTTP As Object, html As Object, document As Object, htmlResponse As String
    Dim SStr As String, EStr As String, EndS As Integer, StartS As Integer
    Dim Zip4Digit As String
    Dim number As String
    Dim address As String
    Dim city As String
    Dim state As String
    Dim zipcode As String
    Dim ws As Worksheet

    ' it is good practice to define sheets (and cells) instead of simply referencing them multiple times
    ' that way, you can change them much more easily it if you *ever* need to.
    Set ws = Sheets("Sheet1") ' instead of 'Sheet1', the correct syntax is Sheets("Sheet1").Range("A1")

    number = ws.Range("A2")
    address = ws.Range("B2")
    city = ws.Range("C2")
    state = ws.Range("D2")
    zipcode = ws.Range("E2")


    URL = "https://tools.usps.com/go/ZipLookupResultsAction!input.action?resultMode=1&companyName=&address1="
    URL = URL & number & "+" & address & "&address2=&city=" & city & "&state=" & state & "&urbanCode=&postalCode=&zip=" & zipcode
    Set xmlHTTP = CreateObject("MSXML2.XMLHTTP")
    xmlHTTP.Open "GET", URL, False
    On Error GoTo NoConnect
    xmlHTTP.send
    Do Until xmlHTTP.ReadyState = 4 And xmlHTTP.Status = 200: DoEvents: Loop
    On Error GoTo 0
    Set html = CreateObject("htmlfile")
    htmlResponse = xmlHTTP.ResponseText
    If htmlResponse = Null Then
        MsgBox ("Aborted - HTML response was null")
        GoTo End_Prog
    End If

    SStr = "<span class=""zip4"">": EStr = "</span><br />" 'Searches for a string within 2 strings
    StartS = InStr(1, htmlResponse, SStr, vbTextCompare) + Len(SStr)
    EndS = InStr(StartS, htmlResponse, EStr, vbTextCompare)
    Zip4Digit = Left(Mid(htmlResponse, StartS, EndS - StartS), 4)

    ws.Range("F2").Value = Zip4Digit

GoTo End_Prog
NoConnect:
    If Err = -2147467259 Or Err = -2146697211 Then MsgBox "Error - No Connection": GoTo End_Prog 'MsgBox Err & ": " & Error(Err)
End_Prog:
End Sub

答案 1 :(得分:0)

只是一个想法,您是否考虑过使用正则表达式而不是简单的字符串搜索?如果没有,VBA中有一些有用的模块。例如,如果要确定文件名是否为Excel文件(存储在TestStr中),则可以执行以下操作:

Dim oRe As VBScript_RegExp_10.regexp, TestStrIsExcel as Boolean

Dim oMatches As VBScript_RegExp_10.MatchCollection

Dim oMatch As VBScript_RegExp_10.Match

oRe.Pattern = "\.(xlm|xlsm|xls|xlsx)$"

oRe.IgnoreCase = True

' Find all occurrences

oRe.Global = False

Set oMatches = oRe.Execute(TestStr)

If oMatches.Count <> 0 Then TestStrIsExcel = true