物品搜索宏

时间:2014-05-24 03:49:49

标签: excel vba excel-vba internet-explorer

我有一个Excel宏,可以搜索网站上的商品编号,并从网页上删除一些特定信息,如可用性,价格,页面网址。但在刮掉一些页面后,它给了我这个错误:

  

运行时错误91,对象变量或未设置块变量

我不知道为什么会一次又一次地发生这种情况。这个宏也很慢。是否可以与所有Internet Explorer(IE9,IE10,IE11等)一起使用?任何人都可以解决这个问题吗?

我有Office 2007和IE9。

Sub xtremeExcel()
Dim HTMLDoc As HTMLDocument
Dim oBrowser As InternetExplorer
Dim oHTML_Element As IHTMLElement
Set oBrowser = New InternetExplorer
oBrowser.Visible = True
oBrowser.navigate "http://cpc.farnell.com/"
Do
Loop Until oBrowser.readyState = READYSTATE_COMPLETE

For i = 3 To Sheet1.Range("A:A").Find(what:="*", searchdirection:=xlPrevious).Row
    Set HTMLDoc = oBrowser.document
    xc = 0
    Do While (HTMLDoc.getElementById("searchTerms") Is Nothing)
        Application.Wait (Now() + TimeValue("00:00:01"))
        xc = xc + 1
        If xc > 15 Then
            Exit Do
        End If
    Loop
    Set HTMLDoc = oBrowser.document
    HTMLDoc.getElementById("searchTerms").Value = Cells(i, 1).Value
    HTMLDoc.getElementById("go").Click

    xc = 0
    flag = 0
    Do While (HTMLDoc.getElementsByClassName("prodDetailAvailability")(0) Is Nothing)
        Application.Wait (Now() + TimeValue("00:00:01"))
        xc = xc + 1
        If xc > 15 Then
            Exit Do
        End If
    Loop

    If HTMLDoc.getElementsByClassName("prodDetailAvailability")(0) Is Nothing Then
         xc = 0
        Do While (HTMLDoc.getElementById("totalNoResultsSlotAtTop") Is Nothing)
            Application.Wait (Now() + TimeValue("00:00:01"))
            xc = xc + 1
            If xc > 10 Then
                Exit Do
            End If
        Loop
        flag = 2
    End If


    If flag <> 2 Then
        Sheet1.Cells(i, 2).Value = Replace(HTMLDoc.getElementsByClassName("prodDetailAvailability")(0).innerText, "Availability: ", "")
        unitprice = HTMLDoc.getElementsByClassName("unitprice")(0).innerText
        If InStr(1, unitprice, "(") > 0 Then
            Sheet1.Cells(i, 3).Value = Replace(Left(unitprice, InStr(1, unitprice, "(") - 1), "Unit Price: ", "")
            Sheet1.Cells(i, 4).Value = Mid(unitprice, InStr(1, unitprice, "(") + 1, InStr(1, unitprice, ")") - 1 - (InStr(1, unitprice, "(")))
        Else
            Sheet1.Cells(i, 3).Value = unitprice
        End If

        Sheet1.Cells(i, 5).Value = oBrowser.LocationURL
    Else
        Sheet1.Cells(i, 2).Value = "Not Found"
    End If
 oBrowser.GoBack
Next
End Sub

1 个答案:

答案 0 :(得分:0)

tl; dr;

您可以按照以下所示的方式循环播放;等待设置项目。

函数WaitUntilReady循环运行,直到可以设置.document.getElementById("sProdList")。该表保存要返回的信息。您可能希望添加一些错误处理和超时,以使循环不会冒无限循环的风险。

我在searchTerms数组中列出了2个搜索词。您可以扩展它。它会将搜索字词包含在URL中,因此可以直接搜索而不是总是着陆并重新定向。

注意:

如果计划进行大量搜索或循环搜索结果页面,我将调查是否有可用的API。下面仅显示每个页面的第一页,但已经为您提供了大量可使用的代码。如果尝试XHR,您的IP将被阻止,因此下面的方法是一个折衷方案。我很乐意放弃任何解决方案中的IE。


示例页面结果:

sample results

示例代码输出:

Sample code output


VBA:

Option Explicit

Public Sub GetInfo()
    Dim IE As InternetExplorer, a As HTMLTable, wsTarget As Worksheet, currSearch As Long, searchTerms
    Application.ScreenUpdating = False

    Set IE = New InternetExplorer
    Set wsTarget = ThisWorkbook.Worksheets("Sheet2")
    wsTarget.UsedRange.ClearContents
    searchTerms = Array("CCTV", "FANS")

    With IE
        .Visible = True
        ApplyHeaders wsTarget
        For currSearch = LBound(searchTerms) To UBound(searchTerms)
            .navigate "http://cpc.farnell.com/search?st=" & searchTerms(currSearch) & "&aka_re=1"

            Set a = WaitUntilReady(IE)

            Dim allRows As Object,classNames()
            Set allRows = a.getElementsByClassName("altRow")
            classNames = Array("productImage mftrPart", "sku", "description", "availability", "priceFor", "priceBreak")

            Dim i As Long, y As Long, r As Long, tempString As String, j As Long, k As Long
            r = GetLastRow(wsTarget, 1) + 1

            For i = 0 To allRows.Length - 1
                For j = LBound(classNames) To UBound(classNames)
                    For k = 0 To allRows(i).getElementsByClassName(classNames(j)).Length - 1
                        tempString = tempString & vbNewLine & TidyString(allRows(i).getElementsByClassName(classNames(j))(k).innerText)
                    Next k
                    With wsTarget
                        .Cells(r, j + 1).Value = tempString
                    End With
                    tempString = vbNullString
                Next j
                r = r + 1
            Next i
            r = r + 1
        Next currSearch
        .Quit
    End With
    TidySheet wsTarget
    Application.ScreenUpdating = True
End Sub

Public Function GetLastRow(ByRef ws As Worksheet, Optional ByVal columnNumber As Long = 1) As Long
    With ws
        GetLastRow = .Cells(.Rows.Count, columnNumber).End(xlUp).Row
    End With
End Function

Public Function TidyString(ByVal inputString As String) As String
    TidyString = Application.WorksheetFunction.Clean(inputString)
    TidyString = Trim$(Replace$(Replace$(TidyString, Chr$(10), vbNullString), vbCrLf, vbNullString))
End Function

Public Sub ApplyHeaders(ByVal wsTarget As Worksheet)
    Dim headers(), i As Long
    headers = Array("Manufacturer Part No", "Order Code", "Manufacturer / Description", "Avail", "Price For", "Price Ex. VAT (Inc. VAT)")
    For i = LBound(headers) To UBound(headers)
        wsTarget.Cells(1, i + 1).Value = headers(i)
    Next i
End Sub

Public Function WaitUntilReady(ByVal IE As InternetExplorer) As HTMLTable
    With IE
        While .Busy Or .readyState < 4: DoEvents: Wend
        Do
            DoEvents
            On Error Resume Next
            Set WaitUntilReady = .document.getElementById("sProdList")
            On Error GoTo 0
        Loop While WaitUntilReady Is Nothing
    End With
End Function

Public Sub TidySheet(ByVal wsTarget As Worksheet)
    With wsTarget
        .Rows("2:" & .Cells.Find(What:="*", After:=.Range("A1"), SearchOrder:=xlByRows, SearchDirection:=xlPrevious).Row).RowHeight = 25
        .UsedRange.Columns.AutoFit
    End With
End Sub