如何使用vba从img中获取alt值

时间:2014-02-25 08:01:42

标签: excel vba excel-vba web-scraping

我试图从this page

获取价格表

为此,我有以下代码:

一切正常,只有最后一列的img alt标签没有显示在。这段代码非常精细,只有最后一列的类没有被提取。

Sub TableExample()

Dim IE As Object
Dim doc As Object
Dim strURL As String

    If Range("B2").Value <> "NA" Then
        strURL = "http://www.idealo.co.uk/compare/351072/canon-500d-77mm-close-up-lens.html"
        Set IE = CreateObject("InternetExplorer.Application")
        With IE
            '.Visible = True
            .navigate strURL
            Do Until .readyState = 4: DoEvents: Loop
            Do While .Busy: DoEvents: Loop
            Set doc = IE.document
            GetAllTables doc
            .Quit
        End With
    End If

End Sub

Sub GetAllTables(doc As Object)

    Dim ws As Worksheet
    Dim rng As Range
    Dim tbl As Object
    Dim rw As Object
    Dim cl As Object
    Dim tabno As Long
    Dim nextrow As Long
    Dim i As Long

    Set ws = Sheets("Sheet1")

    For Each tbl In doc.getElementsByTagName("TABLE")
        tabno = tabno + 1
        nextrow = nextrow + 1
        Set rng = ws.Range("B" & nextrow)
        rng.Offset(, -1) = "Table " & tabno
        On Error GoTo Err1:
        If tabno = 10 Then
            For Each rw In tbl.Rows
                colno = 6
                For Each cl In rw.Cells
                    If colno = 6 And nextrow > 10 Then
                        Set classColl = doc.getElementsByClassName("cellborder")
                        Set imgTgt = classColl(nextrow - 11).getElementsByTagName("img")
                        rng.Value = imgTgt(0).getAttribute("alt")
                    Else
                        rng.Value = cl.innerText
                    End If
                    Set rng = rng.Offset(, 1)
                    i = i + 1
                    colno = colno + 1
                Next cl
                nextrow = nextrow + 1
                Set rng = rng.Offset(1, -i)
                '     Call trim1
                i = 0
            Next rw
            Exit Sub
        End If
    Next tbl

Err1:
'Call comp
'    ws.Cells.ClearFormats
End Sub

2 个答案:

答案 0 :(得分:0)

您需要做的就是指定要查找的ClassColl图像。

试试这个:

Set classColl = doc.getElementsByClassName("cellborder")
Set imgTgt = classColl(0).getElementsByTagName("img")
Rng.Value = imgTgt(0).getAttribute("alt")

答案 1 :(得分:0)

GetAllTables子例程尝试此(非常脏)变体:

Sub GetAllTables(doc As Object)

    Dim ws As Worksheet
    Dim rng As Range
    Dim tbl As Object
    Dim rw As Object
    Dim cl As Object
    Dim tabno As Long
    Dim nextrow As Long
    Dim i As Long

    Set ws = Sheets("Sheet1")

    'Improvised way of getting images.
    Dim imagesColl As New Collection
    Set imgColl = doc.getElementsByClassName("noborder")
    For Each imgElem In imgColl
        If imgElem.getAttribute("height") = 30 And imgElem.getAttribute("width") = 80 Then
            imagesColl.Add imgElem.getAttribute("alt")
        End If
    Next imgElem

    For Each tbl In doc.getElementsByTagName("table")
        tabno = tabno + 1
        If tabno = 10 Then
            nextrow = 1
            imgIter = 1
            For Each rw In tbl.Rows
                colno = 1
                For Each cl In rw.Cells
                    Set rng = ws.Cells(nextrow, colno)
                    If colno = 5 Then
                        rng.Value = imagesColl.Item(imgIter)
                        imgIter = imgIter + 1
                    Else
                        rng.Value = cl.innerText
                    End If
                    colno = colno + 1
                Next cl
                nextrow = nextrow + 1
            Next rw
            Exit Sub
        End If
    Next tbl

End Sub

事情是,你真的不必做桌子风格。如果你知道要定位哪些元素,那么为DOM之外的数据创建一个集合(即使用普通 VBA集合)会更好。恕我直言。

无论如何,上面是 经过试用和 。如果这有帮助,请告诉我们。