如何使用VBA在Internet Explorer中从图/图中提取信息

时间:2018-09-06 15:05:58

标签: excel vba excel-vba web-scraping

<body id="ctl00_Body1" class="pBack"><div class="tipsy tipsy-w" style="top: 3279.47px; left: 1096.63px; visibility: visible; display: block; opacity: 0.8; z-index: 100000;"><div class="tipsy-arrow tipsy-arrow-w"></div><div class="tipsy-inner">Mon Sep 03 2018<br>357,938 Units</div></div>

我对VBA编程和Web剪贴非常陌生,所以请原谅我的编码有点破旧。

项目详细信息

我正在一个项目中,这个项目涉及剪贴网站以获取大量数据。

我到目前为止所做的事情

当前,我已经设法使用用户名和密码登录该网站,输入搜索参数,单击按钮以显示结果。在结果上,我能够提取表中的数据。我在下面给出了到目前为止已开发的代码。

问题陈述

现在我需要一个在图表中显示的信息,当我单击绘图点时就会显示该信息(我已经将图表的屏幕截图/链接与html代码一起封装了)。我想在单位文本之前获取数字。该链接在公司内部,所以我无法共享

enter image description here

这是我到目前为止开发的代码:

Sub MultiCpn_Div_Class()

Application.ScreenUpdating = False
Sheets("Sheet1").Activate
'Active sheet where the search parameter will be
Dim rc As Long
'loop variable
Dim CPN As String
'search parameter from active sheet

Dim MyHTML_Element As IHTMLElement
Dim MyURL As String
Dim HTMLTABL As MSHTML.IHTMLElement
Dim HTMLTABLC As MSHTML.IHTMLElementCollection
Dim HTMLDivc As MSHTML.IHTMLElementCollection
Dim HTMLDIV As MSHTML.IHTMLElement
Dim HTMLROW As MSHTML.IHTMLElement
Dim HTMLCELL As MSHTML.IHTMLElement
Dim Rownum, Colnum, Sheetnum As Long


MyURL = "XXXXXXXXX"

Set ie = New InternetExplorerMedium
'Had to declare ie as Medium because it kept on failing if i didnt
'ie.Silent = True
ie.Visible = False
ie.navigate MyURL
Do
Loop Until ie.readyState = READYSTATE_COMPLETE
Set HTMLDOC = ie.document
HTMLDOC.all.ctl00_PageContent_UserName.Value = "username"
HTMLDOC.all.ctl00_PageContent_Password.Value = "password"
HTMLDOC.all.ctl00_PageContent_OKButton__Button.Click
'Providing user name, password and then signing in
Do
DoEvents
Loop Until ie.readyState = READYSTATE_COMPLETE
rc = 2
'starting from second row of data
For rc = 2 To 5
Sheets("Sheet1").Activate

CPN = Range("A" & rc).Value
'assigning cell value to variable
ie.navigate "YYYY"
'After logging in i had to navigate to a specific link-YYY otherwise it kept showing an error

Do
Loop Until ie.readyState = READYSTATE_COMPLETE

Set IEDOC = ie.document
'reassigned the HTML document to the newly loaded page
IEDOC.all.cpnval.Value = CPN
IEDOC.all.run_Button.Click
'entering the search parameter and then clicking the search button

Do
Loop Until ie.readyState = READYSTATE_COMPLETE
Application.Wait (Now + TimeValue("0:00:02"))
'had to include a wait for 2 seconds to cater to network delay issues

Set HTMLDOC = ie.document
 'again reassigned the HTML document to the newly loaded page which had all necessary data
Set HTMLDivc = HTMLDOC.getElementsByTagName("div")
'the page has multiple div classes inside which multiple tables are located, so getting a
'div collection and then looping over all tables data to search for particular tables that i want

For Each HTMLDIV In HTMLDivc

    If HTMLDIV.ID = "attributes_table" Then
        Set HTMLTABLC = HTMLDIV.Children

        For Each HTMLTABL In HTMLTABLC
            If HTMLTABL.className = "table table-striped temp" Then
                Sheets.Add.Name = "T1" & CPN
                Range("A1").Value = HTMLTABL.className
                Range("B1").Value = Now
                Rownum = 2
                'Once required table is found i'm copying that table data and pasting it into new sheet as required for each CPN

                For Each HTMLROW In HTMLTABL.getElementsByTagName("tr")
                Colnum = 1

                    For Each HTMLCELL In HTMLROW.Children
                    Cells(Rownum, Colnum) = HTMLCELL.innerText
                    Colnum = Colnum + 1
                    Next HTMLCELL

                Rownum = Rownum + 1
                Next HTMLROW

            Sheets("Sheet1").Range("F" & rc).Value = Sheets("T1" & CPN).Range("B4")
            Sheets("Sheet1").Range("G" & rc).Value = Sheets("T1" & CPN).Range("B16")
            Sheets("Sheet1").Range("M" & rc).Value = Sheets("T1" & CPN).Range("B5")
            Sheets("Sheet1").Range("J" & rc).Value = Sheets("T1" & CPN).Range("B25")
            'After extracting the data to new sheet for each search parameter i'm copying that data and pasting into my active sheet
            End If

        Next HTMLTABL

    ElseIf HTMLDIV.ID = "award_table" Then
    'same as above creating new sheet for another table information
        Set HTMLTABLC = HTMLDIV.Children

        For Each HTMLTABL In HTMLTABLC
                If HTMLTABL.className = "table table-striped temp" Then
                    Sheets.Add.Name = "T2" & CPN
                    Range("A1").Value = HTMLTABL.className
                    Range("B1").Value = Now
                    Rownum = 2

                     For Each HTMLROW In HTMLTABL.getElementsByTagName("tr")
                     Colnum = 1

                         For Each HTMLCELL In HTMLROW.Children
                         Cells(Rownum, Colnum) = HTMLCELL.innerText
                         Colnum = Colnum + 1

                         Next HTMLCELL
                         Rownum = Rownum + 1

                     Next HTMLROW
                     Dim i As Long
                     Dim sht As Worksheet
                     Dim LastRow As Long
                     Dim Moq, Moq2, Ven, Ven2 As Variant
                     Moq = ""
                     Ven = ""
                     Set sht = Sheets("T2" & CPN)
                     LastRow = sht.Cells(sht.Rows.Count, "A").End(xlUp).Row

                            For i = 3 To LastRow
                                Moq2 = Sheets("T2" & CPN).Cells(i, 9).Value & vbCrLf
                                Moq = Moq + Moq2
                                Ven2 = Sheets("T2" & CPN).Cells(i, 1).Value & vbCrLf
                                Ven = Ven + Ven2
                            Next i

                    Sheets("Sheet1").Range("E" & rc).Value = Moq
                    Sheets("Sheet1").Range("K" & rc).Value = Ven

                End If
        Next HTMLTABL

        Else
            If HTMLDIV.ID = "full_avl_table" Then
             'same as above creating new sheet for another table information
                Set HTMLTABLC = HTMLDIV.Children
                For Each HTMLTABL In HTMLTABLC
                    If HTMLTABL.className = "table table-striped temp" Then
                        Sheets.Add.Name = "T3" & CPN
                        Range("A1").Value = HTMLTABL.className
                        Range("B1").Value = Now
                        Rownum = 2

                         For Each HTMLROW In HTMLTABL.getElementsByTagName("tr")
                         Colnum = 1

                             For Each HTMLCELL In HTMLROW.Children
                             Cells(Rownum, Colnum) = HTMLCELL.innerText
                             Colnum = Colnum + 1

                             Next HTMLCELL
                             Rownum = Rownum + 1

                         Next HTMLROW
                         Dim i2 As Long
                         Dim sht2 As Worksheet
                         Dim LR As Long
                         Dim AVL, AVL2 As Variant
                         AVL = ""
                         Set sht2 = Sheets("T3" & CPN)
                         LR = sht2.Cells(sht2.Rows.Count, "A").End(xlUp).Row
                         For i2 = 3 To LR
                            Cells(i2, 7).Value = Cells(i2, 1) & " - " & Cells(i2, 2) & " - " & Cells(i2, 4) & " - " & Cells(i2, 6)
                            AVL2 = Sheets("T3" & CPN).Cells(i2, 7).Value & vbCrLf
                            AVL = AVL2 + AVL
                         Next i2

                        Sheets("Sheet1").Range("N" & rc).Value = AVL
                        Columns("N").AutoFit

                       End If
                Next HTMLTABL
            End If
    End If
    Next HTMLDIV

    Next
   ie.Quit
   Sheets("Sheet1").Activate
   Application.ScreenUpdating = True
End Sub

Close up Image of HTML Code 更新Debug page snippet

1 个答案:

答案 0 :(得分:0)

从那个小图像很难分辨出来。看到我的评论重新插入HTML。

根据我所看到的,您可以尝试以下操作:

Debug.Print Split(Split(HMLDoc.querySelector(".tipsy-inner").innerText, Chr(10))(1), Chr$(32))(0)

此位querySelector(".tipsy-inner")按类别指定页面上第一个元素,类别名称为tipsy-inner。然后,我解析.innerText上存在的定界符,以尝试对返回的字符串的数字部分进行拆分。

如果找不到,您可以尝试定时循环查找:

Dim t As Date, ele As Object
Const WAIT_TIME_SECS As Long = 10

t = Timer
Do
    DoEvents
    If Timer - t > WAIT_TIME_SECS Then Exit Do
    On Error Resume Next
    Set ele = HMLDoc.querySelector(".tipsy-inner")
    On Error GoTo 0
Loop While ele Is Nothing

If ele Is Nothing Then Exit Sub

Debug.Print Split(Split(ele.innerText, Chr(10))(1), Chr$(32))(0)