Excel Web Scraper-受密码保护的网站

时间:2018-07-11 00:27:14

标签: vba excel-vba passwords

我想访问一个名为Valueline的网站, 链接:

  

https://jump.valueline.com/login.aspx

,我想登录,因为某些研究受到密码保护。我尝试将密码保存到Internet Explorer上,每次使用上述链接时,该密码似乎都会登录。但是,当我要访问其他页面时,它会将我注销。我想从此页面抓取数据,例如链接:

  

https://research.valueline.com/research#sec=company&sym=AAPL

我似乎无法正常使用登录名,但是网络抓取部分工作正常。您可以注册一个试用帐户进行测试。

到目前为止,这是我的代码。感谢您的帮助/咨询。

Sub Macro1()
Dim ie As Object
Set Rng = Range("A5:A5")


Set Row = Range(Rng.Offset(1, 0), Rng.Offset(1, 0).End(xlDown))

    Set ie = CreateObject("InternetExplorer.Application")
    With ie
    .Visible = True

    For Each Row In Rng
    'Login Website
    .navigate "https://jump.valueline.com/login.aspx?"
    Application.Wait (Now + TimeValue("0:00:05"))

    'Research Page
    .navigate "https://research.valueline.com/research#sec=company&sym=" & Range("A" & Row.Row).Value

    Do
    DoEvents
    Loop Until ie.readyState = READYSTATE_COMPLETE
    Dim doc As HTMLDocument
    Set doc = ie.document
    While ie.readyState <> 4
    Wend

    'Application.Wait (Now + TimeValue("0:00:25"))

    Dim tblName As Object
    Dim span As Object
    Dim price As String


    On Error Resume Next
    'Inserts the Name

    'Last Price

    Range("B" & Row.Row).Value = doc.getElementsByClassName("alignLeft")(9).innerText

    'Dividend yield
    Range("C" & Row.Row).Value = doc.getElementsByClassName("alignLeft")(13).innerText
    Range("D" & Row.Row).Value = doc.getElementsByClassName("alignLeft")(14).innerText

    Range("E" & Row.Row).Value = doc.getElementsByClassName("rank-text")(0).innerText
    Range("F" & Row.Row).Value = doc.getElementsByClassName("rank-text")(1).innerText
    Range("G" & Row.Row).Value = doc.getElementsByClassName("rank-text")(2).innerText


    Next Row


   End With
   ie.Quit

结束子

1 个答案:

答案 0 :(得分:0)

尝试使用此代码,但是如果网站发生更改(例如,第9号前面的额外alignLeft类元素),则该代码将无法正常工作。如果站点已更改其代码,则应添加其他检查。 因此,我建议您使用独立于网站代码的API。

此代码仍然有些丑陋(例如,使用On Error Resume Next完成的文档检查),但已得到改进(阅读注释)并且可以正常工作。

尚不能正常工作!在制品

Sub Macro1()
Dim ie As Object
Dim rng As Excel.Range  'Always declare all vars
Dim row As Excel.Range
Dim wb As Excel.Workbook 'Don't rely on implicit ActiveWorkbook or Sheet, declare!
Dim sh As Excel.Worksheet
Set wb = ThisWorkbook
Set sh = wb.Worksheets(1)

Set rng = sh.Range("A5:A5")


Set row = sh.Range(rng.Offset(1, 0), rng.Offset(1, 0).End(xlDown))

Set ie = CreateObject("InternetExplorer.Application")
With ie
    .Visible = True
    'Login Website
    .navigate "https://jump.valueline.com/login.aspx?"
    'Application.Wait (Now + TimeValue("0:00:05")) 'Don't use Application.Wait
    Do
        DoEvents
    Loop Until Not ie.Busy And ie.readyState = 4 'Check for busy too, or better use InternetExploreres Withevent DocumentComplete and check for sth. like pDisp.object = ie.object, I will evaluate this soon.
    Dim doc As HTMLDocument

    Set doc = .Document

    If ie.LocationURL <> "https://jump.valueline.com/Loggedon.aspx" Then 'if loggen in ie gets redirected to loggedon.aspx
        doc.getelementbyid("ctl00_ContentPlaceHolder_LoginControl_txtUserID").Value = "valuelinetester@gmail.com" 'fill login form an submit
        doc.getelementbyid("ctl00_ContentPlaceHolder_LoginControl_txtUserPw").Value = "Valueline1"
        doc.getelementbyid("ctl00_ContentPlaceHolder_LoginControl_btnLogin").Click ''click submit, because the forms code uses some strange javascript I don't know what it does. Usuallay you refer to the form direct and submit it or use a Get/Post request.
    Else
        'Already logged in
    End If
    Dim FirstSearchDone As Boolean
    FirstSearchDone = False

    For Each row In rng
    'Research Page
        If FirstSearchDone Then
            .navigate "https://research.valueline.com/secure/research#sec=company&sym=" & row.Value
        Else
            If MsgBox("First search has to be done manually. Please type " & row.Value & _
                " in searchbox and click on result. After Site is loaded click OK.", vbOKCancel) = vbOK Then 'First search has to be done manually
                FirstSearch = True
            Else
                Exit Sub
            End If
        End If


        Do
            DoEvents
        Loop Until Not ie.Busy And ie.readyState = 4

        On Error Resume Next ' If IE not ready error occurs and loop starts again
        Do
            Err.Clear 'clear error to detect ie.document set

           'Inserts the Name

            'Last Price
            With sh 'uses explicit sheet instead of former implicit activesheet
                .Range("B" & row.row).Value = doc.getElementsByClassName("alignLeft")(9).innerText

                'Dividend yield
                .Range("C" & row.row).Value = doc.getElementsByClassName("alignLeft")(13).innerText
                .Range("D" & row.row).Value = doc.getElementsByClassName("alignLeft")(14).innerText

                .Range("E" & row.row).Value = doc.getElementsByClassName("rank-text")(0).innerText
                .Range("F" & row.row).Value = doc.getElementsByClassName("rank-text")(1).innerText
                .Range("G" & row.row).Value = doc.getElementsByClassName("rank-text")(2).innerText
            End With
        Loop While Err.Number = 91 'error if ie.document not set



        On Error GoTo 0
        Err.Clear

    Next row

    .Quit
End With
Set ie = Nothing
End Sub