我想访问一个名为Valueline的网站, 链接:
,我想登录,因为某些研究受到密码保护。我尝试将密码保存到Internet Explorer上,每次使用上述链接时,该密码似乎都会登录。但是,当我要访问其他页面时,它会将我注销。我想从此页面抓取数据,例如链接:
https://research.valueline.com/research#sec=company&sym=AAPL
我似乎无法正常使用登录名,但是网络抓取部分工作正常。您可以注册一个试用帐户进行测试。
到目前为止,这是我的代码。感谢您的帮助/咨询。
Sub Macro1()
Dim ie As Object
Set Rng = Range("A5:A5")
Set Row = Range(Rng.Offset(1, 0), Rng.Offset(1, 0).End(xlDown))
Set ie = CreateObject("InternetExplorer.Application")
With ie
.Visible = True
For Each Row In Rng
'Login Website
.navigate "https://jump.valueline.com/login.aspx?"
Application.Wait (Now + TimeValue("0:00:05"))
'Research Page
.navigate "https://research.valueline.com/research#sec=company&sym=" & Range("A" & Row.Row).Value
Do
DoEvents
Loop Until ie.readyState = READYSTATE_COMPLETE
Dim doc As HTMLDocument
Set doc = ie.document
While ie.readyState <> 4
Wend
'Application.Wait (Now + TimeValue("0:00:25"))
Dim tblName As Object
Dim span As Object
Dim price As String
On Error Resume Next
'Inserts the Name
'Last Price
Range("B" & Row.Row).Value = doc.getElementsByClassName("alignLeft")(9).innerText
'Dividend yield
Range("C" & Row.Row).Value = doc.getElementsByClassName("alignLeft")(13).innerText
Range("D" & Row.Row).Value = doc.getElementsByClassName("alignLeft")(14).innerText
Range("E" & Row.Row).Value = doc.getElementsByClassName("rank-text")(0).innerText
Range("F" & Row.Row).Value = doc.getElementsByClassName("rank-text")(1).innerText
Range("G" & Row.Row).Value = doc.getElementsByClassName("rank-text")(2).innerText
Next Row
End With
ie.Quit
结束子
答案 0 :(得分:0)
尝试使用此代码,但是如果网站发生更改(例如,第9号前面的额外alignLeft类元素),则该代码将无法正常工作。如果站点已更改其代码,则应添加其他检查。 因此,我建议您使用独立于网站代码的API。
此代码仍然有些丑陋(例如,使用On Error Resume Next
完成的文档检查),但已得到改进(阅读注释)并且可以正常工作。
尚不能正常工作!在制品
Sub Macro1()
Dim ie As Object
Dim rng As Excel.Range 'Always declare all vars
Dim row As Excel.Range
Dim wb As Excel.Workbook 'Don't rely on implicit ActiveWorkbook or Sheet, declare!
Dim sh As Excel.Worksheet
Set wb = ThisWorkbook
Set sh = wb.Worksheets(1)
Set rng = sh.Range("A5:A5")
Set row = sh.Range(rng.Offset(1, 0), rng.Offset(1, 0).End(xlDown))
Set ie = CreateObject("InternetExplorer.Application")
With ie
.Visible = True
'Login Website
.navigate "https://jump.valueline.com/login.aspx?"
'Application.Wait (Now + TimeValue("0:00:05")) 'Don't use Application.Wait
Do
DoEvents
Loop Until Not ie.Busy And ie.readyState = 4 'Check for busy too, or better use InternetExploreres Withevent DocumentComplete and check for sth. like pDisp.object = ie.object, I will evaluate this soon.
Dim doc As HTMLDocument
Set doc = .Document
If ie.LocationURL <> "https://jump.valueline.com/Loggedon.aspx" Then 'if loggen in ie gets redirected to loggedon.aspx
doc.getelementbyid("ctl00_ContentPlaceHolder_LoginControl_txtUserID").Value = "valuelinetester@gmail.com" 'fill login form an submit
doc.getelementbyid("ctl00_ContentPlaceHolder_LoginControl_txtUserPw").Value = "Valueline1"
doc.getelementbyid("ctl00_ContentPlaceHolder_LoginControl_btnLogin").Click ''click submit, because the forms code uses some strange javascript I don't know what it does. Usuallay you refer to the form direct and submit it or use a Get/Post request.
Else
'Already logged in
End If
Dim FirstSearchDone As Boolean
FirstSearchDone = False
For Each row In rng
'Research Page
If FirstSearchDone Then
.navigate "https://research.valueline.com/secure/research#sec=company&sym=" & row.Value
Else
If MsgBox("First search has to be done manually. Please type " & row.Value & _
" in searchbox and click on result. After Site is loaded click OK.", vbOKCancel) = vbOK Then 'First search has to be done manually
FirstSearch = True
Else
Exit Sub
End If
End If
Do
DoEvents
Loop Until Not ie.Busy And ie.readyState = 4
On Error Resume Next ' If IE not ready error occurs and loop starts again
Do
Err.Clear 'clear error to detect ie.document set
'Inserts the Name
'Last Price
With sh 'uses explicit sheet instead of former implicit activesheet
.Range("B" & row.row).Value = doc.getElementsByClassName("alignLeft")(9).innerText
'Dividend yield
.Range("C" & row.row).Value = doc.getElementsByClassName("alignLeft")(13).innerText
.Range("D" & row.row).Value = doc.getElementsByClassName("alignLeft")(14).innerText
.Range("E" & row.row).Value = doc.getElementsByClassName("rank-text")(0).innerText
.Range("F" & row.row).Value = doc.getElementsByClassName("rank-text")(1).innerText
.Range("G" & row.row).Value = doc.getElementsByClassName("rank-text")(2).innerText
End With
Loop While Err.Number = 91 'error if ie.document not set
On Error GoTo 0
Err.Clear
Next row
.Quit
End With
Set ie = Nothing
End Sub