使用excel VBA从URL获取股票清单

时间:2019-02-20 18:54:56

标签: excel vba web-scraping

我想从以下站点将文件下载到本地工作表:https://www.bseindia.com/corporates/List_Scrips.aspx# 以下是经过研究后尝试的代码。 挑战在于,如何避免页面重新提交(重试,取消警告窗口) 其次,我需要下载5mb文件并解压缩到当前的本地工作表中。

Sub bsecode()
Dim ie As Object
Set ie = CreateObject("InternetExplorer.Application")
With ie
ie.Visible = True
'To open the website
.navigate "https://www.bseindia.com/corporates/List_Scrips.aspx#"
 Do While ie.readyState <> 4
       Sleep 1000
   Loop
   ie.document.getElementsByName("ctl00$ContentPlaceHolder1$btnSubmit")(0).Click
   Sleep 1000
    Do While ie.readyState <> 4
    Sleep 1000
   Loop
  'To download the file
ie.navigate "javascript:__doPostBack('ctl00$ContentPlaceHolder1$lnkDownload','')"
'Do While .Busy: DoEvents: Loop
End With
End Sub

1 个答案:

答案 0 :(得分:0)

我不是sendkey的粉丝,但以下作品

Option Explicit
'VBE > Tools > References:
' Microsoft Internet Controls
Public Sub GetData()
    Dim ie As New InternetExplorer
    With ie
        .Visible = True
        .Navigate2 "https://www.bseindia.com/corporates/List_Scrips.aspx#"

        While .Busy Or .readyState < 4: DoEvents: Wend

        With .document

            'status
            .querySelector("[value='Active']").Selected = True 'Suspended,Delisted,Select

            'group
            .querySelector("[value='Select']").Selected = True ' "B ", "C " etc

            'industry
            .querySelector("[value='Advertising & Media']").Selected = True 'Agrochemicals etc

            'segment

            .querySelector("#ContentPlaceHolder1_ddSegment  [value='Equity']").Selected = True

            'Submit
            .querySelector("#ContentPlaceHolder1_btnSubmit").Click
            Const MAX_WAIT_SEC As Long = 5
            Dim t As Date
            While ie.Busy Or ie.readyState < 4: DoEvents: Wend

            Dim download As Object
            t = Timer
            Do
            On Error Resume Next
            Set download = .querySelector("#ContentPlaceHolder1_lnkDownload")
            On Error GoTo 0
            If Timer - t > MAX_WAIT_SEC Then Exit Do
            Loop While download Is Nothing
            If Not download Is Nothing Then
                download.Click
            End If

            Application.Wait Now + TimeSerial(0, 0, 10)
            Application.SendKeys "%N", True
            Application.SendKeys "%S", True
            Application.Wait Now + TimeSerial(0, 0, 10)
            Application.SendKeys "%O", True
        End With
        Stop
        .Quit
    End With
End Sub

硒的使用要容易得多,尽管您需要将下载文件的扩展名从.tmp更改为.csv。安装selenium basic后,请确保进入VBE>工具>引用,并添加对Microsoft脚本运行时的引用。

Option Explicit   
Public Sub MakeSelections()
    Dim d As WebDriver
    Set d = New ChromeDriver
    Const URL = "https://www.bseindia.com/corporates/List_Scrips.aspx#"
    With d
        .Start "Chrome"
        .get URL
               'status
            .FindElementByCss("#ContentPlaceHolder1_ddlStatus").AsSelect.SelectByText "Suspended"
            'group
            .FindElementByCss("#ContentPlaceHolder1_ddlGroup").AsSelect.SelectByText "Select"   ' "B ", "C " etc
            'industry
           .FindElementByCss("#ContentPlaceHolder1_ddlIndustry").AsSelect.SelectByText "Agrochemicals" 'Agrochemicals etc
            'segment
            .FindElementByCss("#ContentPlaceHolder1_ddSegment").AsSelect.SelectByText "Equity"

            .FindElementByCss("#ContentPlaceHolder1_btnSubmit").Click
            .FindElementByCss("#ContentPlaceHolder1_lnkDownload").Click
        .Quit
    End With
End Sub