VBA从网页抓取数据到变量

时间:2013-11-30 23:01:47

标签: html excel excel-vba getelementsbyname vba

我可以将excel值传递给网站,然后通过vba点击它。但是它打开了另一个标题为“Results - Research Randomizer”的页面,我不知道如何在“Set#1”中检索这些值。任何人都可以给我一些想法将这些值检索到变量中。我的代码是

Sub OpenPage()
Const myPageTitle As String = "Research Randomizer Form v4.0"
Const myPageURL As String = "http://www.randomizer.org/form.htm"
    Dim NoofSet, NoPSet, RangeBeg, RangeEnd As String
    NoofSet = Range("b3").Value
    NoPSet = Range("c3").Value
    RangeBeg = Range("d3").Value
    RangeEnd = Range("e3").Value

    Dim myIE As SHDocVw.InternetExplorer
    Dim doc As HTMLDocument
    Dim PageForm As HTMLFormElement
    Dim UserIdBox As HTMLInputElement
    Dim PasswordBox As HTMLInputElement
    Dim HrangeBeg, HrangeEnd As HTMLInputElement
    Dim FormButton As HTMLInputButtonElement
    Dim Elem As IHTMLElement


    'check if page is already open
  Set myIE = GetOpenIEByTitle(myPageTitle, False)
     If myIE Is Nothing Then
    'page isn't open yet
    'create new IE instance
    Set myIE = GetNewIE
    'make IE window visible
    myIE.Visible = True
    'load page
    If LoadWebPage(myIE, myPageURL) = False Then
      'page wasn't loaded
      MsgBox "Couldn't open page"
      Exit Sub
    End If
  End If

    Do
    DoEvents
    Loop Until myIE.readyState = READYSTATE_COMPLETE

    Set doc = myIE.document
    Set PageForm = doc.forms(0)
    'Get the User Id textbox
    '< input class="TextBox" maxlength="15" name="UserName" size="12">

    Set UserIdBox = PageForm.elements("numofsets")
    'Set the User Id
    UserIdBox.Value = NoofSet

    'Get the password textbox
    '< input class="TextBox" type="password" maxlength="10" name="Password" size="12">
    Set PasswordBox = PageForm.elements("numperset")
    'Set the password
    PasswordBox.Value = NoPSet

    Set HrangeBeg = PageForm.elements("rangebeg")
    HrangeBeg.Value = RangeBeg
    Set HrangeEnd = PageForm.elements("rangeend")
    HrangeEnd.Value = RangeEnd


    'Submit the form (like clicking the 'Submit' button) to navigate to next page

    PageForm.Button.Click

    'Wait for the new page to load

    Do
    DoEvents
    Loop Until myIE.readyState = READYSTATE_COMPLETE
    myIE.Visible = True
'Working fine till here
'Need to pull the data from the 2nd webisite





    End Sub

'returns new instance of Internet Explorer
Function GetNewIE() As SHDocVw.InternetExplorer
  'create new IE instance
  Set GetNewIE = New SHDocVw.InternetExplorer
  'start with a blank page
  GetNewIE.Navigate2 "about:Blank"
End Function

'loads a web page and returns True or False depending on
'whether the page could be loaded or not
Function LoadWebPage(i_IE As SHDocVw.InternetExplorer, _
                     i_URL As String) As Boolean
  With i_IE
    'open page
    .navigate i_URL
    'wait until IE finished loading the page
    Do While .readyState <> READYSTATE_COMPLETE
      Application.Wait Now + TimeValue("0:00:01")
    Loop
    'check if page could be loaded
    If .document.URL = i_URL Then
      LoadWebPage = True
    End If
  End With
End Function

'finds an open IE site by checking the URL
Function GetOpenIEByURL(ByVal i_URL As String) As SHDocVw.InternetExplorer
Dim objShellWindows As New SHDocVw.ShellWindows

  'ignore errors when accessing the document property
  On Error Resume Next
  'loop over all Shell-Windows
  For Each GetOpenIEByURL In objShellWindows
    'if the document is of type HTMLDocument, it is an IE window
    If TypeName(GetOpenIEByURL.document) = "HTMLDocument" Then
      'check the URL
      If GetOpenIEByURL.document.URL = i_URL Then
        'leave, we found the right window
        Exit Function
      End If
    End If
  Next
End Function

'finds an open IE site by checking the title
Function GetOpenIEByTitle(i_Title As String, _
                          Optional ByVal i_ExactMatch As Boolean = True) As SHDocVw.InternetExplorer
Dim objShellWindows As New SHDocVw.ShellWindows

  If i_ExactMatch = False Then i_Title = "*" & i_Title & "*"
  'ignore errors when accessing the document property
  On Error Resume Next
  'loop over all Shell-Windows
  For Each GetOpenIEByTitle In objShellWindows
    'if the document is of type HTMLDocument, it is an IE window
    If TypeName(GetOpenIEByTitle.document) = "HTMLDocument" Then
      'check the title
      If GetOpenIEByTitle.document.Title Like i_Title Then
        'leave, we found the right window
        Exit Function
      End If
    End If
  Next
End Function

1 个答案:

答案 0 :(得分:0)

此代码将查找并标识打开的“结果”窗口,然后将其背后的源代码分配给变量(my_var)。然后,您可以从变量中提取所需内容。

' Find the open instance of IE that contains the "Results"
    Set objShell = CreateObject("Shell.Application")
    IE_count = objShell.Windows.Count

    For x = 0 To (IE_count - 1)
        On Error Resume Next
        my_url = objShell.Windows(x).document.Location
        my_title = objShell.Windows(x).document.Title

        If my_title Like "Results - Research Randomizer" Then
            Set ie = objShell.Windows(x)
            Exit For
        Else
        End If
    Next

    my_var = ie.document.body.innerhtml