Excel VBA打开谷歌的第一个搜索结果页面

时间:2013-02-06 21:54:08

标签: html vba excel-vba html-parsing msxml

我必须使用excel Macro打开谷歌搜索页面。在excel中提供搜索参数后,我能够成功打开谷歌搜索页面。但是,我的任务是打开第一个返回的搜索结果页面,并在该页面中进行一些数据提取。我使用下面的代码。

假如我搜索“ Sachin Tendulkar wiki ”,我应该可以在搜索结果中打开第一页。到目前为止我的代码如下。

Dim ie As InternetExplorer
Dim RegEx As RegExp, RegMatch As MatchCollection
Dim MyStr As String
Dim pDisp As Object
Set ie = New InternetExplorer
Set RegEx = New RegExp
Dim iedoc As Object

'Search google for "something"
ie.Navigate "http://www.google.com.au/search?hl=en&q=sachin+tendulkar+wiki&meta="

'Loop unitl ie page is fully loaded
Do Until ie.ReadyState = READYSTATE_COMPLETE
Loop



MyStr = ie.Document.body.innertext
Set RegMatch = RegEx.Execute(MyStr)

'If a match to our RegExp searchstring is found then launch this page
If RegMatch.Count > 0 Then
    ie.Navigate RegMatch(0)
    Do Until ie.ReadyState = READYSTATE_COMPLETE
    Loop
         MsgBox "Loaded"
         'show internet explorer
    ie.Visible = True
    'Private Sub ie_DocumentComplete(ByVal pDisp As Object, URL As Variant)
    Set iedoc = ie.Application.Document
    'iedoc.getElementById("divid").Value = "poS0"
    'MsgBox iedoc

    'ie.Navigate iedoc.getelementsbytagname("ol")(0).Children(0).getelementsbytagname("a")(0).href
    ie.Navigate iedoc.getelementsbyclassname("divid")("poS0").href
    Else
    MsgBox "No linkedin profile found"
End If

Set RegEx = Nothing
Set ie = Nothing

我在Google搜索页面中查看了网页来源。我有一个特定的div id =“pos0”,它是第一个搜索结果的id。我必须让IE导航到div id =“pos0”的页面。我无法在VBA中完成这件事。有人可以帮帮我吗?

谢谢&问候, 拉梅什

2 个答案:

答案 0 :(得分:4)

你有几个问题。首先访问文档对象ie.Document而不是ie.Application.Document。我已经更新了您的代码,以显示如何使用子字符串快速找到第一个URL。

Dim ie As InternetExplorer
Dim RegEx As RegExp, RegMatch As MatchCollection
Dim MyStr As String
Dim pDisp As Object
Set ie = New InternetExplorer
Set RegEx = New RegExp
Dim iedoc As Object

'Search google for "something"
ie.Navigate "http://www.google.com.au/search?hl=en&q=sachin+tendulkar+wiki&meta="

'Loop unitl ie page is fully loaded
Do Until ie.ReadyState = READYSTATE_COMPLETE
Loop



MyStr = ie.Document.body.innertext
Set RegMatch = RegEx.Execute(MyStr)

'If a match to our RegExp searchstring is found then launch this page
If RegMatch.Count > 0 Then
    ie.Navigate RegMatch(0)
    Do Until ie.ReadyState = READYSTATE_COMPLETE
    Loop
         MsgBox "Loaded"
         'show internet explorer
    ie.Visible = True
    'Private Sub ie_DocumentComplete(ByVal pDisp As Object, URL As Variant)
    '****************************************
    'EDITS
    '****************************************
    Set iedoc = ie.Document

    'create a variable to hold the text
    Dim extractedHTML As String
    'start and end points for the substring
    Dim iStart, iEnd As Integer
    'get the element with ID of search - this is where the results start
    extractedHTML = iedoc.getElementById("search").innerHTML
    'find the first href as this will be the first link, add 1 to encompass the quote
    iStart = InStr(1, extractedHTML, "href=", vbTextCompare) + Len("href=") + 1
    'locate the next quote as this will be the end of the href
    iEnd = InStr(iStart, extractedHTML, Chr(34), vbTextCompare)
    'extract the text
    extractedHTML = Mid(extractedHTML, iStart, iEnd - iStart)
    'go to the URL
    ie.Navigate extractedHTML

    '****************************************
    'End EDITS
    '****************************************
    Else
    MsgBox "No linkedin profile found"
End If

Set RegEx = Nothing
Set ie = Nothing

答案 1 :(得分:2)

您可以考虑使用xmlHTTP对象而不是使用IE HTTP请求更容易,更快

以下是示例代码

Sub xmlHttp()

    Dim URl As String, lastRow As Long
    Dim xmlHttp As Object, html As Object, objResultDiv As Object, objH3 As Object, link As Object


    lastRow = Range("A" & Rows.Count).End(xlUp).Row

    For i = 2 To lastRow

        URl = "https://www.google.co.in/search?q=" & Cells(i, 1)

        Set xmlHttp = CreateObject("MSXML2.XMLHTTP")
        xmlHttp.Open "GET", URl, False
        xmlHttp.setRequestHeader "Content-Type", "text/xml"
        xmlHttp.send

        Set html = CreateObject("htmlfile")
        html.body.innerHTML = xmlHttp.ResponseText
        Set objResultDiv = html.getelementbyid("rso")
        Set objH3 = objResultDiv.getelementsbytagname("H3")(0)
        Set link = objH3.getelementsbytagname("a")(0)


        str_text = Replace(link.innerHTML, "<EM>", "")
        str_text = Replace(str_text, "</EM>", "")

        Cells(i, 2) = str_text
        Cells(i, 3) = link.href
    Next
End Sub

enter image description here

HTH
桑托什