使用VBA搜索网站

时间:2018-05-30 07:13:52

标签: javascript html vba web-scraping

我想要做的是使用VBA搜索website,在左侧框中添加一些单词并在右侧获得结果。

问题是我不知道HTML,我不知道如何引用此框。我使用 GetElementByID ,但我收到了错误:

objIE.Document.GetElementByID("text-translation-video-ad").Value = "piłka".   
"Object doesn't support this property or method".

这是我的代码:

Sub www()

    Set objIE = CreateObject("InternetExplorer.Application")

    objIE.Top = 0
    objIE.Left = 0
    objIE.Width = 800
    objIE.Height = 600
    objIE.AddressBar = 0
    objIE.StatusBar = 0
    objIE.Toolbar = 0
    objIE.Visible = True
    objIE.Navigate ("https://pl.pons.com/tłumaczenie-tekstu")

    Do
        DoEvents
    Loop Until objIE.ReadyState = 4

    pagesource = objIE.Document.Body.Outerhtml
    objIE.Document.GetElementByID("text-translation-video-ad").Value = "piłka"
    objIE.Document.GetElementByID("qKeyboardInputInitiator").Click

    Do
        DoEvents
    Loop Until objIE.ReadyState = 4

End Sub

2 个答案:

答案 0 :(得分:3)

在不更改任何语言设置的情况下,以下内容翻译了" Hello"

<强>代码:

Option Explicit

Public Sub GetInfo()
    Dim IE As New InternetExplorer, html As HTMLDocument, translation As String
    Const TRANSLATION_STRING As String = "Hello"

    With IE
        .Visible = True
        .navigate "https://pl.pons.com/t%C5%82umaczenie-tekstu"

        While .Busy Or .readyState < 4: DoEvents: Wend

        Set html = .document

        With html
            .querySelector("textarea.text-translation-source.source").Value = TRANSLATION_STRING
            .querySelector("button.btn.btn-primary.submit").Click
            Application.Wait Now + TimeSerial(0, 0, 3)
            translation = .querySelector("div.translated_text").innerText
        End With

        Debug.Print translation
        'Quit '<== Remember to quit application
    End With

End Sub

查看:

Output

在即时窗口中打印:

Output

修改

后期绑定版本

Option Explicit

Public Sub GetInfo()
    Dim IE As Object, html As Object

    With CreateObject("InternetExplorer.Application")
        .Visible = True
        .navigate "https://pl.pons.com/t%C5%82umaczenie-tekstu"

        While .Busy Or .readyState < 4: DoEvents: Wend

        Set html = CreateObject("htmlfile")
        Set html = .document

        With html

            .getElementsByClassName("text-translation-source source")(0).innerText = "Translate"
            .getElementsByClassName("btn btn-primary submit")(0).Click
             Application.Wait Now + TimeSerial(0, 0, 2)

             Dim i As Long
             For i = 0 To .getElementsByClassName("text-translation-target target").Length - 1
                 Debug.Print .getElementsByClassName("text-translation-target target")(i).innerText
             Next i

            Stop
        End With
        .Quit
    End With

End Sub

答案 1 :(得分:1)

ID为“text-translation-video-ad”的元素是一个没有.Value属性的DIV。您想要访问后面提到的DIV的后代文本区域。

页面上有两个带有“textarea”标签的元素,你感兴趣的是第一个元素,因此(0)索引。 GetElementsByTagName中的标记必须大写。

objIE.Document.GetElementsByTagName("TEXTAREA")(0).Value = "piłka"

您还可以从IE自动化中退出,并采用更快,更可靠的方法,无需浏览器自动化,这将为您提供JSON格式的响应。需要设置对Microsoft HTML对象库的引用。

Option Explicit

Public Sub Scrape()

    Dim WindHttp As Object: Set WindHttp = CreateObject("WinHTTP.WinHTTPRequest.5.1")
    Dim htmlDoc As New HTMLDocument
    Dim urlName As String, myWord As String, requestString As String
    Dim myResults() As String
    Dim resultNum As Long

    urlName = "https://pl.pons.com/_translate/translate"
    myWord = "piłka"

    requestString = "source_language=pl&target_language=en&service=deepl&text=" & _
    myWord & _
    "&lookup=true&requested_by=Web&source_language_confirmed=true"

    Set htmlDoc = postDocument(urlName, WindHttp, requestString)

    myResults = Split(Replace(Split(Split(htmlDoc.body.innerText, ",")(1), ":")(1), Chr(34), vbNullString), vbCrLf)

    For resultNum = LBound(myResults) To UBound(myResults)
        Debug.Print myResults(resultNum)
    Next resultNum

End Sub

Function postDocument(ByVal urlName As String, myRequest As Object, Optional requestString As String) As HTMLDocument

    Set postDocument = New HTMLDocument

    With myRequest

        .Open "POST", urlName, False
        .setRequestHeader "Cache-Control", "no-cache"
        .setRequestHeader "Pragma", "no-cache"
        .setRequestHeader "Content-type", "application/x-www-form-urlencoded"

        If requestString = vbNullString Then
            .send
        Else
            .send requestString
        End If

        postDocument.body.innerHTML = .responseText

    End With

End Function