使用Excel VBA读取和操作HTML

时间:2016-11-25 12:37:32

标签: html vba excel-vba excel

假设我有一个如下页面,保存在c:\ temp \ html_page.html:

      <link rel="stylesheet" href="styles.css">
      <div id="xxx1">
         <img src="test.png">

我想以编程方式调整img的src属性,基于Excel数据&amp; VBA。基本上是一种使用Xpath查找div的方法,并调整其中包含的(单个)img标记。


Dim XDoc As Object, root As Object

Set XDoc = CreateObject("MSXML2.DOMDocument")
XDoc.async = False: XDoc.validateOnParse = False

If XDoc.Load(html_path) Then
    Debug.Print "Document loaded"
    Dim strErrText As String
    Dim xPE As MSXML2.IXMLDOMParseError
    ' Obtain the ParseError object
    Set xPE = XDoc.parseError
    With xPE
       strErrText = "Your XML Document failed to load" & _
         "due the following error." & vbCrLf & _
         "Error #: " & .ErrorCode & ": " & xPE.reason & _
         "Line #: " & .Line & vbCrLf & _
         "Line Position: " & .linepos & vbCrLf & _
         "Position In File: " & .filepos & vbCrLf & _
         "Source Text: " & .srcText & vbCrLf & _
         "Document URL: " & .URL
    End With
    MsgBox strErrText, vbExclamation


Set outer_div = XDoc.SelectFirstNode("//div[id='xxx1'")
... edit the img attribute



2 个答案:

答案 0 :(得分:3)


Sub changeImg()

    Dim dom As Object
    Dim img As Object
    Dim src As String

    Set dom = CreateObject("htmlFile")

    Open "C:\temp\test.html" For Input As #1
        src = Input$(LOF(1), 1)
    Close #1

    dom.body.innerHTML = src

    Set img = dom.getelementsbytagname("img")(0)

    img.src = "..."

    Open "C:\temp\test.html" For Output As #1
        Print #1, dom.DocumentElement.outerHTML
    Close #1

End Sub


另外,如果你想更深入地做一些事情,更好的选择者会考虑早期绑定。公开的HTML界面与后期绑定时的界面不同,并支持更多属性 - 您需要添加对HTML Object Library的引用:

Sub changeImg()

    Dim dom As HTMLDocument
    Dim img As Object
    Dim src As String

    Set dom = CreateObject("htmlFile")

    Open "C:\temp\test.html" For Input As #1
        src = Input$(LOF(1), 1)
    Close #1

    dom.body.innerHTML = src

    Set img = dom.getelementsbytagname("img")(0)

    img.src = "..."

    Open "C:\temp\test.html" For Output As #1
        Print #1, dom.DocumentElement.outerHTML
    Close #1

End Sub

答案 1 :(得分:0)

为此,您可以使用doc.querySelector("div[id='xxx1'] img")。要更改src属性,请使用img.setAttribute "src", "new.png"。 HTH

Option Explicit

' Add reference to Microsoft Internet Controls (SHDocVw)
' Add reference to Microsoft HTML Object Library

Sub Demo()
    Dim ie As SHDocVw.InternetExplorer
    Dim doc As MSHTML.HTMLDocument
    Dim url As String

    url = "file:///C:/Temp/StackOverflow/html/html_page.html"
    Set ie = New SHDocVw.InternetExplorer
    ie.Visible = True
    ie.navigate url
    While ie.Busy Or ie.readyState <> READYSTATE_COMPLETE: DoEvents: Wend
    Set doc = ie.document

    Dim img As HTMLImg
    Set img = doc.querySelector("div[id='xxx1'] img")
    If Not img Is Nothing Then
        img.setAttribute "src", "new.png"
    End If
End Sub