我正在尝试写一个从excel数据中删除html标签的宏。 我只想搜索< *>这种模式并用空白替换它们。 还需要删除像“”这样的特殊字符和if(typeof(dstb)!=“undefined”){dstb();}等标签。
我写的代码到现在为止要求我在宏中硬编码文件名,这是我不想要的。
代码:
Sub UnescapeCharacters()
sheetname = "2011 Publications" 'file name goes here
Dim sheet As Worksheet
Set sheet = Me.Worksheets(sheetname)
For Row = 1 To sheet.UsedRange.Rows.Count
For Column = 1 To sheet.UsedRange.Columns.Count
Dim cell As Range
Set cell = sheet.Cells(Row, Column)
ReplaceCharacter cell, """, """"
ReplaceCharacter cell, ",", ""
ReplaceCharacter cell, " ", ""
ReplaceCharacter cell, "•", ""
ReplaceCharacter cell, "</ul>", ""
ReplaceCharacter cell, "<ul>", ""
ReplaceCharacter cell, "<b>", ""
ReplaceCharacter cell, "</b>", ""
ReplaceCharacter cell, "<i>", ""
ReplaceCharacter cell, "</i>", ""
ReplaceCharacter cell, "</li>", ""
ReplaceCharacter cell, "<li>", ""
ReplaceCharacter cell, "</br>", ""
ReplaceCharacter cell, "<br />", ""
ReplaceCharacter cell, "</p>", ""
ReplaceCharacter cell, "<p>", ""
Next Column
Next Row
End Sub
Sub ReplaceCharacter(ByRef cell As Range, ByVal find As String, ByVal replacement As String)
Dim result As String
cell.Value = Replace(cell.Text, find, replacement, 1, -1)
End Sub
有人可以帮忙吗?
答案 0 :(得分:1)
我更喜欢这种做法。 如果HTML不是真正的网站,您可以将HTML保存为文件,然后IE.navigate该filePath。
Sub testing()
Dim IE As Object
Dim stringWithOutTags As String
Set IE = CreateObject("InternetExplorer.Application")
' HardCode the URL address in
IE.navigate "http://stackoverflow.com/questions/13824872/writing-macro-in-excel-to-remove-html-code"
Do While IE.Busy
Loop
Do While IE.readyState <> 4
Loop
stringWithOutTags = IE.document.DocumentElement.innerText
IE.Quit
End Sub