使用正则表达式,但它不会删除Excel工作表中的html标记

时间:2012-10-10 19:25:47

标签: regex excel-vba excel-2003 vba excel

我尝试使用下面的代码删除html标签,但它没有做任何事情。我已经尝试再次运行它但没有用。有人可以告诉可能是什么问题。使用html标记附加文件的快照。有关信息,数据来自MS Access和MS Access链接到Sharepoint列表。

enter image description here

Sub Import_AccessData()
 Dim strtKeyMsgRange As Range
 Dim KeyMsgRange As Range
 Dim KeyMsgRangeCell As Range
 Dim endKeyMsgRangeCell As Range

Set strtKeyMsgRange = Range("B2")
Set endKeyMsgRange = Range("AC13")

Set KeyMsgRange = Range(strtKeyMsgRange, endKeyMsgRange)

For Each KeyMsgRangeCell In KeyMsgRange
   a = StripHTML(KeyMsgRangeCell)
   KeyMsgRangeCell.Value = a
Next KeyMsgRangeCell

End Sub


Public Function StripHTML(cell As Range) As String
     Dim RegEx As Object
     Set RegEx = CreateObject("vbscript.regexp")

     Dim sInput As String
     Dim sOut As String


     sInput = cell.Value

     With RegEx
   .Global = True
   .IgnoreCase = True
   .MultiLine = True
   .Pattern = "<[^>]+>" 'Regular Expression for HTML Tags.
   .Pattern = "&nbsp;"
   .Pattern = "&amp;"
 End With

 sOut = RegEx.Replace(sInput, "")
 StripHTML = sOut
 Set RegEx = Nothing
End Function

1 个答案:

答案 0 :(得分:1)

您要多次设置Pattern属性,它只会保留指定的最后一个值(&amp;)。

您需要使用3个正则表达式("<[^>]+>" => """&nbsp;" => " ""&amp;" => "&")或一个匹配所有输入("(&amp;)|(&nbsp;)|(<[^>]+>)" => "")的表达式来实际执行此操作