从单元格字符串中删除HTML标记:excel Formula

时间:2013-02-05 10:43:02

标签: excel excel-formula

我在excel表格中有一个带有HTML标签的数据,如下所示:

<b>This is test data<br>Nice
<div> Go on this is next Cell
Very goood <b>.....</b>

所以,基本上我想删除或替换excel表中带空格的所有html标签。

3 个答案:

答案 0 :(得分:37)

使用Replace All模式应用<*>

replace tags pattern

要打开此功能,请转到功能区Home > Find & Select > Replace...或只需按 CTRL + H

可以使用TRIM函数进一步删除额外的空格。祝你好运!

答案 1 :(得分:5)

在Excel中打开VBA(Alt + F11),单击右侧项目浏览器中的项目名称(电子表格名称)。插入 - &gt;新模块。将用户定义的函数粘贴到模块窗口中。保存为允许宏的.XLSM。

键入函数'= StripHTML(A2)',假设您的数据在单元格A2中。您也可以在此处下载一个工作示例:

http://jfrancisconsulting.com/how-to-strip-html-tags-in-excel/

Function StripHTML(cell As Range) As String

Dim RegEx As Object
Set RegEx = CreateObject(“vbscript.regexp”)
Dim sInput As String
Dim sOut As String
sInput = cell.Text

sInput = Replace(sInput, “\x0D\x0A”, Chr(10))
sInput = Replace(sInput, “\x00″, Chr(10))

‘replace HTML breaks and end of paragraphs with line breaks
sInput = Replace(sInput, “</P>”, Chr(10) & Chr(10))
sInput = Replace(sInput, “<BR>”, Chr(10))

‘replace bullets with dashes
sInput = Replace(sInput, “<li>”, “-”)

‘add back all of the special characters
sInput = Replace(sInput, “&ndash;”, “–”)
sInput = Replace(sInput, “&mdash;”, “—”)
sInput = Replace(sInput, “&iexcl;”, “¡”)
sInput = Replace(sInput, “&iquest;”, “¿”)
sInput = Replace(sInput, “&quot;”, “”)
sInput = Replace(sInput, “&ldquo;”, ““”)
sInput = Replace(sInput, “&rdquo;”, “””)
sInput = Replace(sInput, “”, “‘”)
sInput = Replace(sInput, “&lsquo;”, “‘”)
sInput = Replace(sInput, “&rsquo;”, “’”)
sInput = Replace(sInput, “&laquo;”, “«”)
sInput = Replace(sInput, “&raquo;”, “»”)
sInput = Replace(sInput, “&nbsp;”, ” “)
sInput = Replace(sInput, “&amp;”, “&”)
sInput = Replace(sInput, “&cent;”, “¢”)
sInput = Replace(sInput, “&copy;”, “©”)
sInput = Replace(sInput, “&divide;”, “÷”)
sInput = Replace(sInput, “&gt;”, “>”)
sInput = Replace(sInput, “&lt;”, “<”)
sInput = Replace(sInput, “&micro;”, “µ”)
sInput = Replace(sInput, “&middot;”, “·”)
sInput = Replace(sInput, “&para;”, “¶”)
sInput = Replace(sInput, “&plusmn;”, “±”)
sInput = Replace(sInput, “&euro;”, “€”)
sInput = Replace(sInput, “&pound;”, “£”)
sInput = Replace(sInput, “&reg;”, “®”)
sInput = Replace(sInput, “&sect;”, “§”)
sInput = Replace(sInput, “&trade;”, “™”)
sInput = Replace(sInput, “&yen;”, “¥”)
sInput = Replace(sInput, “&aacute;”, “á”)
sInput = Replace(sInput, “&Aacute;”, “Á”)
sInput = Replace(sInput, “&agrave;”, “à”)
sInput = Replace(sInput, “&Agrave;”, “À”)
sInput = Replace(sInput, “&acirc;”, “â”)
sInput = Replace(sInput, “&Acirc;”, “”)
sInput = Replace(sInput, “&aring;”, “å”)
sInput = Replace(sInput, “&Aring;”, “Å”)
sInput = Replace(sInput, “&atilde;”, “ã”)
sInput = Replace(sInput, “&Atilde;”, “Ô)
sInput = Replace(sInput, “&auml;”, “ä”)
sInput = Replace(sInput, “&Auml;”, “Ä”)
sInput = Replace(sInput, “&aelig;”, “æ”)
sInput = Replace(sInput, “&AElig;”, “Æ”)
sInput = Replace(sInput, “&ccedil;”, “ç”)
sInput = Replace(sInput, “&Ccedil;”, “Ç”)
sInput = Replace(sInput, “&eacute;”, “é”)
sInput = Replace(sInput, “&Eacute;”, “É”)
sInput = Replace(sInput, “&egrave;”, “è”)
sInput = Replace(sInput, “&Egrave;”, “È”)
sInput = Replace(sInput, “&ecirc;”, “ê”)
sInput = Replace(sInput, “&Ecirc;”, “Ê”)
sInput = Replace(sInput, “&euml;”, “ë”)
sInput = Replace(sInput, “&Euml;”, “Ë”)
sInput = Replace(sInput, “&iacute;”, “í”)
sInput = Replace(sInput, “&Iacute;”, “Í”)
sInput = Replace(sInput, “&igrave;”, “ì”)
sInput = Replace(sInput, “&Igrave;”, “Ì”)
sInput = Replace(sInput, “&icirc;”, “î”)
sInput = Replace(sInput, “&Icirc;”, “Δ)
sInput = Replace(sInput, “&iuml;”, “ï”)
sInput = Replace(sInput, “&Iuml;”, “Ï”)
sInput = Replace(sInput, “&ntilde;”, “ñ”)
sInput = Replace(sInput, “&Ntilde;”, “Ñ”)
sInput = Replace(sInput, “&oacute;”, “ó”)
sInput = Replace(sInput, “&Oacute;”, “Ó”)
sInput = Replace(sInput, “&ograve;”, “ò”)
sInput = Replace(sInput, “&Ograve;”, “Ò”)
sInput = Replace(sInput, “&ocirc;”, “ô”)
sInput = Replace(sInput, “&Ocirc;”, “Ô”)
sInput = Replace(sInput, “&oslash;”, “ø”)
sInput = Replace(sInput, “&Oslash;”, “Ø”)
sInput = Replace(sInput, “&otilde;”, “õ”)
sInput = Replace(sInput, “&Otilde;”, “Õ”)
sInput = Replace(sInput, “&ouml;”, “ö”)
sInput = Replace(sInput, “&Ouml;”, “Ö”)
sInput = Replace(sInput, “&szlig;”, “ß”)
sInput = Replace(sInput, “&uacute;”, “ú”)
sInput = Replace(sInput, “&Uacute;”, “Ú”)
sInput = Replace(sInput, “&ugrave;”, “ù”)
sInput = Replace(sInput, “&Ugrave;”, “Ù”)
sInput = Replace(sInput, “&ucirc;”, “û”)
sInput = Replace(sInput, “&Ucirc;”, “Û”)
sInput = Replace(sInput, “&uuml;”, “ü”)
sInput = Replace(sInput, “&Uuml;”, “Ü”)
sInput = Replace(sInput, “&yuml;”, “ÿ”)
sInput = Replace(sInput, “”, “´”)
sInput = Replace(sInput, “”, “`”)

‘replace all the remaining HTML Tags
With RegEx
.Global = True
.IgnoreCase = True
.MultiLine = True
.Pattern = “<[^>]+>” ‘Regular Expression for HTML Tags.

End With
sOut = RegEx.Replace(sInput, “”)
StripHTML = sOut
Set RegEx = Nothing
End Function

答案 2 :(得分:0)

由于上面的宏没有为我工作,我自己修复了它。这是我的第一个剧本,如果你们可以改进它们,让它更快,添加更多,那么你不仅仅是欢迎!

好的,我以前没有编程经验(除了6年前的一些非常基本的Java),但是在一些帮助下,很多猜测(实际上是几小时)我设法制作这个脚本,它就像一个删除大多数和8#文本的魅力,但它不会用换行符替换<BR>(你可以通过点击CTRL + H,&#34;找到:<br>&#34;&#34;替换:(现在按住ALT并使用你的NUMPAD类型0010.一个小点应该在替换窗口中闪烁,然后点击&#34;替换所有&#34;)。

将下面的代码粘贴到用户模块中(alt + f11,右键单击Sheet1-&gt; insert-&gt; Module-&gt;粘贴代码)

通过File-&gt; Options-&gt; Customize Ribbon-&gt;制作一个按钮。选中Developer复选框。然后转到开发人员标签 - &gt;插入 - &gt;按钮 - &gt;然后放置按钮并右键单击 - >分配宏 - &gt;选择RemoveTags。

Sub RemoveTags()
    Dim r As Range

    Selection.NumberFormat = "@"  'set cells to text numberformat

    With CreateObject("vbscript.regexp")
      .Pattern = "\<.*?\>"
      .Global = True

      For Each r In Selection
        r.Value = Replace(.Replace(r.Value, ""), "&#8217;", " ")
        r.Value2 = Replace(.Replace(r.Value2, ""), "&#8211;", " ")
      Next r

      For Each r In Selection
        r.Value = Replace(.Replace(r.Value, ""), "&#8216;", " ")
        r.Value2 = Replace(.Replace(r.Value2, ""), "&#8232;", " ")
      Next r

      For Each r In Selection
        r.Value = Replace(.Replace(r.Value, ""), "&#8233;", " ")
        r.Value2 = Replace(.Replace(r.Value2, ""), "&#146;s", " ")
      Next r
    End With
End Sub


Private Sub CommandButton1_Click()

End Sub