我的excel单元中有以下内容 - 看起来非常混乱 - 包含大约100行HTML标记:
以下两个例子:
let rollNumber:String = String(format: "%@", rollNumberWhichIsANumber as! CVarArg)
或
<ul class=""list-unstyled"">
<li><span title=""Website"" class=""glyphicon glyphicon-link text-gray""></span> <a href=""https://google.org/"" target=""_blank"">Website</a></li>
<li><span title=""Website"" class=""glyphicon glyphicon-link text-gray""></span> <a href=""https://www.google.com/"" target=""_blank"">Website 2</a></li>
<li><span title=""Product"" class=""glyphicon glyphicon-search text-gray""></span> <a href=""http://amazon.com"" target=""_blank"">Product</a></li>
<li><span title=""Product"" class=""glyphicon glyphicon-search text-gray""></span> <a href=""https://amazon.de/"" target=""_blank"">Product 2</a></li>
<li><span title=""Tags"" class=""glyphicon glyphicon glyphicon-tag text-gray""></span>
<small><span class=""label label-warning"">Available</span></small>
<small><span class=""label label-warning"">Country</span></small>
</li>
</ul>
我的目标是创建一个如下所示的表:
<ul class=""list-unstyled"">
<li><span title=""Website"" class=""glyphicon glyphicon-link text-gray""></span> <a href=""https://google.org/"" target=""_blank"">Website</a></li>
<li><span title=""Website"" class=""glyphicon glyphicon-link text-gray""></span> <a href=""https://www.google.com/"" target=""_blank"">Website 2</a></li>
<li><span title=""Product"" class=""glyphicon glyphicon-search text-gray""></span> <a href=""http://amazon.com"" target=""_blank"">Product</a></li>
<li><span title=""Tags"" class=""glyphicon glyphicon glyphicon-tag text-gray""></span>
<small><span class=""label label-warning"">Not Available</span></small>
<small><span class=""label label-warning"">State</span></small>
</li>
</ul>
老实说,我不知道如何应对这一挑战。
您方有何建议?
答案 0 :(得分:2)
方法是:创建函数,将HTML
代码作为字符串作为参数,并返回带有与表头相同的键的字典。函数的主体是:
Function ParseHTML(str As String) As Scripting.Dictionary
Set ParseHTML = New Scripting.Dictionary
Dim txt As String
Dim website As Long: website = 0
Dim product As Long: product = 0
Dim i As Long: i = 0
Do While True
'get all text between <li> and <\li> tags
'then extract all data from it: title attribute and link
txt = Mid(str, InStr(1, str, "<li>") + 4, InStr(1, str, "</li>") - InStr(1, str, "<li>") - 4)
'select which case it is: website, product or tags
Select Case Mid(txt, InStr(1, txt, "title") + 8, InStr(1, txt, "class") - InStr(1, txt, "title") - 11)
Case Is = "Website"
website = website + 1
'here you extract the link
ParseHTML.Add "Website " & website, Mid(txt, InStr(1, txt, "<a href") + 10, InStr(1, txt, "target") - InStr(1, txt, "<a href") - 13)
Case Is = "Product"
product = product + 1
'here you extract the link
ParseHTML.Add "Product " & product, Mid(txt, InStr(1, txt, "<a href") + 10, InStr(1, txt, "target") - InStr(1, txt, "<a href") - 13)
Case Is = "Tags"
'if we reached Tags, then all websites are over and need different processing
Exit Do
End Select
'delete processed text
str = Mid(str, InStr(1, str, "</li>") + 5)
Loop
'since in your table you have 3 places for websites and products, so we need to add them
For i = website + 1 To 3
ParseHTML.Add "Website " & i, ""
Next i
For i = product + 1 To 3
ParseHTML.Add "Product " & i, ""
Next i
'now txt is the interior of last <li></li> tag and now we focus on what is
'between <small> and </small> tags
'also we don't need str variable anymore, so we can re-use it
str = Mid(txt, InStr(1, txt, "<small>") + 7, InStr(1, txt, "</small>") - InStr(1, txt, "<small>") - 7)
ParseHTML.Add "Available", Mid(str, InStr(1, str, ">") + 1, Len(str) - InStr(1, str, ">") - 7)
'remove processed part of html
txt = Mid(txt, InStr(1, txt, "</small>") + 8)
'take care of last <small> tag
str = Mid(txt, InStr(1, txt, "<small>") + 7, InStr(1, txt, "</small>") - InStr(1, txt, "<small>") - 7)
ParseHTML.Add "Country", Mid(str, InStr(1, str, ">") + 1, Len(str) - InStr(1, str, ">") - 7)
End Function
因此,总而言之,该函数返回带有键的字典
"Website 1"
,"Website 2"
,"Website 3"
,"Product 1"
,"Product 2"
,"Product 3"
,"Available"
,"Country"
。
现在,拥有该功能,您可以轻松填写所需的表格。这是一种方法:
Sub ProcessHTML()
'determine last row in A column
Dim lastRow As Long: lastRow = Cells(Rows.Count, 1).End(xlUp).Row
Dim dict As Scripting.Dictionary
Dim i As Long
Dim j As Long
For i = 2 To lastRow
'parse HTML code with our function
Set dict = ParseHTML(Cells(i, 1).Value)
For j = 2 To 9
'write corresponding values from dictionary to cells in table
Cells(i, j).Value = dict(Cells(1, j).Value)
Next j
'get rid of object
Set dict = Nothing
Next i
End Sub
它适用于那样排列的表(已填充):
将这些标题放在列中非常重要。
重要强>
在运行任何内容之前,在您的VBA编辑器中转到: 工具 - &gt;引用,在弹出的窗口中,您需要选择 Microsoft Scripting Runtime 。
答案 1 :(得分:1)
我有一些想法:
如果您在所有VBA中执行此操作(不使用任何库),您可以将html搜索为字符串&amp;寻找<a> & </a>
标签。一旦你拉出具有这个:<a href=""https://google.org/"" target=""_blank"">Website</a>
的子串,那么你就会对href&amp;网址。
另一种选择是使用正则表达式。看起来VBA脚本DLL具有正则表达式功能,您可以查看它。
最后一个选项是HTML敏捷包。这是专为使用HTML而设计的。我曾经从一个.net项目中使用过它。我现在不记得细节,但我记得很容易合作。
答案 2 :(得分:1)
Sub splithtml()
Dim htmlstring As String
Dim rowcount As Integer
Dim website1str As String, website2str As String, website3str As String
Dim product1str As String, product2str As String
Dim spanstr As String
'All the Attribute Nodes to be extracted are hardcoded
website1str = ">Website</a></li>"
website2str = ">Website 2</a></li>"
website3str = ">Website 3</a></li>"
product1str = ">Product</a></li>"
product2str = ">Product 2</a></li>"
spanstr = "</span></small>"
'Create Headers for the xml parsed table
Cells(1, 2).Value = "Website 1"
Cells(1, 3).Value = "Website 2"
Cells(1, 4).Value = "Website 3"
Cells(1, 5).Value = "Product 1"
Cells(1, 6).Value = "Product 2"
Cells(1, 7).Value = "Available"
Cells(1, 8).Value = "Country"
'Get the number of rows with data in A column
'Assmption:- XML data stored in A column of the sheet
rowcount = Cells(Rows.Count, 1).End(xlUp).row
For i = 2 To rowcount + 1
'Xml is stored in A column and starts from second row, First row is assumed to be header
htmlstring = Cells(i, 1).Value
'Parses each node and stores in the adjacent column of the column where XML is stored
htmlstring = GetValue(htmlstring, website1str, i, 2)
htmlstring = GetValue(htmlstring, website2str, i, 3)
htmlstring = GetValue(htmlstring, website3str, i, 4)
htmlstring = GetValue(htmlstring, product1str, i, 5)
htmlstring = GetValue(htmlstring, product2str, i, 6)
htmlstring = GetValue(htmlstring, spanstr, i, 7)
htmlstring = GetValue(htmlstring, spanstr, i, 8)
Next i
End Sub
Function Trimhtml(Mainhtml, Processedhtml)
'Function to Trim the HTMl nodes that has been parsed
Dim spanstr As String
spanstr = "</span></small>"
Trimhtml = Mainhtml
If Processedhtml = spanstr Then
Trimhtml = Mid(Mainhtml, InStr(Mainhtml, Processedhtml) + 15)
Else
Trimhtml = Mid(Mainhtml, InStr(Mainhtml, Processedhtml))
End If
End Function
Function GetValue(Mainhtml, nodevalue, row, column)
'Function to Get Text value from the attribute passed and stored in the row, column passed
Dim nodestring As String
Dim FirstPoint As Integer, Secondpoint As Integer
Dim spanstr As String
spanstr = "</span></small>"
If InStr(Mainhtml, nodevalue) > 0 Then
nodestring = Left$(Mainhtml, InStr(Mainhtml, nodevalue))
If nodevalue = spanstr Then
FirstPoint = InStrRev(nodestring, ">")
Secondpoint = InStrRev(nodestring, "<")
Returnvalue = Mid(nodestring, FirstPoint + 1, Secondpoint - FirstPoint - 1)
Else
FirstPoint = InStr(nodestring, "<a href=")
Secondpoint = InStr(nodestring, "target=")
Returnvalue = Mid(nodestring, FirstPoint + 10, Secondpoint - FirstPoint - 13)
End If
Cells(row, column).Value = Returnvalue
GetValue = Trimhtml(Mainhtml, nodevalue)
Else
GetValue = Mainhtml
End If
End Function
我编写了一个VB脚本来解析xml数据。
假设: -
答案 3 :(得分:0)
假设您的数据位于单元格A2中,并且您正在单元格B2中为网站应用公式,则可以使用以下公式。
=IF((LEN($A2)-LEN(SUBSTITUTE($A2,"""""Website""""","")))/(LEN("Website")+4)>=COLUMNS($B$1:B1),TRIM(MID(SUBSTITUTE(SUBSTITUTE($A2,"<a href=""""",REPT(" ",LEN($A2)),COLUMNS($B$1:B1)),""""" target",REPT(" ",LEN($A2)),COLUMNS($B$1:B1)),LEN($A2),LEN($A2))),"")
向下复制。
对于细胞E2中的产品
=IF((LEN($A2)-LEN(SUBSTITUTE($A2,"""""Product""""","")))/(LEN("Product")+4)>=COLUMNS($E$1:E1),TRIM(MID(SUBSTITUTE(SUBSTITUTE(MID($A2,FIND("""""Product""""",$A2,1),LEN($A2)),"<a href=""""",REPT(" ",LEN($A2)),COLUMNS($E$1:E1)),""""" target",REPT(" ",LEN($A2)),COLUMNS($E$1:E1)),LEN($A2),LEN($A2))),"")
好处:它使用原生Excel功能,因此不需要VBA。它是非数组公式,即不需要CTRL + SHIFT + ENTER。
缺点:配方很复杂,可能难以维护。
我已经在Dropbox上传了示例文件,以便于理解和实施。
答案 4 :(得分:-1)
您可以在Excel中执行此操作,如果这是您正在寻找的内容。
首先,使用Text to Columns来解析数据。
希望这就是你要找的东西