我的excel单元中有以下内容 - 看起来非常混乱 - 包含大约100行HTML标记:
let rollNumber:String = String(format: "%@", rollNumberWhichIsANumber as! CVarArg)
<ul class=""list-unstyled"">
<li><span title=""Website"" class=""glyphicon glyphicon-link text-gray""></span> <a href=""https://google.org/"" target=""_blank"">Website</a></li>
<li><span title=""Website"" class=""glyphicon glyphicon-link text-gray""></span> <a href=""https://www.google.com/"" target=""_blank"">Website 2</a></li>
<li><span title=""Product"" class=""glyphicon glyphicon-search text-gray""></span> <a href=""http://amazon.com"" target=""_blank"">Product</a></li>
<li><span title=""Product"" class=""glyphicon glyphicon-search text-gray""></span> <a href=""https://amazon.de/"" target=""_blank"">Product 2</a></li>
<li><span title=""Tags"" class=""glyphicon glyphicon glyphicon-tag text-gray""></span>
<small><span class=""label label-warning"">Available</span></small>
<small><span class=""label label-warning"">Country</span></small>
<ul class=""list-unstyled"">
<li><span title=""Website"" class=""glyphicon glyphicon-link text-gray""></span> <a href=""https://google.org/"" target=""_blank"">Website</a></li>
<li><span title=""Website"" class=""glyphicon glyphicon-link text-gray""></span> <a href=""https://www.google.com/"" target=""_blank"">Website 2</a></li>
<li><span title=""Product"" class=""glyphicon glyphicon-search text-gray""></span> <a href=""http://amazon.com"" target=""_blank"">Product</a></li>
<li><span title=""Tags"" class=""glyphicon glyphicon glyphicon-tag text-gray""></span>
<small><span class=""label label-warning"">Not Available</span></small>
<small><span class=""label label-warning"">State</span></small>
答案 0 :(得分:2)
Function ParseHTML(str As String) As Scripting.Dictionary
Set ParseHTML = New Scripting.Dictionary
Dim txt As String
Dim website As Long: website = 0
Dim product As Long: product = 0
Dim i As Long: i = 0
Do While True
'get all text between <li> and <\li> tags
'then extract all data from it: title attribute and link
txt = Mid(str, InStr(1, str, "<li>") + 4, InStr(1, str, "</li>") - InStr(1, str, "<li>") - 4)
'select which case it is: website, product or tags
Select Case Mid(txt, InStr(1, txt, "title") + 8, InStr(1, txt, "class") - InStr(1, txt, "title") - 11)
Case Is = "Website"
website = website + 1
'here you extract the link
ParseHTML.Add "Website " & website, Mid(txt, InStr(1, txt, "<a href") + 10, InStr(1, txt, "target") - InStr(1, txt, "<a href") - 13)
Case Is = "Product"
product = product + 1
'here you extract the link
ParseHTML.Add "Product " & product, Mid(txt, InStr(1, txt, "<a href") + 10, InStr(1, txt, "target") - InStr(1, txt, "<a href") - 13)
Case Is = "Tags"
'if we reached Tags, then all websites are over and need different processing
Exit Do
End Select
'delete processed text
str = Mid(str, InStr(1, str, "</li>") + 5)
'since in your table you have 3 places for websites and products, so we need to add them
For i = website + 1 To 3
ParseHTML.Add "Website " & i, ""
Next i
For i = product + 1 To 3
ParseHTML.Add "Product " & i, ""
Next i
'now txt is the interior of last <li></li> tag and now we focus on what is
'between <small> and </small> tags
'also we don't need str variable anymore, so we can re-use it
str = Mid(txt, InStr(1, txt, "<small>") + 7, InStr(1, txt, "</small>") - InStr(1, txt, "<small>") - 7)
ParseHTML.Add "Available", Mid(str, InStr(1, str, ">") + 1, Len(str) - InStr(1, str, ">") - 7)
'remove processed part of html
txt = Mid(txt, InStr(1, txt, "</small>") + 8)
'take care of last <small> tag
str = Mid(txt, InStr(1, txt, "<small>") + 7, InStr(1, txt, "</small>") - InStr(1, txt, "<small>") - 7)
ParseHTML.Add "Country", Mid(str, InStr(1, str, ">") + 1, Len(str) - InStr(1, str, ">") - 7)
End Function
"Website 1"
,"Website 2"
,"Website 3"
,"Product 1"
,"Product 2"
,"Product 3"
Sub ProcessHTML()
'determine last row in A column
Dim lastRow As Long: lastRow = Cells(Rows.Count, 1).End(xlUp).Row
Dim dict As Scripting.Dictionary
Dim i As Long
Dim j As Long
For i = 2 To lastRow
'parse HTML code with our function
Set dict = ParseHTML(Cells(i, 1).Value)
For j = 2 To 9
'write corresponding values from dictionary to cells in table
Cells(i, j).Value = dict(Cells(1, j).Value)
Next j
'get rid of object
Set dict = Nothing
Next i
End Sub
在运行任何内容之前,在您的VBA编辑器中转到: 工具 - &gt;引用,在弹出的窗口中,您需要选择 Microsoft Scripting Runtime 。
答案 1 :(得分:1)
如果您在所有VBA中执行此操作(不使用任何库),您可以将html搜索为字符串&amp;寻找<a> & </a>
标签。一旦你拉出具有这个:<a href=""https://google.org/"" target=""_blank"">Website</a>
答案 2 :(得分:1)
Sub splithtml()
Dim htmlstring As String
Dim rowcount As Integer
Dim website1str As String, website2str As String, website3str As String
Dim product1str As String, product2str As String
Dim spanstr As String
'All the Attribute Nodes to be extracted are hardcoded
website1str = ">Website</a></li>"
website2str = ">Website 2</a></li>"
website3str = ">Website 3</a></li>"
product1str = ">Product</a></li>"
product2str = ">Product 2</a></li>"
spanstr = "</span></small>"
'Create Headers for the xml parsed table
Cells(1, 2).Value = "Website 1"
Cells(1, 3).Value = "Website 2"
Cells(1, 4).Value = "Website 3"
Cells(1, 5).Value = "Product 1"
Cells(1, 6).Value = "Product 2"
Cells(1, 7).Value = "Available"
Cells(1, 8).Value = "Country"
'Get the number of rows with data in A column
'Assmption:- XML data stored in A column of the sheet
rowcount = Cells(Rows.Count, 1).End(xlUp).row
For i = 2 To rowcount + 1
'Xml is stored in A column and starts from second row, First row is assumed to be header
htmlstring = Cells(i, 1).Value
'Parses each node and stores in the adjacent column of the column where XML is stored
htmlstring = GetValue(htmlstring, website1str, i, 2)
htmlstring = GetValue(htmlstring, website2str, i, 3)
htmlstring = GetValue(htmlstring, website3str, i, 4)
htmlstring = GetValue(htmlstring, product1str, i, 5)
htmlstring = GetValue(htmlstring, product2str, i, 6)
htmlstring = GetValue(htmlstring, spanstr, i, 7)
htmlstring = GetValue(htmlstring, spanstr, i, 8)
Next i
End Sub
Function Trimhtml(Mainhtml, Processedhtml)
'Function to Trim the HTMl nodes that has been parsed
Dim spanstr As String
spanstr = "</span></small>"
Trimhtml = Mainhtml
If Processedhtml = spanstr Then
Trimhtml = Mid(Mainhtml, InStr(Mainhtml, Processedhtml) + 15)
Trimhtml = Mid(Mainhtml, InStr(Mainhtml, Processedhtml))
End If
End Function
Function GetValue(Mainhtml, nodevalue, row, column)
'Function to Get Text value from the attribute passed and stored in the row, column passed
Dim nodestring As String
Dim FirstPoint As Integer, Secondpoint As Integer
Dim spanstr As String
spanstr = "</span></small>"
If InStr(Mainhtml, nodevalue) > 0 Then
nodestring = Left$(Mainhtml, InStr(Mainhtml, nodevalue))
If nodevalue = spanstr Then
FirstPoint = InStrRev(nodestring, ">")
Secondpoint = InStrRev(nodestring, "<")
Returnvalue = Mid(nodestring, FirstPoint + 1, Secondpoint - FirstPoint - 1)
FirstPoint = InStr(nodestring, "<a href=")
Secondpoint = InStr(nodestring, "target=")
Returnvalue = Mid(nodestring, FirstPoint + 10, Secondpoint - FirstPoint - 13)
End If
Cells(row, column).Value = Returnvalue
GetValue = Trimhtml(Mainhtml, nodevalue)
GetValue = Mainhtml
End If
End Function
假设: -
答案 3 :(得分:0)
=IF((LEN($A2)-LEN(SUBSTITUTE($A2,"""""Website""""","")))/(LEN("Website")+4)>=COLUMNS($B$1:B1),TRIM(MID(SUBSTITUTE(SUBSTITUTE($A2,"<a href=""""",REPT(" ",LEN($A2)),COLUMNS($B$1:B1)),""""" target",REPT(" ",LEN($A2)),COLUMNS($B$1:B1)),LEN($A2),LEN($A2))),"")
=IF((LEN($A2)-LEN(SUBSTITUTE($A2,"""""Product""""","")))/(LEN("Product")+4)>=COLUMNS($E$1:E1),TRIM(MID(SUBSTITUTE(SUBSTITUTE(MID($A2,FIND("""""Product""""",$A2,1),LEN($A2)),"<a href=""""",REPT(" ",LEN($A2)),COLUMNS($E$1:E1)),""""" target",REPT(" ",LEN($A2)),COLUMNS($E$1:E1)),LEN($A2),LEN($A2))),"")
好处:它使用原生Excel功能,因此不需要VBA。它是非数组公式,即不需要CTRL + SHIFT + ENTER。
答案 4 :(得分:-1)
首先,使用Text to Columns来解析数据。