我一直想知道为什么我无法用<>/
等分割字符串。
例如,我正在尝试拆分下面的字符串:
<h3 class="r"><a href="https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCgQFjAA&url=http://www.site.me/&ei=C2z3UqCeO4mrhQfT_oGQAg&usg=AFQjCNEnOMh79FnzOZLXjrvhaf5jAH_aog&sig2=AnVKWZvmdCXGRU-EcbTzFA" onmousedown="return rwt(this,'','','','1','AFQjCNEnOMh79FnzOZLXjrvhaf5jAH_aog','AnVKWZvmdCXGRU-EcbTzFA','0CCgQFjAA','','',event)" data-href="http://www.site.me/">www.site.me <em>text</em> <b>...</b></a></h3>
我在页面上有10个h3 class="r"
属性。
因此,为了获得第一个属性,我尝试了以下代码,但它不起作用。
Dim gurl As String = (Split(googlepage, "<h3 class=""r"">")(1).Split("</h3>")(0))
同样如下。
Dim gurl as String = (Split(googleplage, "<cite>")(1).split("</cite")(0))
它没有给出任何错误,但它没有得到任何结果。当我尝试在消息框中输出它时,它什么也没显示。任何帮助将不胜感激!
答案 0 :(得分:2)
很抱歉,在我的平板电脑上输入有点慢,但以下介绍了使用String.Split
的几种方法:
Option Strict On
Module Module1
Sub Main()
Dim googlepage As String = "<h3 class=""r""><a href=""https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCgQFjAA&url=http://www.site.me/&ei=C2z3UqCeO4mrhQfT_oGQAg&usg=AFQjCNEnOMh79FnzOZLXjrvhaf5jAH_aog&sig2=AnVKWZvmdCXGRU-EcbTzFA"" onmousedown=""return rwt(this,'','','','1','AFQjCNEnOMh79FnzOZLXjrvhaf5jAH_aog','AnVKWZvmdCXGRU-EcbTzFA','0CCgQFjAA','','',event)"" data-href=""http://www.site.me/"">www.site.me <em>text</em> <b>...</b></a></h3>" & _
"<h3 class=""r""><a href=""https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCgQFjAA&url=http://www.site.me/&ei=C2z3UqCeO4mrhQfT_oGQAg&usg=AFQjCNEnOMh79FnzOZLXjrvhaf5jAH_aog&sig2=AnVKWZvmdCXGRU-EcbTzFA"" onmousedown=""return rwt(this,'','','','1','AFQjCNEnOMh79FnzOZLXjrvhaf5jAH_aog','AnVKWZvmdCXGRU-EcbTzFA','0CCgQFjAA','','',event)"" data-href=""http://www.site.me/"">www.site.me <em>text</em> <b>...</b></a></h3>" & _
"<h3 class=""r""><a href=""https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCgQFjAA&url=http://www.site.me/&ei=C2z3UqCeO4mrhQfT_oGQAg&usg=AFQjCNEnOMh79FnzOZLXjrvhaf5jAH_aog&sig2=AnVKWZvmdCXGRU-EcbTzFA"" onmousedown=""return rwt(this,'','','','1','AFQjCNEnOMh79FnzOZLXjrvhaf5jAH_aog','AnVKWZvmdCXGRU-EcbTzFA','0CCgQFjAA','','',event)"" data-href=""http://www.site.me/"">www.site.me <em>text</em> <b>...</b></a></h3>" & _
"<h3 class=""r""><a href=""https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCgQFjAA&url=http://www.site.me/&ei=C2z3UqCeO4mrhQfT_oGQAg&usg=AFQjCNEnOMh79FnzOZLXjrvhaf5jAH_aog&sig2=AnVKWZvmdCXGRU-EcbTzFA"" onmousedown=""return rwt(this,'','','','1','AFQjCNEnOMh79FnzOZLXjrvhaf5jAH_aog','AnVKWZvmdCXGRU-EcbTzFA','0CCgQFjAA','','',event)"" data-href=""http://www.site.me/"">www.site.me <em>text</em> <b>...</b></a></h3>"
Dim headings = googlepage.Split({"</h3>"}, StringSplitOptions.None)
For Each heading In headings
' No point splitting on <H3...> as we've already split on end tag, so for illustration, I'll split on '<' to give a list of nodes
Dim nodes = heading.Split("<"c)
For Each node In nodes
Dim parts = node.Split(">"c)
Dim attributes = parts(0).Split({vbTab, " ", vbCr, vbLf}, StringSplitOptions.RemoveEmptyEntries) ' First "attribute is the tag name"
For i As Integer = 0 To attributes.Length - 1
Console.WriteLine("{0}: {1}", If(i = 0, "Tag", "Attribute"), attributes(i))
Next
Console.WriteLine("Contect: {0}", If(parts.Length > 1, parts(1), "(empty)"))
Next
Console.WriteLine("Next heading:")
Next
End Sub
End Module
当然,没有理由编写自己的HTML解析器,因为HtmlAgilityPack做得很好