我正在抓取https://thepiratebay.se
中的内容,并且我想知道如何获取搜索结果的总页数。
这是我的计划:
正如您在左侧看到的那样,搜索结果显示出来,而右侧是对所选结果的评论(在这种情况下是最高结果)。在程序的底部,你可以看到它显示了它显示了多少结果,现在右边是页面。
我想以此格式{current page}/{total pages}
显示结果的页数。我真的不想帮助查找current page
,但我真的想得到total pages
。
我还希望评论的这个功能(右边的列表)。
这可能有点太多了,但它不应该那么难,因为结果很容易。无法在Google上找到任何内容,因此非常感谢您的帮助。
编辑: 刮痧结果:
Dim ResultCount, I As Integer
Dim filenamelist, fileurllist, fileseeders, fileleechers, filemagneturl, filesize As New List(Of String)
filenamelist.Clear()
fileurllist.Clear()
TreeView1.Nodes.Clear()
Using WC As New System.Net.WebClient
Source = WC.DownloadString("http://thepiratebay.se/search/" & TextBox1.Text.Replace(" "c, "%20") & "/0/7/")
Link = Source
If CheckBox6.Checked = True Then
Source = Source & "600"
End If
If CheckBox5.Checked = True Then
Source = Source & "500"
End If
If CheckBox7.Checked = True Then
Source = Source & "400"
End If
If CheckBox4.Checked = True Then
Source = Source & "300"
End If
If CheckBox3.Checked = True Then
Source = Source & "200"
End If
If CheckBox2.Checked = True Then
Source = Source & "100"
End If
If CheckBox1.Checked = True Then
Source = Source & "0"
End If
If Source.Contains("No hits. Try adding an asterisk in you search phrase.") Then
MessageBox.Show("Search returned 0 results.", "Error", MessageBoxButtons.OK, MessageBoxIcon.Stop)
Else
ResultCount = InstanceCount(Source, "vertTh")
Dim Filename, FileUrl, Filemagnet, FS As String
Dim FileSeed, FileLeech As Integer
Do Until I = ResultCount
Filename = GetBetween(Source, "title=" & Chr(34) & "Details for ", Chr(34) & ">", I)
FileUrl = "http://thepiratebay.se/torrent/" & GetBetween(Source, "<a href=" & Chr(34) & "/torrent/", Chr(34) & " class=" & Chr(34) & "detLink" & Chr(34), I)
FileSeed = GetBetween(Source, "<td align=" & Chr(34) & "right" & Chr(34) & ">", "</td>", I + I)
FileLeech = GetBetween(Source, "<td align=" & Chr(34) & "right" & Chr(34) & ">", "</td>", I + I + 1)
Filemagnet = "magnet:" & GetBetween(Source, "<a href=" & Chr(34) & "magnet:", Chr(34) & " title=" & Chr(34) & "Download this torrent using magnet", I)
FS = GetBetween(Source, ", Size", ", ULed by ", I).Replace(" ", "").Replace(" ", " ")
filemagneturl.Add(Filemagnet) 'add result to array
filenamelist.Add(Filename) 'add result to array
fileurllist.Add(FileUrl) 'add result to array
fileseeders.Add(FileSeed) 'add result to array
fileleechers.Add(FileLeech)
filesize.Add(FS)
I = I + 1 'increment i with 1 to get next result
Loop
End If
End Using
I = 0
Do Until I = filenamelist.Count()
Dim rootNode = TreeView1.Nodes.Add(filenamelist(I))
rootNode.Nodes.Add("Seeders: " & fileseeders(I))
rootNode.Nodes.Add("Leechers: " & fileleechers(I))
rootNode.Nodes.Add(fileurllist(I))
rootNode.Nodes.Add(filemagneturl(I))
rootNode.Nodes.Add(filesize(I))
I = I + 1
LabelResults.Text = "Results: " & I
Loop
对结果进行评论:
Try
UsernameArr.Clear()
PostArr.Clear()
Using WC As New System.Net.WebClient
Dim Source As String = WC.DownloadString(Link)
Source = GetBetween(Source, "<div id=" & Chr(34) & "comments" & Chr(34) & ">", "<div class=" & Chr(34) & "ads" & Chr(34) & " id=" & Chr(34) & "sky-banner" & Chr(34) & ">", 0)
Dim CommentCount As Integer = InstanceCount(Source, "byline")
CommentCounter = CommentCount
If CommentCount < 1 Then
MessageBox.Show("This torrent has no comments")
Else
Dim itterator As Integer = 0
Do Until itterator = CommentCount
Dim User As String = GetBetween(Source, "<a href=" & Chr(34) & "/user/", "/" & Chr(34) & " title=" & Chr(34) & "Browse ", itterator)
Dim Post As String = GetBetween(Source, "comment" & Chr(34) & ">", "</div>", itterator).Replace("<br />", " "c)
Dim DateStr As String = GetBetween(Source, "</a> at ", " CET:", itterator)
UsernameArr.Add(User)
postdatearr.Add(DateStr)
PostArr.Add(Post)
Dim rootNode = TreeView2.Nodes.Add(User)
rootNode.Nodes.Add(DateStr)
rootNode.Nodes.Add(Post)
itterator = itterator + 1
Loop
End If
End Using
Catch ex As Exception
'do nothing
End Try
在字符串之间获取文字:
Private Function GetBetween(ByVal input As String, ByVal str1 As String, ByVal str2 As String, ByVal index As Integer) As String
Dim temp As String = Regex.Split(input, str1)(index + 1)
Return Regex.Split(temp, str2)(0)
End Function