据称存在非法字符,而没有非法字符。我的实例让我发疯。问题发生在Dim datastream As Stream = client.OpenRead(url)
。
第一个Dim url As String = GoogleSearch & MovieName
不想接受HTML格式作为字符串。好。我从字符串中删除了https://
,现在它只是'www .____`格式,仍然可以与webclient一起使用。现在,它把这个拉到我身上。为什么?在Visual Studio外部测试时,它可以工作。
我的输入字符串URL是:www.google.com/search?q=imdb+Orville
,这会导致网络客户端出现此错误:
System.ArgumentException:“路径中包含非法字符。”
Public Class Form1
Private Sub Form1_Load(sender As Object, e As EventArgs) Handles MyBase.Load()
End Sub
Public Property status As Boolean
Public Property Id As String
Public Property ImdbURL As String
Private GoogleSearch As String = "www.google.com/search?q=imdb+"
Private BingSearch As String = "www.bing.com/search?q=imdb+"
Private AskSearch As String = "www.ask.com/web?q=imdb+"
Private Function match(ByVal regex As String, ByVal html As String, ByVal Optional i As Integer = 1) As String
Return New Regex(regex, RegexOptions.Multiline).Match(html).Groups(i).Value.Trim()
End Function
Private Function matchAll(ByVal regex As String, ByVal html As String, ByVal Optional i As Integer = 1) As ArrayList
Dim list As ArrayList = New ArrayList()
For Each m As Match In New Regex(regex, RegexOptions.Multiline).Matches(html)
list.Add(m.Groups(i).Value.Trim())
Next
Return list
End Function
Private Function getIMDbUrl(ByVal MovieName As String, ByVal Optional searchEngine As String = "google") As String
Dim url As String = GoogleSearch & MovieName
If searchEngine.ToLower().Equals("bing") Then url = BingSearch & MovieName
If searchEngine.ToLower().Equals("ask") Then url = AskSearch & MovieName
Dim html As String = getUrlData(url)
Dim imdbUrls As ArrayList = matchAll("<a href=""(http://www.imdb.com/title/tt\d{7}/)"".*?>.*?</a>", html)
If imdbUrls.Count > 0 Then
Return CStr(imdbUrls(0))
ElseIf searchEngine.ToLower().Equals("google") Then
Return getIMDbUrl(MovieName, "bing")
ElseIf searchEngine.ToLower().Equals("bing") Then
Return getIMDbUrl(MovieName, "ask")
Else
Return String.Empty
End If
End Function
Private Function getUrlData(ByVal url As String) As String
Dim client As WebClient = New WebClient()
Dim r As Random = New Random()
client.Headers("X-Forwarded-For") = r.[Next](0, 255) & "." & r.[Next](0, 255) & "." & r.[Next](0, 255) & "." & r.[Next](0, 255)
client.Headers("User-Agent") = "Mozilla/" & r.[Next](3, 5) & ".0 (Windows NT " & r.[Next](3, 5) & "." & r.[Next](0, 2) & "; rv:2.0.1) Gecko/20100101 Firefox/" & r.[Next](3, 5) & "." & r.[Next](0, 5) & "." & r.[Next](0, 5)
Dim datastream As Stream = client.OpenRead(url)
Dim reader As StreamReader = New StreamReader(datastream)
Dim sb As StringBuilder = New StringBuilder()
While Not reader.EndOfStream
sb.Append(reader.ReadLine())
End While
Return sb.ToString()
End Function
Private Sub parseIMDbPage(ByVal imdbUrl As String)
Dim html As String = getUrlData(imdbUrl)
Id = match("<link rel=""canonical"" href=""http://www.imdb.com/title/(tt\d{7})/"" />", html)
If Not String.IsNullOrEmpty(Id) Then
status = True
imdbUrl = "http://www.imdb.com/title/" & Id & "/"
End If
End Sub
Private Sub Button1_Click(sender As Object, e As EventArgs) Handles Button1.Click
Dim TextFromBox As String = RichTextBox1.Text
Dim imdbUrl As String = getIMDbUrl(TextFromBox)
parseIMDbPage(imdbUrl)
MessageBox.Show(Id)
End Sub
End Class