我试图抓取一些网站。它就像一个魅力。但是存在一个重大问题。在某些页面上(不是mutch)我得到一些奇怪的字符而不是HTML代码。
看起来像这样:
;�<cS���u�/�qYa$�4l7�.�Q�7&��O����� Z�D}z��/���� ��u����V���lWY|�n5�1�We����GB�U��g{�� �|Ϸ����*�Q��0���nb�o�߯�����[b��/����@CƑ����D{{/n��X�!� �Et�X"����?��˩����8\y��&
如果我在浏览器中打开它,则根本没有问题。 我不明白为什么。
我的HTTP标头说:
接受:text / html的,应用/ XHTML + xml的,应用/ XML; Q = 0.9, / 的; Q = 0.8 接受编码:gzip,放气,SDCH 接受语言:DE-DE,DE; Q = 0.8,EN-US; Q = 0.6,连接; Q = 0.4 Cache-Control:max-age = 0连接:keep-alive用户代理:Mozilla / 5.0 (Windows NT 6.1; WOW64)AppleWebKit / 537.36(KHTML,与Gecko一样) Chrome / 29.0.1547.66 Safari / 537.36
我认为它与Accept
request.Accept = "*/*"
有关
这是我的网络请求:
Public Class Http
Dim cookieCon As New CookieContainer
Dim request As HttpWebRequest
Dim response As HttpWebResponse
Public Function GetRequest(ByVal Params() As Object)
Dim url As String = Params(0)
Dim mycookie As String = Params(1)
'request.AllowAutoRedirect = True
request = CType(HttpWebRequest.Create(url), HttpWebRequest)
request.CookieContainer = New CookieContainer()
request.Method = "GET"
request.Timeout = 20000
request.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.66 Safari/537.36"
'request.ContentType = "application/x-www-form-urlencoded"
request.Accept = "*/*"
If Not mycookie Like "nocookie" Then
request.Headers("Cookie") = mycookie
End If
response = CType(request.GetResponse(), HttpWebResponse)
Dim html(1) As String
html(0) = request.Address.ToString()
html(1) = New StreamReader(response.GetResponseStream()).ReadToEnd()
Return html
End Function
感谢。
答案 0 :(得分:1)
您正在下载的数据是GZip压缩的。你需要解压缩它。将您的功能更改为:
Dim request As HttpWebRequest
Dim response As HttpWebResponse
Public Function GetRequest(ByVal Params() As Object) As String()
Dim url As String = Params(0)
Dim mycookie As String = Params(1)
'request.AllowAutoRedirect = True
request = CType(HttpWebRequest.Create(url), HttpWebRequest)
request.CookieContainer = New CookieContainer()
If Not mycookie Like "nocookie" Then
request.Headers("Cookie") = mycookie
End If
request.AutomaticDecompression = DecompressionMethods.GZip
response = CType(request.GetResponse(), HttpWebResponse)
Dim html(1) As String
html(0) = request.Address.ToString()
html(1) = New StreamReader(response.GetResponseStream).ReadToEnd()
Return html
End Function
<强>用法强>:
Dim params(1) As Object
params(0) = url
Dim page As String = GetRequest(params)(1)