我正在使用httpwebrequest从给定的url下载数据,但很少有元素没有响应。
Dim Request As HttpWebRequest = CType(WebRequest.Create("https://www.royalmail.com/track-your-item#/tracking-results/37005067200003B0F1FF2"), HttpWebRequest)
Request.Timeout = 2 * 60 * 1000
Request.Proxy = Nothing
Request.AutomaticDecompression = DecompressionMethods.Deflate Or DecompressionMethods.GZip
Request.Credentials = System.Net.CredentialCache.DefaultCredentials
Dim HttpResp As HttpWebResponse
HttpResp = (CType(Request.GetResponse(), HttpWebResponse))
If HttpResp.StatusCode = HttpStatusCode.OK Then
Dim receiveStream As Stream = HttpResp.GetResponseStream()
Dim readStream As New StreamReader(receiveStream)
Dim sData As String
sData = readStream.ReadToEnd()
readStream.Close()
Else
End If
当我在chrome上打开URL(https://www.royalmail.com/track-your-item#/tracking-results/37005067200003B0F1FF2并检查元素时,我可以看到此元素(搜索37005067200003B0F1FF2) 但作为回应,我没有得到这个元素(搜索37005067200003B0F1FF2)。
使用网络浏览器控件的代码
Private Sub Button10_Click(sender As Object, e As EventArgs) Handles Button10.Click
ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12
Dim sURL As String = String.Format("https://www.royalmail.com/track-your-item#/tracking-results/37005067200003B0F1FF2")
Dim webBrowserForPrinting As WebBrowser = New WebBrowser()
webBrowserForPrinting.ScriptErrorsSuppressed = True
AddHandler webBrowserForPrinting.DocumentCompleted, AddressOf PrintDocument
webBrowserForPrinting.Url = New Uri(sURL)
webBrowserForPrinting.Navigate(sURL)
End Sub
Private Sub PrintDocument(ByVal sender As Object, ByVal e As WebBrowserDocumentCompletedEventArgs)
Dim HTMD As HtmlDocument
HTMD = CType(sender, WebBrowser).Document
Dim HTC As HtmlElementCollection
If HTMD IsNot Nothing Then
HTC = HTMD.All
For Each ele As HtmlElement In HTC
MsgBox(ele.InnerHtml)
Next
End If
End Sub
答案 0 :(得分:1)
您需要激活WebBrowser高级功能才能成功完成解析过程。如果未启用这些功能,则在标准IE7仿真中,WebBrowser将无法完成文档。该故障是由大量脚本错误引起的。
我添加了带有静态方法的类( WebBrowserAdvancedFetures
),以将所需的值添加到注册表中。
WebBrowserAdvancedFetures.ActivateWBAdvancedFeatures
在表单的构造函数中被调用。
您可以回滚来调用WebBrowserAdvancedFetures.DeactivateWBAdvancedFeatures
。
此过程如何工作:
Private browser As WebBrowser
)。我们也可以使用WebBrowser控件(Form容器可以承载的可见控件版本),这是同一回事。 HtmlDocuments
中的 WebBrowser.Document
之一完成时,它就会被引发。阅读How to get an HtmlElement value inside Frames/IFrames?,了解有关HtmlDocuments嵌套的更多详细信息。 DocumentCompleted
处理程序中,确认至少有一个文档已准备好进行分析,并检查WebBrowser.ReadyState
= WebBrowserReadyState.Complete
EventArgs
类)并禁止进一步解析HtmlDocument
(在这里,这是通过设置布尔字段来完成的)。 String
和一个DateTime
对象),然后重置在解析过程中使用的字段/变量。 在 Form.FormClosed
事件或自定义类 Dispose()
方法中,请记住要删除处理程序:
RemoveHandler DocumentParsingComplete, AddressOf OnDocumentParsingComplete
RemoveHandler browser.DocumentCompleted, AddressOf browser_DocumentCompleted
Public Event DocumentParsingComplete As EventHandler(Of EventArgs)
Private browser As WebBrowser = Nothing
Private trackingNumberValue As String = String.Empty
Private trackingDateValue As DateTime
Private documentParsed As Boolean = False
Private userAgent As String = "User-Agent: Mozilla/5.0 (Windows NT 10; Win64; x64; rv:48.0) Gecko/20100101 Firefox/48.0"
Public Sub New()
InitializeComponent()
WebBrowserAdvancedFetures.ActivateWBAdvancedFeatures(Path.GetFileName(Application.ExecutablePath))
browser = New WebBrowser With {.ScriptErrorsSuppressed = True}
AddHandler DocumentParsingComplete, AddressOf OnDocumentParsingComplete
AddHandler browser.DocumentCompleted, AddressOf browser_DocumentCompleted
End Sub
Private Sub btnNavigate_Click(sender As Object, e As EventArgs) Handles btnNavigate.Click
browser.Navigate("")
browser.Document.OpenNew(True)
documentParsed = False
browser.Navigate("[Some URL]", "_self", Nothing, userAgent)
End Sub
Private Sub OnDocumentParsingComplete(sender As Object, e As EventArgs)
' Do whatever you need with these
Console.WriteLine(trackingNumberValue)
Console.WriteLine(trackingDateValue)
'Then reset for further use
trackingNumberValue = String.Empty
trackingDateValue = DateTime.MinValue
End Sub
Private Sub browser_DocumentCompleted(sender As Object, e As WebBrowserDocumentCompletedEventArgs)
Dim wb As WebBrowser = DirectCast(sender, WebBrowser)
If wb.ReadyState <> WebBrowserReadyState.Complete OrElse wb.Document.Forms.Count = 0 OrElse documentParsed Then Return
Dim trackingNumberClass As String = "tracking-number-value"
Dim trackingElement = wb.Document.GetElementsByTagName("SPAN").
OfType(Of HtmlElement)().FirstOrDefault(Function(elm) elm.GetAttribute("className").Contains(trackingNumberClass))
Me.trackingNumberValue = trackingElement?.InnerText
Dim trackingDateClass As String = "ng-binding ng-scope"
Dim trackingDateElement = wb.Document.GetElementsByTagName("SPAN").
OfType(Of HtmlElement)().FirstOrDefault(Function(elm) elm.GetAttribute("className").Equals(trackingDateClass))
If trackingDateElement IsNot Nothing Then
Dim deliveryDate As String = trackingDateElement.InnerText.Split().Last().TrimEnd("."c)
Me.trackingDateValue = Date.ParseExact(deliveryDate, "dd-MM-yyyy", Nothing)
If Not String.IsNullOrEmpty(trackingNumberValue) Then
documentParsed = True
RaiseEvent DocumentParsingComplete(sender, EventArgs.Empty)
End If
End If
End Sub
使用此类可激活/停用WebBrowser控件的高级功能:
Imports Microsoft.Win32
Imports System.Security.AccessControl
Public Class WebBrowserAdvancedFetures
Private Shared baseKeyName As String = "Software\Microsoft\Internet Explorer\Main\FeatureControl"
Private Shared featuresKey As String = baseKeyName & "\FEATURE_BROWSER_EMULATION"
Private Shared hardwareAccelKey As String = baseKeyName & "\FEATURE_GPU_RENDERING"
Public Shared Sub ActivateWBAdvancedFeatures(executableName As String)
Using wbFeatureKey = Registry.CurrentUser.OpenSubKey(
featuresKey, RegistryKeyPermissionCheck.ReadWriteSubTree, RegistryRights.WriteKey)
wbFeatureKey.SetValue(executableName, 11001, RegistryValueKind.DWord)
End Using
Using wbAccelKey = Registry.CurrentUser.OpenSubKey(
hardwareAccelKey, RegistryKeyPermissionCheck.ReadWriteSubTree, RegistryRights.WriteKey)
wbAccelKey.SetValue(executableName, 1, RegistryValueKind.DWord)
End Using
End Sub
Public Shared Sub DeactivateWBAdvancedFeatures(executableName As String)
Using wbFeatureKey = Registry.CurrentUser.OpenSubKey(
featuresKey, RegistryKeyPermissionCheck.ReadWriteSubTree, RegistryRights.WriteKey)
wbFeatureKey.DeleteValue(executableName, False)
End Using
Using wbAccelKey = Registry.CurrentUser.OpenSubKey(
hardwareAccelKey, RegistryKeyPermissionCheck.ReadWriteSubTree, RegistryRights.WriteKey)
wbAccelKey.DeleteValue(executableName, False)
End Using
End Sub
End Class