我需要从价格比较网站抓取价格值(产品链接:https://www.toppreise.ch/prod_488002.html)。我不能刮。请参阅我要捕获的图像中突出显示的价格:
请帮我看看如何抓取此页面。
PS:toppreise.ch将无法在许多国家/地区访问,因此请使用VPN
我使用以下代码:
Private Sub SiteInfo_Click()
Dim strhtml
On Error Resume Next
ThisWorkbook.Sheets("Data Mining").Activate
Sheets("Data Mining").Range("B1").Select
Set xmlHttp = Nothing
Set xmlHttp = CreateObject("MSXML2.ServerXMLHTTP.6.0")
StrUrl = ""
StrUrl = Sheets("Data Mining").Range("B1").Value
xmlHttp.Open "GET", StrUrl, False
xmlHttp.Send
strhtml =xmlHttp.responseText
END Sub
当我运行上面的代码时,我只是低于响应文本。它没有给出整个页面。 (您可以使用产品链接查看源代码或查看此处https://www.dropbox.com/s/ah80jt7a25xcicp/source%20code.txt?dl=0)
<html><head>
<script type="text/javascript" src="//en.toppreise.ch/js/tpjs.js"></script>
<script type="text/javascript" src="//en.toppreise.ch/js/afxp.js"></script>
<script type="text/javascript" src="//en.toppreise.ch/js/jquery.min.js"></script>
<script type="text/javascript" src="//en.toppreise.ch/js/jquery-ui-autocomplete.min.js"></script>
</head><body>...
答案 0 :(得分:0)
此代码有效,谢谢SIM
Sub Get_Price()
Dim HTTP As New XMLHTTP60, HTML As New HTMLDocument
Dim post As HTMLDivElement
With HTTP
.Open "GET", "https://www.toppreise.ch/index.php?a=488002", False
.send
HTML.body.innerHTML = .responseText
End With
For Each post In HTML.getElementsByClassName("altLinesOdd")
With post.getElementsByTagName("a")
If .Length Then R = R + 1: Cells(R, 1) = .Item(0).innerText
End With
With post.getElementsByClassName("spaceVert nobreak")
If .Length Then Cells(R, 2) = .Item(0).innerText
End With
Next post
End Sub