我想导入餐厅数据,例如餐厅名称,电话号码,网站和地址,以使其表现出色,但不幸的是,我获得赞助商的结果,也没有获得网站和完整地址,因为当我们单击酒店时,它会显示在内页上名称。我在平台的帮助下创建了一个使用的代码,但没有帮助。请纠正我的代码中的问题。网站:https://www.yelp.com/searchcflt=restaurants&find_loc=San%20Francisco%2C%20CA&start=
这是我的代码:
Sub GetInfo()
Const URL$ = "https://www.yelp.com/search?cflt=restaurants&find_loc=San%20Francisco%2C%20CA&start="
Dim Http As New XMLHTTP60, Html As New HTMLDocument, Htmldoc As New HTMLDocument, page&, I&
For page = 0 To 1 ' this is where you change the last number for the pages to traverse
With Http
.Open "GET", URL & page * 30, False
.send
Html.body.innerHTML = .responseText
End With
With Html.querySelectorAll("[class*='searchResult']")
For I = 0 To .Length - 1
Htmldoc.body.innerHTML = .Item(I).outerHTML
On Error Resume Next
r = r + 1: Cells(r, 1) = Htmldoc.querySelector("[class*='heading--h3'] > a").innerText
Cells(r, 2) = Htmldoc.querySelector("[class*='container'] > [class*='display--inline-block']").innerText
' Cells(r, 3) = Htmldoc.querySelector("[class*='container'] > address").innerText
'Cells(r, 4) = Htmldoc.querySelector("[class*='container'] > address").NextSibling.innerText
'Inner loop creation
Cells(r, 5) = Htmldoc.querySelector("[class*='container'] > website").href ' Extract from window after clicking on hotel name
Cells(r, 6) = Htmldoc.querySelector("[class*='container'] > fulladdress").innerText ' Extract from window after clicking on hotel name
On Error GoTo 0
Next I
End With
Next page
End Sub
答案 0 :(得分:3)
您可以使用免费的API从business_search端点获得前50名。在查询字符串中传递sort参数以获得最高评分。
使用json解析器(例如jsonconverter.bas)来处理响应。在该链接中的代码安装到名为JsonConverter的标准模块中之后,请转到VBE>工具>引用>添加对Microsoft脚本运行时的引用。
API指令为here。您需要设置一个test app,它需要一些基本的用户信息,并验证您的电子邮件。然后,您将收到authentication的API密钥,该密钥在授权标头中传递,如下所示。
还返回了其他信息,您可以根据需要对其进行解析。
Option Explicit
Public Sub GetTopRestuarants()
Dim json As Object, headers(), r As Long, c As Long
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://api.yelp.com/v3/businesses/search?term=restuarant&location=san-francisco&limit=50&sort_by=rating", False
.setRequestHeader "Authorization", "Bearer yourAPIkey"
.send
Set json = JsonConverter.ParseJson(.responseText)("businesses")
headers = Array("Restaurant name", "phone", "website", "address")
Dim results(), item As Object
ReDim results(1 To json.Count, 1 To UBound(headers) + 1)
For Each item In json
r = r + 1
results(r, 1) = item("name")
results(r, 2) = item("phone")
results(r, 3) = item("url")
Dim subItem As Variant, address As String
address = vbNullString
For Each subItem In item("location")("display_address")
address = address & Chr$(32) & subItem
Next
results(r, 4) = Trim$(address)
Next
End With
With ActiveSheet
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub
返回前50名中的前20名示例:
偷取者
请注意,指定sort_by是对Yelp搜索的建议(并非严格执行),该搜索会考虑多个输入参数以返回最相关的结果。例如,评级排序不是严格按照评级值排序,而是根据调整后的评级值进行排序,该值考虑了评级的数量,类似于贝叶斯平均值。这是为了防止一次审核就将结果偏向企业。
答案 1 :(得分:2)
这是让您从其内页解析结果的方法之一。我无法再访问该网页来为您提供进一步的帮助。但是,试一试。我想它将起作用:
Sub GetInfo()
Const URL$ = "https://www.yelp.com/search?cflt=restaurants&find_loc=San%20Francisco%2C%20CA&start="
Const base$ = "https://www.yelp.com"
Dim Http As New XMLHTTP60, Html As New HTMLDocument
Dim oTitle$, oPhone As Object, Htmldoc As New HTMLDocument
Dim R&, newUrl$, I&, oWeb As Object, page&, oAddress As Object
[A1:D1] = [{"Name","Phone","Address","Website"}]
For page = 1 To 3 'this is where you change the last number for this script to traverse
With Http
.Open "GET", URL & page * 30, False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
Html.body.innerHTML = .responseText
End With
With Html.querySelectorAll("[class*='searchResult'] [class*='heading--h3'] > a")
For I = 0 To .Length - 1
If Not InStr(.item(I).getAttribute("href"), "/adredir?") > 0 Then
oTitle = .item(I).innerText
newUrl = Replace(.item(I).getAttribute("href"), "about:", base)
With Http
.Open "GET", newUrl, False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
Htmldoc.body.innerHTML = .responseText
End With
R = R + 1: Cells(R + 1, 1) = oTitle
Set oPhone = Htmldoc.querySelector(".biz-phone")
If Not oPhone Is Nothing Then
Cells(R + 1, 2) = oPhone.innerText
End If
Set oAddress = Htmldoc.querySelector(".map-box-address")
If Not oAddress Is Nothing Then
Cells(R + 1, 3) = WorksheetFunction.Clean(oAddress.innerText)
End If
Set oWeb = Htmldoc.querySelector(".biz-website > a")
If Not oWeb Is Nothing Then
Cells(R + 1, 4) = oWeb.innerText
End If
End If
Next I
End With
Next page
End Sub
顺便说一句,广告已被踢出。