我已经开发了代码,可以将名称,地址,网站,联系方式从页面抓取到excel工作表中。因此,由于我不熟悉JSON和Web抓取,因此无法解决此问题。第一页链接是 https://www.yelp.com/search?cflt=hvac&find_loc=San+Francisco%2C+CA;但是此代码无法正常运行。而且,我在其他网站上使用了相同的代码,然后运行良好。这是我的代码
Option Explicit
Public Sub FetchTabularInfo()
Dim Http As XMLHTTP60, Html As HTMLDocument, col As Variant, csrf As Variant, i&, page As Long
Dim headers(), ws As Worksheet, iCol As Collection
Set ws = ThisWorkbook.Worksheets("Sheet1")
headers = Array("SrNo", "Name", "Address", "Mobile", "Email")
Set Http = New XMLHTTP60
Set Html = New HTMLDocument
ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
For page = 1 To 73 'To cover all pages
With Http
.Open "GET", "https://www.yelp.com/search?cflt=hvac&find_loc=San+Francisco%2C+CA" & CStr(page), Falsev 'Last letter of URL is page number whose range will be given in outerloop
.send
Html.body.innerHTML = .responseText
End With
Set iCol = New Collection
With Html.querySelectorAll(".table tr a[onclick^='show_ngo_info']")
For i = 0 To .Length - 1
iCol.Add Split(Split(.Item(i).getAttribute("onclick"), "(""")(1), """)")(0)
Next i
End With
Dim r As Long, results()
ReDim results(1 To iCol.Count, 1 To UBound(headers) + 1)
r = 0
For Each col In iCol
r = r + 1
With Http
.Open "GET", "https://www.yelp.com/index.php/ajaxcontroller/get_csrf", False
.send
csrf = .responseText
End With
csrf = Split(Replace(Split(csrf, ":")(1), """", ""), "}")(0)
Dim json As Object
With Http
.Open "POST", "https://www.yelp.com/index.php/ajaxcontroller/show_ngo_info", False
.setRequestHeader "X-Requested-With", "XMLHttpRequest"
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded; charset=UTF-8"
.send "id=" & col & "&csrf_test_name=" & csrf
Set json = JsonConverter.ParseJson(.responseText)
Dim orgName As String, address As String, srNo As Long, city As String
Dim state As String, tel As String, mobile As String, website As String, email As String
On Error Resume Next
orgName = json("registeration_info")(1)("nr_orgName")
address = json("registeration_info")(1)("nr_add")
srNo = r '<unsure where this is coming from.
mobile = json("infor")("0")("Mobile")
email = json("infor")("0")("Email")
On Error GoTo 0
Dim arr()
arr = Array(srNo, orgName, address, tel, email)
For i = LBound(headers) To UBound(headers)
results(r, i + 1) = arr(i)
Next
End With
Next col
Set iCol = Nothing: Set json = Nothing
ws.Cells(GetLastRow(ws) + 1, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
Next
End Sub
Public Function GetLastRow(ByVal sh As Worksheet) As Long
On Error Resume Next
GetLastRow = sh.Cells.Find(What:="*", _
After:=sh.Range("A1"), _
Lookat:=xlPart, _
LookIn:=xlFormulas, _
SearchOrder:=xlByRows, _
SearchDirection:=xlPrevious, _
MatchCase:=False).Row
On Error GoTo 0
End Function