我正在尝试收集此网站的所有房产数据: http://taxsales.lgbs.com/
目前有7,000多个属性,但在页面的任何给定视图中,我只能看到15 - 20,具体取决于屏幕分辨率。
我已经大致了解了如何在HTML中搜索属性名称和详细信息。因为初始搜索有警告屏幕,所以我使用以下代码“点击”“同意”按钮以查看后续搜索页面。您还可以看到我不知道如何仅查找属性详细信息,而是(基本上)从整个站点获取所有HTML并稍后在excel中进行筛选。
问题:
1)有没有办法一次查看所有属性的数据? - 或者 - 如何“遍历”结果的每个部分以最终收集所有这些?
2)我如何只收集物业地址,销售日期,销售类型等数据?
<li ng-if="listing.property.sale_date" class="ng-binding ng-scope"><label>Sale Date:</label> 4/5/18 9:00 AM</li>
物业销售日期HTML:
<a ng-click="listing.addressClick()" class="ng-binding"> 02863 Stouton St, Philadelphia PA 19134-3515 </a>
属性地址HTML:
<a href="" ng-click="selectPage(page + 1, $event)" class="ng-binding">Next</a>
下一步按钮HTML:
{{1}}
答案 0 :(得分:0)
这是一种使用Web请求(xhr)的hacky方法。查看页面请求,它看起来像是返回一个JSON对象。我正在解析这个JSON对象并将其转储到第一个工作表的范围内。这里有一些代码可以帮助您入门。
'You'll need the following references:
'MSXML v6.0
'Microsoft Scripting Runtime
'JSON project from: https://github.com/VBA-tools/VBA-JSON
Public Sub Scraper()
Dim webrequest As MSXML2.XMLHTTP60
Dim JSON As Object
Dim responses As Object
Dim itemdict As Variant
Dim i As Long
Dim j As Long
Dim k As Long
Dim item As Variant
Dim myarray As Variant: ReDim myarray(0 To 20, 0 To 5000)
Dim url As String: url = "http://taxsales.lgbs.com/api/property_sales/?in_bbox=-139.04111793750002%2C7.97834134877145%2C-54.40244606250002%2C61.39968867373271&offset=10&ordering=sale_date%2Caddress_full%2Cuid&sale_type=SALE%2CRESALE%2CSTRUCK+OFF%2CFUTURE+SALE"
For i = 0 To 10 'Do a loop to get SOME of the data, probably need a different loop here
With New MSXML2.XMLHTTP60
.Open "GET", url
.setRequestHeader "accept", "application/json, text/plain, */*"
.send
'Parse the response into a JSON dict
Set JSON = JsonConverter.ParseJson(.responseText)
url = JSON("next") ' the next URl to send a GET request
Set responses = JSON("results") 'Get the results Dict
On Error Resume Next ' getting an error, just ignoring for now
For Each itemdict In responses
j = 0
'add headers
If k = 0 Then
For Each item In itemdict
myarray(j, k) = item
j = j + 1
Next
End If
'add values
For Each item In itemdict
myarray(j, k) = itemdict(item)
j = j + 1
Next
k = k + 1
Next
On Error GoTo 0
End With
Next
ReDim Preserve myarray(0 To 20, 0 To k - 1)
ThisWorkbook.Sheets(1).Range("A1:T" & k - 1).Value = TransposeArray(myarray)
End Sub
'using this function as worksheetfunction.transpose causing issues
Public Function TransposeArray(myarray As Variant) As Variant
Dim X As Long
Dim Y As Long
Dim Xupper As Long
Dim Yupper As Long
Dim tempArray As Variant
Xupper = UBound(myarray, 2)
Yupper = UBound(myarray, 1)
ReDim tempArray(Xupper, Yupper)
For X = 0 To Xupper
For Y = 0 To Yupper
tempArray(X, Y) = myarray(Y, X)
Next
Next
TransposeArray = tempArray
End Function
输出: