我正在试图刮取Indeed.com并为动态IP购买了crawlera服务,因为它确实经常阻塞。
以下是如何将Crawlera与curl命令行工具一起使用的示例:
curl -U key: -x proxy.crawlera.com:8010 http://httpbin.org/ip
要下载HTTPS页面,我必须使用证书文件。我是VBA的初学者,我不知道如何使用代理。我想利用这个代理在VBA中发出请求,以获取信息。
Dim ie As SHDocVw.InternetExplorer
Dim doc As IHTMLDocument
Dim SearchForm As HTMLFormElement
Dim WhatInputBox As HTMLInputElement
Dim WhereInputBox As HTMLInputElement
Dim SubmitButton As HTMLInputButtonElement
Dim HTMLelement As IHTMLElement
Dim oInputs As IHTMLElementCollection
Dim Listings As IHTMLElementCollection
Dim i As Long, j As Long, r As Long, lngRetVal As Long
Dim TotalResumes As Long, ResumesPerPage As Long, TotalPages As Long, PageNo As Long
Dim firstURL As String, nextURL As String
Set ie = New SHDocVw.InternetExplorer
ie.Visible = True
Set myIE = New SHDocVw.InternetExplorer
myIE.Visible = False
Dim StringFound As Boolean
StringFound = False
If Len(Trim(Range("D1").Text)) > 0 Then
StringFound = True
End If
If Len(Trim(Range("F1").Text)) > 0 Then
StringFound = True
If StringFound = True Then
Const cURL = "http://www.indeed.com/resumes?co="
End If
ie.Navigate cURL & Mid(ActiveSheet.Shapes("Country").ControlFormat.List(Range("J1").Value), InStr(1, ActiveSheet.Shapes("Country").ControlFormat.List(Range("J1").Value), "(", vbTextCompare) + 1, 2)
Application.StatusBar = "Going to the Website........."
If Right(Range("H1").Value, 1) = "\" Then
myFolderPath = Range("H1").Value
Else
myFolderPath = Range("H1").Value & "\"
End If
Do While ie.Busy
Loop
Application.StatusBar = "Going to the Website........."
Do
Loop Until ie.ReadyState = READYSTATE_COMPLETE '= 4
Sleep 5000
Dim ieElement As Object
Dim ieElement2 As Object
Set ieElement = ie.Document.getElementById("query")
ieElement.Value = Range("D1")
Set ieElement2 = ie.Document.getElementById("location")
ieElement2.Value = Range("F1")
ieElement.Document.getElementById("submit").Click
Do While ie.Busy
DoEvents
Loop
Application.StatusBar = "Searching for Resumes........."
Do
Loop Until ie.ReadyState = READYSTATE_COMPLETE '= 4
Sleep 5000
Range("A3:K1048576").ClearContents
Set doc = ie.Document
firstURL = doc.Url
If Len(doc.getElementById("result_count").innerHTML) = 0 Then
MsgBox " No results found " & vbCrLf & "Pls check the website", vbCritical
ie.Quit
Set ie = Nothing
myIE.Quit
Set myIE = Nothing
Application.StatusBar = ""
Exit Sub
End If
如何使用crawlera代理发出请求?我试着搜索如何提出此请求,但找不到可能有用的请求。
答案 0 :(得分:0)
试试这个:
Function GetResult(url As String) As String
Dim XMLHTTP As Object, ret As String
Set XMLHTTP = CreateObject("MSXML2.ServerXMLHTTP.6.0")
XMLHTTP.Open "GET", url, False
XMLHTTP.setProxy 2, "proxy.crawlera.com:8010"
XMLHTTP.setProxyCredentials "APIKEY", ""
XMLHTTP.send
ret = XMLHTTP.ResponseText
GetResult = ret
End Function
Sub TestCrawleraVBA()
Debug.Print GetResult("http://httpbin.org/ip")
End Sub
不要忘记将 APIKEY 替换为您的。