通过ajax发布请求从网站检索结果

时间:2018-07-24 11:39:13

标签: javascript ajax post web-scraping http-headers

我正在尝试执行一个简单的ajax发布请求,以从网站中检索一些数据。 详细地说,我正在尝试与网站回忆起包含某些信息的页面联系。 因此,我有一个主要网站和一个用来检索数据的页面。 我使用Google检查部分,特别是在检查器的网络字段的 xhr 部分中找到了该页面。 在我的代码中,我使用了网站用于联系页面的所有标头和有效载荷数据。
这是我用来实现目标的代码:

    var XMLHttpRequest = require("xmlhttprequest").XMLHttpRequest;
    var url = 'https://www.remax.pt/Webservices/MainWebService.asmx/GetCityList';
var body = {"SiteRegionID":"12","RegionID":"12","RegionRowID":"78","ProvinceID":"0",
"LanguageCode":"ITA","MinInternetCount":"0","SearchType":"","OfficeAgent":"0",
"EncodingLanguage":"PTG","OfficeAgentId":"0"};

var xhr = new XMLHttpRequest();
xhr.onload = function () {
    var data = xhr.responseText;
    if (xhr.readyState == 4 && xhr.status == "200") {
        console.table("results: "+data);
    } else {
        console.error("error: "+data);
    }
}
    xhr.open("POST", url, true); 
    xhr.setRequestHeader('Content-Type', 'application/json; charset=UTF-8');
    //xhr.setRequestHeader("Content-Type","text/html");
    xhr.setRequestHeader("Access-Control-Allow-Origin","*");
    xhr.setRequestHeader("accept", "application/json, text/javascript, */*; q=0.01");
    xhr.setRequestHeader("authority", "www.remax.pt");
    xhr.setRequestHeader("scheme", "https");
    xhr.setRequestHeader("path", "/Webservices/MainWebService.asmx/GetCityList");
    xhr.setRequestHeader("accept-language","it-IT,it;q=0.9,en-US;q=0.8,en;q=0.7");
    //xhr.setRequestHeader('accept-encoding', 'gzip, deflate, br');
    //xhr.setRequestHeader("host", "https://www.remax.pt");
    //xhr.setRequestHeader('referer', 'https://www.remax.pt/PublicListingList.aspx');
    //xhr.setRequestHeader('content-length', '192');    
    //xhr.setRequestHeader('cookie','__cfduid=dc7dd48ccff40ee4f85840bfc35685b311531384150; PersonalizationMap=; PersonalizationGallery=SelectedCountryID=12; GtTransLang=ITA; SLINGSHOT=LanguageCode=it-IT; SessionId=1ac0ec84-6a03-4965-ba90-7eb686f66bf5; ASP.NET_SessionId=rgia1pblms2abf11ypsbiqgz; GtTrans=ENU; LastSearch=SiteRegionID=12&TransactionTypeUID=260&RegionID=12&RegionRowID=78&LocationText=Porto&LocationValue=YR78&PriceCurrency=EUR&ComRes=2; PersonalizationRegion=#mode=list&tt=260&cr=2&r=78&cur=EUR&la=All&sb=PriceIncreasing&page=1&sc=12&sid=a81a1d1d-ee36-4236-a72e-31343349c574; PersonalizationDate=2018-7-24 10:0:30');
    xhr.setRequestHeader("user-agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36");
    xhr.setRequestHeader("x-requested-with", "XMLHttpRequest");
    xhr.send(JSON.stringify(body);

实际上,我从未收到答案。我认为操作流程永远不会输入onLoad部分,因为从不打印if和else部分中的字符串。 我想指定一些标头被注释,因为我有这种类型的答案: 拒绝设置不安全的标头'nameHeader' 所以我决定暂时不使用它们。

我试图更改某些标头或添加一些新内容,但问题仍然存在,老实说,我不知道这是某些字段的语法问题还是我需要其他东西来执行可接受的请求。

为了完整起见,我插入了在检查器工具中找到的4个字段,这些字段指定了网站传递给调用页面的参数:

常规

 1. Request URL:       
    https://www.remax.pt/Webservices/MainWebService.asmx/GetCityList
 2. Request Method: POST 
 3. Status Code: 200  
 4. Remote Address: 104.25.40.105:443
 5. Referrer Policy: no-referrer-when-downgrade

响应头

  1. 列表项
  2. 缓存控制:私有,最大年龄= 0
  3. cf-ray:43f532b9e9886260-LIS
  4. 内容编码:br
  5. 内容类型:application / json; charset = utf-8
  6. 日期:2018年7月24日星期二09:00:44 GMT
  7. expect-ct:max-age = 604800,report-uri =“ https://report-uri.cloudflare.com/cdn-cgi/beacon/expect-ct”
  8. 服务器:cloudflare
  9. 状态:200
  10. x-aspnet-version:4.0.30319
  11. 与x-ua兼容:IE = 9,IE = 8

请求标题

  1. 权威:www.remax.pt
  2. 方法:开机自检
  3. 路径:/Webservices/MainWebService.asmx/GetCityList
  4. 方案:https
  5. 接受:application / json,text / javascript, / ; q = 0.01
  6. 接受编码:gzip,deflate,br
  7. 接受语言:it-IT,it; q = 0.9,en-US; q = 0.8,en; q = 0.7
  8. 内容长度:192
  9. 内容类型:application / json; charset = UTF-8
  10. cookie:__cfduid = dc7dd48ccff40ee4f85840bfc35685b311531384150; PersonalizationMap =; PersonalizationGallery = SelectedCountryID = 12; GtTransLang = ITA; SLINGSHOT = LanguageCode = it-IT; SessionId = 1ac0ec84-6a03-4965-ba90-7eb686f66bf5; ASP.NET_SessionId = rgia1pblms2abf11ypsbiqgz; GtTrans = ENU; LastSearch = SiteRegionID = 12&TransactionTypeUID = 260&RegionID = 12&RegionRowID = 78&LocationText = Porto&LocationValue = YR78&PriceCurrency = EUR&ComRes = 2; PersonalizationRegion =#mode = list&tt = 260&cr = 2&r = 78&cur = EUR&la = All&sb = PriceIncreasing&page = 1&sc = 12&sid = a81a1d1d-ee36-4236-a72e-31343349c574; PersonalizationDate = 2018-7-24 10:0:30
  11. 来源:https://www.remax.pt
  12. 引荐来源:https://www.remax.pt/PublicListingList.aspx
  13. 用户代理:Mozilla / 5.0(Windows NT 6.1; Win64; x64)AppleWebKit / 537.36(KHTML,例如Gecko)Chrome / 67.0.3396.99 Safari / 537.36
  14. x-requested-with:XMLHttpRequest

请求有效载荷: {“ SiteRegionID”:“ 12”,“ RegionID”:“ 12”,“ RegionRowID”:“ 78”,“ ProvinceID”:“ 0”,“ LanguageCode”:“ ITA”,“ MinInternetCount”:“ 0”,“ SearchType“:”“,” OfficeAgent“:0,” EncodingLanguage“:” PTG“,” OfficeAgentId“:0}

0 个答案:

没有答案