Scrapy Pagination XHR 400 Bad Request

时间:2017-09-24 11:31:14

标签: python pagination scrapy xmlhttprequest

我正在尝试从https://www.magzter.com/magazines/listAllIssues/503

获取所有网址

在一组中,Page显示12个杂志并滚动分页并继续下12个杂志

调试后,即将到来的请求如下

https://www.magzter.com/magazines/listAllIssues/503/12
https://www.magzter.com/magazines/listAllIssues/503/24

xml Request

但是通过

https://www.magzter.com/magazines/listAllIssues/503/12请求
400 Bad Request

scrapy中是否有这种情况的实现请提供示例脚本。

或任何其他刺激无限滚动并使用scrapy框架的库

1 个答案:

答案 0 :(得分:1)

问题是请求是一个AJAX请求而不是发送它HANDLE PROTO_HAND::GrabPerfectHandle(const wchar_t *__processName) { if (__processName == nullptr) return reinterpret_cast<HANDLE>(PRH_ERR_BADPARAM); NTSTATUS __returnError; SYSTEM_PROCESS_INFORMATION *__systemProcessInfo; void *__systemInfo; void *__allocationBuffer; __allocationBuffer = VirtualAlloc(0, 1024 * 1024, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); if (!__allocationBuffer) return reinterpret_cast<HANDLE>(PRH_ERR_CANNOTALLOC); __systemProcessInfo = reinterpret_cast<SYSTEM_PROCESS_INFORMATION*>(__allocationBuffer); if (!NT_SUCCESS(__returnError = NtQuerySystemInformation(SystemProcessInformation, __systemProcessInfo, 1024 * 1024, 0))) { if (!VirtualFree(__allocationBuffer, 0, MEM_RELEASE)) return reinterpret_cast<HANDLE>(PRH_ERR_CANNOTDEALLOC); return reinterpret_cast<HANDLE>(PRH_ERR_NTQUERYFAIL); } while (__systemProcessInfo->NextEntryOffset) { if (__systemProcessInfo->ImageName.Buffer != nullptr) { if (wcscmp(__systemProcessInfo->ImageName.Buffer, __processName) == 0) { HANDLE __basicHandle = OpenProcess(PROCESS_ALL_ACCESS, false, __systemProcessInfo->UniqueProcessId); HANDLE __perfectHandle{ 0 }; if (!__basicHandle) { if (!VirtualFree(__allocationBuffer, 0, MEM_RELEASE)) return reinterpret_cast<HANDLE>(PRH_ERR_CANNOTDEALLOC); return reinterpret_cast<HANDLE>(PRH_ERR_OPENPROCFAIL); } if (!NT_SUCCESS(NtDuplicateObject(GetCurrentProcess(), __basicHandle, GetCurrentProcess(), &__perfectHandle, PROCESS_ALL_ACCESS, 0, DUPLICATE_SAME_ACCESS))) { if (!VirtualFree(__allocationBuffer, 0, MEM_RELEASE)) return reinterpret_cast<HANDLE>(PRH_ERR_CANNOTDEALLOC); return reinterpret_cast<HANDLE>(PRH_ERR_DUPHANDFAIL); } /*if(!NtClose(__basicHandle)) { if (!VirtualFree(__allocationBuffer, 0, MEM_RELEASE)) return reinterpret_cast<HANDLE>(PRH_ERR_CANNOTDEALLOC); if(!CloseHandle(__basicHandle)) return reinterpret_cast<HANDLE>(PRH_ERR_CANNOTCLOSEHAND); return reinterpret_cast<HANDLE>(PRH_ERR_CANNOTCLOSEHAND); } if(__basicHandle != nullptr) { if (!VirtualFree(__allocationBuffer, 0, MEM_RELEASE)) return reinterpret_cast<HANDLE>(PRH_ERR_CANNOTDEALLOC); return reinterpret_cast<HANDLE>(PRH_ERR_CANNOTCLOSEHAND); }*/ _OBJECT_HANDLE_FLAG_INFORMATION __objectInformation{ 0 }; __objectInformation.ProtectFromClose = { true }; if (!NT_SUCCESS(NtSetInformationObject(__perfectHandle, ObjectHandleFlagInformation, &__objectInformation, sizeof(_OBJECT_HANDLE_FLAG_INFORMATION)))) { if (!VirtualFree(__allocationBuffer, 0, MEM_RELEASE)) return reinterpret_cast<HANDLE>(PRH_ERR_CANNOTDEALLOC); return reinterpret_cast<HANDLE>(PRH_ERR_PFCFAIL); } if (!VirtualFree(__allocationBuffer, 0, MEM_RELEASE)) return reinterpret_cast<HANDLE>(PRH_ERR_CANNOTDEALLOC); return __perfectHandle; } } __systemProcessInfo = reinterpret_cast<SYSTEM_PROCESS_INFORMATION*>(reinterpret_cast<BYTE*>(__systemProcessInfo) + __systemProcessInfo->NextEntryOffset); } if (!VirtualFree(__allocationBuffer, 0, MEM_RELEASE)) return reinterpret_cast<HANDLE>(PRH_ERR_CANNOTDEALLOC); return reinterpret_cast<HANDLE>(PRH_ERR_FELLTROUGH); } 标头使它成为一个400错误的请求。无法直接从shell命令行发送标头,因此您需要启动shell并键入命令以使用标头获取请求

X-Requested-With: XMLHttpRequest