我正在尝试使用python的scrapy框架来抓取数据,但我遇到了一些问题。问题是,在登录后...它没有打开所需的URL,并且发生了与会话相关的一些问题,但我不确定。下面的代码成功登录,但之后它不会打开网址。请仔细阅读代码并指导我。它没有打开下面代码的链接变量中的url。
from scrapy.spider import BaseSpider
from scrapy.http import FormRequest
import scrapy
from scrapy.spider import BaseSpider
from scrapy.selector import HtmlXPathSelector
from first.items import FirstItem
from scrapy.http import FormRequest, Request
from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import NoSuchElementException
from selenium import webdriver
import urllib2
import csv
import time
import re
import locale
import os
import sys
import sys
import unicodedata
import base64
class MySpider(BaseSpider):
name = "craig"
allowed_domains = ["linkedin.com"]
start_urls = ["https://www.linkedin.com"]
def parse(self, response):
return scrapy.FormRequest.from_response(
response,
formdata={'username': 'name', 'password':password},
callback=self.after_login
)
def after_login(self, response):
# check login succeed before going on
if "authentication failed" in response.body:
self.logger.error("Login failed")
return
else:
link="https://www.linkedin.com/vsearch/someurl"
myRequest = FormRequest(link, callback=self.parse1, dont_filter=True)
yield myRequest
def parse1(self,response):
hxs = HtmlXPathSelector(response)
path = "//*"
html = str(hxs.select(path).extract())
f=open("f.txt","w")
f.write(html)