我最近学会了scrapy。我想为每个不同的项添加一个属性,然后传递给下一个方法。具体来说,我想做类似的事情:
{'company_name': 'apple'}
{'company_name': 'ibm'}
但我目前有:
{'c_o_name': 'ibm'}
{'c_o_name': 'ibm'}
我的代码如下:
def start_requests(self):
self.driver.get(self.home_page)
cookies = self.driver.get_cookies()
company = ['apple','ibm']
company_html_url = []
company_item = CompanyItem()
for i in company:
self.driver.get(self.home_page)
self.driver.find_element_by_xpath("//input[@id='companyName' and @class='textbox criterion']").send_keys("",i)
self.driver.find_element_by_link_text("Companies").click()
time.sleep(4)
if len(self.driver.find_elements_by_css_selector('a.companyResultsName')) > 0:
company_item['c_o_name'] = i
company_link = WebDriverWait(self.driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "companyResultsName")))
company_link.click()
time.sleep(4)
html_source = self.driver.find_element_by_xpath("//div[@id='profileSection']").get_attribute("outerHTML")
f = open('%s.html'%(i), 'w')
f.write(html_source.encode('utf-8'))
f.close()
company_path = 'file:///Users/cengcengruihong/Desktop/scrapy_learning/zoomtest2/' + '%s.html'%(i)
company_html_url.append(company_path)
else:
print "%s company does not exist in zoominfo"%i
if company_html_url:
for i in company_html_url:
yield scrapy.Request(i, cookies=cookies, callback=self.parse, meta={'item':company_item})
else:
print "no url in company_html_url"
def parse(self, response):
sel = Selector(response)
company = response.meta['item']
print company
我该怎么做?