我需要监视网页以找到可用的产品,并且我使用scrapy框架。 如果找到产品,我会通知它。 该页面的主页上包含产品列表,其中包含有关产品的信息以及产品页面中的其他信息。
import pandas as pd
df=pd.read_csv('data.csv')
df1 =df.loc[:,['LOCATION','TIME','VALUE']]
程序为什么要经过这行?
class Spider(scrapy.Spider):
name = 'productpage'
start_urls = ['https://www.productpage.com']
def parse(self, response):
for product in response.css('article'):
link = 'https://www. productpage' + product.css('a::attr(href)').get()
id = link.split('/')[-1]
title = product.css('a > span::attr(content)').get()
price = product.css('a > figure::text').get()
product = Product(self.name, id, title, price, image, size, link)
yield scrapy.Request('{}.json'.format(link), callback=self.parse_product, meta={'product': product})
yield scrapy.Request(url=response.url, callback=self.parse, dont_filter=True)
# The program pass this line and after some minutes it closes without error
def parse_product(self, response):
product = response.meta['product']
jsonresponse = json.loads(response.body_as_unicode())
product.image = jsonresponse['images'][0]['small_url']
for size in jsonresponse['available_sizes']:
product.size.append(u'{} | {}'.format(size['name'], size['id']))
send(product)