我正在学习如何使用Scrapy
import scrapy
class TestSetSpider(scrapy.Spider):
name = "test_spider"
start_urls = ['https://example.html']
def parse(self, response):
for brickset in response.xpath('//div[@class="product-name"]'):
yield {
'name': brickset.xpath('h1/text()').extract_first(),
}
我用命令scrapy crawl test_spider -o test.csv
这适用于//div[@class="product-name"
,但我不知道如何在同一个蜘蛛文件中添加另一个CSS / XPath类
我正在尝试这个,但它不起作用
import scrapy
class TestSetSpider(scrapy.Spider):
name = "test_spider"
start_urls = ['https://example.html']
def parse(self, response):
for test in response.xpath('//div[@class="product-name"]'):
yield {
'name': test.xpath('h1/text()').extract_first(),
}
def parse(self, response):
for attempt in response.xpath('//div[@class="another-class"]'):
yield {
'color': attempt.xpath('h1/a/text()').extract_first(),
}
请帮我这样做。
答案 0 :(得分:0)
def parse(self, response):
product_name_lst = []
# we will append all data to product_name_lst
for test in response.xpath('//div[@class="product-name"]'):
product_name_lst.append('name': test.xpath('h1/text()').extract_first())
another_product_name_lst = []
# we will append all data to another_product_name_lst
for test in response.xpath('//div[@class="another-product-name"]'):
another_product_name_lst.append('name': test.xpath('h1/text()').extract_first())
# after that write to out.csv all the data you need from
# product_name_lst and another_prodct_name_lst lists
out_file = open('out.csv', 'a') # a meen append to file not rewrite file
# and here you need to write in out.csv file
out.write(data) # data is what you need to write
# and close the file
out.close()
答案 1 :(得分:0)
只需使用两个for
循环:
import scrapy
class TestSetSpider(scrapy.Spider):
name = "test_spider"
start_urls = ['https://example.html']
def parse(self, response):
for brickset in response.xpath('//div[@class="product-name"]'):
yield {
'name': brickset.xpath('h1/text()').extract_first(),
}
for brickset in response.xpath('//div[@class="another-class"]'):
yield {
'name': brickset.xpath('h1/text()').extract_first(),
}