你好我是python / scrapy世界的新手,我需要将我的产品列表导出到csv,如下例所示: what i want 但我得到这个: what i got
///// 的蜘蛛: /////
import scrapy
import csv
from escrap.items import EscrapItem
class EscrapSpider(scrapy.Spider):
name = "tunisianet"
allowed_domains = ["tunisianet.com.tn"]
start_urls = [
"http://www.tunisianet.com.tn/385-logiciels-informatique-tunisie/"
]
def parse(self, response):
for sel in response.xpath('//*[contains(@class, "ajax_block_product")]'):
item = EscrapItem()
item['revendeur'] = '\n'.join(sel.xpath('//*[contains(@class, "center_block")]/h2/a/@href').re('tunisianet'))
item['produit'] = '\n'.join(sel.xpath('//*[contains(@class, "center_block")]/h2/a/text()').extract())
item['lien'] = '\n'.join(sel.xpath('//*[contains(@class, "center_block")]/h2/a/@href').extract())
item['description'] = '\n'.join(sel.xpath('//*[contains(@class, "product_desc")]/a/text()').extract())
item['prix'] = '\n'.join(sel.xpath('//*[contains(@class, "price")]/text()').extract())
data = [item['revendeur'], item['produit'], item['lien'], item['description'], item['prix']]
out = open('out.csv', 'w')
for row in data:
for column in row:
out.write(column.encode('utf-8'))
return data
///// 的项目: /////
import scrapy
class EscrapItem(scrapy.Item):
revendeur = scrapy.Field()
produit = scrapy.Field()
lien = scrapy.Field()
description = scrapy.Field()
prix = scrapy.Field()
///// 的管道: /////
class EscrapPipeline(object):
# put all words in lowercase
words_to_filter = ['politics', 'religion']
def process_item(self, item, spider):
for word in self.words_to_filter:
if word in unicode([item['revendeur'],item['produit'],item['lien'],item['description'],item ['prix']]).lower():
raise DropItem("Contains forbidden word: %s" % word)
else:
return item
///// 我的设置: /////
BOT_NAME = 'escrap'
SPIDER_MODULES = ['escrap.spiders']
NEWSPIDER_MODULE = 'escrap.spiders'
ITEM_PIPELINES = {'escrap.pipelines.EscrapPipeline': 1}
FEED_EXPORTERS = {
'csv': 'escrap.escrap_csv_item_exporter.EscrapCsvItemExporter',
}
FIELDS_TO_EXPORT = [
'revendeur',
'produit',
'lien',
'description',
'prix'
]
答案 0 :(得分:1)
在解析项目时,您不需要自己创建csv文件,scrapy可以默认导出到csv文件。
将您的USE [CTU SQL Project]
SELECT Advisors.First_Name, Advisors.Last_Name...and so on.
方法更改为:
parse
稍后在调用scrapy时,您可以通过以下方式调用它:
def parse(self, response):
for sel in response.xpath('//*[contains(@class, "ajax_block_product")]'):
item = EscrapItem()
item['revendeur'] = '\n'.join(sel.xpath('//*[contains(@class, "center_block")]/h2/a/@href').re('tunisianet'))
item['produit'] = '\n'.join(sel.xpath('//*[contains(@class, "center_block")]/h2/a/text()').extract())
item['lien'] = '\n'.join(sel.xpath('//*[contains(@class, "center_block")]/h2/a/@href').extract())
item['description'] = '\n'.join(sel.xpath('//*[contains(@class, "product_desc")]/a/text()').extract())
item['prix'] = '\n'.join(sel.xpath('//*[contains(@class, "price")]/text()').extract())
yield item
现在您已将所有项目导出到csv文件。
如果您仍想在自己的管道上进行控制,请点击此处create your own exporter。它希望如此:
scrapy crawl myspider -o output.csv
要创建自己的管道,请务必完全read。
答案 1 :(得分:0)
您应该将单元格设置为要为数据写入的位置。类似的东西:
worksheet.write('A1','thing you want to write')
或者可能默认在'A'
中写内容答案 2 :(得分:0)
它导出但不是我想要的形式,我希望像这样的形式:
http://i.imgur.com/r8LaVem.png,但我得到了这个http://i.imgur.com/8IVnlui.png。
这是我的最后一堂课:
def parse(self, response):
item = TfawItem()
data= []
items = []
out = open('out.csv', 'a')
x = response.xpath('//*[contains(@class, "ajax_block_product")]')
for i in range(0, len(x)):
item['revendeur'] = response.xpath('//*[contains(@class, "center_block")]/h2/a/@href').re('tunisianet')[i]
item['produit'] = response.xpath('//*[contains(@class, "center_block")]/h2/a/text()').extract()[i]
item['url'] = response.xpath('//*[contains(@class, "center_block")]/h2/a/@href').extract()[i]
item['description'] = response.xpath('//*[contains(@class, "product_desc")]/a/text()').extract()[i]
item['prix'] = response.xpath('//*[contains(@class, "price")]/text()').extract()[i]
data = item['revendeur'], item['produit'], item['url'], item['description'], item['prix']
out.write(str(data))
out.write('\n')