我正在使用管道将scrapy项目中的数据导出到csv文件中。每行从单个scrapy对象获取数据,加上提取日期。
我想知道如何在每行末尾添加提取项目的URL。
在这里你可以看到我的pypeline的代码
def open_spider(self, spider):
if spider.name == "LandSalesLinks":
self.file = csv.writer(open('links.csv', 'w'), quoting=csv.QUOTE_NONE)
elif spider.name == "LandSalesDetails":
now = datetime.now()
filepath = u"E:/Dropbox/Αγορά Ακινήτων/Πωλήσεις Γης/Αρχεία προς ένταξη/"
filename = str(now.year)+"-"+str(now.month)+"-"+str(now.day)+" "+'details.csv'
self.file = csv.writer(open(filepath+filename, 'w'), delimiter=';', quoting=csv.QUOTE_NONE)
row = [
"extraction_date",
"regionA",
"regionB",
"regionC",
"regionD",
"location_name",
"category",
"area",
"price",
"city_plan",
"structure_factor",
"coverage_factor",
"facade_length",
"facade_count",
"airy",
"slope",
"artio",
"oikodomisimo",
"me_adia",
"ktizei",
"availability",
"availability_from",
"antiparoxi",
"view",
"dist_from_sea",
"paling",
"supplies",
"drilling",
"with_building",
"corner_plot",
"mesites",
"epaggelmatiki_xrisi",
"dimensions",
"contains"
]
self.file.writerow(row)
def process_item(self, item, spider):
if spider.name == "LandSalesLinks":
# Declaring an empty list that represents a row of the table
row = []
# First and only column
row.append("http://www.xe.gr"+str(item['link'][0])+"?mode=spec")
# Writing the row to the file
self.file.writerow(row)
return item
elif spider.name == "LandSalesDetails":
def append2(row, item):
if item != []:
row.append(item[0].encode('utf-8').strip())
else:
row.append("")
# Declaring an empty list that represents a row of the table
row = []
now = datetime.now()
row.append(str(now.day)+"/"+str(now.month)+"/"+str(now.year))
# append2(row, item['region']) <-- old
# Seperation region to regionA, regionB, regionC and regionD
region = item['region'][0]
reglist = region.split(" > ")
# print reglist[0].strip()
# print reglist[1].strip()
# print reglist[2].strip()
# print reglist[3].strip()
# s = input("stop")
# Region A
row.append(reglist[0].strip().encode('utf-8'))
# Region B
try:
row.append(reglist[1].strip().encode('utf-8'))
except IndexError:
row.append("")
# Region C
try:
row.append(reglist[2].strip().encode('utf-8'))
except IndexError:
row.append("")
# Region D
try:
row.append(reglist[3].strip().encode('utf-8'))
except IndexError:
row.append("")
append2(row, item['location_name'])
append2(row, item['category'])
append2(row, item['area'])
append2(row, item['price'])
append2(row, item['city_plan'])
append2(row, item['structure_factor'])
append2(row, item['coverage_factor'])
append2(row, item['facade_length'])
append2(row, item['facade_count'])
append2(row, item['airy'])
append2(row, item['slope'])
append2(row, item['artio'])
append2(row, item['oikodomisimo'])
append2(row, item['me_adia'])
append2(row, item['ktizei'])
append2(row, item['availability'])
append2(row, item['availability_from'])
append2(row, item['antiparoxi'])
append2(row, item['view'])
append2(row, item['dist_from_sea'])
append2(row, item['paling'])
append2(row, item['supplies'])
append2(row, item['drilling'])
append2(row, item['with_building'])
append2(row, item['corner_plot'])
append2(row, item['mesites'])
append2(row, item['epaggelmatiki_xrisi'])
append2(row, item['dimensions'])
append2(row, item['contains'])
# Writing the row to the file
self.file.writerow(row)
return item
答案 0 :(得分:0)
您只需在项目中添加def parse(self, response):
item = MyItem()
item['url'] = response.url
yield item
字段,然后在管道中添加使用它:
def process_item(self, item, spider):
row = []
row.append(item['url'])
# and then remove it if you don't want it in your item
del item['url']
return item
并在管道中:
const arr = [Object.keys(result[0])]
.concat(result.map(({x, y, name}) => [x, y, name]))