我的输出如下
0 winner loser
1 winner1
2 loser1
3 winner2
4 loser2
5 winner3
6 loser3
如何删除空单元格以使赢家和输家值在同一行?我试图找到向管道添加新线路参数但没有运气。是否有任何方法可以覆盖管道以仅写入项目是否具有行的值,以便输出可以在同一行?
spider.py
import scrapy
from scrapy_splash import SplashRequest
from scrapejs.items import SofascoreItemLoader
from scrapy import Spider
import scrapy
import json
from scrapy.http import Request, FormRequest
class MySpider(scrapy.Spider):
name = "jsscraper"
start_urls = ["https://www.sofascore.com/tennis/2018-02-07"]
def start_requests(self):
for url in self.start_urls:
yield SplashRequest(url=url,
callback=self.parse,
endpoint='render.html',
args={'wait':3.5})
def parse(self, response):
for row in response.css('.event-team'):
il = SofascoreItemLoader(selector=row)
il.add_css('winner' , '.event-team:nth-
child(2)::text')
il.add_css('loser' , '.event-team:nth-
child(1)::text')
yield il.load_item()
pipline.py
from scrapy.exporters import CsvItemExporter
class ScrapejsPipeline(object):
def process_item(self, item, spider):
return item
class CsvPipeline(object):
def __init__(self):
self.file = open("quotedata2.csv", 'w+b')
self.exporter = CsvItemExporter(self.file, str)
self.exporter.start_exporting()
def close_spider(self, spider):
self.exporter.finish_exporting()
self.file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
items.py
import scrapy
from scrapy.loader import ItemLoader
from scrapy.loader.processors import TakeFirst, MapCompose,
from operator import methodcaller
from scrapy import Spider, Request, Selector
class SofascoreItem(scrapy.Item):
loser = scrapy.Field()
winner = scrapy.Field()
#date = scrapy.Field()
class SofascoreItemLoader(ItemLoader):
default_item_class = SofascoreItem
default_input_processor = MapCompose(methodcaller('strip'))
default_output_processor = TakeFirst()