Scrapy CSV导出位于1个单元格中

时间:2016-03-03 23:37:40

标签: python-2.7 csv web-scraping scrapy web-crawler

我想写一个csv,每个数据观察存储在一行中。但是,我所有的观察结果都存在于一个细胞中。我尝试使用yield item替换item.append(item)return items,但它也无效。

import scrapy
from selenium import webdriver
import time
from scrapy.selector import Selector
from scrapy.selector import HtmlXPathSelector
from gdp.items import gdpItem
import unicodecsv as csv

class gdp_spider2(scrapy.Spider):
name = 'gdp_spider2'
allowed_domains = ['statdb.dgbas.gov.tw/']
start_urls = ['http://statdb.dgbas.gov.tw/pxweb/Dialog/varval.asp?ma=NA8101A1Q&ti=Principal%20Figures%282008SNA%29-Quarterly&path=../PXfileE/NationalIncome/&lang=1&strList=L']

def __init__(self):
    self.driver = webdriver.Firefox()

def parse(self, response):
    items = []

    driver = self.driver
    driver.get(response.url)
    driver.find_element_by_partial_link_text('Select all').click()
    driver.find_element_by_xpath('//option[contains(text(),"GDP (Million N.T.$,at Current Prices)")]').click()
    driver.find_element_by_xpath('//option[contains(text(),"Data")]').click()
    driver.find_element_by_xpath('//input[@type="SUBMIT"]').click()

    hxs = HtmlXPathSelector(text=self.driver.page_source)
    data = hxs.xpath("//table[@class='pxtable']//tbody//tr")

    for datum in data:
        item = gdpItem()
        item ["date"] = datum.xpath('//td[1]/text()').extract()
        item ["data"] = datum.xpath('//td[2]/text()').extract()

        items.append(item)
        return items

CSV image

1 个答案:

答案 0 :(得分:0)

试试这个:

def parse(self, response):
    items = []
    item = gdpItem()
    driver = self.driver
    driver.get(response.url)
    driver.find_element_by_partial_link_text('Select all').click()
    driver.find_element_by_xpath('//option[contains(text(),"GDP (Million N.T.$,at Current Prices)")]').click()
    driver.find_element_by_xpath('//option[contains(text(),"Data")]').click()
    driver.find_element_by_xpath('//input[@type="SUBMIT"]').click()

    hxs = HtmlXPathSelector(text=self.driver.page_source)
    data = hxs.xpath("//table[@class='pxtable']//tbody//tr")

    for datum in data:
        item ["date"] = datum.xpath('td[1]/text()').extract()
        item ["data"] = datum.xpath('td[2]/text()').extract()

        yield item