shutil复制文件但删除数据

时间:2018-03-18 19:35:00

标签: python scrapy shutil

以下代码有时会工作,它会将其推送到需要的目录中并且IT始终复制文件,但有时在leadparser目录中的csv文件中没有数据。

import csv
import re
import os
import shutil


class myExporter(object):

    def __init__(self):
        self.i = 0
        self.filename = 'output%s.csv'
        self.srcfile = '/Users/poweruser/Applications/pythonwork/bbbscrap2/scrape/' + self.filename
        while os.path.exists(self.srcfile % self.i):
            self.i += 1
        self.folderdes = '/Users/poweruser/Applications/pythonwork/leadparser/newfiles'
        self.myCSV = csv.writer(open(self.filename % self.i,  'w'))
        self.myCSV.writerow(['Email', 'Website', 'Phone Number', 'Location'])

    def process_item(self, item, spider):
        self.myCSV.writerow([item['email'],
                             item['website'],
                             item['phonenumber'],
                             item['location']])
        self.folderPath = os.path.join(
            self.folderdes, os.path.basename(self.srcfile % self.i))
        shutil.copy(self.srcfile % self.i, self.folderPath)

        return item

1 个答案:

答案 0 :(得分:1)

您必须关闭该文件才能写入所有数据:

import csv
import re
import os
import shutil

SRCFILE = '/Users/poweruser/Applications/pythonwork/bbbscrap2/scrape/output%s.csv'
DESTINATION_FOLDER = '/Users/poweruser/Applications/pythonwork/leadparser/newfiles'

class myExporter(object):

    def __init__(self):
        i = 0
        while os.path.exists(SRCFILE % i):
            i += 1
        self.filename = SRCFILE % i
        with open(self.filename, 'w') as output:
            output = csv.writer(output)
            output.writerow(['Email', 'Website', 'Phone Number', 'Location'])

    def process_item(self, item, spider):
        with open(self.filename, 'a') as output:
            output = csv.writer(output)
            output.writerow([item['email'],
                             item['website'],
                             item['phonenumber'],
                             item['location']])
        folder = os.path.join(DESTINATION_FOLDER, os.path.basename(self.filename))
        shutil.copy(self.filename, folder)
        return item