我制作了一个脚本,它读取给定的输入文件( csv ),以某种方式操纵数据并写入输出文件( csv )。
就我而言,我的给定输入文件如下所示:
| sku | article_name |
| 1 | MyArticle |
对于我的输出文件,我需要重新安排这些列(还有更多,但我想我可以解决它,当有人给我指路时)
我的输出文件应如下所示:
| article_name | another_column | sku |
| MyArticle | | 1 |
请注意,这是一个新列,不在源csv文件中,但必须打印它(顺序也很重要)
这是我到目前为止所做的:
#!/usr/bin/env python
# -*- coding: latin_1 -*-
import csv
import argparse
import sys
header_mappings = {'attr_artikel_bezeichnung1': 'ARTICLE LABEL',
'sku': 'ARTICLE NUMBER',
'Article label locale': 'Article label locale',
'attr_purchaseprice': 'EK-Preis',
'attr_salesPrice': 'EuroNettoPreis',
'attr_salesunit': 'Einheit',
'attr_salesvatcode': 'MwSt.-Satz',
'attr_suppliercode': 'Lieferantennummer',
'attr_suppliersitemcode': 'Artikelnummer Lieferant',
'attr_isbatchitem': 'SNWarenausgang'}
row_mapping = {'Einheit': {'pc': 'St.'},
'MwSt.-Satz': {'3': '19'}}
def remap_header(header):
for h_map in header_mappings:
if h_map in header:
yield header_mappings.get(h_map), header.get(h_map)
def map_header(header):
for elem in header:
yield elem, header.index(elem)
def read_csv(filename):
with open(filename, 'rb') as incsv:
csv_reader = csv.reader(incsv, delimiter=';')
for r in csv_reader:
yield r
def add_header(header, fields=()):
for f in fields:
header.append(f)
return header
def duplicate(csv_row, header_name, fields):
csv_row[new_csv_header.index(fields)] = csv_row[new_csv_header.index(header_name)]
return csv_row
def do_new_row(csv_row):
for header_name in new_csv_header:
for r_map in row_mapping:
row_content = csv_row[mapped_header.get(r_map)]
if row_content in row_mapping.get(r_map):
csv_row[mapped_header.get(r_map)] = row_mapping.get(r_map).get(row_content)
try:
yield csv_row[mapped_header.get(header_name)]
except TypeError:
continue
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--infile', metavar='CSV')
parser.add_argument('-o', '--outfile', metavar='CSV')
args = parser.parse_args()
arguments = vars(args)
if len(sys.argv[1:]) == 0:
parser.print_usage()
sys.exit(0)
# print arguments
# parse_csv(**arguments)
"""
"""
csv_reader_iter = read_csv(arguments.get('infile'))
# neuer csv header
new_csv_header = list()
csv_header = next(csv_reader_iter)
for h in csv_header:
if h in header_mappings:
new_csv_header.append(header_mappings.get(h))
# print new_csv_header
new_csv_header = add_header(new_csv_header, ('Article label locale', 'Nummer'))
mapped_header = dict(remap_header(dict(map_header(csv_header))))
# print mapped_header
with open(arguments.get('outfile'), 'wb') as outcsv:
csv_writer = csv.writer(outcsv, delimiter=';')
csv_writer.writerow(new_csv_header)
for row in csv_reader_iter:
row = list(do_new_row(row))
delta = len(new_csv_header) - len(row)
if delta > 0:
row = row + (delta * [''])
# duplicate(row, 'SNWarenausgang', 'SNWareneingang')
# duplicate(row, 'SNWarenausgang', 'SNWareneingang')
csv_writer.writerow(row)
print "Done."
"""
print new_csv_header
for row in csv_reader_iter:
row = list(do_new_row(row))
delta = len(new_csv_header) - len(row)
if delta > 0:
row = row + (delta * [''])
duplicate(row, 'Herstellernummer', 'Nummer')
duplicate(row, 'SNWarenausgang', 'SNWareneingang')
print row
"""
现在,即使它说" ARTICLE LABEL"首先,首先打印sku。我的猜测:这是由于csv文件的顺序,因为sku是第一个字段......对吗?
答案 0 :(得分:2)
如果您使用DictWriter
lib中的csv
,则可以指定列的顺序。使用DictReader
从文件中读取行作为dicts。然后,您只需在创建DictWriter
时明确指定键的顺序。
答案 1 :(得分:0)
正如riotburn已经建议的那样,您可以使用DictWriter
及其fieldnames
参数来调整新文件中列的顺序。
重新排序文件可能如下所示:
def read_csv (filename):
with open(filename) as incsv:
reader = csv.DictReader(incsv, delimiter=';')
for r in reader:
yield r
columns = ['article_name', 'another_column', 'sku']
with open('newfile.csv', 'w+') as f:
writer = csv.DictWriter(f, columns, delimiter=';')
writer.writeheader()
for row in read_csv('oldfile.csv'):
# add a property
row['another_column'] = 'foo'
# write row (using the order specified in columns)
writer.writerow(row)