Question

我有一个小程序只需要从csv文件中读取一行（并且只有一行）并将列值写入一系列文件。该程序有三个系统参数：数据文件的路径，作业ID（uuid）和目标行号，即我要解析的csv中的行。它不起作用，我该如何解决？

import csv
import sys
import itertools

f = sys.argv[1]
uuid = sys.argv[2]
target_row = sys.argv[3]
tmpdir="/tmp/pagekicker/"
folder = tmpdir+uuid

destination1 = folder + '/csv/row.editedby'
destination3 = folder + '/csv/row.booktitle'
destination4 = folder + '/csv/row.seeds'
destination5 = folder + '/csv/row.imprint'
f = open(f, 'rb')
f1 = open(destination1, 'w')
f3 = open(destination3, 'w')
f4 = open(destination4, 'w')
f5 = open(destination5, 'w')

target_row = int(target_row)
try:
    reader = csv.reader(f)  # creates the reader object
    for row in itertools.islice(reader,1,1):   # iterates the rows of the file in orders
            editedby = row[0] # we throw away column 2
            booktitle = row[2]
            print row[2]
            seeds = row[3]
            imprint = row[4]
            f1.write(editedby)
            f3.write(booktitle)
            f4.write(seeds)
            f5.write(imprint)
            f.close()      
            f1.close()
            f3.close()
            f4.close()
            f5.close()
finally:
    print 'done'

更新：感谢格雷厄姆贝尔提出的建议代码。他的'with'语句的第一行有两个“f5”我的代码现在看起来像这样：

我

mport csv
import sys
import itertools

f = sys.argv[1]
uuid = sys.argv[2]
target_row = sys.argv[3]
tmpdir="/tmp/pagekicker/"
folder = tmpdir+uuid
# os.mkdir(folder)

destination3 = folder + '/csv/row.booktitle'
destination1 = folder + '/csv/row.editedby'
destination4 = folder + '/csv/row.seeds'
destination5 = folder + '/csv/row.imprint'

with open(f, 'rb') as f, open(destination1, 'w') as f1, open(destination3, 'w') as f3, open(destination4, 'w') as f4, open(destination5, 'w') as f5:
    target_row = int(target_row)
    try:
        reader = csv.reader(f)  # creates the reader object
        for row in itertools.islice(reader,1,1):   # iterates the rows of the file in orders
            editedby = row[0] # we throw away column 2
            booktitle = row[2]
            print row[2]
            seeds = row[3]
            imprint = row[4]
            f1.write(editedby)
            f3.write(booktitle)
            f4.write(seeds)
            f5.write(imprint)
        except
            print 'done'

没有除外，当我运行它时会产生“意外的unindent”。对于except，它表示except行是无效的语法。

Answer 1

csv库DictReader（）对象能够显示当前行号：

reader = csv.DictReader(csv_file)
reader.line_num

你可以迭代并做任何事情，直到你得到你需要的正确行号，如下所示：

for row in reader:
    if reader.line_num == row_you_want
        do something

DictReader类还允许您将CSV文件中的第一行作为标题列，然后您可以像这样访问它们：

reader["title_of_column1"]

这可能会为你节省一些工作，你也应该在处理这样的文件时使用python with block：

with open(f, 'rb') as f, open(destination1, 'w') as f1, open(destination3, 'w') as f3, open(destination4, 'w') as f5, open(destination5, 'w') as f5:
    target_row = int(target_row)
    try:
        reader = csv.reader(f)  # creates the reader object
        for row in itertools.islice(reader,1,1):   # iterates the rows of the file in orders
            editedby = row[0] # we throw away column 2
            booktitle = row[2]
            print row[2]
            seeds = row[3]
            imprint = row[4]
            f1.write(editedby)
            f3.write(booktitle)
            f4.write(seeds)
            f5.write(imprint)

这样您就不必担心全部关闭

Answer 2

假设您从1（而不是0）开始计算行数，这里有一个独立的函数可以执行此操作：

import csv
from contextlib import contextmanager
import sys
import itertools

@contextmanager
def multi_file_manager(files, mode='r'):
    """ Context manager for multiple files. """
    files = [open(file, mode) for file in files]
    yield files
    for file in files:
        file.close()

# This is the standalone function
def csv_read_row(filename, n):
    """ Read and return nth row of a csv file, counting from 1. """
    with open(filename, 'rb') as f:
        reader = csv.reader(f)
        return next(itertools.islice(reader, n-1, n))

if len(sys.argv) != 4:
    print('usage: utility <csv filename> <uuid> <target row>')
    sys.exit(1)

tmpdir = "/tmp/pagekicker"
f = sys.argv[1]
uuid = sys.argv[2]
target_row = int(sys.argv[3])
folder = os.path.join(tmpdir, uuid)

destinations = [folder+dest for dest in ('/csv/row.editedby',
                                         '/csv/row.booktitle',
                                         '/csv/row.seeds',
                                         '/csv/row.imprint')]

with multi_file_manager(destinations, mode='w') as files:
    row = csv_read_row(f, target_row)
    #editedby, booktitle, seeds, imprint = row[0], row[2], row[3], row[4]
    for i,j in zip(range(4), (0, 2, 3, 4)):
        files[i].write(row[j]+'\n')

如何仅选择CSV文件中的特定行？

2 个答案: