使用python在CSV文件中搜索字符串并写入结果

时间:2014-02-13 20:44:53

标签: python string search csv

   #!/usr/bin/python

import csv
import re

string_1 = ('OneTouch AT')
string_2 = ('LinkRunner AT')
string_3 = ('AirCheck')

#searched = ['OneTouch AT', 'LinkRunner AT', 'AirCheck']
print "hello Pythong! "

#def does_match(string):
#    stringl = string.lower()
#    return any(s in stringl for s in searched)

inFile  = open('data.csv', "rb")
reader = csv.reader(inFile)
outFile  = open('data2.csv', "wb")
writer = csv.writer(outFile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_ALL)

for row in reader:
    found = False
    for col in row:
        if col in [string_1, string_2, string_3] and not found:
            writer.writerow(row)
            found = True


#for row in reader:
 #   if any(does_match(col) for col in row):
  #      writer.writerow(row[:2]) # write only 2 first columns

inFile.close()
outFile.close()

我正在试图弄清楚如何在CSV文件中搜索3个项目。如果存在这些项目则打印该行。理想情况下,我希望只将第1列和第3列打印到新文件中。

示例数据文件

LinkRunner AT Video,10,20
Wireless Performance Video OneTouch AT,1,2
Wired OneTouch AT,200,300
LinkRunner AT,200,300
AirCheck,200,300

3 个答案:

答案 0 :(得分:3)

  

我正在试图弄清楚如何在CSV文件中搜索3个项目。如果   那些项目存在打印行。理想情况下,我只想要第1列   和3打印到新文件。

试试这个:

import csv

search_for = ['OneTouch AT','LinkRunner AT','AirCheck']

with open('in.csv') as inf, open('out.csv','w') as outf:
    reader = csv.reader(inf)
    writer = csv.writer(outf, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    for row in reader:
        if row[0] in search_for:
            print('Found: {}'.format(row))
            writer.writerow(row)

答案 1 :(得分:0)

#!/usr/bin/python

import csv
import numpy as np

class search_csv(object):
    def __init__(self, infile, outfile):
        infile = open(infile, 'rb')
        read_infile = [i for i in csv.reader(infile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)]
        self.non_numpy_data = read_infile
        self.data = np.array(read_infile, dtype=None)
        self.outfile = open(outfile, 'wb')
        self.writer_ = csv.writer(self.outfile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)

    def write_to(self, matched_values):
        self.writer_.writerows(matched_values)
        print ' Matched Values Written '
        return True

    def searcher(self, items, return_cols=[0,2]): ##// items should be passed as list -> ['OneTouch AT', 'LinkRunner AT', 'AirCheck']
        find_these = np.array(items, dtype=None)
        matching_y = np.in1d(self.data, find_these).reshape(self.data.shape).nonzero()[0]
        matching_data = self.data[matching_y][:,return_cols]
        self.write_to(matching_data)
        self.outfile.close()
        return True

    def non_numpy_search(self, items, return_cols=[0,2]):
        lst = []
        for i in self.non_numpy_data:
            for ii in items:
                if ii in i:
                    z = []
                    for idx in return_cols:
                        z.append(i[idx])
                    lst.append(z)
                break
        self.write_to(lst)
        return True


### now use the class ###

SEARCHING_FOR = ['OneTouch AT', 'LinkRunner AT', 'AirCheck']

IN_FILE = 'in_file.csv'
OUT_FILE = 'out_file.csv'

non_numpy_search(IN_FILE, OUT_FILE).non_numpy_search(SEARCHING_FOR)

通过你的问题的措辞,我假设你只是想完成手头的任务,而不是真正关心如何。因此,将其复制并粘贴并将您的数据文件用作“IN_FILE”值,并将要写入的文件名用作“OUT_FILE”值。完成后,将要搜索的值放在“SEARCHING_FOR”列表中。

注意事项...... SEARCHING_FOR应该是一个列表。

SEARCHING_FOR中的值完全匹配,因此“A”与“a”不匹配。如果你想要使用正则表达式或更复杂的东西,请告诉我。

在函数'non_numpy_search'中有一个'return_cols'参数。它默认为第一列和第三列。

如果你没有numpy,请告诉我。

答案 2 :(得分:0)

#!/usr/bin/python

import csv
import re
import sys
import gdata.docs.service


#string_1 = ('OneTouch AT')
#string_2 = ('LinkRunner AT')
#string_3 = ('AirCheck')

searched = ['aircheck', 'linkrunner at', 'onetouch at']

def find_group(row):
    """Return the group index of a row
        0 if the row contains searched[0]
        1 if the row contains searched[1]
        etc
        -1 if not found
    """
    for col in row:
        col = col.lower()
        for j, s in enumerate(searched):
            if s in col:
                return j
        return -1

def does_match(string):
    stringl = string.lower()
    return any(s in stringl for s in searched)

#Opens Input file for read and output file to write.
inFile  = open('data.csv', "rb")
reader = csv.reader(inFile)
outFile  = open('data2.csv', "wb")
writer = csv.writer(outFile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_ALL)


#for row in reader:
#   found = False
#   for col in row:
#       if col in [string_1, string_2, string_3] and not found:
#           writer.writerow(row)
#           found = True



"""Built a list of items to sort. If row 12 contains 'LinkRunner AT' (group 1),
    one stores a triple (1, 12, row)
    When the triples are sorted later, all rows in group 0 will come first, then
    all rows in group 1, etc.
"""
stored = []
for i, row in enumerate(reader):
    g = find_group(row)
    if g >= 0:
        stored.append((g, i, row))
stored.sort()

for g, i, row in stored:
    writer.writerow(tuple(row[k] for k in (0,2))) # output col 1 & 5

#for row in reader:
 #   if any(does_match(col) for col in row):
  #      writer.writerow(row[:2]) # write only 2 first columns

# Closing Input and Output files.
inFile.close()
outFile.close()