我想根据文件内容对原始文件进行排序,并在该列中获取唯一元素:
原始档案:
qoow_12_xx7_21 wer1 rwty3
asss_x17_211 aqe3 sda4
acyi_112_werxc xcu12 weqa1
qwer_234_ssd aqe3 wers
输出排序数据:
asss_x17_211 aqe3 sda4
qwer_234_ssd aqe3 wers
qoow_12_xx7_21 wer1 rwty3
acyi_112_werxc xcu12 weqa1
输出唯一col2:
aqe3
wer1
xcu12
我的尝试无效代码:
from operator import itemgetter
import itemgetter
def get_unique(data):
seen=""
for e in data:
if e not in seen:
seen="\t".join(seen)
return seen
col2=""
with open("myfile.txt", "r") as infile, open("out.xls","w") as outfile:
for line in infile:
data=line.rstrip.split("\t")
sorted_data=sorted(data, key=lambda e: e.itemgetter)
col2="".join(data[1])
uniq_col2=get_unique(col2)
outfile.write(sorted_data)# tab-delimited sorted data
outfile.write(uniq_col2) # sorted column 2 data
有人可以帮助使这段代码有效。谢谢
答案 0 :(得分:1)
试试这个:
from operator import itemgetter
with open('test.txt') as infile, open('out.txt', 'w') as outfile:
# sort input by 2nd column
sorted_lines = sorted(
(line.strip().split() for line in infile),
key=itemgetter(1)
)
# output sorted input
for line in sorted_lines:
outfile.write('\t'.join(line))
outfile.write('\n')
# discard duplicates in already sorted sequence => uniq items
prev_item = None
for item in (line[1] for line in sorted_lines):
if item != prev_item:
prev_item = item
outfile.write(item)
outfile.write('\n')