此代码应该执行的操作是将一行中编写的看起来很奇怪的.csv文件传输到多行csv中
import csv
import re
filenmi = "original.csv"
filenmo = "data-out.csv"
infile = open(filenmi,'r')
outfile = open(filenmo,'w+')
for line in infile:
print ('read data :',line)
line2 = re.sub('[^0-9|^,^.]','',line)
line2 = re.sub(',,',',',line2)
print ('clean data: ',line2)
wordlist = line2.split(",")
n=(len(wordlist))/2
print ('num data pairs: ',n)
i=0
print ('data paired :')
while i < n*2 :
pairstr = ','.join( pairlst )
print(' ',i/2+1,' ',pairstr)
pairstr = pairstr + '\n'
outfile.write( pairstr )
i=i+2
infile.close()
outfile.close()
我想要这段代码要做的是更改一个混乱的.txt文件
L,39,100,50.5,83,L,50.5,83
转换为正常格式的csv文件,如下例所示
39,100
50.5,83
50.5,83
但是我的数据是这样的
,39
100,50.5
83,50.5
83,
我不确定出了什么问题或如何解决。因此,如果有人可以提供帮助,那就太好了
::Data Set::
L,39,100,50.5,83,L,50.5,83,57.5,76,L,57.5,76,67,67.5,L,67,67.5,89,54,L,89,54,100.5,49,L,100.5,49,111.5,45.5,L,111.5,45.5,134,42,L,134,42,152.5,44,L,152.5,44,160,46.5,L,160,46.5,168,52,L,168,52,170,56.5,L,170,56.5,162,64.5,L,162,64.5,152.5,70,L,152.5,70,126,85.5,L,126,85.5,113.5,94,L,113.5,94,98,105.5,L,98,105.5,72.5,132,L,72.5,132,64.5,145,L,64.5,145,57.5,165.5,L,57.5,165.5,57,176,L,57,176,63.5,199.5,L,63.5,199.5,69,209,L,69,209,76,216.5,L,76,216.5,83.5,222,L,83.5,222,90.5,224.5,L,90.5,224.5,98,225.5,L,98,225.5,105.5,225,L,105.5,225,115,223,L,115,223,124.5,220,L,124.5,220,133.5,216.5,L,133.5,216.5,142,212,L,142,212,149,207,L,149,207,156.5,201.5,L,156.5,201.5,163.5,195.5,L,163.5,195.5,172.5,185.5,L,172.5,185.5,175,180.5,L,175,180.5,177,173,L,177,173,177.5,154,L,177.5,154,174.5,142.5,L,174.5,142.5,168.5,133.5,L,168.5,133.5,150,131.5,L,150,131.5,135,136.5,L,135,136.5,120.5,144.5,L,120.5,144.5,110.5,154,L,110.5,154,104,161.5,L,104,161.5,99.5,168.5,L,99.5,168.5,98,173,L,98,173,97.5,176,L,97.5,176,99.5,178,L,99.5,178,105,179.5,L,105,179.5,112.5,179,L,112.5,179,132,175.5,L,132,175.5,140.5,175,L,140.5,175,149.5,175,L,149.5,175,157,176.5,L,157,176.5,169.5,181.5,L,169.5,181.5,174,185.5,L,174,185.5,178,206,L,178,206,176.5,214.5,L,176.5,214.5,161,240.5,L,161,240.5,144.5,251,L,144.5,251,134.5,254,L,134.5,254,111.5,254.5,L,111.5,254.5,98,253,L,98,253,71.5,248,L,71.5,248,56,246,
答案 0 :(得分:0)
您的代码失败,因为尝试line2 = re.sub('[^0-9|^,^.]','',line)
时,它输出到,39,100,50.5,83,,50.5,83
。
在该行中,您使用re
将所有不是数字,点或逗号的字符替换为空或''
。这将删除输入中的L
,但第二个字符(逗号)将保留。
我已经修复了该问题,并对创建csv
列表的方式做了一些修改。下面的代码有效。
import csv
import re
filenmi = "original.csv"
filenmo = "data-out.csv"
with open(filenmi, 'r') as infile:
#get a list of words that must be split
for line in infile:
#remove any char which isn't a number, dot, or comma
line2 = re.sub('[^0-9|^,^.]','',line)
#replace ",," with ","
line2 = re.sub(',,',',',line2)
#remove the first char which is a ","
line2 = line2[1:]
#get a list of individual values, sep by ","
wordlist = line2.split(",")
parsed = []
for i,val in enumerate(wordlist):
#for every even index, get the word pair
try:
if i%2 == 0:
parstr = wordlist[i] + "," + wordlist[i+1] + '\n'
parsed.append(parstr)
except:
print("Data set needs cleanup\n")
with open(filenmo, 'w+') as f:
for item in parsed:
f.write(item)