使用python移出数据

时间:2019-02-04 04:42:02

标签: python csv

此代码应该执行的操作是将一行中编写的看起来很奇怪的.csv文件传输到多行csv中

import csv
import re
filenmi = "original.csv"
filenmo = "data-out.csv"
infile  = open(filenmi,'r')
outfile = open(filenmo,'w+')

for line in infile:
  print ('read data :',line)
  line2 = re.sub('[^0-9|^,^.]','',line)
  line2 = re.sub(',,',',',line2)
  print ('clean data: ',line2)
  wordlist = line2.split(",")
  n=(len(wordlist))/2
  print ('num data pairs: ',n)
  i=0
  print ('data paired :')

  while i < n*2 :
    pairstr = ','.join( pairlst )
    print('  ',i/2+1,'  ',pairstr)
    pairstr = pairstr + '\n'
    outfile.write( pairstr )
    i=i+2

infile.close()                         
outfile.close()

我想要这段代码要做的是更改一个混乱的.txt文件

L,39,100,50.5,83,L,50.5,83

转换为正常格式的csv文件,如下例所示

39,100
50.5,83
50.5,83

但是我的数据是这样的

,39
100,50.5
83,50.5
83,

我不确定出了什么问题或如何解决。因此,如果有人可以提供帮助,那就太好了

::Data Set::

L,39,100,50.5,83,L,50.5,83,57.5,76,L,57.5,76,67,67.5,L,67,67.5,89,54,L,89,54,100.5,49,L,100.5,49,111.5,45.5,L,111.5,45.5,134,42,L,134,42,152.5,44,L,152.5,44,160,46.5,L,160,46.5,168,52,L,168,52,170,56.5,L,170,56.5,162,64.5,L,162,64.5,152.5,70,L,152.5,70,126,85.5,L,126,85.5,113.5,94,L,113.5,94,98,105.5,L,98,105.5,72.5,132,L,72.5,132,64.5,145,L,64.5,145,57.5,165.5,L,57.5,165.5,57,176,L,57,176,63.5,199.5,L,63.5,199.5,69,209,L,69,209,76,216.5,L,76,216.5,83.5,222,L,83.5,222,90.5,224.5,L,90.5,224.5,98,225.5,L,98,225.5,105.5,225,L,105.5,225,115,223,L,115,223,124.5,220,L,124.5,220,133.5,216.5,L,133.5,216.5,142,212,L,142,212,149,207,L,149,207,156.5,201.5,L,156.5,201.5,163.5,195.5,L,163.5,195.5,172.5,185.5,L,172.5,185.5,175,180.5,L,175,180.5,177,173,L,177,173,177.5,154,L,177.5,154,174.5,142.5,L,174.5,142.5,168.5,133.5,L,168.5,133.5,150,131.5,L,150,131.5,135,136.5,L,135,136.5,120.5,144.5,L,120.5,144.5,110.5,154,L,110.5,154,104,161.5,L,104,161.5,99.5,168.5,L,99.5,168.5,98,173,L,98,173,97.5,176,L,97.5,176,99.5,178,L,99.5,178,105,179.5,L,105,179.5,112.5,179,L,112.5,179,132,175.5,L,132,175.5,140.5,175,L,140.5,175,149.5,175,L,149.5,175,157,176.5,L,157,176.5,169.5,181.5,L,169.5,181.5,174,185.5,L,174,185.5,178,206,L,178,206,176.5,214.5,L,176.5,214.5,161,240.5,L,161,240.5,144.5,251,L,144.5,251,134.5,254,L,134.5,254,111.5,254.5,L,111.5,254.5,98,253,L,98,253,71.5,248,L,71.5,248,56,246,

1 个答案:

答案 0 :(得分:0)

您的代码失败,因为尝试line2 = re.sub('[^0-9|^,^.]','',line)时,它输出到,39,100,50.5,83,,50.5,83

在该行中,您使用re将所有不是数字,点或逗号的字符替换为空或''。这将删除输入中的L,但第二个字符(逗号)将保留。

我已经修复了该问题,并对创建csv列表的方式做了一些修改。下面的代码有效。

import csv
import re
filenmi = "original.csv"
filenmo = "data-out.csv"

with open(filenmi, 'r') as infile:
    #get a list of words that must be split
    for line in infile:
        #remove any char which isn't a number, dot, or comma
        line2 = re.sub('[^0-9|^,^.]','',line)
        #replace ",," with ","
        line2 = re.sub(',,',',',line2)
        #remove the first char which is a ","
        line2 = line2[1:]
        #get a list of individual values, sep by ","
        wordlist = line2.split(",")

parsed = []
for i,val in enumerate(wordlist):
    #for every even index, get the word pair
    try:
        if i%2 == 0:
            parstr = wordlist[i] + "," + wordlist[i+1] + '\n'
            parsed.append(parstr)
    except:
        print("Data set needs cleanup\n")


with open(filenmo, 'w+') as f:
    for item in parsed:
        f.write(item)