我有一个要清理的csv文件。我正在尝试查看第一列,并删除第一列中该行除chars之外的任何行(我现在正在清理第一列具有^或。的行)。看来我所有的尝试都无能为力,或者核对了整个csv文件。
足够有趣的是,我有可以识别问题行的代码,而且看起来工作正常
def FindProblemRows():
with open('Data.csv') as csvDataFile:
ProblemRows = []
csvReader = csv.reader(csvDataFile)
data = [row for row in csv.reader(csvDataFile)]
length = len(data)
for i in range (0,length):
if data[i][0].find('^')!=-1 or data[i][0].find('.')!=-1:
ProblemRows.append(i)
return (ProblemRows)
下面是我最近的三个失败尝试。我要去哪里错了,应该怎么改变?其中哪一个最接近?
'''
def Clean():
with open("Data.csv", "w", newline='') as f:
data = list(csv.reader(f))
writer = csv.writer(f)
Problems = FindProblemRows()
data = list(csv.reader(f))
length = len(data)
for row in data:
for i in Problems:
for j in range (0, length):
if row[j] == i:
writer.writerow(row)
Problems.remove(i)
def Clean():
Problems = FindProblemRows()
with open('Data.csv') as csvDataFile:
csvReader = csv.reader(csvDataFile)
data = [row for row in csv.reader(csvDataFile)]
length = len(data)
width = len(data[0])
with open("Data.csv","r") as csvFile:
csvReader = csv.reader( csvFile )
with open("CleansedData.csv","w") as csvResult:
csvWrite = csv.writer( csvResult )
for i in Problems:
for j in range (0, length):
if data[j] == i:
del data[j]
for j in range (0, length):
csvWrite.writerow(data[j])
'''
def Clean():
with open("Data.csv", 'r') as infile , open("CleansedData.csv", 'w') as outfile:
data = [row for row in infile]
for row in infile:
for column in row:
if "^" not in data[row][0]:
if "." not in data[row][0]:
outfile.write(data[row])
更新
现在我有
def Clean():
df = pd.read_csv('Data.csv')
df = df['^' not in df.Symbol]
df = df['.' not in df.Symbol]
但是我得到KeyError:True
那不行吗?
答案 0 :(得分:1)
您应检查Symbol
列是否包含任何感兴趣的字符。方法contains
采用正则表达式:
bad_rows = df.Symbol.str.contains('[.^]')
df_clean = df[~bad_rows]