我有一个这样写的txt文件:(enter image description here)
speed weight color fuel cost
0 180 3 black 95 NaN
1 160 NaN green 92 NaN
2 200 5 NaN 95 NaN
3 180 3 black 95 30000
我想创建一个像这样的数组
import re
import pandas as pd
df_output_list = {}
df_output_dict = []
match_counter = 1
with open('sample_car.txt',encoding='utf-8') as file:
line = file.readline()
while line:
result = re.split(r'\|',line.rstrip())
result2 = re.findall(r'.(?<=\[)(\d+)(?=\])',result[1])
regex = re.compile('vechicle.car.characteristics.')
match = re.search(regex, result[1])
if match:
if match_counter == 1:
ArrInd = 0
match_counter+=1
#print(df_output_list)
if ArrInd == int(result2[0]):
df_output_list[result[1].split('.')[3]] = result[2]
ArrInd = int(result2[0])
else:
df_output_dict.append(df_output_list)
df_output_list = {}
df_output_list[result[1].split('.')[3]] = result[2]
ArrInd = int(result2[0])
line = file.readline()
df_output_dict.append(df_output_list)
#print(df_output_dict)
df_output = pd.DataFrame(df_output_dict)
print(df_output)
这就是我得到的:
1 !
2 "
3 #
4 $
5 %