我有一个像这样的csv文件:
#CODE FOR THE DATAFRAME
raw_data = {'Max_Acc': [90.71, 87.98, 92.62, 78.93, 73.69,
92.62, 94.17, 92.62, 83.81, 79.76,
85.36, 89.23, 88.88, 56.23, 89.54,
36.25, 14.52, 85.45, 75.45, 45.54],
'Stage': ['AWA', 'Rem', 'S1', 'S2', 'SWS',
'AWA', 'Rem', 'S1', 'S2', 'SWS',
'AWA', 'Rem', 'S1', 'S2', 'SWS',
'AWA', 'Rem', 'S1', 'S2', 'SWS'],
'Elec': ['Fp1', 'Fp2', 'C4', 'Cz', 'Pz',
'C4', 'T3', 'Fp1', 'P4', 'Fp2',
'Cz', 'P3', 'P4', 'T4', 'Cp1',
'Cp2', 'T3', 'T4', 'Fp2', 'Fp1'],
'Clf': ['RF', 'RF', 'RF', 'RF', 'RF',
'XG', 'XG', 'XG', 'XG', 'XG',
'SVM','SVM','SVM','SVM','SVM',
'KNN','KNN','KNN','KNN','KNN']}
df=pd.DataFrame(raw_data, columns = ['Max_Acc', 'Stage', 'Elec', 'Clf'])
df
#CODE FOR THE PLOT
#To use seaborn palette
palette = sns.color_palette("Set1", 12)
sns.set(style="white")
uelec, uind = np.unique(df["Elec"], return_inverse=1)
cmap = plt.cm.get_cmap("Set1")
colors= [ palette[i] for i in uind]
fig, ax=plt.subplots(figsize=(15, 5))
l = len(df)
pos = np.arange(0,l) % (l//2) + (np.arange(0,l)//(l//2)-1)*0.4
ax.bar(pos, df["Max_Acc"], width=0.4, align="edge", ec="k", color=colors)
handles=[plt.Rectangle((0,0),1,1, color=palette[i], ec="k") for i in range(len(uelec))]
legend=ax.legend(bbox_to_anchor=(0., 1.15, 1., .102), handles=handles, labels=list(uelec),
prop ={'size':10}, loc=9, ncol=8, title=r'Best algorithm using Max_Acc after undersampling' )
legend.get_frame().set_linewidth(0.0)
plt.setp(legend.get_title(),fontsize='24')
ax.set_xticks(range(l//2))
ax.set_xticklabels(df["Stage"][:l//2])
ax.set_ylim(0, 110)
ax.get_yaxis().set_visible(False)
ax.spines['top'].set_visible(False)
#Double x-axis
ax.set_xticks(pos+0.2, minor=True)
clf=df['Clf'].tolist()
ax.set_xticklabels(clf, minor=True)
plt.setp(ax.get_xticklabels(), rotation=0)
ax.tick_params(axis='x', which='major', pad=25, size=0)
ax=ax
def annotateBars(row, ax=ax):
for p in ax.patches:
ax.annotate("%.2f" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),
ha='center', va='center', fontsize=11, color='gray', rotation=90, xytext=(0, 20),
textcoords='offset points')
plot = df.apply(annotateBars, ax=ax, axis=1)
当我尝试使用此命令导入python时:
ATTRIBUTE_1;.....;ATTRIBUTE_N
null;01;M;N;;N;1108;1;F205;;N;F;13;;N;S;2;N;6000000;;A010;40;B;2;10;42;N;;61;MI;01;N;N;S;;-1;N;N;01;;;;;;;;;;;;;;;;;;;;;;;;;;778,69
null;01;M;N;;N;1108;1;F205;;N;F;13;;N;S;2;N;6000000;;A010;40;B;2;10;42;N;;61;MI;01;N;N;S;;-1;N;N;01;;;;;;;;;;;;;;;;;;;;;;;;;;778,71
null;01;M;N;;N;1108;1;F205;;N;F;13;;N;S;2;N;6000000;;A010;40;B;2;10;42;N;;61;MI;01;N;N;S;;-1;N;N;01;;;;;;;;;;;;;;;;;;;;;;;;;;778,72
我的输出是:
data = pd.read_csv(r"C:\...\file.csv")
如何按列导入csv?像这样:
0 null;01;M;N;;N;1108;1;F205;;N;F;13;;N;S;2;N;60...
答案 0 :(得分:0)
您的每一行都以"
开头和结尾存在问题,因此必需参数quoting=3
,这意味着设置QUOTE_NONE
:
df = pd.read_csv('file.csv', sep=';', quoting=3)
#strip " from first and last column
df.iloc[:,0] = df.iloc[:,0].str.strip('"')
df.iloc[:,-1] = df.iloc[:,-1].str.strip('"')
#strip " from columns names
df.columns = df.columns.str.strip('"')
print (df.head())
SIGLA TARGA CATEGORIA TARIFFARIA - LIVELLO 3 SESSO \
0 null 1 M
1 null 1 M
2 null 1 M
3 null 1 M
4 null 1 M
RCA - PATTO PER I GIOVANI VALORE FRANCHIGIA TIPO TARGA CILINDRATA \
0 N NaN N 1108
1 N NaN N 1108
2 N NaN N 1108
3 N NaN N 1108
4 N NaN N 1108
CODICE FORMA CONTRATTUALE RCA - RECUPERO COMUNE PRA \
0 1 F205
1 1 F205
2 1 F205
3 1 F205
4 1 F205
CODICE WORKSITE MARKETING ... Unnamed: 55 Unnamed: 56 \
0 NaN ... NaN NaN
1 NaN ... NaN NaN
2 NaN ... NaN NaN
3 NaN ... NaN NaN
4 NaN ... NaN NaN
Unnamed: 57 Unnamed: 58 Unnamed: 59 Unnamed: 60 Unnamed: 61 Unnamed: 62 \
0 NaN NaN NaN NaN NaN NaN
1 NaN NaN NaN NaN NaN NaN
2 NaN NaN NaN NaN NaN NaN
3 NaN NaN NaN NaN NaN NaN
4 NaN NaN NaN NaN NaN NaN
Unnamed: 63 PREMIO FINALE
0 NaN 778,69
1 NaN 778,70
2 NaN 778,71
3 NaN 778,72
4 NaN 778,73
[5 rows x 65 columns]