我需要关于属性和类的数量,属性和类的描述以及实例和类的数量的直方图,而对于编程是新的,这是我到目前为止所尝试的。
import numpy as np
import pandas as pd
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
data=pd.read_csv('mushroom')
column=df.'Class'
num_bins = 5
n, bins, patches = plt.hist(column, num_bins, facecolor='blue', alpha=0.5)
plt.show()
这就是我的数据的样子
cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,stalk-surface-above-ring,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat,Class
,f,g,f,n,f,c,n,p,e,s,s,w,w,p,w,o,p,k,v,u,p
,f,g,f,n,f,c,n,n,e,s,s,w,w,p,w,o,p,k,y,u,p
x,f,g,f,n,f,w,b,k,t,s,f,w,w,p,w,o,e,n,s,g,e
,f,g,f,n,f,c,n,g,e,s,s,w,w,p,w,o,p,n,y,u,e
x,f,w,f,n,f,w,b,p,t,f,s,w,w,p,w,o,e,n,a,g,e
s,f,n,f,n,f,c,n,n,e,s,s,w,w,p,w,o,p,k,v,u,e
f,f,n,f,n,f,c,n,n,e,s,s,w,w,p,w,o,p,n,v,u,e
x,f,g,f,n,f,c,n,p,e,s,s,w,w,p,w,o,p,n,y,u,e
f,s,g,f,n,f,w,b,n,t,s,f,w,w,p,w,o,e,n,s,g,e
x,f,w,f,n,f,w,b,n,t,f,f,w,w,p,w,o,e,n,a,g,e
x,s,n,f,n,f,w,b,p,t,f,f,w,w,p,w,o,e,k,s,g,e
x,s,w,f,n,f,w,b,h,t,f,s,w,w,p,w,o,e,n,s,g,p
f,f,w,f,n,f,w,b,p,t,f,s,w,w,p,w,o,e,k,s,g,p
x,f,g,f,n,f,w,b,p,t,f,f,w,w,p,w,o,e,n,s,g,e
答案 0 :(得分:0)
类是分类变量(或因子http://www.statisticshowto.com/what-is-a-categorical-variable/)。当您有连续变量(http://www.statisticshowto.com/continuous-variable/)时,分箱和直方图是有意义的。
我假设您实际需要的是频率图,条形图显示分类数据中每个结果的频率。
如果我的假设是正确的,以下代码将解决您的问题。
import pandas
import matplotlib.pyplot as plt
data = pandas.read_csv('mushroom.csv')
fig, ax = plt.subplots()
data['Class'].value_counts().plot(kind='bar', ax=ax)
plt.show()