Question

我的csv文件非常复杂..它包含数字和字符串属性。这就是我的csv文件的样子我想绘制一个过程直方图与cpuid

Answer 1

您可以使用read_csv，indexing with str并按hist绘图：

import pandas as pd
import matplotlib.pyplot as plt
import io

temp=u"""kmem_kmalloc;{cpu_id=1}
kmem_kmalloc;{cpu_id=1}
kmem_kmalloc;{cpu_id=1}
kmem_kmalloc;{cpu_id=1}
kmem_kfree;{cpu_id=1}
kmem_kfree;{cpu_id=1}
power_cpu_idle;{cpu_id=0}
power_cpu_idle;{cpu_id=0}
power_cpu_idle;{cpu_id=3}"""

s = pd.read_csv(io.StringIO(temp), #after testing replace io.StringIO(temp) to filename
                sep=";", #set separator, if sep=',' can be omited (default sep = ,)
                header=None, #no header in csv
                names=[None,'cpuid'], #set names of columns, (first is None because index)
                index_col=0, #first column set to index
                squeeze=True) #try convert DataFrame to Series
print s
kmem_kmalloc      {cpu_id=1}
kmem_kmalloc      {cpu_id=1}
kmem_kmalloc      {cpu_id=1}
kmem_kmalloc      {cpu_id=1}
kmem_kfree        {cpu_id=1}
kmem_kfree        {cpu_id=1}
power_cpu_idle    {cpu_id=0}
power_cpu_idle    {cpu_id=0}
power_cpu_idle    {cpu_id=3}
Name: cpuid, dtype: object

#if max cpu <= 9, use Indexing with .str 
s = s.str[-2].astype(int)

#if cpu > 9 
#s= s.str.extract('(\d)', expand=False)
print s
kmem_kmalloc      1
kmem_kmalloc      1
kmem_kmalloc      1
kmem_kmalloc      1
kmem_kfree        1
kmem_kfree        1
power_cpu_idle    0
power_cpu_idle    0
power_cpu_idle    3
Name: cpuid, dtype: int32

plt.figure();
s.hist(alpha=0.5)
plt.show()

使用matplotlib和pandas从csv文件绘制直方图

1 个答案: