Question

我在目录中有多个.txt文件，比如d：\ memdump \ 0.txt，1.txt，... 10.txt示例文本文件如下：

 Applications Memory Usage (kB):
 Uptime: 7857410 Realtime: 7857410
** MEMINFO in pid 23875 [com.example.twolibs] **
                 Shared  Private     Heap     Heap     Heap
               Pss    Dirty     Dirty     Size    Alloc     Free
              ------   ------   ------   ------   ------   ------
  Native        0        0        0       13504    10836      459
  Dalvik     6806     7740     6580       24076    18523     5553
  Stack       80        0       80                           
  Cursor        0        0        0                           
  Ashmem        0        0        0                           
  Other dev    14741      836     1028                           
 .so mmap     1367      448     1028                           
 .jar mmap        0        0        0                           
 .apk mmap      225        0        0                           
 .ttf mmap        0        0        0                           
 .dex mmap     1225      340       16                           
  Other mmap        5        8        4                           
  Unknown     3473      564     3432                           
  TOTAL    27922     9936    12168    37580    29359     6012
  Objects
       Views:       62         ViewRootImpl:        2
  AppContexts:        5           Activities:        2
      Assets:        3        AssetManagers:        3
  Local Binders:        9        Proxy Binders:       18
  Death Recipients:        0
  OpenSSL Sockets:        0
  SQL
    MEMORY_USED:        0
   PAGECACHE_OVERFLOW:        0          MALLOC_SIZE:        0

我必须解析这些文件以获取PID，Native Heap Size，Native Heap Alloc size，Dalvik Heap Size，Dalvik Heap Alloc size的值，并绘制具有以下堆大小的图表

enter image description here

我使用以下代码来实现此目的：

import glob
import os
import re
import numpy as np
import matplotlib.pyplot as plt

os.chdir("D:\Python_Trainings\MemInfo\Data")
pid_arr = []
native_heapsize_arr = []
dalvik_heapsize_arr = []
native_heapalloc_arr = []
dalvik_heapalloc_arr = []
pkg_name_arr = []

#Method to parse the memory dump files
def parse_dumpFiles():
  for data_file in glob.glob("*.txt"):
    try:
      fo = open(data_file,"r")
      for line in fo:
        pid_match = re.search('pid\s+(\d+)',line)
        pkg_name_match = re.search("\[(\w+\.+\w+\.+\w+)\]",line)
        native_heapsize_match = re.search('(Native+\s+\d+\s+\d+\s+\d+\s+)+(\d+)',line)
        dalvik_heapsize_match = re.search('(Dalvik+\s+\d+\s+\d+\s+\d+\s+)+(\d+)',line)
        native_heapalloc_match = re.search('(Native+\s+\d+\s+\d+\s+\d+\s+\d+\s+)+(\d+)',line)
        dalvik_heapalloc_match = re.search('(Dalvik+\s+\d+\s+\d+\s+\d+\s+\d+\s+)+(\d+)',line)
        if pid_match:
          pid_arr.append(int(pid_match.group(1)))
        if native_heapsize_match:
          native_heapsize_arr.append(native_heapsize_match.group(2))
        if dalvik_heapsize_match:
          dalvik_heapsize_arr.append(dalvik_heapsize_match.group(2))
        if native_heapalloc_match:
          native_heapalloc_arr.append(native_heapalloc_match.group(2))
        if dalvik_heapalloc_match:
          dalvik_heapalloc_arr.append(dalvik_heapalloc_match.group(2))
        if pkg_name_match:
          if pkg_name_match.group(1) not in pkg_name_arr:
            pkg_name_arr.append(pkg_name_match.group(1))

    except IOError:
      print "Error: can\'t find file or read data"
    finally:
      fo.close()
#end of parse_dumpFiles() Method

#Method to plot from Memory Dumps
def plt_MemDump(pid_arr, native_heapsize_arr, dalvik_heapsize_arr,   native_heapalloc_arr, dalvik_heapalloc_arr, pkg_name_arr):

#Create a figure and axes with room for the table
  fig = plt.figure()
  ax = plt.axes([0.2, 0.2, 0.7, 0.7])

#Create labels for the rows and columns as tuples
  colLabels = ('0','10', '20', '30', '40', '50', '60', '70', '80', '90', '100')
  rowLabels = ('Native Heap Size','Native Heap Allocated','Dalvik Heap Size','Dalvik Heap Allocated','PID')

#Table data as a numpy array
  tableData = np.asarray([native_heapsize_arr,dalvik_heapsize_arr,native_heapalloc_arr,dalvik_heapalloc_arr,pid_arr],dtype=int)

#Get the current color cycle as a list, then reset the cycle to be at the beginning
  colors = []     
  while True:
    colors.append(ax._get_lines.color_cycle.next())
    if colors[0] == colors[-1] and len(colors)>1:
      colors.pop(-1)
      break

  for i in xrange(len(colors)-1):
    ax._get_lines.color_cycle.next()

#Show the table
  the_table = plt.table(cellText=tableData, rowLabels=rowLabels, rowColours=colors,    colLabels=colLabels, loc='bottom')

#Make some line plots
  xAxis_val = [0,10,20,30,40,50,60,70,80,90,100]
  ax.plot(xAxis_val,native_heapsize_arr, linewidth=2.5, marker="o", label="Native Heap Size")
  ax.plot(xAxis_val,dalvik_heapsize_arr, linewidth=2.5, marker="D", label="Dalvik Heap Size")
  ax.plot(xAxis_val,native_heapalloc_arr, linewidth=2.5, marker="^",label="Native Heap Allocated")
  ax.plot(xAxis_val,dalvik_heapalloc_arr, linewidth=2.5, marker="h", label="Dalvik Heap Allocated")

#show legend
  plt.legend(loc='upper right', fontsize=10)

#set the column color where PID is different from 1st PID 
  c=the_table.get_celld()[(5,3)]
  c.set_color('r')
  i=0
  while i<=10:
    c=the_table.get_celld()[(5,i)]
if(c.get_text().get_text()!=((the_table.get_celld()[(5,0)]).get_text().get_text())):
  c.set_color('r')
  (the_table.get_celld()[(4,i)]).set_color('r')
  (the_table.get_celld()[(3,i)]).set_color('r')
  (the_table.get_celld()[(2,i)]).set_color('r')
  (the_table.get_celld()[(1,i)]).set_color('r')
  i+=1

#Turn off x-axis ticks and show the plot              
  plt.xticks([])

#Configure Y axis
  plt.ylim(0,60000)
  plt.yticks([10000,20000,30000,40000,50000,60000])
  plt.grid(True)

#Setting the name of the window title of the plot  
  fig.canvas.set_window_title(pkg_name_arr[0] + "- Memory Dump Plot")

#Setting the Title of the plot
  plt.title(pkg_name_arr[0],color='r',fontsize=20)

#Setting Y Label 
  plt.ylabel('Heap Size', fontsize=14, color='r')

#show plot
  plt.show()
#end of plt_MemDump() Method

parse_dumpFiles()
plt_MemDump(pid_arr, native_heapsize_arr, dalvik_heapsize_arr, native_heapalloc_arr, dalvik_heapalloc_arr, pkg_name_arr)

现在我想用一些颜色标记表格的列，其中PID值与第一个PID值不同。（例如，第30,60和100列）。

有人可以帮助我实现这个目标吗？

Answer 1

matplotlib.pyplot.table为您提供了满足您需求的工具：

cellColours（形状与cellText相同）：让您为每个单元格选择颜色
colColours：适用于rowColours，但适用于列标题

如果您希望列中的所有单元格都具有特定颜色，则可以执行此类操作

cellcolours = np.empty_like(tableData, dtype='object')
for i, cl in enumerate(colLabels):
    if cl > 50:
        cellcolours[:,i] = 'r'

然后调用plt.table（虽然我建议您更改为ax.table）添加cellColours=cellcolours关键字。

如果您还希望列标题为彩色，请执行上述操作

如果您希望在创建表后能够更改单元格，table.get_celld()将返回单元格字典，其键是每个单元格位置的元组，例如：

{(0, 0): <matplotlib.table.Cell at 0x5d750d0>,  # these are column headers
 (0, 1): <matplotlib.table.Cell at 0x5d75290>,
 (0, 2): <matplotlib.table.Cell at 0x5d75450>,
 (0, 3): <matplotlib.table.Cell at 0x5d75610>,
 [...]
 (1, -1): <matplotlib.table.Cell at 0x5d757d0>, # this is a row header
 (1, 0): <matplotlib.table.Cell at 0x5a58110>,  # this is a cell
 (1, 1): <matplotlib.table.Cell at 0x5d74150>,
 (1, 2): <matplotlib.table.Cell at 0x5d74290>,
 (1, 3): <matplotlib.table.Cell at 0x5d743d0>,
 [...]
 (2, -1): <matplotlib.table.Cell at 0x5d75990>, # this is a row header
 [...] }

您可以使用元组访问每个单元格，例如

c=table.get_celld()[(1,1)]

您可以使用c.set_color()设置单元格颜色，并使用c.get_text().get_text()检查单元格中的文本（第一个get_text返回Text个实例，第二个字符串输入{{1}}它）

如何在matplotlib表中设置列的背景颜色

1 个答案: