Question

我有一个包含几千行数据的文件，如下所示：

defaultdict(<type 'int'>, {'2012021310': 76422, '2012021311': 94188, '2012021323': 139363, '2012021312': 111817, '2012021307': 71316, '2012021306': 82418, '2012021305': 65217, '2012021313': 127002, '2012021314': 141099, '2012021315': 147830, '2012021316': 136330, '2012021317': 122252, '2012021318': 118619, '2012021319': 115763, '2012021322': 137658, '2012021321': 130022, '2012021320': 121393, '2012021309': 69406, '2012021308': 66833}) 
defaultdict(<type 'int'>, {'2012021408': 139745, '2012021409': 143658, '2012021414': 288286, '2012021418': 31216, '2012021416': 268214, '2012021400': 207878, '2012021401': 269296, '2012021402': 270258, '2012021403': 275882, '2012021404': 232521, '2012021405': 195062, '2012021406': 166669, '2012021407': 142855, '2012021417': 245582, '2012021411': 194360, '2012021413': 262078, '2012021410': 158954, '2012021415': 296457, '2012021412': 237083}) 
defaultdict(<type 'int'>, {'2012021523': 676350, '2012021522': 670147, '2012021521': 650984, '2012021520': 617401, '2012021501': 170448, '2012021503': 246600, '2012021502': 250013, '2012021505': 363866, '2012021504': 300809, '2012021507': 333080, '2012021506': 370454, '2012021509': 343671, '2012021508': 330452, '2012021512': 549736, '2012021513': 622690, '2012021510': 387871, '2012021511': 456171, '2012021516': 647559, '2012021517': 600969, '2012021514': 692257, '2012021515': 706377, '2012021518': 579669, '2012021519': 587969}) 
defaultdict(<type 'int'>, {'2012021608': 333986, '2012021609': 344126, '2012021602': 651692, '2012021603': 676458, '2012021600': 664484, '2012021601': 686408, '2012021620': 932692, '2012021621': 1065501, '2012021604': 589033, '2012021605': 465191, '2012021623': 1316907, '2012021606': 389669, '2012021607': 342613, '2012021619': 828190, '2012021618': 617836, '2012021622': 1111334, '2012021611': 467532, '2012021610': 387220, '2012021613': 634585, '2012021612': 560227, '2012021615': 718498, '2012021614': 704008, '2012021617': 606396, '2012021616': 665030})

密钥是日期/小时，如'2012021310'是“02-13-2012 10am”，这些值只是发生事件的计数。

我对使用matplotlib进行绘图有一个很好的理解但是我不确定如何处理数据。是否有一种简单的方法可以读取这些行中的每一行并在类似于使用csv2rec读取两列的方式中对它们进行处理？或者有更好的方法吗？

编辑：

这取代了defaultdict（带有“data =”

import fileinput

filein = 'list.txt'

for line in fileinput.input([filein]):
        line = line.replace("defaultdict(<type 'int'>,", "data =")
        line = line.replace(")", " ")
        print line

这导致：

data = {'2012021310': 76422, '2012021311': 94188, '2012021323': 139363, '2012021312': 111817, '2012021307': 71316, '2012021306': 82418, '2012021305': 65217, '2012021313': 127002, '2012021314': 141099, '2012021315': 147830, '2012021316': 136330, '2012021317': 122252, '2012021318': 118619, '2012021319': 115763, '2012021322': 137658, '2012021321': 130022, '2012021320': 121393, '2012021309': 69406, '2012021308': 66833}  

data = {'2012021408': 139745, '2012021409': 143658, '2012021414': 288286, '2012021418': 31216, '2012021416': 268214, '2012021400': 207878, '2012021401': 269296, '2012021402': 270258, '2012021403': 275882, '2012021404': 232521, '2012021405': 195062, '2012021406': 166669, '2012021407': 142855, '2012021417': 245582, '2012021411': 194360, '2012021413': 262078, '2012021410': 158954, '2012021415': 296457, '2012021412': 237083}  

data = {'2012021523': 676350, '2012021522': 670147, '2012021521': 650984, '2012021520': 617401, '2012021501': 170448, '2012021503': 246600, '2012021502': 250013, '2012021505': 363866, '2012021504': 300809, '2012021507': 333080, '2012021506': 370454, '2012021509': 343671, '2012021508': 330452, '2012021512': 549736, '2012021513': 622690, '2012021510': 387871, '2012021511': 456171, '2012021516': 647559, '2012021517': 600969, '2012021514': 692257, '2012021515': 706377, '2012021518': 579669, '2012021519': 587969}  

data = {'2012021608': 333986, '2012021609': 344126, '2012021602': 651692, '2012021603': 676458, '2012021600': 664484, '2012021601': 686408, '2012021620': 932692, '2012021621': 1065501, '2012021604': 589033, '2012021605': 465191, '2012021623': 1316907, '2012021606': 389669, '2012021607': 342613, '2012021619': 828190, '2012021618': 617836, '2012021622': 1111334, '2012021611': 467532, '2012021610': 387220, '2012021613': 634585, '2012021612': 560227, '2012021615': 718498, '2012021614': 704008, '2012021617': 606396, '2012021616': 665030}

我仍然不确定从哪里开始

编辑：

我越来越接近每个人的建议：

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from pylab import *
from matplotlib.mlab import csv2rec

    filein = 'list.txt'
    output_name = "image.png"
    dicts = []
    line = ""

    for line in fileinput.input([filein]):
            line = line.replace("defaultdict(<type 'int'>,", "data = ")
            line = line.replace(")", "")
            line = line.strip()
            exec(line)
            objects = data.items()
            print objects

返回：

[('2012021307', 71316), ('2012021306', 82418), ('2012021305', 65217), ('2012021309', 69406), ('2012021310', 76422), ('2012021311', 94188), ('2012021312', 111817), ('2012021313', 127002), ('2012021314', 141099), ('2012021315', 147830), ('2012021316', 136330), ('2012021317', 122252), ('2012021318', 118619), ('2012021319', 115763), ('2012021308', 66833), ('2012021321', 130022), ('2012021320', 121393), ('2012021323', 139363), ('2012021322', 137658)]
[('2012021408', 139745), ('2012021409', 143658), ('2012021403', 275882), ('2012021418', 31216), ('2012021400', 207878), ('2012021416', 268214), ('2012021402', 270258), ('2012021414', 288286), ('2012021404', 232521), ('2012021405', 195062), ('2012021406', 166669), ('2012021407', 142855), ('2012021417', 245582), ('2012021411', 194360), ('2012021401', 269296), ('2012021413', 262078), ('2012021410', 158954), ('2012021415', 296457), ('2012021412', 237083)]
[('2012021523', 676350), ('2012021522', 670147), ('2012021521', 650984), ('2012021520', 617401), ('2012021501', 170448), ('2012021503', 246600), ('2012021502', 250013), ('2012021505', 363866), ('2012021504', 300809), ('2012021507', 333080), ('2012021506', 370454), ('2012021509', 343671), ('2012021508', 330452), ('2012021512', 549736), ('2012021513', 622690), ('2012021510', 387871), ('2012021511', 456171), ('2012021516', 647559), ('2012021517', 600969), ('2012021514', 692257), ('2012021515', 706377), ('2012021518', 579669), ('2012021519', 587969)]
[('2012021605', 465191), ('2012021608', 333986), ('2012021609', 344126), ('2012021602', 651692), ('2012021603', 676458), ('2012021600', 664484), ('2012021601', 686408), ('2012021606', 389669), ('2012021607', 342613), ('2012021622', 1111334), ('2012021623', 1316907), ('2012021620', 932692), ('2012021621', 1065501), ('2012021619', 828190), ('2012021618', 617836), ('2012021604', 589033), ('2012021611', 467532), ('2012021610', 387220), ('2012021613', 634585), ('2012021612', 560227), ('2012021615', 718498), ('2012021614', 704008), ('2012021617', 606396), ('2012021616', 665030)]
[('2012021605', 465191), ('2012021608', 333986), ('2012021609', 344126), ('2012021602', 651692), ('2012021603', 676458), ('2012021600', 664484), ('2012021601', 686408), ('2012021606', 389669), ('2012021607', 342613), ('2012021622', 1111334), ('2012021623', 1316907), ('2012021620', 932692), ('2012021621', 1065501), ('2012021619', 828190), ('2012021618', 617836), ('2012021604', 589033), ('2012021611', 467532), ('2012021610', 387220), ('2012021613', 634585), ('2012021612', 560227), ('2012021615', 718498), ('2012021614', 704008), ('2012021617', 606396), ('2012021616', 665030)]

所以我绝对更接近：

编辑：

而且我还有：

import fileinput
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from pylab import *
from matplotlib.mlab import csv2rec 

filein = 'list.txt'
output_name = "image.png"
dicts = []
times = []
values = []

line = ""
for line in fileinput.input([filein]):
        line = line.replace("defaultdict(<type 'int'>,", "data = ")
        line = line.replace(")", "")
        line = line.strip()
        exec(line)
        for k in sorted(data.iterkeys()):
                times.append(k)
                values.append(data[k])

fig = plt.figure()

ax = fig.add_subplot(111)
ax.plot(times, values)
hours = mdates.HourLocator()
fmt = mdates.DateFormatter('%Y - %M:%D:%H')
ax.xaxis.set_major_locator(hours)
ax.xaxis.set_major_formatter(fmt)

fig.autofmt_xdate(bottom=0.2, rotation=90, ha='left')

ax.grid()
plt.savefig(output_name)

这让我到了至少生成情节的地步。现在的问题是x访问不会从输入的数据中正确生成标签。我认为这是由于某种非传统的时间戳。还有最后的建议吗？

Answer 1

如果可能的话，我会找到一种方法将输出变成更有用的东西。如果没有，你可以替换它

defaultdict(<type 'int'>,

带

data =

在循环遍历行时，使用string.replace（），然后使用exec（）函数执行该行中的文字代码。

Answer 2

尝试：

dicts = []
with open(filename,'r') as f:
    for l in f:
        exec("dicts.append(" + l[l.index('{'):l.index('}')+1] + ")")

应该读入所有dicts并将其存储在列表dicts中。然后，您可以使用以下内容构建键和值对的列表：

tsvals = []
for d in dicts:
    tsvals.append(d.items())

如果你想要它们排序，你当然可以在构建它之后对它们进行排序。一旦我拥有它们，我可能会保存键和值对。一个csv文件可能就足够了。

但exec命令只应用于可信数据。这是非常不安全的，但如果您是生成数据的人，或者如果您信任生成数据的人只给您提供良好的数据，那么我认为这是最好的方法。

Python Matplotlib每小时保存在文件中的Defaultdict行中的数据

2 个答案: