file = open(outFile, 'w+')
matrix = defaultdict(lambda: defaultdict(int))
for s in range(len(self.goldenTags)):
for w in range(len(self.goldenTags[s])):
matrix[self.goldenTags[s][w].tag][self.myTags[s][w].tag] += 1
我创建了一个嵌套字典,表示POS标记器的混淆矩阵,它看起来像:
'VBP': defaultdict(<class 'int'>,
{'CD': 4,
'FW': 1,
'JJ': 5,
'JJS': 1,
'NN': 61,
'NNP': 6,
'NNPS': 1,
'SYM': 2,
'UH': 19,
'VB': 72,
'VBD': 5,
'VBG': 2,
'VBP': 537,
'VBZ': 1}),
这有点难看。我希望将其作为一种整齐的矩阵格式保存到txt文件中,最好不使用任何库。有什么好办法呢?
Tag Tag Tag Tag Tag
Tag 1 0 2 inf 4
Tag 4 2 0 1 5
Tag inf inf 1 0 3
Tag 3 4 5 3 0
答案 0 :(得分:1)
d = {'VBP':{'CD': 4,'FW': 1,'JJ': 5,'NN': 61,'NNP': 6,'NNPS': 1,
'SYM': 2,'VB': 72,'VBD': 5,'VBG': 2,'VBZ': 1},
'xyz':{'CD': 4,'FW': 1,'JJS': 1,'NN': 61,'NNP': 6,'NNPS': 1,
'UH': 19,'VB': 72,'VBD': 5,'VBP': 537,'VBZ': 1}}
# find all the columns and all the rows, sort them
columns = sorted(set(key for dictionary in d.values() for key in dictionary))
rows = sorted(d)
# figure out how wide each column is
col_width = max(max(len(thing) for thing in columns),
max(len(thing) for thing in rows)) + 3
# preliminary format string : one column with specific width, right justified
fmt = '{{:>{}}}'.format(col_width)
# format string for all columns plus a 'label' for the row
fmt = fmt * (len(columns) + 1)
# print the header
print(fmt.format('', *columns))
# print the rows
for row in rows:
dictionary = d[row]
s = fmt.format(row, *(dictionary.get(col, 'inf') for col in columns))
print(s)
>>>
CD FW JJ JJS NN NNP NNPS SYM UH VB VBD VBG VBP VBZ
VBP 4 1 5 inf 61 6 1 2 inf 72 5 2 inf 1
xyz 4 1 inf 1 61 6 1 inf 19 72 5 inf 537 1
>>>
将它放在一个产生字符串而不是打印字符串的函数中;迭代函数将返回值写入文件。
答案 1 :(得分:0)
不使用任何库,您仍然可以使用列表创建csv样式的输出。
# create a nested dictionary
d = {'x': {'v1':4, 'v2':5, 'v3':12},
'y':{'v1':2, 'v2':1, 'v3':11},
'z':{'v2':5, 'v3':1}}
# get all of the row and column ids
row_ids = sorted(d.keys())
col_ids = sorted(set(k for v in d.values() for k in v.keys()))
# create an empty list and fill it with the header and then the rows
out = []
# header
out.append(['']+col_ids)
for r in row_ids:
out.append([r]+[d[r].get(c, 0) for c in col_ids])
out
# returns
[['', 'v1', 'v2', 'v3'],
['x', 4, 5, 12],
['y', 2, 1, 11],
['z', 0, 5, 1]]
答案 2 :(得分:0)
而不是重新发明轮子&#39;使用.xml,.json或.ini。有很多库可用于这些以及更多。有关简单示例,请查看https://docs.python.org/3/library/configparser.html。