我正在尝试创建一个在python 2和3上均可使用的shannon_entropy函数。以下代码在python 3上适用,但是,用于评估norm_counts的语句在python 2上返回一个等于0的narray,并在python 3中正确返回
我已经分解并简化了以下代码:
import unittest
import numpy as np
def shannon_ent(labels, base=256):
value, counts = np.unique(labels, return_counts=True)
sum_counts = counts.sum()
norm_counts = counts / sum_counts
print(norm_counts)
base = e if base is None else base
logged_counts = np.log(norm_counts)
logged_base = np.log(base)
logged = logged_counts/logged_base
final = -(norm_counts * logged)
return final.sum()
class function_tests(unittest.TestCase):
def test_shannon_ent(self):
chunk = [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126]
ent = shannon_ent(chunk)
print('*** is: {}'.format(ent))
self.assertEqual(ent, 0.8212319510413685)
if __name__ == '__main__':
unittest.main()
给出以下输出:
Python 2
# python unittest_binGraph.py
(array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1]), 95)
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
E
======================================================================
ERROR: test_shannon_ent (__main__.function_tests)
----------------------------------------------------------------------
Traceback (most recent call last):
File "unittest_binGraph.py", line 39, in test_shannon_ent
ent = shannon_ent(chunk)
File "unittest_binGraph.py", line 22, in shannon_ent
logged_counts = np.log(norm_counts)
FloatingPointError: divide by zero encountered in log
----------------------------------------------------------------------
Ran 1 test in 0.007s
FAILED (errors=1)
Python 3
# python unittest_binGraph.py
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] 95
[0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632]
*** is: 0.8212319510413685
.
----------------------------------------------------------------------
Ran 1 test in 0.007s
OK
除非有人有更好的方法来计算熵?!我目前还在代码中使用scripy和statistics模块。
答案 0 :(得分:1)
可能是整数除法。添加
from __future__ import division
位于文件的顶部。