Question

我正在尝试创建一个在python 2和3上均可使用的shannon_entropy函数。以下代码在python 3上适用，但是，用于评估norm_counts的语句在python 2上返回一个等于0的narray，并在python 3中正确返回

我已经分解并简化了以下代码：

import unittest   

import numpy as np

def shannon_ent(labels, base=256):

    value, counts = np.unique(labels, return_counts=True)    
    sum_counts = counts.sum()

    norm_counts = counts / sum_counts

    print(norm_counts)

    base = e if base is None else base

    logged_counts = np.log(norm_counts)
    logged_base = np.log(base)
    logged = logged_counts/logged_base
    final = -(norm_counts * logged)

    return final.sum()


class function_tests(unittest.TestCase):

    def test_shannon_ent(self):

        chunk = [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126]
        ent = shannon_ent(chunk)
        print('*** is: {}'.format(ent))

        self.assertEqual(ent, 0.8212319510413685)

if __name__ == '__main__':
    unittest.main()

给出以下输出：

Python 2

# python unittest_binGraph.py 
(array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1]), 95)
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
E
======================================================================
ERROR: test_shannon_ent (__main__.function_tests)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "unittest_binGraph.py", line 39, in test_shannon_ent
    ent = shannon_ent(chunk)
  File "unittest_binGraph.py", line 22, in shannon_ent
    logged_counts = np.log(norm_counts)
FloatingPointError: divide by zero encountered in log

----------------------------------------------------------------------
Ran 1 test in 0.007s

FAILED (errors=1)

Python 3

# python unittest_binGraph.py 
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] 95
[0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632]
*** is: 0.8212319510413685
.
----------------------------------------------------------------------
Ran 1 test in 0.007s

OK

除非有人有更好的方法来计算熵？！我目前还在代码中使用scripy和statistics模块。

Answer 1

可能是整数除法。添加

from __future__ import division

位于文件的顶部。

用于熵计算的numpy数组除法

1 个答案: