我有以下代码:
import numpy as np
from numpy import array
from scipy.optimize import nnls
def by_nnls(A=None, B=None):
""" Linear programming by NNLS """
#print "NOF row = ", A.shape[0]
A = np.nan_to_num(A)
B = np.nan_to_num(B)
x, rnorm = nnls(A,B)
x = x / x.sum()
# print repr(x)
return x
def f(arrA, arrB):
""" Check if two matrices overlap"""
return not set(map(tuple, arrA)).isdisjoint(map(tuple, arrB))
基本上它使用NNLS运行线性编程。通过矩阵A
和向量B
。我有两组:A1 B1
和A2 B2
。
A1是A2的子集,B1是B2的子集。
B1 = array([ 22.133, 197.087, 84.344, 1.466, 3.974, 0.435,
8.291, 45.059, 5.755, 0.519, 0. , 30.272,
24.92 , 10.095])
A1 = array([[ 46.35, 80.58, 48.8 , 80.31, 489.01, 40.98,
29.98, 44.3 , 5882.96],
[ 2540.73, 49.53, 26.78, 30.49, 48.51, 20.88,
19.92, 21.05, 19.39],
[ 2540.73, 49.53, 26.78, 30.49, 48.51, 20.88,
19.92, 21.05, 19.39],
[ 30.95, 1482.24, 100.48, 35.98, 35.1 , 38.65,
31.57, 87.38, 33.39],
[ 30.95, 1482.24, 100.48, 35.98, 35.1 , 38.65,
31.57, 87.38, 33.39],
[ 30.95, 1482.24, 100.48, 35.98, 35.1 , 38.65,
31.57, 87.38, 33.39],
[ 15.99, 223.27, 655.79, 1978.2 , 18.21, 20.51,
19. , 16.19, 15.91],
[ 15.99, 223.27, 655.79, 1978.2 , 18.21, 20.51,
19. , 16.19, 15.91],
[ 16.49, 20.56, 19.08, 18.65, 4568.97, 20.7 ,
17.4 , 17.62, 25.51],
[ 33.84, 26.58, 18.69, 40.88, 19.17, 5247.84,
29.39, 25.55, 18.9 ],
[ 42.66, 83.59, 99.58, 52.11, 46.84, 64.93,
43.8 , 7610.12, 47.13],
[ 42.66, 83.59, 99.58, 52.11, 46.84, 64.93,
43.8 , 7610.12, 47.13],
[ 41.63, 204.32, 4170.37, 86.95, 49.92, 87.15,
51.88, 45.38, 42.89],
[ 81.34, 60.16, 357.92, 43.48, 36.92, 39.13,
1772.07, 68.43, 38.07]])
B2=array([ 5.144, 30.771, 22.133, 0. , 0.427, 0. ,
1.329, 197.087, 84.344, 1.466, 3.974, 0.435,
8.291, 45.059, 5.755, 0.519, 21.662, 41.665,
5.544, 0.482, 1.953, 4.214, 0.576, 0. ,
30.272, 2.262, 24.92 , 30.32 , 0.948, 1.107, 10.095])
A2 = array([[ 29.95, 36.96, 51.68, 35.95, 34.18, 49.8 ,
30.47, 4565.08, 34.25],
[ 29.95, 36.96, 51.68, 35.95, 34.18, 49.8 ,
30.47, 4565.08, 34.25],
[ 46.35, 80.58, 48.8 , 80.31, 489.01, 40.98,
29.98, 44.3 , 5882.96],
[ 26. , 151.76, 2850.8 , 64.88, 29.08, 110.99,
23.67, 23.69, 25.84],
[ 26. , 151.76, 2850.8 , 64.88, 29.08, 110.99,
23.67, 23.69, 25.84],
[ 26. , 151.76, 2850.8 , 64.88, 29.08, 110.99,
23.67, 23.69, 25.84],
[ 19.33, 23.2 , 28.65, 22.05, 5271.38, 24.4 ,
20.88, 20.28, 23.45],
[ 2540.73, 49.53, 26.78, 30.49, 48.51, 20.88,
19.92, 21.05, 19.39],
[ 2540.73, 49.53, 26.78, 30.49, 48.51, 20.88,
19.92, 21.05, 19.39],
[ 30.95, 1482.24, 100.48, 35.98, 35.1 , 38.65,
31.57, 87.38, 33.39],
[ 30.95, 1482.24, 100.48, 35.98, 35.1 , 38.65,
31.57, 87.38, 33.39],
[ 30.95, 1482.24, 100.48, 35.98, 35.1 , 38.65,
31.57, 87.38, 33.39],
[ 15.99, 223.27, 655.79, 1978.2 , 18.21, 20.51,
19. , 16.19, 15.91],
[ 15.99, 223.27, 655.79, 1978.2 , 18.21, 20.51,
19. , 16.19, 15.91],
[ 16.49, 20.56, 19.08, 18.65, 4568.97, 20.7 ,
17.4 , 17.62, 25.51],
[ 33.84, 26.58, 18.69, 40.88, 19.17, 5247.84,
29.39, 25.55, 18.9 ],
[ 21.18, 4213.22, 1172.23, 39.19, 33.68, 32.09,
17.48, 47.11, 26.52],
[ 21.18, 4213.22, 1172.23, 39.19, 33.68, 32.09,
17.48, 47.11, 26.52],
[ 23.61, 28.03, 22.92, 27.01, 24.62, 27.2 ,
23.86, 23.1 , 925.26],
[ 23.61, 28.03, 22.92, 27.01, 24.62, 27.2 ,
23.86, 23.1 , 925.26],
[ 23.61, 28.03, 22.92, 27.01, 24.62, 27.2 ,
23.86, 23.1 , 925.26],
[ 111.35, 24.3 , 42.06, 23.83, 118.74, 21.12,
1464.47, 86.8 , 157.35],
[ 111.35, 24.3 , 42.06, 23.83, 118.74, 21.12,
1464.47, 86.8 , 157.35],
[ 42.66, 83.59, 99.58, 52.11, 46.84, 64.93,
43.8 , 7610.12, 47.13],
[ 42.66, 83.59, 99.58, 52.11, 46.84, 64.93,
43.8 , 7610.12, 47.13],
[ 21.01, 14.49, 9.62, 19.74, 10.73, 2396.79,
15.9 , 11.78, 10.06],
[ 41.63, 204.32, 4170.37, 86.95, 49.92, 87.15,
51.88, 45.38, 42.89],
[ 2500.99, 60.14, 59. , 64.74, 72.4 , 61.48,
64.68, 51.07, 51.89],
[ 62.61, 121.71, 161.71, 2949.63, 74.98, 82.22,
63.01, 719.7 , 72.93],
[ 62.61, 121.71, 161.71, 2949.63, 74.98, 82.22,
63.01, 719.7 , 72.93],
[ 81.34, 60.16, 357.92, 43.48, 36.92, 39.13,
1772.07, 68.43, 38.07]])
然后我发现了一些奇怪的东西。 特定点的解决方案存在于A1中,但在A2中变为0。这是它的外观:
In [9]: by_nnls(A=A1,B=B1)
Out[9]:
array([ 0.70089761, 0. , 0.06481495, 0.14325696, 0.01218972,
0. , 0.02125942, 0.01906576, 0.03851557])
In [10]: by_nnls(A=A2,B=B2)
Out[10]:
array([ 0.72020358, 0.0992065 , 0.04161968, 0.04371648, 0.00698775,
0. , 0. , 0.03533024, 0.05293576])
A1
的第六个(从0开始)解得到0.02125942但在B2
得到0。
这是为什么?我该如何解决这个问题?
我的理解是,当您添加更多证据(即行)时,值应该是 增加。也许直觉是错的?
如果您想验证A1是否是A2的子集:
In [15]: f(A1,A2)
Out[15]: True