在尝试将密集和稀疏数据与scipy.spare.hstack结合使用时,我偶尔会遇到错误:
Traceback (most recent call last):
File "hstack_error.py", line 3, in <module>
X = scipy.sparse.hstack(hstack_parts)
File "/usr/lib/python2.7/dist-packages/scipy/sparse/construct.py", line 263, in hstack
return bmat([blocks], format=format, dtype=dtype)
File "/usr/lib/python2.7/dist-packages/scipy/sparse/construct.py", line 329, in bmat
raise ValueError('blocks must have rank 2')
ValueError: blocks must have rank 2
重现这一点的最小代码是:
import scipy.sparse
hstack_parts = [[[0.17968359700312667, -0.23497267759562843, 5.5625, 12.0, 12.0, -0.3514978725245902, 4.562932312249999, 7.578125000000001, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, -1.0, -1.0, -1.0], [0.43775723232977204, -0.04553734061930783, 4.486910994764398, 12.0, 12.0, -0.33614476914571956, 2.8162986569528794, 4.74869109947644, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, -1.0, -1.0, -1.0], [0.403883732290472, -0.04826958105646641, 1.7142857142857142, 12.0, 12.0, -0.32207319092531883, 0.933412042503896, 1.851948051948052, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, -1.0, -1.0, -1.0], [0.29203081876806636, -0.11020036429872503, 1.5376623376623375, 12.0, 12.0, -0.31131701908652093, 0.964088085825974, 1.851948051948052, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, -1.0, -1.0, -1.0], [0.30639528566925406, -0.08743169398907111, 1.505, 12.0, 12.0, -0.3014608089744991, 0.917490079365, 1.745, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, -1.0, -1.0, -1.0], [1.138331763811077, 0.0, 3.2350000000000003, 12.0, 12.0, -0.5323457206576151, 0.9805158730150001, 3.2350000000000003, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, -1.0, -1.0, -1.0], [1.0770851496658955, -0.002941176470588277, 3.2375, 12.0, 12.0, -0.5199720995117647, 1.0401185770749999, 3.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, -1.0, -1.0, -1.0], [1.0152399481191077, -0.002941176470588277, 3.1140776699029122, 12.0, 12.0, -0.5052406417111764, 1.0414827890558251, 3.126213592233009, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, -1.0, -1.0, -1.0], [0.961141824125552, -0.0029359953024075576, 2.643776824034335, 12.0, 12.0, -0.4915900561438638, 0.8579874128476395, 2.6545064377682404, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, -1.0, -1.0, 1.0], [0.9079651211907968, -0.004110393423370539, 1.726688102893891, 12.0, 12.0, -0.4780357379095714, 0.4291079394533763, 1.7379421221864957, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, -1.0, -1.0, 1.0], [0.8545562907561834, -0.010569583088667041, 1.6746031746031749, 12.0, 12.0, -0.46648671607163833, 0.4421795595714286, 1.7031746031746033, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, -1.0, -1.0, 1.0], [0.824431155068869, -0.005871990604815115, 1.687301587301587, 12.0, 12.0, -0.4551024813223723, 0.4729531338222223, 1.7031746031746033, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, -1.0, -1.0, 1.0], [0.7862017310261765, -0.007633587786259692, 1.6825396825396823, 12.0, 12.0, -0.44442646372108047, 0.5018122734650794, 1.7031746031746033, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, -1.0, -1.0, 1.0], [0.7565618927494311, -0.007633587786259692, 1.6825396825396823, 12.0, 12.0, -0.43505183830416916, 0.5271535228063493, 1.7031746031746033, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, -1.0, -1.0, -1.0], [0.7120208806607795, -0.013505578391074599, 1.6666666666666667, 12.0, 12.0, -0.4237836507997651, 0.5576134010920637, 1.7031746031746033, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, -1.0, -1.0, -1.0], [0.6783481869678059, -0.013505578391074599, 1.6666666666666667, 12.0, 12.0, -0.4122230242395773, 0.5888637932063492, 1.7031746031746033, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, -1.0, -1.0, -1.0], [0.6499276254106391, -0.010569583088667041, 1.6746031746031749, 12.0, 12.0, -0.4003188978273635, 0.6210427253968255, 1.7031746031746033, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, -1.0, -1.0, -1.0], [0.6213577120617446, -0.008807985907222673, 1.6793650793650792, 12.0, 12.0, -0.38866543347034654, 0.6525440742857141, 1.7031746031746033, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, -1.0, 1.0, 1.0], [0.6018164150167221, -0.005284791544333521, 1.602150537634409, 12.0, 12.0, -0.3790079817322373, 0.624499857311828, 1.6159754224270355, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, -1.0, 1.0, 1.0], [0.569013826241389, -0.007046388725778097, 1.5621212121212122, 12.0, 12.0, -0.3671479532765708, 0.6329500538939395, 1.5803030303030305, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, -1.0, 1.0, 1.0], [0.5431497867155388, -0.005871990604815115, 1.5651515151515152, 12.0, 12.0, -0.3557799651379918, 0.6622829081363637, 1.5803030303030305, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, -1.0, 1.0, 1.0], [0.5210546429944948, -0.002348796241926171, 1.5170370370370367, 12.0, 12.0, -0.3441056122783324, 0.654797247837037, 1.522962962962963, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, -1.0, 1.0, 1.0], [0.4957918898245967, -0.0017615971814445763, 1.4045261669024045, 12.0, 12.0, -0.33263550256605995, 0.607527212347949, 1.4087694483734088, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, -1.0, -1.0, -1.0]]]
scipy.sparse.hstack(hstack_parts)
此错误意味着什么,以及如何修复数据以使其不再发生?
答案 0 :(得分:4)
您尝试连接的部分不是稀疏矩阵对象,而是普通的密集矩阵对象。您可以从内容中构造稀疏矩阵,如下所示:
x_sparse = scipy.sparse.coo_matrix(hstack_parts[0])
y_sparse = scipy.sparse.coo_matrix(hstack_parts[1])
z_sparse = scipy.sparse.hstack([x_sparse, y_sparse])
要回收密集表示,您可以使用:
z = z_sparse.todense()
此处有关于sparse.coo_matrix
的文档,可帮助您确定它是否适合您的问题: