我正在尝试根据大小索引矩阵。我得到TypeError:参数在ipdb
循环中的大小不正确,不知道那里发生了什么。但是images[i,:] = img[ind[i]*rows*cols:(ind[i]+1)*rows*cols]
工作正常。
labels[i] = lbl[ind[i]]
示例数据:
def read(digits, target_index):
rows=28
cols=28
#lbl = array("b", reformatData['target'])
if target_index == -1:
data = dataset[:, :-1]
target = dataset[:, -1]
elif target_index == 0:
data = dataset[:, 1:]
print "data size: ", data.size,"length", type(data)
#output: data size: 791840 length <type 'numpy.ndarray'>
target = dataset[:, 0]
print "data size: ", target.size,"length", type(target) #output data size: 1010 length <type 'numpy.ndarray'>
lbl =np.asarray(target.flatten())
print "length labell", lbl.size, "length", type(lbl) # output length labell 1010 length <type 'numpy.ndarray'>
img=np.asarray(data[0].flatten())
print "length of image: ",img.size, "length", type(img) #output length of image: 784 length <type 'numpy.ndarray'>
ind = [ k for k in xrange(lbl.size) if lbl[k] in digits]
print "index length ", len(ind), "length", type(ind) #output index length 1010 length <type 'list'>
images = matrix(0, (len(ind), rows*cols))
print "images length ", images.size, "images type", type(images) #output: images length (1010, 784) images type <type 'cvxopt.base.matrix'>
labels = matrix(0, (len(ind), 1))
print "lables length ", labels.size, "labels type", type(labels) #output: lables length (1010, 1) labels type <type 'cvxopt.base.matrix'>
for i in xrange(len(ind)):
images[i,:] = img[ind[i]*rows*cols:(ind[i]+1)*rows*cols] #output: see error below
labels[i] = lbl[ind[i]]
return images, labels
print read(digits=digits1, target_index=target_index)
Traceback (most recent call last):
File "svm.py", line 79, in <module>
print read(digits=digits1, target_index=target_index)
File "svm.py", line 74, in read
images[i,:] = img[ind[i]*rows*cols:(ind[i]+1)*rows*cols]
TypeError: argument has wrong size
SOLUTION:
经过一系列调试后,我终于找到了一个解决方案,它将标签转换为数组包中的数组:3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 12 41 146 146 48 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 12 129 253 253 253 250 163 18 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 133 253 253 253 253 253 253 229 70 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 101 253 252 145 102 107 237 253 247 128 10 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 181 253 167 0 0 0 61 235 253 253 163 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 255 253 43 0 0 0 0 58 193 253 253 164 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 187 253 32 0 0 0 0 0 55 236 253 253 86 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 146 253 32 0 100 190 87 87 87 147 253 253 123 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 94 253 78 40 248 253 253 253 253 253 253 253 223 84 15 0 0 0 0 0 0 0 0 0 0 0 0 0 14 92 12 35 240 253 253 253 253 253 253 253 253 253 244 89 10 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 75 161 179 253 253 253 253 253 253 253 253 253 209 43 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 16 16 39 38 16 16 145 243 253 253 185 48 0 0 0 0 0 0 0 0 0 0 0 0 0 20 58 0 0 0 0 0 0 0 0 58 209 253 253 183 0 0 0 0 0 0 0 0 0 0 0 0 77 221 247 79 0 0 0 0 0 0 0 0 13 219 253 240 72 0 0 0 0 0 0 0 0 0 0 0 90 247 253 252 57 0 0 0 0 0 0 0 0 53 251 253 191 0 0 0 0 0 0 0 0 0 0 0 0 116 253 253 59 0 0 0 0 0 0 0 0 99 252 253 145 0 0 0 0 0 0 0 0 0 0 0 0 14 188 253 221 158 38 0 0 0 0 111 211 246 253 253 145 0 0 0 0 0 0 0 0 0 0 0 0 0 12 221 246 253 251 249 249 249 249 253 253 253 253 200 19 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 95 183 228 253 253 253 253 253 253 195 124 23 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 24 37 138 74 126 88 37 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
。在此之前,我通过img=array("B", data)
将numpy.ndarray压缩到列表中:四个小时的调试帮助了我。可能不是很有效但现在可以工作。在这里发布解决方案,试图解决scikit库中的分类问题。
data = ([int(i) for i in (dataset[:, 1:].flatten())])
答案 0 :(得分:1)
经过一系列调试后,我终于找到了一个解决方案,它将标签转换为数组包中的数组:img=array("B", data)
。在此之前,我通过data = ([int(i) for i in (dataset[:, 1:].flatten())])
将numpy.ndarray压缩到列表中:四个小时的调试帮助了我。可能效率不高但现在可以使用。在这里发布解决方案,试图解决scikit库中的分类问题。
def read(digits, target_index):
rows=18
cols=15
#lbl = array("b", reformatData['target'])
if target_index == -1:
data = dataset[:, :-1]
target = dataset[:, -1]
elif target_index == 0:
data = dataset[:, 1:]
#print (data[0].flatten(), type(data[0].flatten()),data[0].flatten().size)
#data=(list(data[0].flatten()))
data = ([int(i) for i in (dataset[:, 1:].flatten())])
#print ("data size: ", data.size,"length", type(data))
#target = dataset[:, 0]
#print ("data size: ", target.size,"length", type(target) )
lbl=([int(i) for i in (dataset[:, 0].flatten())])
#print lbl
#lbl =np.asarray(target[:].flatten())
lbl=array("b", lbl)
#print(type(lbl)), len(lbl)
#print ("length labell", lbl.size, "length", type(lbl) )
img=array("B", data)
#print(type(img)), len(img)
#print ("length of image: ",img.size, "length", type(img) )
ind = [ k for k in range(len(lbl)) if lbl[k] in digits]
print ("index length ", len(ind), "length", type(ind) )
images = matrix(0, (len(ind), rows*cols))
print ("images length ", images.size, "images type", type(images))
labels = matrix(0, (len(ind), 1))
print ("lables length ", labels.size, "labels type", type(labels))
for i in xrange(len(ind)):
images[i,:] = img[ind[i]*rows*cols:(ind[i]+1)*rows*cols]
labels[i]= lbl[ind[i]]
return images, labels
print read(digits=[3], target_index=0)