我正在尝试学习输出-1.0..1.0范围内的值的网络。到目前为止,只有六个功能,所有浮动。我很难将类型和形状对齐。到目前为止,我有:
#!/usr/bin/env python3
import lasagne
import numpy as np
import sys
import theano
import theano.tensor as T
infilename = sys.argv[1]
split_size = 500
epochs = 100
theano.config.exception_verbosity = 'high'
examples = np.genfromtxt(infilename, delimiter=' ')
np.random.shuffle(examples)
examples = examples.reshape(-1, 7)
train, test = examples[:split_size,:], examples[split_size:,:]
# input and target
train_y = train[:,0]
train_X = train[:,1:]
test_y = test[:,0]
test_X = test[:,1:]
input_var = T.matrix()
target_var = T.vector()
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
assert len(inputs) == len(targets)
if shuffle:
indices = np.arange(len(inputs))
np.random.shuffle(indices)
for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
if shuffle:
excerpt = indices[start_idx:start_idx + batchsize]
else:
excerpt = slice(start_idx, start_idx + batchsize)
yield inputs[excerpt], targets[excerpt]
# nn structure
from lasagne.nonlinearities import tanh, softmax, leaky_rectify
net = lasagne.layers.InputLayer(shape=(None, 6), input_var=input_var)
net = lasagne.layers.DenseLayer(net, num_units=10, nonlinearity=tanh)
net = lasagne.layers.DenseLayer(net, num_units=1, nonlinearity=softmax)
prediction = lasagne.layers.get_output(net)
loss = lasagne.objectives.aggregate(prediction, target_var)
loss = loss.mean() + 1e-4 * lasagne.regularization.regularize_network_params(net, lasagne.regularization.l2)
# parameter update expressions
params = lasagne.layers.get_all_params(net, trainable=True)
updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate = 0.02, momentum=0.9)
# training function
train_fn = theano.function([input_var, target_var], loss, updates=updates)
for epoch in range(epochs):
loss = 0
for input_batch, target_batch in iterate_minibatches(train_X, train_y, 50, shuffle=True):
print('input', input_batch.shape)
print('target', target_batch.shape)
loss += train_fn(input_batch, target_batch)
print('epoch', epoch, 'loss', loss / len(training_data))
test_prediction = lasagne.layers.get_output(network, deterministic=True)
predict_fn = theano.function([input_var], T.argmax(test_prediction, axis=1))
print('predicted score for first test input', predict_fn(test_X[0]))
print(net_output)
输入数据是浮点数的7列文件,以空格分隔。以下是几个示例行:
-0.4361711835021444 0.9926778242677824 1.0 0.0 0.0 0.0 0.0
1.0 0.9817294281729428 1.0 1.7142857142857142 0.0 0.42857142857142855 1.7142857142857142
-0.4356014580801944 0.9956764295676429 1.0 0.0 0.0 0.0 0.0
1.0 1.0 3.0 0.0 0.0 4.0 1.0
-0.4361977186311787 0.9925383542538354 1.0 0.0 0.0 0.0 0.0
-0.46511627906976744 1.0 0.5 0.0 0.0 0.0 0.0
-0.4347826086956522 1.0 1.0 0.0 0.0 0.0 0.0
-0.4378224895429426 0.9840306834030683 1.0 0.0 0.0 0.0 0.0
-0.4377155764476054 0.9845885634588564 1.0 0.0 0.0 0.0 0.0
1.0 1.0 1.0 1.0 0.0 2.0 0.0
这完全基于千篇一律的参考例。出现的错误是:
/usr/local/lib/python3.5/dist-packages/theano/tensor/signal/downsample.py:6: UserWarning: downsample module has been moved to the theano.tensor.signal.pool module.
"downsample module has been moved to the theano.tensor.signal.pool module.")
input (50, 6)
target (50,)
Traceback (most recent call last):
File "/usr/local/lib/python3.5/dist-packages/theano/compile/function_module.py", line 859, in __call__
outputs = self.fn()
ValueError: Input dimension mis-match. (input[0].shape[1] = 1, input[1].shape[1] = 50)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "./nn_cluster.py", line 66, in <module>
loss += train_fn(input_batch, target_batch)
File "/usr/local/lib/python3.5/dist-packages/theano/compile/function_module.py", line 871, in __call__
storage_map=getattr(self.fn, 'storage_map', None))
File "/usr/local/lib/python3.5/dist-packages/theano/gof/link.py", line 314, in raise_with_op
reraise(exc_type, exc_value, exc_trace)
File "/usr/lib/python3/dist-packages/six.py", line 685, in reraise
raise value.with_traceback(tb)
File "/usr/local/lib/python3.5/dist-packages/theano/compile/function_module.py", line 859, in __call__
outputs = self.fn()
ValueError: Input dimension mis-match. (input[0].shape[1] = 1, input[1].shape[1] = 50)
Apply node that caused the error: Elemwise{Mul}[(0, 0)](SoftmaxWithBias.0, InplaceDimShuffle{x,0}.0)
Toposort index: 21
Inputs types: [TensorType(float64, matrix), TensorType(float64, row)]
Inputs shapes: [(50, 1), (1, 50)]
Inputs strides: [(8, 8), (400, 8)]
Inputs values: ['not shown', 'not shown']
Outputs clients: [[Sum{acc_dtype=float64}(Elemwise{Mul}[(0, 0)].0)]]
Debugprint of the apply node:
Elemwise{Mul}[(0, 0)] [id A] <TensorType(float64, matrix)> ''
|SoftmaxWithBias [id B] <TensorType(float64, matrix)> ''
| |Dot22 [id C] <TensorType(float64, matrix)> ''
| | |Elemwise{Composite{tanh((i0 + i1))}}[(0, 0)] [id D] <TensorType(float64, matrix)> ''
| | | |Dot22 [id E] <TensorType(float64, matrix)> ''
| | | | |<TensorType(float64, matrix)> [id F] <TensorType(float64, matrix)>
| | | | |W [id G] <TensorType(float64, matrix)>
| | | |InplaceDimShuffle{x,0} [id H] <TensorType(float64, row)> ''
| | | |b [id I] <TensorType(float64, vector)>
| | |W [id J] <TensorType(float64, matrix)>
| |b [id K] <TensorType(float64, vector)>
|InplaceDimShuffle{x,0} [id L] <TensorType(float64, row)> ''
|<TensorType(float64, vector)> [id M] <TensorType(float64, vector)>
Storage map footprint:
- Elemwise{Composite{tanh((i0 + i1))}}[(0, 0)].0, Shape: (50, 10), ElemSize: 8 Byte(s), TotalSize: 4000 Byte(s)
- <TensorType(float64, matrix)>, Input, Shape: (50, 6), ElemSize: 8 Byte(s), TotalSize: 2400 Byte(s)
- W, Shared Input, Shape: (6, 10), ElemSize: 8 Byte(s), TotalSize: 480 Byte(s)
- <TensorType(float64, matrix)>, Shared Input, Shape: (6, 10), ElemSize: 8 Byte(s), TotalSize: 480 Byte(s)
- SoftmaxWithBias.0, Shape: (50, 1), ElemSize: 8 Byte(s), TotalSize: 400 Byte(s)
- InplaceDimShuffle{x,0}.0, Shape: (1, 50), ElemSize: 8 Byte(s), TotalSize: 400 Byte(s)
- SoftmaxGrad.0, Shape: (50, 1), ElemSize: 8 Byte(s), TotalSize: 400 Byte(s)
- <TensorType(float64, vector)>, Input, Shape: (50,), ElemSize: 8 Byte(s), TotalSize: 400 Byte(s)
- W, Shared Input, Shape: (10, 1), ElemSize: 8 Byte(s), TotalSize: 80 Byte(s)
- b, Shared Input, Shape: (10,), ElemSize: 8 Byte(s), TotalSize: 80 Byte(s)
- <TensorType(float64, vector)>, Shared Input, Shape: (10,), ElemSize: 8 Byte(s), TotalSize: 80 Byte(s)
- <TensorType(float64, matrix)>, Shared Input, Shape: (10, 1), ElemSize: 8 Byte(s), TotalSize: 80 Byte(s)
- TensorConstant{0.02}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
- b, Shared Input, Shape: (1,), ElemSize: 8 Byte(s), TotalSize: 8 Byte(s)
- TensorConstant{0.0001}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
- TensorConstant{(1, 1) of 0.9}, Shape: (1, 1), ElemSize: 8 Byte(s), TotalSize: 8 Byte(s)
- TensorConstant{4.00000000..000001e-06}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
- TensorConstant{(1,) of 0.02}, Shape: (1,), ElemSize: 8 Byte(s), TotalSize: 8 Byte(s)
- Constant{0}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
- Subtensor{int64}.0, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
- TensorConstant{(1,) of 0.9}, Shape: (1,), ElemSize: 8 Byte(s), TotalSize: 8 Byte(s)
- Constant{1}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
- Subtensor{int64}.0, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
- TensorConstant{(1, 1) of 1.0}, Shape: (1, 1), ElemSize: 8 Byte(s), TotalSize: 8 Byte(s)
- <TensorType(float64, vector)>, Shared Input, Shape: (1,), ElemSize: 8 Byte(s), TotalSize: 8 Byte(s)
TotalSize: 8984.0 Byte(s) 0.000 GB
TotalSize inputs: 4168.0 Byte(s) 0.000 GB
HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.
使用lasagne.objectives.squared_error
时会出现类似的异常。有任何想法吗?我无法解决数据形状错误的地方,如果这是问题,并且这是使用目标函数的正确方法。
答案 0 :(得分:0)
我复制你的代码和你的数据输入,修改一些东西并运行没有错误。
代码:
import lasagne
import numpy as np
import sys
import theano
import theano.tensor as T
infilename = 'tt_lasagne.input' #sys.argv[1]
split_size = 500
epochs = 100
theano.config.exception_verbosity = 'high'
examples = np.genfromtxt(infilename, delimiter=' ')
np.random.shuffle(examples)
examples = examples.reshape(-1, 7)
train, test = examples[:split_size,:], examples[split_size:,:]
# input and target
train_y = train[:,0]
train_X = train[:,1:]
test_y = test[:,0]
test_X = test[:,1:]
input_var = T.matrix()
target_var = T.vector()
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
assert len(inputs) == len(targets)
if shuffle:
indices = np.arange(len(inputs))
np.random.shuffle(indices)
for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
if shuffle:
excerpt = indices[start_idx:start_idx + batchsize]
else:
excerpt = slice(start_idx, start_idx + batchsize)
yield inputs[excerpt], targets[excerpt]
# nn structure
from lasagne.nonlinearities import tanh, softmax, leaky_rectify
net = lasagne.layers.InputLayer(shape=(None, 6), input_var=input_var)
net = lasagne.layers.DenseLayer(net, num_units=10, nonlinearity=tanh)
net = lasagne.layers.DenseLayer(net, num_units=1, nonlinearity=softmax)
prediction = lasagne.layers.get_output(net)
loss = lasagne.objectives.aggregate(prediction, target_var)
loss = loss.mean() + 1e-4 * lasagne.regularization.regularize_network_params(net, lasagne.regularization.l2)
# parameter update expressions
params = lasagne.layers.get_all_params(net, trainable=True)
updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate = 0.02, momentum=0.9)
# training function
train_fn = theano.function([input_var, target_var], loss, updates=updates)
for epoch in range(epochs):
loss = 0
for input_batch, target_batch in iterate_minibatches(train_X, train_y, 50, shuffle=True):
print('input', input_batch.shape)
print('target', target_batch.shape)
loss += train_fn(input_batch, target_batch)
print('epoch', epoch, 'loss', loss / len(train_X))
#test_prediction = lasagne.layers.get_output(net, deterministic=True)
#predict_fn = theano.function([input_var], T.argmax(test_prediction, axis=1))
#print('predicted score for first test input', predict_fn(test_X[0]))
#print(net_output)
tt_lasagne.input
-0.4361711835021444 0.9926778242677824 1.0 0.0 0.0 0.0 0.0
1.0 0.9817294281729428 1.0 1.7142857142857142 0.0 0.42857142857142855 1.7142857142857142
-0.4356014580801944 0.9956764295676429 1.0 0.0 0.0 0.0 0.0
1.0 1.0 3.0 0.0 0.0 4.0 1.0
-0.4361977186311787 0.9925383542538354 1.0 0.0 0.0 0.0 0.0
-0.46511627906976744 1.0 0.5 0.0 0.0 0.0 0.0
-0.4347826086956522 1.0 1.0 0.0 0.0 0.0 0.0
-0.4378224895429426 0.9840306834030683 1.0 0.0 0.0 0.0 0.0
-0.4377155764476054 0.9845885634588564 1.0 0.0 0.0 0.0 0.0
1.0 1.0 1.0 1.0 0.0 2.0 0.0