我正在尝试调整the code以使用1-D输入实现1-D卷积。该模型是可编译的,因此您可以在.summary()
中查看图层和形状,但在.fit()
模型时会抛出错误。它似乎发生在损失计算中。以下是我的代码:
import numpy as np
from scipy.stats import norm
from keras.layers import Input, Dense, Lambda, Flatten, Reshape
from keras.layers import Conv1D, UpSampling1D
from keras.models import Model
from keras import backend as K
from keras import metrics
num_conv = 6
batch_size = 100
latent_dim = 2
intermediate_dim = 128
epochs = 50
epsilon_std = 1.0
x = Input(batch_shape=(batch_size, 310, 1))
conv_1 = Conv1D(1, kernel_size=num_conv,
padding='same', activation='relu')(x)
conv_2 = Conv1D(64, kernel_size=num_conv,
padding='same', strides=2, activation='relu')(conv_1)
conv_3 = Conv1D(64, kernel_size=num_conv,
padding='same', activation='relu')(conv_2)
flatten = Flatten()(conv_3)
hidden = Dense(intermediate_dim, activation='relu')(flatten)
z_mean = Dense(latent_dim)(hidden)
z_log_var = Dense(latent_dim)(hidden)
def sampling(args):
z_mean, z_log_var = args
epsilon = K.random_normal(shape=(batch_size, latent_dim),
mean=0., stddev=epsilon_std)
return(z_mean + K.exp(z_log_var/2) * epsilon)
z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])
decoder_h = Dense(256, activation='relu')(z)
decoder = Dense(155, activation='relu')(decoder_h)
decoder = Reshape((155, 1))(decoder)
de_conv_1 = Conv1D(64, kernel_size=num_conv,
padding='same', activation='relu')(decoder)
de_conv_2 = Conv1D(64, kernel_size=num_conv,
padding='same', activation='relu')(de_conv_1)
upsamp = UpSampling1D(2)(de_conv_2)
x_decoded_mean = Conv1D(1, kernel_size=num_conv,
padding='same', activation='relu')(upsamp)
x_decoded_mean = Reshape([310, 1])(x_decoded_mean)
def vae_loss(x, x_decoded_mean):
x_ = x[:, 150:160, :]
x_decoded_mean_ = x_decoded_mean[:, 150:160, :]
xent_loss = 10 * metrics.mean_squared_error(x_, x_decoded_mean_)
kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) -K.exp(z_log_var), axis=-1)
return(xent_loss + kl_loss)
vae = Model(x, x_decoded_mean)
vae.summary()
vae.compile(optimizer='rmsprop', loss=vae_loss)
输入数据形状为(n_sample,310,1)。 它是一维时间序列,但我包括前后150帧 预测中间10帧,导致310帧作为输入。
在vae_loss()
中,x
和x_decoded_mean
被切片的原因是目的是使用前后150帧的附加信息重建中间10帧。因此,我想强制模型专注于仅从中间10帧计算的损失。
当我.fit()
模型时出现以下错误:
# X.shape == (n_samples, 310, 1)
# n_samples % batch_size == 0
vae.fit(x=X, y=X, batch_size=batch_size,
epochs=epochs,
shuffle=True)
下面的长错误:
Epoch 1/50
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
/Users/yjluo/WORK/pitchPerfect/vae/model2.py in <module>()
77 vae.fit(x=X, y=X, batch_size=batch_size,
78 epochs=epochs,
---> 79 shuffle=True)
/usr/local/lib/python2.7/site-packages/keras/engine/training.pyc in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, **kwargs)
1496 val_f=val_f, val_ins=val_ins, shuffle=shuffle,
1497 callback_metrics=callback_metrics,
-> 1498 initial_epoch=initial_epoch)
1499
1500 def evaluate(self, x, y, batch_size=32, verbose=1, sample_weight=None):
/usr/local/lib/python2.7/site-packages/keras/engine/training.pyc in _fit_loop(self, f, ins, out_labels, batch_size, epochs, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics, initial_epoch)
1150 batch_logs['size'] = len(batch_ids)
1151 callbacks.on_batch_begin(batch_index, batch_logs)
-> 1152 outs = f(ins_batch)
1153 if not isinstance(outs, list):
1154 outs = [outs]
/usr/local/lib/python2.7/site-packages/keras/backend/tensorflow_backend.pyc in __call__(self, inputs)
2227 session = get_session()
2228 updated = session.run(self.outputs + [self.updates_op],
-> 2229 feed_dict=feed_dict)
2230 return updated[:len(self.outputs)]
2231
/usr/local/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in run(self, fetches, feed_dict, options, run_metadata)
776 try:
777 result = self._run(None, fetches, feed_dict, options_ptr,
--> 778 run_metadata_ptr)
779 if run_metadata:
780 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
/usr/local/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _run(self, handle, fetches, feed_dict, options, run_metadata)
980 if final_fetches or final_targets:
981 results = self._do_run(handle, final_targets, final_fetches,
--> 982 feed_dict_string, options, run_metadata)
983 else:
984 results = []
/usr/local/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
1030 if handle is None:
1031 return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
-> 1032 target_list, options, run_metadata)
1033 else:
1034 return self._do_call(_prun_fn, self._session, handle, feed_dict,
/usr/local/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_call(self, fn, *args)
1050 except KeyError:
1051 pass
-> 1052 raise type(e)(node_def, op, message)
1053
1054 def _extend_graph(self):
InvalidArgumentError: Incompatible shapes: [100,10] vs. [100]
[[Node: gradients_4/add_121_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _class=["loc:@add_121"], _device="/job:localhost/replica:0/task:0/cpu:0"](gradients_4/add_121_grad/Shape, gradients_4/add_121_grad/Shape_1)]]
Caused by op u'gradients_4/add_121_grad/BroadcastGradientArgs', defined at:
File "/usr/local/bin/ipython", line 11, in <module>
sys.exit(start_ipython())
File "/usr/local/lib/python2.7/site-packages/IPython/__init__.py", line 119, in start_ipython
return launch_new_instance(argv=argv, **kwargs)
File "/usr/local/lib/python2.7/site-packages/traitlets/config/application.py", line 658, in launch_instance
app.start()
File "/usr/local/lib/python2.7/site-packages/IPython/terminal/ipapp.py", line 355, in start
self.shell.mainloop()
File "/usr/local/lib/python2.7/site-packages/IPython/terminal/interactiveshell.py", line 493, in mainloop
self.interact()
File "/usr/local/lib/python2.7/site-packages/IPython/terminal/interactiveshell.py", line 484, in interact
self.run_cell(code, store_history=True)
File "/usr/local/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2718, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/usr/local/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2828, in run_ast_nodes
if self.run_code(code, result):
File "/usr/local/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2882, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-5-475083cdc0be>", line 1, in <module>
get_ipython().magic(u'run model2.py')
File "/usr/local/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2160, in magic
return self.run_line_magic(magic_name, magic_arg_s)
File "/usr/local/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2081, in run_line_magic
result = fn(*args,**kwargs)
File "<decorator-gen-58>", line 2, in run
File "/usr/local/lib/python2.7/site-packages/IPython/core/magic.py", line 188, in <lambda>
call = lambda f, *a, **k: f(*a, **k)
File "/usr/local/lib/python2.7/site-packages/IPython/core/magics/execution.py", line 742, in run
run()
File "/usr/local/lib/python2.7/site-packages/IPython/core/magics/execution.py", line 728, in run
exit_ignore=exit_ignore)
File "/usr/local/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2483, in safe_execfile
self.compile if kw['shell_futures'] else None)
File "/usr/local/lib/python2.7/site-packages/IPython/utils/py3compat.py", line 289, in execfile
builtin_mod.execfile(filename, *where)
File "/Users/yjluo/WORK/pitchPerfect/vae/model2.py", line 79, in <module>
shuffle=True)
File "/usr/local/lib/python2.7/site-packages/keras/engine/training.py", line 1481, in fit
self._make_train_function()
File "/usr/local/lib/python2.7/site-packages/keras/engine/training.py", line 1013, in _make_train_function
self.total_loss)
File "/usr/local/lib/python2.7/site-packages/keras/optimizers.py", line 197, in get_updates
grads = self.get_gradients(loss, params)
File "/usr/local/lib/python2.7/site-packages/keras/optimizers.py", line 47, in get_gradients
grads = K.gradients(loss, params)
File "/usr/local/lib/python2.7/site-packages/keras/backend/tensorflow_backend.py", line 2264, in gradients
return tf.gradients(loss, variables, colocate_gradients_with_ops=True)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/gradients_impl.py", line 560, in gradients
grad_scope, op, func_call, lambda: grad_fn(op, *out_grads))
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/gradients_impl.py", line 368, in _MaybeCompile
return grad_fn() # Exit early
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/gradients_impl.py", line 560, in <lambda>
grad_scope, op, func_call, lambda: grad_fn(op, *out_grads))
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/math_grad.py", line 598, in _AddGrad
rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/gen_array_ops.py", line 411, in _broadcast_gradient_args
name=name)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
self._traceback = _extract_stack()
...which was originally created as op u'add_121', defined at:
File "/usr/local/bin/ipython", line 11, in <module>
sys.exit(start_ipython())
[elided 16 identical lines from previous traceback]
File "/usr/local/lib/python2.7/site-packages/IPython/utils/py3compat.py", line 289, in execfile
builtin_mod.execfile(filename, *where)
File "/Users/yjluo/WORK/pitchPerfect/vae/model2.py", line 68, in <module>
vae.compile(optimizer='rmsprop', loss=vae_loss)
File "/usr/local/lib/python2.7/site-packages/keras/engine/training.py", line 910, in compile
sample_weight, mask)
File "/usr/local/lib/python2.7/site-packages/keras/engine/training.py", line 436, in weighted
score_array = fn(y_true, y_pred)
File "/Users/yjluo/WORK/pitchPerfect/vae/model2.py", line 64, in vae_loss
return(xent_loss + kl_loss)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/math_ops.py", line 821, in binary_op_wrapper
return func(x, y, name=name)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/gen_math_ops.py", line 73, in add
result = _op_def_lib.apply_op("Add", x=x, y=y, name=name)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
self._traceback = _extract_stack()
InvalidArgumentError (see above for traceback): Incompatible shapes: [100,10] vs. [100]
[[Node: gradients_4/add_121_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _class=["loc:@add_121"], _device="/job:localhost/replica:0/task:0/cpu:0"](gradients_4/add_121_grad/Shape, gradients_4/add_121_grad/Shape_1)]]
基于行Incompatible shapes: [100,10] vs. [100]
,我相信它发生在损失计算中,但我无法找出解决方案。此外,即使我不在vae_loss()
进行切片,错误仍显示为Incompatible shapes: [100,310] vs. [100]
。有人可以给我一些建议吗?
答案 0 :(得分:1)
问题在于xent_loss
是具有形状(100, 10)
的2D张量,kl_loss
是具有形状(100)
的1D张量。在tensorflow中,添加这两个张量是无效的。请参阅官方文档中的this section。
考虑前面的例子,不是将标量添加到(2,3)矩阵,而是将维度(3)的向量添加到维度矩阵(2,3)。如果不指定广播,则此操作无效。要正确请求矩阵向量加法,请将广播维度指定为(1),这意味着向量的维度与矩阵的维度1匹配。
这是因为metrics.mean_squared_error()
取特征轴的平均值,而不是时间轴。
要解决此问题,请在时间轴上再选择K.mean()
:
xent_loss = 10 * K.mean(metrics.mean_squared_error(x_, x_decoded_mean_), axis=-1)
或者,在将张量输入K.squeeze()
之前使用metrics.mean_squared_error()
删除要素轴(但这仅适用于1D时间序列):
x_ = K.squeeze(x[:, 150:160, :], axis=-1)
x_decoded_mean_ = K.squeeze(x_decoded_mean[:, 150:160, :], axis=-1)
xent_loss = 10 * metrics.mean_squared_error(x_, x_decoded_mean_)
但是,最好的方法是忘记metrics.mean_squared_error()
,并使用正确的axis
参数自行计算MSE。
xent_loss = 10 * K.mean(K.square(x_ - x_decoded_mean_), axis=[1, 2])