我试图使用Tensorflow在Python上实现深度学习模型。我遇到了一些问题,如下所述:
此层计算两个向量之间的余弦距离。我必须计算查询和文档矩阵之间的距离。
return byteBuddyState.loadProxy( persistentClass, new TypeCache.SimpleKey(key), byteBuddy -> byteBuddy
.ignore( byteBuddyState.getProxyDefinitionHelpers().getGroovyGetMetaClassFilter() )
.with( new NamingStrategy.SuffixingRandom( PROXY_NAMING_SUFFIX, new NamingStrategy.SuffixingRandom.BaseNameResolver.ForFixedValue( persistentClass.getName() ) ) )
.subclass( interfaces.length == 1 ? persistentClass : Object.class, ConstructorStrategy.Default.IMITATE_SUPER_CLASS_OPENING )
.implement( (Type[]) interfaces )
.method( byteBuddyState.getProxyDefinitionHelpers().getVirtualNotFinalizerFilter() )
.intercept( byteBuddyState.getProxyDefinitionHelpers().getDelegateToInterceptorDispatcherMethodDelegation() )
.method( byteBuddyState.getProxyDefinitionHelpers().getHibernateGeneratedMethodFilter() )
.intercept( SuperMethodCall.INSTANCE )
.defineField( ProxyConfiguration.INTERCEPTOR_FIELD_NAME, ProxyConfiguration.Interceptor.class, Visibility.PRIVATE )
.implement( ProxyConfiguration.class )
.intercept( byteBuddyState.getProxyDefinitionHelpers().getInterceptorFieldAccessor() )
);
但是,当我运行整个代码时,会出现如下错误:
with tf.name_scope('L1'):
try:
l1_par_range = np.sqrt(6.0 / (TRIGRAM_D + L1_N)) # Range for random values. Set empirically.
weight1 = tf.Variable(tf.random_uniform([TRIGRAM_D, L1_N], -l1_par_range, l1_par_range)) # The weight matrix. shape = TRIGRAM_D X L1_N
bias1 = tf.Variable(tf.random_uniform([L1_N], -l1_par_range, l1_par_range)) # Bias matrix. shape L1_N X 1.
variable_summaries(weight1, 'L1_weights')
variable_summaries(bias1, 'L1_biases')
# query_l1 = tf.matmul(tf.to_float(query_batch),weight1)+bias1
query_l1 = tf.sparse_tensor_dense_matmul(query_batch, weight1) + bias1 # WX + B
# doc_l1 = tf.matmul(tf.to_float(doc_batch),weight1)+bias1
doc_l1 = tf.sparse_tensor_dense_matmul(doc_batch, weight1) + bias1 # WX + B.
query_l1_out = tf.nn.relu(query_l1) # activated output
doc_l1_out = tf.nn.relu(doc_l1) # activated output
except:
print("L1")
with tf.name_scope('L2'):
try:
l2_par_range = np.sqrt(6.0 / (L1_N + L2_N))
weight2 = tf.Variable(tf.random_uniform([L1_N, L2_N], -l2_par_range, l2_par_range))
bias2 = tf.Variable(tf.random_uniform([L2_N], -l2_par_range, l2_par_range))
variable_summaries(weight2, 'L2_weights')
variable_summaries(bias2, 'L2_biases')
query_l2 = tf.matmul(query_l1_out, weight2) + bias2
doc_l2 = tf.matmul(doc_l1_out, weight2) + bias2
query_y = tf.nn.relu(query_l2) # Query output of layer 2
doc_y = tf.nn.relu(doc_l2) # Document output of layer 2
except:
print("L2")
with tf.name_scope('FD_rotate'):
try:
# Rotate FD+ to produce 50 FD-
temp = tf.tile(doc_y, [1, 1]) # What is it's use?
# tf.tile(doc_y, [1, 1]) just replicates doc_y once. Making temp equal to doc_y
for i in range(NEG):
rand = int((random.random() + i) * BS / NEG) # BS/NEG is a constant
# print(rand, BS - rand)
# print(tf.slice(temp, [rand, 0], [BS - rand, -1]))
# print(tf.slice(temp, [0, 0], [rand, -1]))
# print(doc_y)
doc_y = tf.concat([doc_y,
tf.slice(temp, [rand, 0], [BS - rand, -1]),
tf.slice(temp, [0, 0], [rand, -1])], 0)
# AP: TODO: Concatenates the matrices in the zeroeth dimension. Function depreciated. Change this!
# AP: Shuffles the data and enlarges it.
# first iter: doc_y
# second iter: doc_y + doc_y[0, rand][:] + doc_y[rand:BS-rand][:]
# ... NEG iters.
except:
print("OWO")
with tf.name_scope('Cosine_Similarity'):
# Cosine similarity
try:
query_norm = tf.tile(tf.sqrt(tf.reduce_sum(tf.square(query_y), 1, True)), [NEG + 1, 1])
doc_norm = tf.sqrt(tf.reduce_sum(tf.square(doc_y), 1, True))
print(tf.tile(query_y, [NEG + 1, 1]).get_shape().as_list())
print(doc_y.get_shape().as_list())
print(query_y.get_shape().as_list())
# print(query_norm.get_shape().as_list())
# print(doc_norm.get_shape().as_list())
prod_first = tf.tile(query_y, [NEG + 1, 1])
prod_temp = tf.multiply(prod_first, doc_y)
prod = tf.reduce_sum(prod_temp, axis = 1, keepdims = True) #Changed to False.
print("AAAAAA")
norm_prod = tf.multiply(query_norm, doc_norm)
cos_sim_raw = tf.truediv(prod, norm_prod)
cos_sim = tf.transpose(tf.reshape(tf.transpose(cos_sim_raw), [NEG + 1, BS])) * 20
except:
print("Some exception in Cosine similarity")
从错误消息中可以看到,由于2019-02-05 22:02:03.674482: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
Loading data from HDD to memory: 0.00s
[52224, 120]
[52224, 120]
AAAAAA
-------------------------------------------------------------------------------------
Train data 1/9 is loaded in 0.00s
0.00% EpochTraceback (most recent call last):
File "/home/major/Documents/DSSMStuff/DSSM/venv/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1334, in _do_call
return fn(*args)
File "/home/major/Documents/DSSMStuff/DSSM/venv/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1319, in _run_fn
options, feed_dict, fetch_list, target_list, run_metadata)
File "/home/major/Documents/DSSMStuff/DSSM/venv/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1407, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [51000,120] vs. [52224,120]
[[{{node Cosine_Similarity/Mul}} = Mul[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](Cosine_Similarity/Tile_2, FD_rotate/concat_49)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "dssm.py", line 271, in <module>
sess.run(train_step, feed_dict=feed_dict(True, batch_idx % FLAGS.pack_size))
File "/home/major/Documents/DSSMStuff/DSSM/venv/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 929, in run
run_metadata_ptr)
File "/home/major/Documents/DSSMStuff/DSSM/venv/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1152, in _run
feed_dict_tensor, options, run_metadata)
File "/home/major/Documents/DSSMStuff/DSSM/venv/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1328, in _do_run
run_metadata)
File "/home/major/Documents/DSSMStuff/DSSM/venv/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1348, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [51000,120] vs. [52224,120]
[[node Cosine_Similarity/Mul (defined at dssm.py:160) = Mul[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](Cosine_Similarity/Tile_2, FD_rotate/concat_49)]]
Caused by op 'Cosine_Similarity/Mul', defined at:
File "dssm.py", line 160, in <module>
prod_temp = tf.multiply(prod_first, doc_y)
File "/home/major/Documents/DSSMStuff/DSSM/venv/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py", line 203, in multiply
return gen_math_ops.mul(x, y, name)
File "/home/major/Documents/DSSMStuff/DSSM/venv/lib/python3.6/site-packages/tensorflow/python/ops/gen_math_ops.py", line 5042, in mul
"Mul", x=x, y=y, name=name)
File "/home/major/Documents/DSSMStuff/DSSM/venv/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/home/major/Documents/DSSMStuff/DSSM/venv/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
return func(*args, **kwargs)
File "/home/major/Documents/DSSMStuff/DSSM/venv/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3274, in create_op
op_def=op_def)
File "/home/major/Documents/DSSMStuff/DSSM/venv/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1770, in __init__
self._traceback = tf_stack.extract_stack()
InvalidArgumentError (see above for traceback): Incompatible shapes: [51000,120] vs. [52224,120]
[[node Cosine_Similarity/Mul (defined at dssm.py:160) = Mul[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](Cosine_Similarity/Tile_2, FD_rotate/concat_49)]]
和tf.multiply
的形状不一致,tf.tile(query_y, [NEG + 1, 1])
正在产生错误。
要事先检查形状,我已经打印了两个矩阵的形状,并将它们分别为:[52224,120]和[52224,120]。这应该不会导致错误,对吧?
此外,错误消息指出输入形状实际上是[51000,120]和[52224,120]。
要尝试对其进行调试,我还放置了一个doc_y
块以捕获所有异常,但它没有捕获任何异常。
我还尝试将try-except
放在所有try-except
上,并且它从未捕获任何异常。捕获异常的唯一地方是我正在初始化会话的地方。
注意,我没有上传完整的代码,因为时间太长,而且我不知道是否应该这样做。
Research Paper Link。该图请参阅第3页。