Question

我试图使用Tensorflow在Python上实现深度学习模型。我遇到了一些问题，如下所述：

此层计算两个向量之间的余弦距离。我必须计算查询和文档矩阵之间的距离。

return byteBuddyState.loadProxy( persistentClass, new TypeCache.SimpleKey(key), byteBuddy -> byteBuddy
        .ignore( byteBuddyState.getProxyDefinitionHelpers().getGroovyGetMetaClassFilter() )
        .with( new NamingStrategy.SuffixingRandom( PROXY_NAMING_SUFFIX, new NamingStrategy.SuffixingRandom.BaseNameResolver.ForFixedValue( persistentClass.getName() ) ) )
        .subclass( interfaces.length == 1 ? persistentClass : Object.class, ConstructorStrategy.Default.IMITATE_SUPER_CLASS_OPENING )
        .implement( (Type[]) interfaces )
        .method( byteBuddyState.getProxyDefinitionHelpers().getVirtualNotFinalizerFilter() )
                .intercept( byteBuddyState.getProxyDefinitionHelpers().getDelegateToInterceptorDispatcherMethodDelegation() )
        .method( byteBuddyState.getProxyDefinitionHelpers().getHibernateGeneratedMethodFilter() )
                .intercept( SuperMethodCall.INSTANCE )
        .defineField( ProxyConfiguration.INTERCEPTOR_FIELD_NAME, ProxyConfiguration.Interceptor.class, Visibility.PRIVATE )
        .implement( ProxyConfiguration.class )
                .intercept( byteBuddyState.getProxyDefinitionHelpers().getInterceptorFieldAccessor() )
);

但是，当我运行整个代码时，会出现如下错误：

with tf.name_scope('L1'):
    try:
        l1_par_range = np.sqrt(6.0 / (TRIGRAM_D + L1_N)) # Range for random values. Set empirically.
        weight1 = tf.Variable(tf.random_uniform([TRIGRAM_D, L1_N], -l1_par_range, l1_par_range)) # The weight matrix. shape = TRIGRAM_D X L1_N
        bias1 = tf.Variable(tf.random_uniform([L1_N], -l1_par_range, l1_par_range)) # Bias matrix. shape L1_N X 1.
        variable_summaries(weight1, 'L1_weights')
        variable_summaries(bias1, 'L1_biases')

        # query_l1 = tf.matmul(tf.to_float(query_batch),weight1)+bias1
        query_l1 = tf.sparse_tensor_dense_matmul(query_batch, weight1) + bias1 # WX + B
        # doc_l1 = tf.matmul(tf.to_float(doc_batch),weight1)+bias1
        doc_l1 = tf.sparse_tensor_dense_matmul(doc_batch, weight1) + bias1 # WX + B.

        query_l1_out = tf.nn.relu(query_l1) # activated output
        doc_l1_out = tf.nn.relu(doc_l1) # activated output
    except:
        print("L1")

with tf.name_scope('L2'):
    try:
        l2_par_range = np.sqrt(6.0 / (L1_N + L2_N))

        weight2 = tf.Variable(tf.random_uniform([L1_N, L2_N], -l2_par_range, l2_par_range))
        bias2 = tf.Variable(tf.random_uniform([L2_N], -l2_par_range, l2_par_range))
        variable_summaries(weight2, 'L2_weights')
        variable_summaries(bias2, 'L2_biases')

        query_l2 = tf.matmul(query_l1_out, weight2) + bias2
        doc_l2 = tf.matmul(doc_l1_out, weight2) + bias2
        query_y = tf.nn.relu(query_l2) # Query output of layer 2
        doc_y = tf.nn.relu(doc_l2) # Document output of layer 2
    except:
        print("L2")

with tf.name_scope('FD_rotate'):
    try:
        # Rotate FD+ to produce 50 FD-
        temp = tf.tile(doc_y, [1, 1]) # What is it's use?
        # tf.tile(doc_y, [1, 1]) just replicates doc_y once. Making temp equal to doc_y

        for i in range(NEG):
            rand = int((random.random() + i) * BS / NEG) # BS/NEG is a constant
            # print(rand, BS - rand)
            # print(tf.slice(temp, [rand, 0], [BS - rand, -1]))
            # print(tf.slice(temp, [0, 0], [rand, -1]))
            # print(doc_y)
            doc_y = tf.concat([doc_y,
                               tf.slice(temp, [rand, 0], [BS - rand, -1]),
                               tf.slice(temp, [0, 0], [rand, -1])], 0)
            # AP: TODO: Concatenates the matrices in the zeroeth dimension. Function depreciated. Change this!
            # AP: Shuffles the data and enlarges it.
            # first iter: doc_y
            # second iter: doc_y + doc_y[0, rand][:] + doc_y[rand:BS-rand][:]
            # ... NEG iters.
    except:
        print("OWO")

with tf.name_scope('Cosine_Similarity'):
    # Cosine similarity

    try:
        query_norm = tf.tile(tf.sqrt(tf.reduce_sum(tf.square(query_y), 1, True)), [NEG + 1, 1])
        doc_norm = tf.sqrt(tf.reduce_sum(tf.square(doc_y), 1, True))

        print(tf.tile(query_y, [NEG + 1, 1]).get_shape().as_list())
        print(doc_y.get_shape().as_list())
        print(query_y.get_shape().as_list())
        # print(query_norm.get_shape().as_list())
        # print(doc_norm.get_shape().as_list())
        prod_first =  tf.tile(query_y, [NEG + 1, 1])

        prod_temp = tf.multiply(prod_first, doc_y)
        prod = tf.reduce_sum(prod_temp, axis = 1, keepdims = True) #Changed to False.
        print("AAAAAA")
        norm_prod = tf.multiply(query_norm, doc_norm)

        cos_sim_raw = tf.truediv(prod, norm_prod)
        cos_sim = tf.transpose(tf.reshape(tf.transpose(cos_sim_raw), [NEG + 1, BS])) * 20
    except:
        print("Some exception in Cosine similarity")

从错误消息中可以看到，由于2019-02-05 22:02:03.674482: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA Loading data from HDD to memory: 0.00s [52224, 120] [52224, 120] AAAAAA ------------------------------------------------------------------------------------- Train data 1/9 is loaded in 0.00s 0.00% EpochTraceback (most recent call last): File "/home/major/Documents/DSSMStuff/DSSM/venv/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1334, in _do_call return fn(*args) File "/home/major/Documents/DSSMStuff/DSSM/venv/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1319, in _run_fn options, feed_dict, fetch_list, target_list, run_metadata) File "/home/major/Documents/DSSMStuff/DSSM/venv/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1407, in _call_tf_sessionrun run_metadata) tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [51000,120] vs. [52224,120] [[{{node Cosine_Similarity/Mul}} = Mul[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](Cosine_Similarity/Tile_2, FD_rotate/concat_49)]] During handling of the above exception, another exception occurred: Traceback (most recent call last): File "dssm.py", line 271, in <module> sess.run(train_step, feed_dict=feed_dict(True, batch_idx % FLAGS.pack_size)) File "/home/major/Documents/DSSMStuff/DSSM/venv/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 929, in run run_metadata_ptr) File "/home/major/Documents/DSSMStuff/DSSM/venv/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1152, in _run feed_dict_tensor, options, run_metadata) File "/home/major/Documents/DSSMStuff/DSSM/venv/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1328, in _do_run run_metadata) File "/home/major/Documents/DSSMStuff/DSSM/venv/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1348, in _do_call raise type(e)(node_def, op, message) tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [51000,120] vs. [52224,120] [[node Cosine_Similarity/Mul (defined at dssm.py:160) = Mul[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](Cosine_Similarity/Tile_2, FD_rotate/concat_49)]] Caused by op 'Cosine_Similarity/Mul', defined at: File "dssm.py", line 160, in <module> prod_temp = tf.multiply(prod_first, doc_y) File "/home/major/Documents/DSSMStuff/DSSM/venv/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py", line 203, in multiply return gen_math_ops.mul(x, y, name) File "/home/major/Documents/DSSMStuff/DSSM/venv/lib/python3.6/site-packages/tensorflow/python/ops/gen_math_ops.py", line 5042, in mul "Mul", x=x, y=y, name=name) File "/home/major/Documents/DSSMStuff/DSSM/venv/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper op_def=op_def) File "/home/major/Documents/DSSMStuff/DSSM/venv/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 488, in new_func return func(*args, **kwargs) File "/home/major/Documents/DSSMStuff/DSSM/venv/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3274, in create_op op_def=op_def) File "/home/major/Documents/DSSMStuff/DSSM/venv/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1770, in __init__ self._traceback = tf_stack.extract_stack() InvalidArgumentError (see above for traceback): Incompatible shapes: [51000,120] vs. [52224,120] [[node Cosine_Similarity/Mul (defined at dssm.py:160) = Mul[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](Cosine_Similarity/Tile_2, FD_rotate/concat_49)]]和tf.multiply的形状不一致，tf.tile(query_y, [NEG + 1, 1])正在产生错误。

要事先检查形状，我已经打印了两个矩阵的形状，并将它们分别为：[52224，120]和[52224，120]。这应该不会导致错误，对吧？

此外，错误消息指出输入形状实际上是[51000,120]和[52224，120]。

要尝试对其进行调试，我还放置了一个doc_y块以捕获所有异常，但它没有捕获任何异常。

我还尝试将try-except放在所有try-except上，并且它从未捕获任何异常。捕获异常的唯一地方是我正在初始化会话的地方。

注意，我没有上传完整的代码，因为时间太长，而且我不知道是否应该这样做。

Research Paper Link。该图请参阅第3页。

tensorflow.multiply报告形状不一致错误，该错误不存在

0 个答案: