如何获得用户警告:将稀疏的IndexedSlices转换为形状未知的密集张量

时间:2019-03-28 18:13:11

标签: python tensorflow deep-learning nlp recurrent-neural-network

我收到以下警告:

/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gradients_impl.py:95: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "

对于以下代码:

import random
import numpy as np
import os
from pathlib import Path
import tensorflow as tf
from tensorflow.contrib.rnn import HighwayWrapper, LSTMCell, DropoutWrapper
from tensorflow.python.ops import array_ops
from socket import gethostname
import os
from pathlib import Path
import numpy as np
from sklearn.metrics import f1_score, precision_score, recall_score
import json
import tensorflow as tf
import time
import argparse
from pathlib import Path
import socket
import os

import datetime
from google.colab import drive
print(print(tf.__version__))

def orthonorm(shape, dtype=tf.float32,  # TODO only works for square (recurrent) weights
              partition_info=None):  # pylint: disable=unused-argument
    """Variable initializer that produces a random orthonormal matrix."""
    if len(shape) != 2 or shape[0] != shape[1]:
        raise ValueError("Expecting square shape, got %s" % shape)
    _, u, _ = tf.svd(tf.random_normal(shape, dtype=dtype), full_matrices=True)
    return u


def _reverse(input_, seq_lengths, seq_dim, batch_dim):  # reverses sequences with right-padding correctly
    return array_ops.reverse_sequence(
        input=input_, seq_lengths=seq_lengths,
        seq_dim=seq_dim, batch_dim=batch_dim)





def bilstms_interleaved(inputs, num_layers, size, keep_prob, lengths):

    outputs = inputs
    print('interleaved')
    for layer in range(num_layers):
        direction = 'backw.' if layer % 2 else 'forw.'
        print('Layer {}: Creating {} LSTM'.format(layer, direction))  # backwards if layer odd
        with tf.variable_scope('{}_lstm_{}'.format(direction, layer)):
            # cell
            cell = HighwayWrapper(DropoutWrapper(LSTMCell(size),
                                                 variational_recurrent=True,
                                                 dtype=tf.float32,
                                                 state_keep_prob=keep_prob))
            # calc either bw or fw - interleaving is done at graph construction (not runtime)
            if direction == 'backw.':
                outputs_reverse = _reverse(outputs, seq_lengths=lengths, seq_dim=1, batch_dim=0)
                tmp, _ = tf.nn.dynamic_rnn(cell=cell,
                                           inputs=outputs_reverse,
                                           sequence_length=lengths,
                                           dtype=tf.float32)
                outputs = _reverse(tmp, seq_lengths=lengths, seq_dim=1, batch_dim=0)
            else:
                outputs, _ = tf.nn.dynamic_rnn(cell=cell,
                                               inputs=outputs,
                                               sequence_length=lengths,
                                               dtype=tf.float32)

    return outputs


class Model():
    def __init__(self, config, embeddings, num_labels, g):

        # embedding
        with tf.device('/cpu:0'):
            self.word_ids = tf.placeholder(tf.int32, [None, None], name='word_ids')
            embedded = tf.nn.embedding_lookup(embeddings, self.word_ids, name='embedded')

        # stacked bilstm
        with tf.device('/gpu:0'):

            self.predicate_ids = tf.placeholder(tf.float32, [None, None], name='predicate_ids')
            self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
            self.lengths = tf.placeholder(tf.int32, [None], name='lengths')
            inputs = tf.concat([embedded, tf.expand_dims(self.predicate_ids, -1)], axis=2, name='lstm_inputs')          

            final_outputs = bilstms_interleaved(inputs,
                                                config.num_layers,
                                                config.cell_size,
                                                self.keep_prob,
                                                    self.lengths)


            # projection
            shape0 = tf.shape(final_outputs)[0] * tf.shape(final_outputs)[1]  # both batch_size and seq_len are dynamic
            final_outputs_2d = tf.reshape(final_outputs, [shape0, config.cell_size], name='final_outputs_2d')
            wy = tf.get_variable('Wy', [config.cell_size, num_labels])
            by = tf.get_variable('by', [num_labels])
            logits = tf.nn.xw_plus_b(final_outputs_2d, wy, by, name='logits')  # need [shape0, num_labels]

            # loss
            self.label_ids = tf.placeholder(tf.int32, [None, None], name='label_ids')  # [batch_size, max_seq_len]
            label_ids_flat = tf.reshape(self.label_ids, [-1])  # need [shape0]
            mask = tf.greater(label_ids_flat, 0, 'mask')
            self.nonzero_label_ids_flat = tf.boolean_mask(label_ids_flat, mask,
                                                     name='nonzero_label_ids_flat')  # removes elements
            nonzero_logits = tf.boolean_mask(logits, mask, name='nonzero_logits')
            nonzero_losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=nonzero_logits,
                                                                            labels=self.nonzero_label_ids_flat,
                                                                            name='nonzero_losses')
            self.nonzero_mean_loss = tf.reduce_mean(nonzero_losses, name='nonzero_mean_loss')

            # update
            optimizer = tf.train.AdadeltaOptimizer(learning_rate=config.learning_rate, rho=0.95,epsilon=config.epsilon)
            gradients, variables = zip(*optimizer.compute_gradients(self.nonzero_mean_loss))
            gradients, _ = tf.clip_by_global_norm(gradients, config.max_grad_norm)
            self.update = optimizer.apply_gradients(zip(gradients, variables), name='update')

            # predictions
            self.nonzero_predicted_label_ids = tf.cast(tf.argmax(tf.nn.softmax(nonzero_logits), axis=1), tf.int32,
                                                  name='nonzero_predicted_label_ids')

            # tensorboard
            tf.summary.scalar('nonzero_accuracy', tf.reduce_mean(tf.cast(tf.equal(self.nonzero_predicted_label_ids,
                                                                                  self.nonzero_label_ids_flat),
                                                                         tf.float32)))
            tf.summary.scalar('nonzero_mean_xe', self.nonzero_mean_loss)
            self.scalar_summaries = tf.summary.merge_all()
            p = Path("/content/drive/My Drive/DLSRL/Tensorboard log") 
            self.train_writer = tf.summary.FileWriter(str(p), g)

            # confusion matrix
            nonzero_cm = tf.confusion_matrix(self.nonzero_label_ids_flat, self.nonzero_predicted_label_ids)
            size = tf.shape(nonzero_cm)[0]
            self.cm_summary = tf.summary.image('nonzero_cm', tf.reshape(tf.cast(nonzero_cm, tf.float32),
                                                                              [1, size, size, 1]))  # needs 4d






我正在尝试使用tensorflow实现语义角色标签。在运行模型时,尽管没有必要调整超参数,但由于在论文中已进行了说明,并且在检查张量板图模型是否正确时,仍会出现此错误和性能下降。另一个问题是,加载检查点时出现错误,并且错误与无法找到/加载IndexedSlices有关。因此,我认为此警告正在给模型造成问题,但我无法弄清原因。我应该如何修改代码以使其起作用并删除该警告?

0 个答案:

没有答案