TensorFlow 2.0 SparseCategoricalCrossentropy值错误:形状不匹配:标签的形状应等于logits的形状,但最后一个除外

时间:2020-02-29 02:16:09

标签: python python-3.x tensorflow keras

一般来说,这是ML和TensorFlow的新功能。当我尝试在train_step函数中运行此行(t_loss = loss_object(标签,预测))时,我遇到了这个问题。

我觉得我想念一些超小而愚蠢的东西!检查了其他解决方案,从我可以为较旧版本的TF收集的内容或语法和结构上有所不同。下面的代码段是可执行的。只是觉得在谷歌搜索后我不够了解。任何帮助表示赞赏。

收到错误

ValueError: Shape mismatch: The shape of labels (received (30,)) should equal the shape of logits except for the last dimension (received (2, 10)).

我正在跟踪本文,并在可能的情况下添加我自己的旋转。 GCP TF sample writeup '

import tensorflow as tf

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras import Model
from tensorflow.keras import backend as K;

import nltk
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from nltk.stem.lancaster import LancasterStemmer
from nltk.stem.porter import PorterStemmer
from nltk.corpus import stopwords

#try sklearn
from sklearn.model_selection import train_test_split

EPOCHS = 10

# staging and vars
data=pd.read_csv('../rando.csv')
data = data[pd.notnull(data['utext'])]
data=data[data.type != 'None']

# encode unique values of the types
le = LabelEncoder()
data['type'] = le.fit_transform(data['type'])

training_data = [] 
testTrain_data = []
# create a dictionary of data based on type
for index,row in data.iterrows():
    training_data.append({"class":row["type"], "sentence":row["fulltext"]})

words = []
classes = []
documents= []

not_required= ['?']
# create our training data
training = []
output = []

lanStemmer = LancasterStemmer()

def stemDocWord(words=words, classes=classes):
    # loop through each sentence in our training data
    for pattern in training_data:
        # tokenize each word in the sentence
        w = nltk.word_tokenize(pattern['sentence'])
        # add to our words list
        words.extend(w)

        documents.append((w, pattern['class']))
        # add to our classes list
        if pattern['class'] not in classes:
            classes.append(pattern['class'])

    # stem and lower each word and remove duplicates
    stemmer = PorterStemmer()
    words = [stemmer.stem(w.lower()) for w in words if w not in not_required]
    words = list(set(words))

    # remove duplicates
    classes = list(set(classes))

    print(len(documents), "documents")


def listWordTokensForPattern():
    # create an empty array for our output
    output_empty = [0] * len(classes)

    # training set, bag of words for each sentence
    for doc in documents:
        # initialize our bag of words
        bag = []
        # list of tokenized words for the pattern
        pattern_words = doc[0]
        # stem each word
        pattern_words = [lanStemmer.stem(word.lower()) for word in pattern_words]
        # create our bag of words array
        for w in words:
            bag.append(1) if w in pattern_words else bag.append(0)

        training.append(bag)
        # output is a '0' for each tag and '1' for current tag
        output_row = list(output_empty)
        output_row[classes.index(doc[1])] = 1
        output.append(output_row)

    print("# output", len(output))
    print("# training", len(training))

# og training function
stemDocWord()
listWordTokensForPattern()
X = np.array(training)
y = np.array(output)

print(X.shape)

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=23)
x_train, x_test = x_train / 255.0, x_test / 255.0 

# Add a channels dimension e.g. (60000, 28, 28) => (60000, 28, 28, 1)
x_train = x_train[..., tf.newaxis, tf.newaxis]
x_test = x_test[..., tf.newaxis, tf.newaxis]

train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(100).batch(2)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(2)

print(x_train)
print(K.image_data_format())

# inputs_ = tf.compat.v1.placeholder(tf.float32, [None, 32, 32, 3])
# inputs_ = tf.Variable(tf.ones(shape=(0 ,32, 32, 3)), name="inputs_")
class CustomModel(Model):
  def __init__(self):
    super(CustomModel, self).__init__()
    self.conv1 = Conv2D(2, 1,activation='relu')#, input_shape=x_train.shape)#x_train.shape())
    self.flatten = Flatten()
    self.d1 = Dense(128, activation='relu')
    self.d2 = Dense(10, activation='softmax')

  def call(self, x):
    x = self.conv1(x)
    x = self.flatten(x)
    x = self.d1(x)
    return self.d2(x)

model = CustomModel()

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(reduction='none')
optimizer = tf.keras.optimizers.Adam()

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

@tf.function
def train_step(images, labels):
  with tf.GradientTape() as tape:
    predictions = model(images)
    loss = loss_object(labels, predictions)
  gradients = tape.gradient(loss, model.trainable_variables)
  optimizer.apply_gradients(zip(gradients, model.trainable_variables))

  train_loss(loss)
  train_accuracy(labels, predictions)

@tf.function
def test_step(images, labels):
  predictions = model(images)
  t_loss = loss_object(labels, predictions)

  test_loss(t_loss)
  test_accuracy(labels, predictions)

for epoch in range(EPOCHS):
  for images, labels in train_ds:
    train_step(images, labels)

  for test_images, test_labels in test_ds:
    test_step(test_images, test_labels)

  template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
  print (template.format(epoch+1,
                         train_loss.result(),
                         train_accuracy.result()*100,
                         test_loss.result(),
                         test_accuracy.result()*100))

# Save the weights
model.save_weights('fashion_mnist_weights')

CSV文件看起来与此类似

utext,全文,类型

t1,“一些随机句子”,type1

t2,“其他随机句子”,type2

t3,“一些随机文本”,type3


以下是我对第一条评论的理解:

WARNING:tensorflow:Entity <function train_step at 0x000001E9480B11E0> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, 'export AUTOGRAPH_VERBOSITY=10') and attach the full output. Cause: converting <function train_step at 0x000001E9480B11E0>: AttributeError: module 'gast' has no attribute 'Str' WARNING:tensorflow:Entity <bound method CustomModel.call of <__main__.CustomModel object at 0x000001E947C9D358>> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, 'export AUTOGRAPH_VERBOSITY=10') and attach the full output. Cause: converting <bound method CustomModel.call of <__main__.CustomModel object at 0x000001E947C9D358>>: AssertionError: Bad argument number for Name: 3, expecting 4 2020-02-29 12:14:46.018316: W tensorflow/core/framework/cpu_allocator_impl.cc:81] Allocation of 2683371520 exceeds 10% of system memory. 2020-02-29 12:14:47.459793: W tensorflow/core/framework/cpu_allocator_impl.cc:81] Allocation of 2683371520 exceeds 10% of system memory. 2020-02-29 12:14:47.869789: W tensorflow/core/framework/cpu_allocator_impl.cc:81] Allocation of 2683371520 exceeds 10% of system memory.

1 个答案:

答案 0 :(得分:1)

已解决,问题是我的训练x是单次热编码,因此我的损失方法不正确。只需将keras模块更改为非稀疏和宾果游戏即可。