Question

我一直在尝试训练模型。我写了一个脚本来构造图像数据集中的TFrecords。

这是训练脚本的代码：

import numpy as np
import tensorflow as tf
import cv2
from PIL import Image
import glob
import sys

batch_size=32
img_size=50
classes_names=[]
dataset_file=glob.glob("dataset/asl_alphabet_train/*")
sess = tf.Session()
sess.run(tf.global_variables_initializer())

for f in dataset_file:
    classes_names.append(f[27:]) # the name of the subfolders is the name of the class
num_classes=len(classes_names)

def parser(record):
    # a parsing function to parse the tfrecords
    keys_to_features={
        "img_raw": tf.FixedLenFeature([], tf.string),
        "label": tf.FixedLenFeature([], tf.int64)


    }
    parsed=tf.parse_single_example(record, keys_to_features)#parsing one example from the example buffer from the tfrecord using the keys 
    image= tf.decode_raw(parsed["img_raw"], tf.uint8)# decoding ( bytes -> tf.float32)
    image= tf.cast(image, tf.float32)
    #image= tf.reshape(image, shape=[50,50,3])#reshaping images
    label=tf.cast(parsed["label"], tf.int32)# casting labels to int32
    return  image, label

def input_fn(filenames, train_bool=True):
    #from tfrecord to iterable data 
    dataset=tf.data.TFRecordDataset(filenames=filenames, num_parallel_reads=40)# instantiantion of an object from class TFRecordDataset
    dataset= dataset.map(parser) # maps a function to the dataset
    if train_bool: 
        dataset= dataset.shuffle(buffer_size=2048)
        repeat=None # if in training mode allow reading data infinitely
    else :
        repeat=1 # if in validation or test allow max 1 read
    dataset= dataset.repeat(repeat)
    dataset= dataset.batch(batch_size)# define bach size
    iterator= dataset.make_one_shot_iterator()# making the iterator
    images_batch, labels_batch=iterator.get_next()# getting the data
    x= {'image': images_batch}
    y= labels_batch
    return x, y

def train_input_fn():
    return input_fn(filenames=["TFrecords/train.tfrecords"])

def val_input_fn():
    return input_fn(filenames=["TFrecords/val.tfrecords"],train_bool=False)

def test_input_fn():
    return input_fn(filenames=["TFrecords/test.tfrecords"])

def model_fn(features, labels, mode, params):

    Input=features['image']# transform the features to a tensor 
    Input=tf.reshape(Input, [-1,img_size,img_size,3])# reshaping to tensorflow networks compatible shapes

    output=tf.layers.conv2d(inputs=Input, name='conv1',
                filters=32, kernel_size=3,
                padding='same', activation=tf.nn.relu)
    output=tf.layers.max_pooling2d(inputs=output, pool_size=2, strides=2)

    output=tf.layers.conv2d(inputs=output, name='conv2',
                filters=64, kernel_size=3,
                padding='same', activation=tf.nn.relu)
    output=tf.layers.max_pooling2d(inputs=output, pool_size=2, strides=2)

    output=tf.layers.conv2d(inputs=output, name='conv3',
                filters=64, kernel_size=3,
                padding='same', activation=tf.nn.relu)
    output=tf.layers.max_pooling2d(inputs=output, pool_size=2, strides=2)

    output=tf.layers.flatten(output)


    output=tf.layers.dense(inputs=output, name='fc1',
                units=128, activation=tf.nn.relu)
    output=tf.layers.dropout(inputs=output, rate=0.5, noise_shape=None,
                 seed=None, training=(mode==tf.estimator.ModeKeys.TRAIN))
    output=tf.layers.dense(inputs=Input, name='fc2',units=num_classes)

    logits=output


    y_pred=tf.identity(tf.nn.softmax(logits=logits), name="logits")


    y_pred_cls=tf.identity(tf.argmax(y_pred,axis=1), name="classes")

    if mode == tf.estimator.ModeKeys.PREDICT:
        spec = tf.estimator.EstimatorSpec(mode=mode,predictions=y_pred_cls)

    else: 
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=labels)

        loss=tf.reduce_mean(cross_entropy)

        optimizer= tf.train.AdamOptimizer(learning_rate=params["learning_rate"])
        train_op= optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())
        metrics={ "accuracy":tf.metrics.accuracy(labels, y_pred_cls) }
        spec=tf.estimator.EstimatorSpec(
            mode=mode,
            loss=loss,
            train_op=train_op,
            eval_metric_ops=metrics)
    return spec

model = tf.estimator.Estimator(model_fn=model_fn,
                   params={"learning_rate":1e-2},
                   model_dir="Model/")

count=0
while(count<100000):
    model.train(input_fn=train_input_fn,steps=1000)
    result=model.evaluate(input_fn=val_input_fn)
    print(count)
    print("Classification accuracy: {0:.2%}".format(result["accuracy"]))
    sys.stdout.flush()
    count=count+1

运行脚本时，出现以下错误： ValueError：等级不匹配：标签的等级（接收到1）应等于对数的等级减去1（接收到4）。

此外，我尝试使用迭代器获取每批图像，并测试该批的大小，并测试是否收到正确的批大小，然后获得正确的批大小。但是，在模型的第一行中，当我尝试将张量整形为[batch_size，img_size，imge，size，3]时，我收到一个错误，即所接收的张量的数量是预期批处理数：0的4倍。

错误跟踪如下：

Traceback (most recent call last):
  File "Train.py", line 122, in <module>
    model.train(input_fn=train_input_fn,steps=1000)
  File "/home/hch/miniconda3/envs/DL_CUDA/lib/python3.5/site-packages/tensorflow/python/estimator/estimator.py", line 366, in train
    loss = self._train_model(input_fn, hooks, saving_listeners)
  File "/home/hch/miniconda3/envs/DL_CUDA/lib/python3.5/site-packages/tensorflow/python/estimator/estimator.py", line 1119, in _train_model
    return self._train_model_default(input_fn, hooks, saving_listeners)
  File "/home/hch/miniconda3/envs/DL_CUDA/lib/python3.5/site-packages/tensorflow/python/estimator/estimator.py", line 1132, in _train_model_default
    features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
  File "/home/hch/miniconda3/envs/DL_CUDA/lib/python3.5/site-packages/tensorflow/python/estimator/estimator.py", line 1107, in _call_model_fn
    model_fn_results = self._model_fn(features=features, **kwargs)
  File "Train.py", line 101, in model_fn
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels,logits=logits)
  File "/home/hch/miniconda3/envs/DL_CUDA/lib/python3.5/site-packages/tensorflow/python/ops/nn_ops.py", line 2052, in sparse_softmax_cross_entropy_with_logits
    (labels_static_shape.ndims, logits.get_shape().ndims))
ValueError: Rank mismatch: Rank of labels (received 1) should equal rank of logits minus 1 (received 4).

您怎么看？谢谢

Answer 1

我知道了！这是一个愚蠢的错误。在最后一个输出层，当我应该使用“输出”时，我输入“输入”：output = tf.layers.dense（inputs = Input，name ='fc2'，units = num_classes

Tensorflow-ValueError：排名不匹配：

1 个答案: