Tensorflow Inception resnet v2输入张量

时间:2016-09-28 14:35:16

标签: python computer-vision tensorflow deep-learning

我试图运行此代码

import os
import tensorflow as tf
from datasets import imagenet
from nets import inception_resnet_v2
from preprocessing import inception_preprocessing

checkpoints_dir = 'model'

slim = tf.contrib.slim

batch_size = 3
image_size = 299

with tf.Graph().as_default():

with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope()):
    logits, _ = inception_resnet_v2.inception_resnet_v2([1, 299, 299, 3], num_classes=1001, is_training=False)
    probabilities = tf.nn.softmax(logits)

    init_fn = slim.assign_from_checkpoint_fn(
    os.path.join(checkpoints_dir, 'inception_resnet_v2_2016_08_30.ckpt'),
    slim.get_model_variables('InceptionResnetV2'))

    with tf.Session() as sess:
        init_fn(sess)

        imgPath = '.../image_3.jpeg'
        testImage_string = tf.gfile.FastGFile(imgPath, 'rb').read()
        testImage = tf.image.decode_jpeg(testImage_string, channels=3)

        np_image, probabilities = sess.run([testImage, probabilities])
        probabilities = probabilities[0, 0:]
        sorted_inds = [i[0] for i in sorted(enumerate(-probabilities), key=lambda x:x[1])]

        names = imagenet.create_readable_names_for_imagenet_labels()
        for i in range(15):
            index = sorted_inds[i]
            print((probabilities[index], names[index]))

但TF显示错误:ValueError: rank of shape must be at least 4 not: 1

我认为问题出在输入张量形状[1, 299, 299, 3]。如何为3通道JPEG图像输入张量???

还有一个类似的问题(Using pre-trained inception_resnet_v2 with Tensorflow)。我在代码input_tensor中看到 - 不幸的是有解释什么是input_tensor。也许我会问一些不言而喻的事情,但我卡住了!非常感谢您的任何建议!

2 个答案:

答案 0 :(得分:3)

您必须预处理图像。这是一个代码:

import os
import tensorflow as tf
from datasets import imagenet
from nets import inception_resnet_v2
from preprocessing import inception_preprocessing

checkpoints_dir = 'model'

slim = tf.contrib.slim

batch_size = 3
image_size = 299

with tf.Graph().as_default():
    with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope()):

        imgPath = '.../cat.jpg'
        testImage_string = tf.gfile.FastGFile(imgPath, 'rb').read()
        testImage = tf.image.decode_jpeg(testImage_string, channels=3)
        processed_image = inception_preprocessing.preprocess_image(testImage, image_size, image_size, is_training=False)
        processed_images = tf.expand_dims(processed_image, 0)

        logits, _ = inception_resnet_v2.inception_resnet_v2(processed_images, num_classes=1001, is_training=False)
        probabilities = tf.nn.softmax(logits)

        init_fn = slim.assign_from_checkpoint_fn(
        os.path.join(checkpoints_dir, 'inception_resnet_v2_2016_08_30.ckpt'), slim.get_model_variables('InceptionResnetV2'))

        with tf.Session() as sess:
            init_fn(sess)

            np_image, probabilities = sess.run([processed_images, probabilities])
            probabilities = probabilities[0, 0:]
            sorted_inds = [i[0] for i in sorted(enumerate(-probabilities), key=lambda x: x[1])]

            names = imagenet.create_readable_names_for_imagenet_labels()
            for i in range(15):
                index = sorted_inds[i]
                print((probabilities[index], names[index]))

答案是:

(0.1131034, 'tiger cat')
(0.079478227, 'tabby, tabby cat')
(0.052777905, 'Cardigan, Cardigan Welsh corgi')
(0.030195976, 'laptop, laptop computer')
(0.027841948, 'bathtub, bathing tub, bath, tub')
(0.026694898, 'television, television system')
(0.024981709, 'carton')
(0.024039172, 'Egyptian cat')
(0.018425584, 'tub, vat')
(0.018221909, 'Pembroke, Pembroke Welsh corgi')
(0.015066789, 'skunk, polecat, wood pussy')
(0.01377619, 'screen, CRT screen')
(0.012509955, 'monitor')
(0.012224807, 'mouse, computer mouse')
(0.012188354, 'refrigerator, icebox')

答案 1 :(得分:-1)

您可以使用tf.expand_dims(your_tensor_3channel, axis=0)将其展开为批量格式。