Tensorflow模型输出的概率值大于1

时间:2018-10-12 08:17:38

标签: python tensorflow machine-learning scientific-notation

我正在研究这个分类程序,正在训练我的模型以预测对象是螺母还是螺钉。我没有得到任何数据集,因此创建了自己的数据集。我训练了模型,但没有得到正确的预测。值的概率超过1,基本上我得到了垃圾值。 我得到了这个预测值:[[9.990779e-01 9.220659e-04]]

#培训代码

import dataset
import tensorflow as tf
import time
from datetime import timedelta
import math
import random
import numpy as np
import os

#添加种子,以使随机初始化保持一致
    从numpy.random导入种子

seed(1)
from tensorflow import set_random_seed

set_random_seed(2)

batch_size = 20

# Prepare input data
classes = os.listdir('training_set')
num_classes = len(classes)

# 20% of the data will automatically be used for validation
validation_size = 0.2
img_size = 128
num_channels = 3
train_path = 'training_set'

# We shall load all the training and validation images and labels into 
 memory using openCV and use that during training
data = dataset.read_train_sets(train_path, img_size, classes, 
validation_size=validation_size)

print("Complete reading input data. Will Now print a snippet of it")
print("Number of files in Training- 
set:\t\t{}".format(len(data.train.labels)))
print("Number of files in Validation- 
set:\t{}".format(len(data.valid.labels)))

session = tf.Session()
x = tf.placeholder(tf.float32, shape=[None, img_size, img_size, 
    num_channels], name='x')

## labels
y_true = tf.placeholder(tf.float32, shape=[None, num_classes], 
         name='y_true')
y_true_cls = tf.argmax(y_true, dimension=1)

##Network graph params
filter_size_conv1 = 3
num_filters_conv1 = 32

filter_size_conv2 = 3
num_filters_conv2 = 32

filter_size_conv3 = 3
num_filters_conv3 = 32

fc_layer_size = 128


def create_weights(shape):
    return tf.Variable(tf.truncated_normal(shape, stddev=0.05))


def create_biases(size):
    return tf.Variable(tf.constant(0.05, shape=[size]))


def create_convolutional_layer(input,
                           num_input_channels,
                           conv_filter_size,
                           num_filters):
## We shall define the weights that will be trained using create_weights function.
weights = create_weights(shape=[conv_filter_size, conv_filter_size, num_input_channels, num_filters])
## We create biases using the create_biases function. These are also trained.
biases = create_biases(num_filters)

## Creating the convolutional layer
layer = tf.nn.conv2d(input=input,
                     filter=weights,
                     strides=[1, 1, 1, 1],
                     padding='SAME')

layer += biases

## We shall be using max-pooling.
layer = tf.nn.max_pool(value=layer,
                       ksize=[1, 2, 2, 1],
                       strides=[1, 2, 2, 1],
                       padding='SAME')
## Output of pooling is fed to Relu which is the activation function for us.
layer = tf.nn.relu(layer)

return layer


def create_flatten_layer(layer):
# We know that the shape of the layer will be [batch_size img_size img_size num_channels]
# But let's get it from the previous layer.
    layer_shape = layer.get_shape()

## Number of features will be img_height * img_width* num_channels. But we shall calculate it in place of hard-coding it.
    num_features = layer_shape[1:4].num_elements()

## Now, we Flatten the layer so we shall have to reshape to num_features
    layer = tf.reshape(layer, [-1, num_features])

return layer


def create_fc_layer(input,
                num_inputs,
                num_outputs,
                use_relu=True):
# Let's define trainable weights and biases.
     weights = create_weights(shape=[num_inputs, num_outputs])
     biases = create_biases(num_outputs)

# Fully connected layer takes input x and produces wx+b.Since, these are matrices, we use matmul function in Tensorflow
layer = tf.matmul(input, weights) + biases
if use_relu:
    layer = tf.nn.relu(layer)

return layer


layer_conv1 = create_convolutional_layer(input=x,
                                     num_input_channels=num_channels,
                                     conv_filter_size=filter_size_conv1,
                                     num_filters=num_filters_conv1)
layer_conv2 = create_convolutional_layer(input=layer_conv1,
                                     num_input_channels=num_filters_conv1,
                                     conv_filter_size=filter_size_conv2,
                                     num_filters=num_filters_conv2)

layer_conv3 = create_convolutional_layer(input=layer_conv2,
                                     num_input_channels=num_filters_conv2,
                                     conv_filter_size=filter_size_conv3,
                                     num_filters=num_filters_conv3)

layer_flat = create_flatten_layer(layer_conv3)

layer_fc1 = create_fc_layer(input=layer_flat,
                        num_inputs=layer_flat.get_shape() 
[1:4].num_elements(),
                        num_outputs=fc_layer_size,
                        use_relu=True)

layer_fc2 = create_fc_layer(input=layer_fc1,
                        num_inputs=fc_layer_size,
                        num_outputs=num_classes,
                        use_relu=False)

y_pred = tf.nn.softmax(layer_fc2, name='y_pred')

y_pred_cls = tf.argmax(y_pred, dimension=1)
session.run(tf.global_variables_initializer())
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=layer_fc2,
                                                    labels=y_true)
cost = tf.reduce_mean(cross_entropy)
optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cost)
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

session.run(tf.global_variables_initializer())


def show_progress(epoch, feed_dict_train, feed_dict_validate, val_loss):
    acc = session.run(accuracy, feed_dict=feed_dict_train)
    val_acc = session.run(accuracy, feed_dict=feed_dict_validate)
    msg = "Training Epoch {0} --- Training Accuracy: {1:>6.1%}, Validation 
Accuracy: {2:>6.1%},  Validation Loss: {3:.3f}"
    print(msg.format(epoch + 1, acc, val_acc, val_loss))


total_iterations = 0

saver = tf.train.Saver()


def train(num_iteration):
    global total_iterations

    for i in range(total_iterations,
                   total_iterations + num_iteration):

        x_batch, y_true_batch, _, cls_batch = 
            data.train.next_batch(batch_size)
        x_valid_batch, y_valid_batch, _, valid_cls_batch = 
             data.valid.next_batch(batch_size)

        feed_dict_tr = {x: x_batch,
                    y_true: y_true_batch}
        feed_dict_val = {x: x_valid_batch,
                     y_true: y_valid_batch}

        session.run(optimizer, feed_dict=feed_dict_tr)

        if i % int(data.train.num_examples / batch_size) == 0:
            val_loss = session.run(cost, feed_dict=feed_dict_val)
            epoch = int(i / int(data.train.num_examples / batch_size))

            show_progress(epoch, feed_dict_tr, feed_dict_val, val_loss)
            saver.save(session, 'C:\\Nutsbolts\\nuts-screws-model')

    total_iterations += num_iteration


train(num_iteration=3000)





#Prediction code

import tensorflow as tf
import numpy as np
import os,glob,cv2
import sys,argparse


# First, pass the path of the image
dir_path = 'C:\\nutsbolts\\testing_set\\nuts'
image_path= 'nuts11.jpg'
filename = dir_path +'/' +image_path
image_size=128
num_channels=3
images = []
# Reading the image using OpenCV
image = cv2.imread(filename)
# Resizing the image to our desired size and preprocessing will be done 
exactly as done during training
image = cv2.resize(image, (image_size, image_size),0,0, cv2.INTER_LINEAR)
images.append(image)
images = np.array(images, dtype=np.uint8)
images = images.astype('float32')
images = np.multiply(images, 1.0/255.0) 
#The input to the network is of shape [None image_size image_size 
num_channels]. Hence we reshape.
x_batch = images.reshape(1, image_size,image_size,num_channels)

## Let us restore the saved model 
sess = tf.Session()
# Step-1: Recreate the network graph. At this step only graph is created.
saver = tf.train.import_meta_graph('nuts-screws-model.meta')
# Step-2: Now let's load the weights saved using the restore method.
saver.restore(sess, tf.train.latest_checkpoint('./'))

# Accessing the default graph which we have restored
graph = tf.get_default_graph()

# Now, let's get hold of the op that we can be processed to get the output.
# In the original network y_pred is the tensor that is the prediction of the 
network
y_pred = graph.get_tensor_by_name("y_pred:0")

## Let's feed the images to the input placeholders
x= graph.get_tensor_by_name("x:0") 
y_true = graph.get_tensor_by_name("y_true:0") 
y_test_images = np.zeros((1, len(os.listdir('testing_set'))))


### Creating the feed_dict that is required to be fed to calculate y_pred 
feed_dict_testing = {x: x_batch, y_true: y_test_images}
result=sess.run(y_pred, feed_dict=feed_dict_testing)
# result is of this format [probabiliy_of_nuts probability_of_screws]
print(result)

1 个答案:

答案 0 :(得分:2)

9.990779e-01实际上低于1。您可能会看到它是:9.990779 *(-01的指数)。