数据加载方式会导致不同的输出精度?

时间:2019-02-07 14:37:09

标签: tensorflow

自定义导入MNIST和TensorFlow官方导入方法导致完全不同的计算精度。为什么?

import tensorflow as tf
import numpy as np
import random
import os
import gzip
from tensorflow.examples.tutorials.mnist import input_data

def load_data(data_folder):

    files = [
      'train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz',
      't10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz'
    ]

    paths = []
    for fname in files:
        paths.append(os.path.join(data_folder,fname))

    with gzip.open(paths[0], 'rb') as lbpath:
        y_train = np.frombuffer(lbpath.read(), np.uint8, offset=8)

    with gzip.open(paths[1], 'rb') as imgpath:
        x_train = np.frombuffer(
        imgpath.read(), np.uint8, offset=16).reshape(len(y_train), 28, 28)

    with gzip.open(paths[2], 'rb') as lbpath:
        y_test = np.frombuffer(lbpath.read(), np.uint8, offset=8)

    with gzip.open(paths[3], 'rb') as imgpath:
        x_test = np.frombuffer(
        imgpath.read(), np.uint8, offset=16).reshape(len(y_test), 28, 28)

    return (x_train, y_train), (x_test, y_test)

(x_train, y_train), (x_test, y_test) = load_data('./MNIST_data/')
x_train = x_train.reshape(len(x_train), 28*28)
x_test = x_test.reshape(len(x_test), 28*28)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
y_test = y_test.astype('uint8')
y_train = y_train.astype('uint8')

# one-hot
with tf.Session() as sess:
    y_test, y_train = sess.run([tf.one_hot(y_test, depth=len(set(y_test))), tf.one_hot(y_train, depth=len(set(y_train)))])
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
print(x_train.dtype, x_test.dtype, y_test.dtype, y_train.dtype)


# 
mnist = input_data.read_data_sets("MNIST_data",one_hot=True, validation_size=0, seed=1)
print(mnist.train.images.shape, mnist.train.labels.shape, mnist.test.images.shape, mnist.test.labels.shape)


def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples)) 
    random.shuffle(indices)
    for i in range(0, num_examples, batch_size):
        j = np.array(indices[i: min(i + batch_size, num_examples)])
        yield features.take(j, axis=0), labels.take(j, axis=0) 

batch_size = 50

n_batch = 60000 // batch_size

x = tf.placeholder(tf.float32,[None,784])
y = tf.placeholder(tf.float32,[None,10])

# model
W = tf.Variable(tf.zeros([784,10]))
b = tf.Variable(tf.zeros([10]))
prediction = tf.nn.softmax(tf.matmul(x,W)+b)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y,logits=prediction))
train_step = tf.train.GradientDescentOptimizer(0.2).minimize(loss)
init = tf.global_variables_initializer()
correct_prediction = tf.equal(tf.argmax(y,1),tf.argmax(prediction,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))

### Use custom data import methods
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(5):

        for batch_xs, batch_ys in data_iter(batch_size, x_train, y_train):
            sess.run(train_step,feed_dict={x: batch_xs, y: batch_ys})
        acc = sess.run(accuracy, feed_dict={x: x_test, y: y_test})
        print("Iter " + str(epoch) + ",Testing Accuracy " + str(acc))

### Use the official data import method of TensorFlow
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(5):

        for batch_xs, batch_ys in data_iter(batch_size, mnist.train.images, mnist.train.labels):
            sess.run(train_step,feed_dict={x: batch_xs, y: batch_ys})
        acc = sess.run(accuracy, feed_dict={x: x_test, y: y_test})
        print("Iter " + str(epoch) + ",Testing Accuracy " + str(acc))

第一种导入数据的结果:

0,测试准确度0.3635

第1次测试精度为0.4344

第2次测试精度为0.5097

第3次,测试精度为0.5484

第4次测试准确度0.5836

第二种导入数据的结果:

0,测试精度0.8959

第1次测试精度为0.907

第2次,测试精度0.9116

第3次,测试精度为0.9133

第4次测试精度为0.9171

0 个答案:

没有答案