在张量流中使用我自己的.csv

时间:2017-01-13 06:37:27

标签: python csv tensorflow

我在这里问了一个关于相同代码的上一个问题where and how to put the filename in this tensorflow code?

不确定我是否应将其合并到此问题中或保留原样。

以下代码来自Sirajology的git hub。我没有找到关于如何将自己的.csv文件放入一个简单的张量流神经网络的超级直接教程,所以我希望这个帖子可以为未来的搜索者提供该指令。

代码如下

import tensorflow.python.platform

import numpy as np
import tensorflow as tf

# Global variables.
NUM_LABELS = 2    # The number of labels.
BATCH_SIZE = 5  # The number of training examples to use per training step.

# Define the flags useable from the command line.
tf.app.flags.DEFINE_string('train', None,
                           'File containing the training data (labels & features).')
tf.app.flags.DEFINE_string('test', None,
                           'File containing the test data (labels & features).')
tf.app.flags.DEFINE_integer('num_epochs', 1,
                            'Number of examples to separate from the training '
                            'data for the validation set.')
tf.app.flags.DEFINE_boolean('verbose', False, 'Produce verbose output.')
FLAGS = tf.app.flags.FLAGS

# Extract numpy representations of the labels and features given rows consisting of:
#   label, feat_0, feat_1, ..., feat_n
def extract_data(filename):

    # Arrays to hold the labels and feature vectors.
    labels = []
    fvecs = []

    # Iterate over the rows, splitting the label from the features. Convert labels
    # to integers and features to floats.
    for line in file(filename):
        row = line.split(",")
        labels.append(int(row[0]))
        fvecs.append([float(x) for x in row[1:]])

    # Convert the array of float arrays into a numpy float matrix.
    fvecs_np = np.matrix(fvecs).astype(np.float32)

    # Convert the array of int labels into a numpy array.
    labels_np = np.array(labels).astype(dtype=np.uint8)

    # Convert the int numpy array into a one-hot matrix.
    labels_onehot = (np.arange(NUM_LABELS) == labels_np[:, None]).astype(np.float32)

    # Return a pair of the feature matrix and the one-hot label matrix.
    return fvecs_np,labels_onehot

def main(argv=None):
    # Be verbose?
    verbose = FLAGS.verbose

    # Get the data.
    train_data_filename = FLAGS.train
    test_data_filename = FLAGS.test

    # Extract it into numpy matrices.
    train_data,train_labels = extract_data(train_data_filename)
    test_data, test_labels = extract_data(test_data_filename)

    # Get the shape of the training data.
    train_size,num_features = train_data.shape

    # Get the number of epochs for training.
    num_epochs = FLAGS.num_epochs

    # This is where training samples and labels are fed to the graph.
    # These placeholder nodes will be fed a batch of training data at each
    # training step using the {feed_dict} argument to the Run() call below.
    x = tf.placeholder("float", shape=[None, num_features])
    y_ = tf.placeholder("float", shape=[None, NUM_LABELS])

    # For the test data, hold the entire dataset in one constant node.
    test_data_node = tf.constant(test_data)

    # Define and initialize the network.

    # These are the weights that inform how much each feature contributes to
    # the classification.
    W = tf.Variable(tf.zeros([num_features,NUM_LABELS]))
    b = tf.Variable(tf.zeros([NUM_LABELS]))
    y = tf.nn.softmax(tf.matmul(x,W) + b)

    # Optimization.
    cross_entropy = -tf.reduce_sum(y_*tf.log(y))
    train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)

    # Evaluation.
    correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

    # Create a local session to run this computation.
    with tf.Session() as s:
        # Run all the initializers to prepare the trainable parameters.
        tf.initialize_all_variables().run()
        if verbose:
            print ('Initialized!')
            print
            print ('Training.')

        # Iterate and train.
        for step in xrange(num_epochs * train_size // BATCH_SIZE):
            if verbose:
                print (step,)

            offset = (step * BATCH_SIZE) % train_size
            batch_data = train_data[offset:(offset + BATCH_SIZE), :]
            batch_labels = train_labels[offset:(offset + BATCH_SIZE)]
            train_step.run(feed_dict={x: batch_data, y_: batch_labels})

            if verbose and offset >= train_size-BATCH_SIZE:
                print

        # Give very detailed output.
        if verbose:
            print
            print ('Weight matrix.')
            print (s.run(W))
            print
            print ('Bias vector.')
            print (s.run(b))
            print
            print ("Applying model to first test instance.")
            first = test_data[:1]
            print ("Point =", first)
            print ("Wx+b = ", s.run(tf.matmul(first,W)+b))
            print ("softmax(Wx+b) = ", s.run(tf.nn.softmax(tf.matmul(first,W)+b)))
            print

        print ("Accuracy:", accuracy.eval(feed_dict={x: test_data, y_: test_labels}))


if __name__ == '__main__':
    tf.app.run()

当我使用以下命令(windows10 cmd行)python YourScript.py --train FileName.csv --test TestName.csv --num_epochs 5 --verbose True从终端运行代码时,我收到了这些错误。非常感谢任何帮助!

错误#1   文件" softmax.py",第133行,in     tf.app.run()

tf.app.run()

错误#2  文件" C:\ app.py",第43行,在运行中     sys.exit(main(sys.argv [:1] + flags_passthrough))

labels_onehot = (np.arange(NUM_LABELS) == labels_np[:, None]).astype(np.float32) 

错误#3  文件" softmax.py",第57行,主要     train_data,train_labels = extract_data(train_data_filename)

train_data,train_labels = extract_data(train_data_filename) 
test_data, test_labels = extract_data(test_data_filename)

错误#4  在extract_data中的文件" softmax.py",第31行     for file in file(filename): NameError:name' file'未定义

for line in file(filename):
        row = line.split(",")
        labels.append(int(row[7]))
        fvecs.append([float(x) for x in row[1:6]])

1 个答案:

答案 0 :(得分:1)

看起来这个问题来自于这一行,它使用了Python 3.5中没有的内置函数(file()):

for line in file(filename):

使用以下行替换它应该可以解决错误:

for line in open(filename):