Question

我的数据是在csv文件中构建的。我希望能够预测在给定所有其他列的情况下，第1列是1还是0。我如何开始训练程序（最好使用神经网络）来使用所有给定的数据来进行预测。有人可以给我看的代码吗？我尝试过喂它numpy.ndarray，FIF0Que（对不起，如果我拼错了），还有DataFrame;什么都没有效果。这是我运行的代码，直到我收到错误 -

import tensorflow as tf
import numpy as np
from numpy import genfromtxt

data = genfromtxt('cs-training.csv',delimiter=',')

x = tf.placeholder("float", [None, 11])
W = tf.Variable(tf.zeros([11,2]))
b = tf.Variable(tf.zeros([2]))

y = tf.nn.softmax(tf.matmul(x,W) + b)
y_ = tf.placeholder("float", [None,2])

cross_entropy = -tf.reduce_sum(y_*tf.log(y))

train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)

init = tf.initialize_all_variables()

sess = tf.Session()
sess.run(init)

for i in range(1000):
    batch_xs, batch_ys = data.train.next_batch(100)
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

此时我遇到了这个错误 -

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-128-b48741faa01b> in <module>()
      1 for i in range(1000):
----> 2     batch_xs, batch_ys = data.train.next_batch(100)
      3     sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

AttributeError: 'numpy.ndarray' object has no attribute 'train'

非常感谢任何帮助。我需要做的就是预测第1列是1还是0。即使你所做的只是让我超过这一个错误，我也应该从那里拿走它。

编辑：当我打印出来时，这就是csv的样子。

[[1,0.766126609,45,2,0.802982129,9120,13,0,6,0,2],
[0,0.957151019,40,0,0.121876201,2600,4,0,0,0,1],
[0,0.65818014,38,1,0.085113375,3042,2,1,0,0,0],
[0,0.233809776,30,0,0.036049682,3300,5,0,0,0,0]]

我正在尝试预测第一栏。

Answer 1

以下内容从CSV文件中读取并构建张量流程序。该示例使用Iris数据集，因为这可能是一个更有意义的示例。但是，它也可能适用于您的数据。

请注意，第一列将是[0,1或2]，因为有3种虹膜。

#!/usr/bin/env python
import tensorflow as tf
import numpy as np
from numpy import genfromtxt

# Build Example Data is CSV format, but use Iris data
from sklearn import datasets
from sklearn.cross_validation import train_test_split
import sklearn
def buildDataFromIris():
    iris = datasets.load_iris()
    X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.33, random_state=42)
    f=open('cs-training.csv','w')
    for i,j in enumerate(X_train):
        k=np.append(np.array(y_train[i]),j   )
        f.write(",".join([str(s) for s in k]) + '\n')
    f.close()
    f=open('cs-testing.csv','w')
    for i,j in enumerate(X_test):
        k=np.append(np.array(y_test[i]),j   )
        f.write(",".join([str(s) for s in k]) + '\n')
    f.close()


# Convert to one hot
def convertOneHot(data):
    y=np.array([int(i[0]) for i in data])
    y_onehot=[0]*len(y)
    for i,j in enumerate(y):
        y_onehot[i]=[0]*(y.max() + 1)
        y_onehot[i][j]=1
    return (y,y_onehot)


buildDataFromIris()


data = genfromtxt('cs-training.csv',delimiter=',')  # Training data
test_data = genfromtxt('cs-testing.csv',delimiter=',')  # Test data

x_train=np.array([ i[1::] for i in data])
y_train,y_train_onehot = convertOneHot(data)

x_test=np.array([ i[1::] for i in test_data])
y_test,y_test_onehot = convertOneHot(test_data)


#  A number of features, 4 in this example
#  B = 3 species of Iris (setosa, virginica and versicolor)
A=data.shape[1]-1 # Number of features, Note first is y
B=len(y_train_onehot[0])
tf_in = tf.placeholder("float", [None, A]) # Features
tf_weight = tf.Variable(tf.zeros([A,B]))
tf_bias = tf.Variable(tf.zeros([B]))
tf_softmax = tf.nn.softmax(tf.matmul(tf_in,tf_weight) + tf_bias)

# Training via backpropagation
tf_softmax_correct = tf.placeholder("float", [None,B])
tf_cross_entropy = -tf.reduce_sum(tf_softmax_correct*tf.log(tf_softmax))

# Train using tf.train.GradientDescentOptimizer
tf_train_step = tf.train.GradientDescentOptimizer(0.01).minimize(tf_cross_entropy)

# Add accuracy checking nodes
tf_correct_prediction = tf.equal(tf.argmax(tf_softmax,1), tf.argmax(tf_softmax_correct,1))
tf_accuracy = tf.reduce_mean(tf.cast(tf_correct_prediction, "float"))

# Initialize and run
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)

print("...")
# Run the training
for i in range(30):
    sess.run(tf_train_step, feed_dict={tf_in: x_train, tf_softmax_correct: y_train_onehot})

# Print accuracy
    result = sess.run(tf_accuracy, feed_dict={tf_in: x_test, tf_softmax_correct: y_test_onehot})
    print "Run {},{}".format(i,result)


"""
Below is the ouput
  ...
  Run 0,0.319999992847
  Run 1,0.300000011921
  Run 2,0.379999995232
  Run 3,0.319999992847
  Run 4,0.300000011921
  Run 5,0.699999988079
  Run 6,0.680000007153
  Run 7,0.699999988079
  Run 8,0.680000007153
  Run 9,0.699999988079
  Run 10,0.680000007153
  Run 11,0.680000007153
  Run 12,0.540000021458
  Run 13,0.419999986887
  Run 14,0.680000007153
  Run 15,0.699999988079
  Run 16,0.680000007153
  Run 17,0.699999988079
  Run 18,0.680000007153
  Run 19,0.699999988079
  Run 20,0.699999988079
  Run 21,0.699999988079
  Run 22,0.699999988079
  Run 23,0.699999988079
  Run 24,0.680000007153
  Run 25,0.699999988079
  Run 26,1.0
  Run 27,0.819999992847
  ...

 Ref:
 https://gist.github.com/mchirico/bcc376fb336b73f24b29#file-tensorflowiriscsv-py
"""

我希望这会有所帮助。

Answer 2

您只需提供与x，y_形状匹配的输入。

x = tf.placeholder("float", [None, 11])
y_ = tf.placeholder("float", [None,2])

因此，而不是data.train.next_batch（100）创建并使用函数“my_csv_batch（count）”，它返回一个形状数组[[count，11]，[count，2]]第一组数组是你的x和下一个是你的y_s标签

my_csv_batch将从csv文件返回一个批处理（如果你有大量数据，可能是随机生成的）。

顺便说一句，你也需要类似的东西来做你的评价。您必须以类似方式生成一批数据和标签。

训练TensorFlow以预测csv文件中的列

2 个答案: