我正在学习如何处理Tensorflow以及如何从.csv文件预测一些数据。基本上,我遵循https://blog.goodaudience.com/first-experience-of-building-a-lstm-model-with-tensorflow-e632bde911e1中的指南,但在遇到多类分类时遇到了一些问题。根据这篇文章,我应该对这些值使用tensorflow.nn.softmax_cross_entropy_with_logits
,但是每次这样做时,我都会遇到形状不兼容的问题。然后,我发现如果对数据进行标准化,则可以使用tf.nn.sigmoid_cross_entropy_with_logits
。它可以成功地与二进制标签一起使用,但是当我将其更改为其他列(带有整数)时,它将以
InvalidArgumentError(请参见上面的回溯):不兼容的形状:[544]与[32] [[节点:logistic_loss / mul = Mul [T = DT_FLOAT,_device =“ / job:localhost /副本:0 /任务:0 / gpu:0”](重塑,_recv_Placeholder_1_0 / _1)]] [[节点:均值/ _15 = _Recvclient_terminated = false,recv_device =“ / job:localhost /副本:0 / task:0 / cpu:0”,send_device =“ / job:localhost /副本:0 / task:0 / gpu :0“,send_device_incarnation = 1,tensor_name =” edge_265_Mean“,tensor_type = DT_FLOAT,_device =” / job:localhost /副本:0 / task:0 / cpu:0“]]
完整代码在这里
import tensorflow as tf
import pandas as pd
import os
import numpy as np
from tensorflow.contrib import rnn
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score, recall_score, precision_score
#default settings
epochs = 8
n_units = 200
batch_size = 32
n_classes = 1
n_features = 29
def read_data(filename):
dataset = pd.read_csv(filename)
dataset = dataset.fillna(method='ffill')
list_of_features = ["col2","col3"]
label = "col1"
#to normalize the dataset
train_set = dataset[list_of_features]/dataset[list_of_features].max()
label_set = dataset[label]/dataset[label].max()
X_train, X_test, y_train, y_test = train_test_split(train_set, label_set, test_size=0.3, shuffle=False, random_state=42)
n_classes = len(dataset[label].unique())-1
n_features = len(list_of_features)
xplaceholder = tf.placeholder('float', [None, n_features])
yplaceholder = tf.placeholder('float')
train_neural_network(X_train, X_test, y_train, y_test, xplaceholder, yplaceholder, n_classes, n_features, batch_size)
def recurrent_neural_network_model(xplaceholder,n_features, n_classes):
layer = {'weights': tf.Variable(tf.random_normal([n_units, n_classes])),
'bias': tf.Variable(tf.random_normal([n_classes]))}
x = tf.split(xplaceholder, n_features, 1)
print(x)
lstm_cell = rnn.BasicLSTMCell(n_units)
outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
output = tf.matmul(outputs[-1], layer['weights']) + layer['bias']
return output
def train_neural_network(X_train, X_test, y_train, y_test, xplaceholder, yplaceholder, n_classes, n_features, batch_size):
logit = recurrent_neural_network_model(xplaceholder,n_features, n_classes)
logit = tf.reshape(logit, [-1])
cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logit, labels=yplaceholder))
optimizer = tf.train.AdamOptimizer().minimize(cost)
with tf.Session() as sess:
tf.global_variables_initializer().run()
tf.local_variables_initializer().run()
for epoch in range(epochs):
epoch_loss = 0
i = 0
for i in range(int(len(X_train) / batch_size)):
start = i
end = i + batch_size
batch_x = np.array(X_train[start:end])
batch_y = np.array(y_train[start:end])
_, c = sess.run([optimizer, cost], feed_dict={xplaceholder: batch_x, yplaceholder: batch_y})
epoch_loss += c
i += batch_size
print('Epoch', epoch, 'completed out of', epochs, 'loss:', epoch_loss)
pred = tf.round(tf.nn.sigmoid(logit)).eval({xplaceholder: np.array(X_test), yplaceholder: np.array(y_test)})
print(pred)
f1 = f1_score(np.array(y_test), pred, average='macro')
accuracy = accuracy_score(np.array(y_test), pred)
recall = recall_score(y_true=np.array(y_test), y_pred=pred)
precision = precision_score(y_true=np.array(y_test), y_pred=pred)
print("F1 Score:", f1)
print("Accuracy Score:", accuracy)
print("Recall:", recall)
print("Precision:", precision)
def main():
read_data("data2.csv")
if __name__ == '__main__':
main()
数据集: col1:int col2:浮动 col3:浮动 col4:bool(用0或1表示)
系统: OS X El Capitan 10.11.6 Python 3.6 Tensorflow-gpu 1.1
请帮助我了解问题所在,因为通常它应该可以工作,但是我错过了一些非常重要的内容(同样,它可以与bool标签一起使用)。可能是在#个类中出现了这个问题。