import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
df = pd.read_csv('FuelConsumption.csv', skip_blank_lines = True)
feature = np.asanyarray(df[["ENGINE SIZE","CYLINDERS","Mcity", "Mhwy", "Mcmb", "McmbMPG"]])
label = np.asanyarray(df['CO2'])
X_train, X_test, Y_train, Y_test = train_test_split(feature, label, test_size=0.2)
learning_rate = 0.01
training_epochs = 1000
cost_history = np.empty(shape=[1],dtype=float)
n_dim = feature.shape[1]
print(feature.shape)
print(label.shape)
X = tf.placeholder(tf.float32,[None,n_dim])
Y = tf.placeholder(tf.float32,[None,1])
W = tf.Variable(tf.ones([n_dim,1]))
init = tf.global_variables_initializer()
y_ = tf.matmul(X, W)
cost = tf.reduce_mean(tf.square(y_ - Y))
training_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
sess = tf.Session()
sess.run(init)
for epoch in range(training_epochs):
sess.run(training_step,feed_dict={X:X_train,Y:Y_train})
cost_history = np.append(cost_history,sess.run(cost,feed_dict={X: X_train,Y: Y_train}))
plt.plot(range(len(cost_history)),cost_history)
plt.axis([0,training_epochs,0,np.max(cost_history)])
plt.show()
我正在使用上面的代码执行线性回归。我想将训练数据传递到tensorflow占位符。当我运行上面的代码时,出现以下错误
Traceback (most recent call last):
File "C:/Users/snaglapu/Desktop/LinReg/LinReg.py", line 39, in <module>
sess.run(training_step,feed_dict={X:X_train,Y:Y_train})
File "C:\Users\snaglapu\Desktop\LinReg\venv\lib\site-packages\tensorflow\python\client\session.py", line 929, in run
run_metadata_ptr)
File "C:\Users\snaglapu\Desktop\LinReg\venv\lib\site-packages\tensorflow\python\client\session.py", line 1128, in _run
str(subfeed_t.get_shape())))
ValueError: Cannot feed value of shape (873,) for Tensor 'Placeholder_1:0', which has shape '(?, 1)'
特征的维数是(1096,6)。为什么标注的尺寸不是(1092,1)而是(1092,)?
答案 0 :(得分:2)
重塑您的训练数据Y_train
。它必须是2D:
Y_train = Y_train.reshape(-1, 1)
您甚至可以更早开始:
label = label.reshape(-1, 1)
原因是label
仅占一列,因此在训练/测试拆分期间停留一列。但是您需要一个2D数组用于张量流。