我对机器学习还很陌生,除了https://www.youtube.com/watch?v=-vHQub0NXI4之外,互联网上没有太多资源。
我遵循了本教程,这确实很有帮助,但是我不知道如何使用新的未经训练的数据集。意思是我不知道如何将新输入插入机器学习模型以生成预测。
我有一个回归模型,该模型接受两个输入并预测一个称为KT的输出
这就是我尝试过的
newdata_labes= new_dataset.pop('KT')
brand_new_data = train_dataset.describe()
brand_new_data= new_dataset
brand_new_data = brand_new_data.transpose()
brand_new_data
test_predictions2 = model.predict(normed_brand_new_data).flatten()
plt.scatter(newdata_labes, test_predictions2)
plt.xlabel('True Values [KT]')
plt.ylabel('Predictions [KT]')
plt.axis('equal')
plt.axis('square')
plt.xlim([0,plt.xlim()[1]])
plt.ylim([0,plt.ylim()[1]])
_ = plt.plot([-100, 100], [-100, 100])
plt.show()
我的代码
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
new_dataset=pd.read_csv(new_data_path)
dataset= pd.read_csv(data_path)
dataset.head()
print(dataset.head)
train_dataset = dataset.sample(frac=0.8,random_state=0)
test_dataset = dataset.drop(train_dataset.index)
train_stats.pop('KT')
train_stats = train_stats.transpose()
train_stats
train_labels = train_dataset.pop('KT')
test_labels = test_dataset.pop('KT')
def norm(x):
return (x - train_stats['mean']) / train_stats['std']
normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)
normed_brand_new_data = norm(brand_new_data)
def build_model():
model = keras.Sequential([
layers.Dense(128, activation=tf.nn.relu, input_shape=[len(train_dataset.keys())]),
layers.Dense(128, activation=tf.nn.relu),
layers.Dense(1)
])
optimizer = tf.keras.optimizers.Adam(0.001)
model.compile(loss='mean_squared_error',
optimizer=optimizer,
metrics=['mean_absolute_error', 'mean_squared_error'])
return model
model = build_model()
model.summary()
########3 large epochs
class PrintDot(keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs):
if epoch % 100 == 0: print('')
print('.', end='')
EPOCHS = 1000
"""history = model.fit(
normed_train_data, train_labels,
epochs=EPOCHS, validation_split = 0.2, verbose=0,
callbacks=[PrintDot()])
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()"""
def plot_history(history):
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
plt.figure()
plt.xlabel('Epoch')
plt.ylabel('Mean Abs Error [KT]')
plt.plot(hist['epoch'], hist['mean_absolute_error'],
label='Train Error')
plt.plot(hist['epoch'], hist['val_mean_absolute_error'],
label = 'Val Error')
plt.ylim([0,0.2])
plt.legend()
plt.figure()
plt.xlabel('Epoch')
plt.ylabel('Mean Square Error [$KT^2$]')
plt.plot(hist['epoch'], hist['mean_squared_error'],
label='Train Error')
plt.plot(hist['epoch'], hist['val_mean_squared_error'],
label = 'Val Error')
plt.ylim([0,0.02])
plt.legend()
plt.show()
model = build_model()
# The patience parameter is the amount of epochs to check for improvement
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
history = model.fit(normed_train_data, train_labels, epochs=EPOCHS,
validation_split = 0.20, verbose=0, callbacks=[early_stop, PrintDot()])
plot_history(history)
loss, mae, mse = model.evaluate(normed_test_data, test_labels, verbose=0)
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()
print("Testing set Mean Abs Error: {:5.2f} KT".format(mae))
test_predictions = model.predict(normed_test_data).flatten()
plt.scatter(test_labels, test_predictions)
plt.xlabel('True Values [KT]')
plt.ylabel('Predictions [KT]')
plt.axis('equal')
plt.axis('square')
plt.xlim([0,plt.xlim()[1]])
plt.ylim([0,plt.ylim()[1]])
_ = plt.plot([-100, 100], [-100, 100])
plt.show()
Y_pred=model.predict(normed_test_data)
error = test_predictions - test_labels
plt.hist(error, bins = 25)
plt.xlabel("Prediction Error [KT]")
_ = plt.ylabel("Count")
# look at each prediction value vs true value
for i in range(len(test_predictions)):
print("KT=%s, Predicted_KT=%s" % (test_labels.iloc[i],test_predictions[i]))
我期望的是,它接受两列/行的输入并预测输出(KT)
错误消息
x,check_steps = True,steps_name =“ steps”,steps = steps) _standardize_user_data中第2651行的文件“ /home/mking/PycharmProjects/Propeller_Porformance/venv/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py” exception_prefix ='输入') 文件“ /home/mking/PycharmProjects/Propeller_Porformance/venv/lib/python3.6/site-packages/tensorflow/python/keras/engine/training_utils.py”,行385,在standardize_input_data中 str(数据形状)) ValueError:检查输入时出错:预期density_3_input具有形状(2,)但具有形状(1,)的数组
答案 0 :(得分:0)
更新,我可以使用它
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
data_path="/home/mking/Desktop/Copy of Podded Propulsor Validation DataI.csv"
new_data_path="/home/mking/Desktop/0.8to1.2.csv"
new_dataset=pd.read_csv(new_data_path)
dataset= pd.read_csv(data_path)
dataset.head()
print(dataset.head)
# uesd to train the models
train_dataset = dataset.sample(frac=0.8,random_state=0)
#used for testing the model
test_dataset = dataset.drop(train_dataset.index)
#brand new dataset, i want to predict all the values
trian_new_stat = new_dataset.describe()
trian_new_stat.pop('KT')
trian_new_stat = trian_new_stat.transpose()
trian_new_stat
newdata_labes= new_dataset.pop('KT') # compare with test_ labels
train_stats = train_dataset.describe()
train_stats.pop('KT')
train_stats = train_stats.transpose()
train_stats
train_labels = train_dataset.pop('KT') # this is KT used in fit()
test_labels = test_dataset.pop('KT') # this is KT these are predictions to map to
def norm_new(x):
return (x - trian_new_stat['mean']) / trian_new_stat['std']
normed_brand_new_data = norm_new(new_dataset)
def norm(x):
return (x - train_stats['mean']) / train_stats['std']
normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset) # this is used on model.predict these are J and AZIA
def build_model():
model = keras.Sequential([
layers.Dense(128, activation=tf.nn.relu, input_shape=[len(train_dataset.keys())]),
layers.Dense(128, activation=tf.nn.relu),
layers.Dense(1)
])
optimizer = tf.keras.optimizers.Adam(0.001)
model.compile(loss='mean_squared_error',
optimizer=optimizer,
metrics=['mean_absolute_error', 'mean_squared_error'])
return model
model = build_model()
class PrintDot(keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs):
if epoch % 100 == 0: print('')
print('.', end='')
EPOCHS = 1000
def plot_history(history):
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
plt.figure()
plt.xlabel('Epoch')
plt.ylabel('Mean Abs Error [KT]')
plt.plot(hist['epoch'], hist['mean_absolute_error'],
label='Train Error')
plt.plot(hist['epoch'], hist['val_mean_absolute_error'],
label = 'Val Error')
plt.ylim([0,0.2])
plt.legend()
plt.figure()
plt.xlabel('Epoch')
plt.ylabel('Mean Square Error [$KT^2$]')
plt.plot(hist['epoch'], hist['mean_squared_error'],
label='Train Error')
plt.plot(hist['epoch'], hist['val_mean_squared_error'],
label = 'Val Error')
plt.ylim([0,0.02])
plt.legend()
plt.show()
#plot_history(history)
########
model = build_model()
# The patience parameter is the amount of epochs to check for improvement
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
# fit with
history = model.fit(normed_train_data, train_labels, epochs=EPOCHS,
validation_split = 0.20, verbose=0, callbacks=[early_stop, PrintDot()])
plot_history(history)
loss, mae, mse = model.evaluate(normed_test_data, test_labels, verbose=0)
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()
print("Testing set Mean Abs Error: {:5.2f} KT".format(mae))
test_predictions = model.predict(normed_test_data).flatten()
plt.scatter(test_labels, test_predictions)
plt.xlabel('True Values [KT]')
plt.ylabel('Predictions [KT]')
plt.axis('equal')
plt.axis('square')
plt.xlim([0,plt.xlim()[1]])
plt.ylim([0,plt.ylim()[1]])
_ = plt.plot([-100, 100], [-100, 100])
plt.show()
Y_pred=model.predict(normed_test_data)
error = test_predictions - test_labels
plt.hist(error, bins = 25)
plt.xlabel("Prediction Error [KT]")
_ = plt.ylabel("Count")
# look at each prediction value vs true value
for i in range(len(test_predictions)):
print("KT=%s, Predicted_KT=%s" % (test_labels.iloc[i],test_predictions[i]))
test_predictions2 = model.predict(normed_brand_new_data).flatten()
plt.scatter(newdata_labes, test_predictions2)
plt.xlabel('True Values [KT]')
plt.ylabel('Predictions [KT]')
plt.axis('equal')
plt.axis('square')
plt.xlim([0,plt.xlim()[1]])
plt.ylim([0,plt.ylim()[1]])
_ = plt.plot([-100, 100], [-100, 100])
plt.show()
for i in range(len(test_predictions)):
print("KT=%s, Predicted_KT=%s" % (newdata_labes.iloc[i],test_predictions2[i]))
#Completly brand new data
我想我的模特很烂