如何使用新的未经训练的数据集来预测输出值?

时间:2019-07-15 15:57:21

标签: python tensorflow keras linear-regression

我对机器学习还很陌生,除了https://www.youtube.com/watch?v=-vHQub0NXI4之外,互联网上没有太多资源。

我遵循了本教程,这确实很有帮助,但是我不知道如何使用新的未经训练的数据集。意思是我不知道如何将新输入插入机器学习模型以生成预测。

我有一个回归模型,该模型接受两个输入并预测一个称为KT的输出

这就是我尝试过的

newdata_labes= new_dataset.pop('KT')
brand_new_data = train_dataset.describe()
brand_new_data= new_dataset
brand_new_data = brand_new_data.transpose()
brand_new_data

test_predictions2 = model.predict(normed_brand_new_data).flatten()
plt.scatter(newdata_labes, test_predictions2)
plt.xlabel('True Values [KT]')
plt.ylabel('Predictions [KT]')
plt.axis('equal')
plt.axis('square')
plt.xlim([0,plt.xlim()[1]])
plt.ylim([0,plt.ylim()[1]])
_ = plt.plot([-100, 100], [-100, 100])

plt.show()

我的代码

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers


new_dataset=pd.read_csv(new_data_path)
dataset= pd.read_csv(data_path)
dataset.head()
print(dataset.head)


train_dataset = dataset.sample(frac=0.8,random_state=0)
test_dataset = dataset.drop(train_dataset.index)

train_stats.pop('KT')
train_stats = train_stats.transpose()
train_stats
train_labels = train_dataset.pop('KT')
test_labels = test_dataset.pop('KT')


def norm(x):
  return (x - train_stats['mean']) / train_stats['std']

normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)
normed_brand_new_data = norm(brand_new_data)

def build_model():
  model = keras.Sequential([
    layers.Dense(128, activation=tf.nn.relu, input_shape=[len(train_dataset.keys())]),
    layers.Dense(128, activation=tf.nn.relu),
    layers.Dense(1)
  ])

  optimizer = tf.keras.optimizers.Adam(0.001)

  model.compile(loss='mean_squared_error',
                optimizer=optimizer,
                metrics=['mean_absolute_error', 'mean_squared_error'])
  return model


model = build_model()

model.summary()

########3 large epochs
class PrintDot(keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs):
    if epoch % 100 == 0: print('')
    print('.', end='')

EPOCHS = 1000

"""history = model.fit(
  normed_train_data, train_labels,
  epochs=EPOCHS, validation_split = 0.2, verbose=0,
  callbacks=[PrintDot()])



hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()"""

def plot_history(history):
  hist = pd.DataFrame(history.history)
  hist['epoch'] = history.epoch

  plt.figure()
  plt.xlabel('Epoch')
  plt.ylabel('Mean Abs Error [KT]')
  plt.plot(hist['epoch'], hist['mean_absolute_error'],
           label='Train Error')
  plt.plot(hist['epoch'], hist['val_mean_absolute_error'],
           label = 'Val Error')
  plt.ylim([0,0.2])
  plt.legend()

  plt.figure()
  plt.xlabel('Epoch')
  plt.ylabel('Mean Square Error [$KT^2$]')
  plt.plot(hist['epoch'], hist['mean_squared_error'],
           label='Train Error')
  plt.plot(hist['epoch'], hist['val_mean_squared_error'],
           label = 'Val Error')
  plt.ylim([0,0.02])
  plt.legend()
  plt.show()


model = build_model()

# The patience parameter is the amount of epochs to check for improvement
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

history = model.fit(normed_train_data, train_labels, epochs=EPOCHS,
                    validation_split = 0.20, verbose=0, callbacks=[early_stop, PrintDot()])

plot_history(history)


loss, mae, mse = model.evaluate(normed_test_data, test_labels, verbose=0)
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()
print("Testing set Mean Abs Error: {:5.2f} KT".format(mae))


test_predictions = model.predict(normed_test_data).flatten()

plt.scatter(test_labels, test_predictions)
plt.xlabel('True Values [KT]')
plt.ylabel('Predictions [KT]')
plt.axis('equal')
plt.axis('square')
plt.xlim([0,plt.xlim()[1]])
plt.ylim([0,plt.ylim()[1]])
_ = plt.plot([-100, 100], [-100, 100])

plt.show()

Y_pred=model.predict(normed_test_data)
error = test_predictions - test_labels
plt.hist(error, bins = 25)
plt.xlabel("Prediction Error [KT]")
_ = plt.ylabel("Count")

# look at each prediction value vs true value
for i in range(len(test_predictions)):
    print("KT=%s, Predicted_KT=%s" %         (test_labels.iloc[i],test_predictions[i]))

我期望的是,它接受两列/行的输入并预测输出(KT)

错误消息

x,check_steps = True,steps_name =“ steps”,steps = steps)   _standardize_user_data中第2651行的文件“ /home/mking/PycharmProjects/Propeller_Porformance/venv/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py”     exception_prefix ='输入')   文件“ /home/mking/PycharmProjects/Propeller_Porformance/venv/lib/python3.6/site-packages/tensorflow/python/keras/engine/training_utils.py”,行385,在standardize_input_data中     str(数据形状)) ValueError:检查输入时出错:预期density_3_input具有形状(2,)但具有形状(1,)的数组

1 个答案:

答案 0 :(得分:0)

更新,我可以使用它

import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

data_path="/home/mking/Desktop/Copy of Podded Propulsor Validation DataI.csv"
new_data_path="/home/mking/Desktop/0.8to1.2.csv"

new_dataset=pd.read_csv(new_data_path)
dataset= pd.read_csv(data_path)
dataset.head()
print(dataset.head)

# uesd to train the models
train_dataset = dataset.sample(frac=0.8,random_state=0)
#used for testing the model
test_dataset = dataset.drop(train_dataset.index)
#brand new dataset, i want to predict all the values 

trian_new_stat = new_dataset.describe()
trian_new_stat.pop('KT')
trian_new_stat = trian_new_stat.transpose()
trian_new_stat
newdata_labes= new_dataset.pop('KT') # compare with test_ labels

train_stats = train_dataset.describe()
train_stats.pop('KT')
train_stats = train_stats.transpose()
train_stats
train_labels = train_dataset.pop('KT')  # this is KT used in fit()
test_labels = test_dataset.pop('KT') #  this is KT these are predictions to map to

def norm_new(x):
    return (x - trian_new_stat['mean']) / trian_new_stat['std']

normed_brand_new_data = norm_new(new_dataset)

def norm(x):
  return (x - train_stats['mean']) / train_stats['std']

normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)  # this is used on model.predict these are J and AZIA 

def build_model():
  model = keras.Sequential([
    layers.Dense(128, activation=tf.nn.relu, input_shape=[len(train_dataset.keys())]),
    layers.Dense(128, activation=tf.nn.relu),
    layers.Dense(1)
  ])

  optimizer = tf.keras.optimizers.Adam(0.001)

  model.compile(loss='mean_squared_error',
                optimizer=optimizer,
                metrics=['mean_absolute_error', 'mean_squared_error'])
  return model


model = build_model()

class PrintDot(keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs):
    if epoch % 100 == 0: print('')
    print('.', end='')

EPOCHS = 1000

def plot_history(history):
  hist = pd.DataFrame(history.history)
  hist['epoch'] = history.epoch

  plt.figure()
  plt.xlabel('Epoch')
  plt.ylabel('Mean Abs Error [KT]')
  plt.plot(hist['epoch'], hist['mean_absolute_error'],
           label='Train Error')
  plt.plot(hist['epoch'], hist['val_mean_absolute_error'],
           label = 'Val Error')
  plt.ylim([0,0.2])
  plt.legend()

  plt.figure()
  plt.xlabel('Epoch')
  plt.ylabel('Mean Square Error [$KT^2$]')
  plt.plot(hist['epoch'], hist['mean_squared_error'],
           label='Train Error')
  plt.plot(hist['epoch'], hist['val_mean_squared_error'],
           label = 'Val Error')
  plt.ylim([0,0.02])
  plt.legend()
  plt.show()


#plot_history(history)
########

model = build_model()

# The patience parameter is the amount of epochs to check for improvement
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

# fit with 
history = model.fit(normed_train_data, train_labels, epochs=EPOCHS,
                    validation_split = 0.20, verbose=0, callbacks=[early_stop, PrintDot()])

plot_history(history)


loss, mae, mse = model.evaluate(normed_test_data, test_labels, verbose=0)
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()
print("Testing set Mean Abs Error: {:5.2f} KT".format(mae))


test_predictions = model.predict(normed_test_data).flatten()

plt.scatter(test_labels, test_predictions)
plt.xlabel('True Values [KT]')
plt.ylabel('Predictions [KT]')
plt.axis('equal')
plt.axis('square')
plt.xlim([0,plt.xlim()[1]])
plt.ylim([0,plt.ylim()[1]])
_ = plt.plot([-100, 100], [-100, 100])

plt.show()

Y_pred=model.predict(normed_test_data)
error = test_predictions - test_labels
plt.hist(error, bins = 25)
plt.xlabel("Prediction Error [KT]")
_ = plt.ylabel("Count")

# look at each prediction value vs true value
for i in range(len(test_predictions)):
    print("KT=%s, Predicted_KT=%s" % (test_labels.iloc[i],test_predictions[i]))


test_predictions2 = model.predict(normed_brand_new_data).flatten()

plt.scatter(newdata_labes, test_predictions2)
plt.xlabel('True Values [KT]')
plt.ylabel('Predictions [KT]')
plt.axis('equal')
plt.axis('square')
plt.xlim([0,plt.xlim()[1]])
plt.ylim([0,plt.ylim()[1]])
_ = plt.plot([-100, 100], [-100, 100])

plt.show()

for i in range(len(test_predictions)):
    print("KT=%s, Predicted_KT=%s" % (newdata_labes.iloc[i],test_predictions2[i]))
#Completly brand new data

我想我的模特很烂

Newinput data