import pandas as pd
import matplotlib.pyplot as plt
csv = 'C:\\Users\\Alex\\Downloads\\weight-height.csv'
df = pd.read_csv(csv)
df.head
x_train = df['Height'].values
#into centimetres because im english
x_train = x_train * 2.54
y_train = df['Weight'].values
#into kilos because im english
y_train = y_train / 2.2046226218
plt.figure()
plt.scatter(x_train, y_train, c=None)
plt.show()
print(X[:10])
print(y[:10])
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
import numpy as np
X = np.array(x_train).reshape(-1,1)
y = np.array(y_train).reshape(-1,1)
X = X[:5000]
y = y[:5000]
model = Sequential()
model.add(Dense(36, activation='relu'))
model.add(Dense(18))
model.add(Dense(1))
model.compile(optimizer='adam',
loss='mean_squared_error',
metrics=['accuracy'])
history = model.fit(X,y, batch_size=1, epochs=1, validation_split=0.1)
#plt.plot(history.history['acc'])
#plt.plot(history.history['val_acc'])
我的问题几乎是我是菜鸟,我试图使用keras从头开始创建自己的线性回归模型,但我不明白为什么我的损失如此之大。我需要知道我正在使用的优化器或损失函数还是数据问题。该数据集只是重量和高度的列表。
答案 0 :(得分:0)
我会尝试:
对身高和体重进行归一化,以使每个身高和体重的最大值为1。当值约为1时,深度学习通常会更容易。显然,您需要对测试数据进行相同的划分,然后将答案乘以相同的数量。
将指标更改为“ mse”或“ mae”(均方误差或均数绝对误差)。这不会改变您的损失,但是会使您感觉更好,因为它是衡量您的表现的更有意义的指标。
尝试一下:
x_train = df['Height'].values
x_train = x_train * 2.54
x_train = x_train / 175.0
y_train = df['Weight'].values
y_train = y_train / 2.2046226218
y_train = y_train / 80.0
...
model.compile(optimizer='adam',
loss='mean_squared_error',
metrics=['mse'])
并测试一些新值
x_test = 187
x_test = np.array(x_test).reshape(-1,1)
x_test = x_test * 2.54
x_test = x_test / 175.0
pred = model.predict(x_test)
pred = pred * 80.0