我正在生成人工数据,并在其中进行神经网络实验。目标是寻找复杂的非线性信号,但是起初我想让我的网络学习线性信号,所以我只使用一个具有1个输出和线性激活的Dense层。我有10 ^ 6个数据点用于训练,10 ^ 5个数据用于验证,并且只有8个特征,它们之间的相关性不是很高,因此我希望权重很容易收敛到线性回归的解决方案。相反,这不会发生。我在预测值和接近训练样本上线性模型的真实值之间实现了相关性,但是在验证时,我陷入了损失的局部最小值中。
我做了一些实验来改变学习率,但是效果并不理想,因此我将其省略。这是重现所有代码的代码,如果将代码复制到jupyter笔记本中,会更容易。我包括了可重复性的数据生成代码,我这样做的原因并不重要(与非线性信号有关)。
任何人都有有用的见识吗?
### Import libraries, set seed
import numpy as np, pandas as pd, statsmodels.api as sm, tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras import Input, layers, optimizers
from tensorflow.keras.models import Model
np.random.seed(2)
# Generate data
n_train = 1000000; n_valid = n_train//10; n_tot = n_train+n_valid
g1 = np.random.randn(n_tot + 1)
r = np.sqrt(0.95)*g1[1:] + np.sqrt(0.05)*g1[:-1];
c1 = np.random.uniform(0.0,0.8,size = n_tot)
p1 = c1 * g1[:-1] + np.random.randn(n_tot); p1 = p1 / np.std(p1)
c2 = np.random.uniform(0.0,0.2,size = n_tot)
p2 = c2 * g1[:-1] + np.random.randn(n_tot); p2 = p2 / np.std(p2)
c3 = np.random.uniform(0.0,0.5,size = n_tot)
p3 = c3 * g1[:-1] + np.random.randn(n_tot); p3 = p3 / np.std(p3)
c4 = np.random.uniform(0.0,0.3,size = n_tot)
p4 = c4 * g1[:-1] + np.random.randn(n_tot); p4 = p4 / np.std(p4)
train_data = pd.DataFrame({"r" : r[:n_train]
, "p1" : p1[:n_train], "c1" : c1[:n_train]
, "p2" : p2[:n_train], "c2" : c2[:n_train]
, "p3" : p3[:n_train], "c3" : c3[:n_train]
, "p4" : p2[:n_train], "c4" : c4[:n_train]
})
valid_data = pd.DataFrame({"r" : r[n_train:n_train+n_valid]
, "p1" : p1[n_train:n_train+n_valid], "c1" : c1[n_train:n_train+n_valid]
, "p2" : p2[n_train:n_train+n_valid], "c2" : c2[n_train:n_train+n_valid]
, "p3" : p3[n_train:n_train+n_valid], "c3" : c3[n_train:n_train+n_valid]
, "p4" : p4[n_train:n_train+n_valid], "c4" : c4[n_train:n_train+n_valid]
}, index=np.arange(n_train, n_train+n_valid))
### Fit linear model and check correlation on the validation set
label_column = "r"
feature_columns = ["p1", "p2", "p3", "p4", "c1", "c2", "c3", "c4"]
y_train = train_data[label_column].values
x_train = train_data[feature_columns].values
y_valid = valid_data[label_column].values
x_valid = valid_data[feature_columns].values
OLS = sm.OLS(y_train, x_train)
result=OLS.fit()
result.summary()
predictions = result.predict(x_valid)
print("Linear model validation correlation: ", np.corrcoef(y_valid, predictions)[0,1])
### Fit a single-layer, linear activation NN on the data
def correlation(y_true, y_pred):
m_true = K.mean(y_true)
m_pred = K.mean(y_pred)
y_true_demeaned = y_true - m_true
y_pred_demeaned = y_pred - m_pred
return K.sum(y_true_demeaned*y_pred_demeaned) / K.sqrt(K.sum(y_true_demeaned*y_true_demeaned) * K.sum(y_pred_demeaned*y_pred_demeaned))
label_column = "r"; feature_columns = ["p1", "p2", "p3", "p4", "c1", "c2", "c3", "c4"]
y_train = train_data[label_column].values; x_train = train_data[feature_columns].values
y_valid = valid_data[label_column].values; x_valid = valid_data[feature_columns].values
input_tensor = Input(shape=(len(feature_columns),))
output_tensor = layers.Dense(1, use_bias = False)(input_tensor)
model = Model(input_tensor, output_tensor)
RMSprop = optimizers.RMSprop(learning_rate = 0.0001)
model.compile(optimizer=RMSprop,loss = "mean_squared_error", metrics = [correlation])
display(model.summary())
model.fit(x = x_train, y = y_train, batch_size=512, epochs=100, validation_data=[x_valid, y_valid])