我目前正致力于将stft音频文件映射到MFCC功能的神经网络。
但是出于某种原因,请继续获得大约200的validation_loss。 比较期望的范围是(-100,100)..所以输出NN输出是不可取的..
我尝试了不同的实现..
简单NN 1层,2层,3层=>没有区别。
CNN,1层,2层=>相同的结果尝试随机forrest =>从未完成,内存分配错误。
所以......
我做错了什么? 我尝试对输入和输出进行规范化,但是当我对数据集进行非规范化时,同样的错误又回来了......所以...
输入的形状是(x,2050)和输出的形状(x,13)......
我为什么会这么糟糕的结果?
标准化:
def numpy_minmax(X):
print X.min()
print X.max()
xmin = X.min()
return (2*(X - xmin) / (X.max() - xmin)-1)*0.9
def numpy_unorm(x):
xmax = 109.2991
xmin = -97.23664
return x*(xmax-xmin)+xmin
files_train_path = [dnn_train+f for f in listdir(dnn_train) if isfile(join(dnn_train, f))]
files_test_path = [dnn_test+f for f in listdir(dnn_test) if isfile(join(dnn_test, f))]
files_train_name = [f for f in listdir(dnn_train) if isfile(join(dnn_train, f))]
files_test_name = [f for f in listdir(dnn_test) if isfile(join(dnn_test, f))]
os.chdir(dnn_train)
#train_name = generate_list_of_names_data(files_train_path)
#print train_name
train_data, train_output_data, max = load_sound_files(files_train_path)
#sys.exit()
#train_data_real, train_data_img = split_real_img(train_data)
#print train_data
train_set_data_vstacked = np.vstack(train_data)
train_set_output_vstacked = np.vstack(train_output_data)
print train_set_data_vstacked.shape
train_set_data_vstacked_normalized = numpy_minmax(train_set_data_vstacked)
train_set_output_vstacked_normalized = numpy_minmax(train_set_output_vstacked)
目前尝试了网络结构选项:
############################### Training setup ##################################
#Define 10 folds:
seed = 7
np.random.seed(seed)
kfold = KFold(n_splits=10, shuffle=False, random_state=None)
print "Splits"
cvscores_acc = []
cvscores_loss = []
hist = []
i = 0
#train_set_data_vstacked_normalized_reshaped = np.reshape(train_set_data_vstacked_normalized,train_set_data_vstacked_normalized.shape+(1,))
#train_set_output_vstacked_normalized_reshaped = np.reshape(train_set_output_vstacked_normalized,train_set_output_vstacked_normalized.shape+(1,))
for train, test in kfold.split(train_set_data_vstacked_normalized):
print "Model definition!"
model = Sequential()
#act = PReLU(init='normal', weights=None)
model.add(Dense(output_dim=2050,input_dim=2050, init="normal", activation='relu'))
#act1 = PReLU(init='normal', weights=None)
#act2 = PReLU(init='normal', weights=None)
model.add(Dense(output_dim=2050, input_dim=2050, init="he_normal",activation='tanh'))
model.add(Dense(output_dim=13, input_dim=2050, init="he_normal",activation='tanh'))
model.add(Lambda(lambda x: numpy_unorm(x)))
#model.add(ELU(100))
#model.add(Convolution1D(13, 3, border_mode='same', input_shape=(2050,1)))
print "Compiling"
#rms_opt = keras.optimizers.RMSprop(lr=0.01, rho=0.9, epsilon=1e-08, decay=0.0)
model.compile(loss='mean_squared_error', optimizer="RMSprop")
print "Compile done! "
print '\n'
print "Train start"
reduce_lr=ReduceLROnPlateau(monitor='val_loss', factor=0.01, patience=3, verbose=1, mode='auto', epsilon=0.0001, cooldown=0, min_lr=0.000000000000000001)
stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=1, mode='auto')
log=csv_logger = CSVLogger('training_'+str(i)+'.csv')
hist_current = model.fit(train_set_data_vstacked_normalized[train],
train_set_output_vstacked[train],
shuffle=False,
validation_data=(train_set_data_vstacked_normalized[test],train_set_output_vstacked[test]),
validation_split=0.1,
nb_epoch=150,
verbose=1,
callbacks=[reduce_lr,log,stop])
hist.append(hist_current)
卷积:
#Each frame length = 118
#Each feature length = 13
############################### Training setup ##################################
#Define 10 folds:
seed = 7
np.random.seed(seed)
kfold = KFold(n_splits=10, shuffle=False, random_state=None)
print "Splits"
cvscores_acc = []
cvscores_loss = []
hist = []
i = 0
#train_set_data_vstacked_normalized_reshaped = np.reshape(train_set_data_vstacked_normalized,train_set_data_vstacked_normalized.shape+(1,))
#train_set_output_vstacked_normalized_reshaped = np.reshape(train_set_output_vstacked_normalized,train_set_output_vstacked_normalized.shape+(1,))
train_set_data_vstacked_reshaped = train_set_data_vstacked[:,newaxis,:]
train_set_output_vstacked_reshaped = train_set_output_vstacked[:,newaxis,:]
print train_set_data_vstacked_reshaped.shape
print train_set_output_vstacked_reshaped.shape
for train, test in kfold.split(train_set_data_vstacked_reshaped):
print "Model definition!"
model = Sequential()
#act = PReLU(init='normal', weights=None)
#model.add(Dense(output_dim=400,input_dim=400, init="normal", activation=K.tanh))
#act1 = PReLU(init='normal', weights=None)
#act2 = PReLU(init='normal', weights=None)
#model.add(Dense(output_dim=2050, input_dim=2050, init="he_normal",activation='tanh'))
#model.add(Dense(output_dim=13, input_dim=2050, init="he_normal",activation='relu'))
#model.add(Lambda(lambda x: numpy_unorm(x)))
#model.add(ELU(100))
model.add(Convolution1D(2050, 1, border_mode='same', input_dim=2050))
model.add(Convolution1D(13, 1, border_mode='same', input_dim=2050))
#model.add(Convolution1D(13, 1, border_mode='same', input_dim=2050))
#model.add(Dense(output_dim=13, input_dim=2050, init="he_normal",activation='relu'))
print "Compiling"
#rms_opt = keras.optimizers.RMSprop(lr=0.01, rho=0.9, epsilon=1e-08, decay=0.0)
model.compile(loss='mean_squared_error', optimizer="Adam")
print "Compile done! "
print '\n'
print "Train start"
reduce_lr=ReduceLROnPlateau(monitor='val_loss', factor=0.01, patience=3, verbose=1, mode='auto', epsilon=0.01, cooldown=0, min_lr=0.000000000000000001)
stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=1, mode='auto')
log=csv_logger = CSVLogger('training_'+str(i)+'.csv')
hist_current = model.fit(train_set_data_vstacked_reshaped[train],
train_set_output_vstacked_reshaped[train],
shuffle=False,
validation_data=(train_set_data_vstacked_reshaped[test],train_set_output_vstacked_reshaped[test]),
validation_split=0.1,
nb_epoch=150,
verbose=1,
callbacks=[reduce_lr,log,stop])
hist.append(hist_current)