我尝试用不同的方法解决XOR任务。第一个使用顺序模型:
result = Sequential()
result.add(Dense(2, input_shape=(2,), activation='sigmoid'))
result.add(Dense(1, input_shape=(2,), activation='sigmoid'))
ada_grad = Adagrad(lr=0.1, epsilon=1e-08, decay=0.0)
result.compile(optimizer=ada_grad, loss=_loss_tensor)
基于测试数据:
X = np.array([ [1, 1], [1, 0], [0, 1], [0, 0] ])
Y_train.append(0)
Y_train.append(1)
Y_train.append(1)
Y_train.append(0)
我训练模型,准确度大约为1。
如果我尝试通过以下方式解决相同的任务:
first_input = Input(shape=(1,), name='x1')
input_dense = Dense(1, activation='sigmoid', )(first_input)
second_input = Input(shape=(1,), name='x2')
second_dense = Dense(1, activation='sigmoid', )(second_input)
merge_one = concatenate([input_dense, second_dense])
merge_one_dense2 = Dense(2, activation='sigmoid', )(merge_one)
merge_one_dense3 = Dense(1, activation='sigmoid', )(merge_one_dense2)
result = Model(inputs=[first_input, second_input], outputs=merge_one_dense3)
ada_grad = Adagrad(lr=0.1, epsilon=1e-08, decay=0.0)
result.compile(optimizer=ada_grad, loss=_loss_tensor)
以下测试数据:
X1 = np.array([ [1], [0], [1], [0] ])
X2 = np.array([ [1], [1], [0], [0] ])
Y_train.append(0)
Y_train.append(1)
Y_train.append(1)
Y_train.append(0)
我的准确率约为0.5。
我错过了什么或没有考虑过?
UPD: 经过一些调查,我可以说这些模型之间没有任何区别。答案不同的原因是我使用不同的(随机)初始条件。如果使用相同的初始条件启动模型模拟结果将是相同的。
答案 0 :(得分:1)
第二个模型比第一个模型更复杂,也许你应该训练更多的步骤。
这是我的代码,acc是simple_acc:0.7923,complex_acc:0.7244。你可以自己调整一下。
#coding: utf-8
import numpy as np
from keras.layers import Input,Dense,merge
from keras.models import Sequential,Model
from keras.optimizers import Adagrad
def simple_model():
result = Sequential()
result.add(Dense(2, input_shape=(2,), activation='sigmoid'))
result.add(Dense(1, input_shape=(2,), activation='sigmoid'))
ada_grad = Adagrad(lr=0.001, epsilon=1e-08, decay=0.0)
result.compile(optimizer=ada_grad, loss='hinge')
return result
def complex_model():
first_input = Input(shape=(1,), name='x1')
input_dense = Dense(1, activation='sigmoid', )(first_input)
second_input = Input(shape=(1,), name='x2')
second_dense = Dense(1, activation='sigmoid', )(second_input)
merge_one = merge([input_dense, second_dense],mode='concat',concat_axis=1)
merge_one_dense2 = Dense(2, activation='sigmoid', )(merge_one)
merge_one_dense3 = Dense(1, activation='sigmoid', )(merge_one_dense2)
result = Model(inputs=[first_input, second_input], outputs=merge_one_dense3)
ada_grad = Adagrad(lr=0.001, epsilon=1e-08, decay=0.0)
result.compile(optimizer=ada_grad, loss='hinge')
return result
def simple_data():
X = np.array([ [1, 1], [1, 0], [0, 1], [0, 0] ])
Y = np.array([ [0], [1], [1], [0] ])
return X,Y
def complex_data():
X1 = np.array([ [1], [0], [1], [0] ])
X2 = np.array([ [1], [1], [0], [0] ])
Y = np.array([ [0], [1], [1], [0] ])
return [X1,X2],Y
def test_simple_model():
model = simple_model()
X,Y = simple_data()
model.summary()
model.fit(X,Y,batch_size=4,epochs=1000,verbose=1,validation_data=(X,Y))
score = model.evaluate(X,Y,verbose=0)
return score
def test_complex_model():
model = complex_model()
X,Y = complex_data()
model.summary()
model.fit(X,Y,batch_size=4,epochs=1000,verbose=1,validation_data=(X,Y))
score = model.evaluate(X,Y,verbose=0)
return score
def main():
simple_acc = test_simple_model()
complex_acc = test_complex_model()
print 'simple_acc: %.4f' % simple_acc
print 'complex_acc: %.4f' % complex_acc
if __name__ == '__main__':
main()