我得到了以下二元分类Keras模型,它训练得不好,但是训练:
def vgg_stack(self):
def func(x):
x = layers.Conv2D(64, (3, 3), activation='relu')(x)
x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
x = layers.Conv2D(128, (3, 3), activation='relu')(x)
x = layers.MaxPooling2D((2, 2), strides=(2, 2))(x)
x = layers.Conv2D(128, (3, 3), activation='relu')(x)
x = layers.MaxPooling2D((2, 2), strides=(2, 2))(x)
x = layers.Conv2D(64, (3, 3), activation='relu')(x)
x = layers.MaxPooling2D((2, 2), strides=(2, 2))(x)
x = layers.Flatten()(x)
x = layers.Dense(512, activation='relu')(x)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dense(1, activation='sigmoid')(x)
return x
return func
def implement(self):
self.inputs = layers.Input((self.input_width, self.input_height, self.input_depth))
self.outputs = self.vgg_stack()(self.inputs)
self.opt = optimizers.Adam(lr=self.learning_rate)
self.model = models.Model(inputs=self.inputs, outputs=self.outputs)
self.model.compile(loss='binary_crossentropy', optimizer=self.opt)
def fit_predict(self):
...
self.model.fit(data_train, actuals_train, batch_size=self.batch_size, epochs=10, verbose=1,
validation_data=[data_validation, actuals_validation], callbacks=[self])
它的预测如下所示
[[ 0.58952832]
[ 0.89163774]
[ 0.99083483]
...,
[ 0.52727282]
[ 0.72056866]
[ 0.99504411]]
即。它是什么。
我试图将模型转换为纯粹的tensroflow并获得
def conv2drelu(self, x, filters, kernel_size, padding='VALID'):
input_depth = int(x.get_shape()[-1])
weights = tf.Variable(tf.truncated_normal([kernel_size[0], kernel_size[0], input_depth, filters],
dtype=tf.float32, stddev=self.init_stddev))
self.var_list.append(weights)
biases = tf.Variable(tf.constant(0.0, shape=[filters], dtype=tf.float32))
self.var_list.append(biases)
y = tf.nn.conv2d(x, weights, [1, 1, 1, 1], padding=padding)
y = tf.nn.bias_add(y, biases)
y = tf.nn.relu(y)
return y
def maxpooling(self, x, pool_size, strides, padding='VALID'):
y = tf.nn.max_pool(x, ksize=[1, pool_size[0], pool_size[1], 1], strides=[1, strides[0], strides[1], 1],
padding=padding)
return y
def flatten(self, x):
shape = int(np.prod(x.get_shape()[1:]))
y = tf.reshape(x, [-1, shape])
return y
def dense(self, x, units, activation):
shape = int(x.get_shape()[1])
weights = tf.Variable(tf.truncated_normal([shape, units], dtype=tf.float32, stddev=self.init_stddev))
self.var_list.append(weights)
biases = tf.Variable(tf.constant(0.0, shape=[units], dtype=tf.float32))
self.var_list.append(biases)
y = tf.matmul(x, weights)
y = tf.nn.bias_add(y, biases)
if activation == 'relu':
y = tf.nn.relu(y)
elif activation == 'sigmoid':
y = tf.nn.sigmoid(y)
return y
def vgg_stack(self, x):
x = self.conv2drelu(x, 64, (3, 3))
x = self.maxpooling(x, (3, 3), strides=(2, 2))
x = self.conv2drelu(x, 128, (3, 3))
x = self.maxpooling(x, (2, 2), strides=(2, 2))
x = self.conv2drelu(x, 128, (3, 3))
x = self.maxpooling(x, (2, 2), strides=(2, 2))
x = self.conv2drelu(x, 64, (3, 3))
x = self.maxpooling(x, (2, 2), strides=(2, 2))
x = self.flatten(x)
x = self.dense(x, 512, activation='relu')
x = self.dense(x, 256, activation='relu')
x = self.dense(x, 1, activation='sigmoid')
return x
def implement(self):
self.var_list = []
self.input_data = tf.placeholder(tf.float32, shape=(None, self.width, self.height, self.depth))
self.prediction = self.vgg_stack(self.input_data)
self.actual = tf.placeholder(tf.float32, shape=(None, 1))
self.log_loss = tf.losses.log_loss(self.actual, self.prediction)
opt = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
# self.step = opt.minimize(self.mean_squared_error, var_list=self.var_list)
self.step = opt.minimize(self.log_loss, var_list=self.var_list)
即。我尝试编写与每个Keras层等效的函数,然后将它们组合成相同的结构。
我使用了所有相同的数字。不幸的是,网络提供了一些降级的东西:
[[ 0.46732453]
[ 0.46732453]
[ 0.46732453]
...,
[ 0.46732453]
[ 0.46732453]
[ 0.46732453]]
即。所有样本的值都相同。
这可能是什么原因?
答案 0 :(得分:2)
转换是正确的。我为Keras和Tensorflow编写了卷积层的单元测试,发现它们产生了数字相同的结果。
此外,我将优化目标从仅记录丢失更改为sigmoid_cross_entropy_with_logits
,但这并没有帮助。
问题在于初始值stdev
太小。
我认为只要它很小就能破坏对称性,并将其设置为1e-8
或1e-5
,但这是错误的:这样的小值几乎与零相同,之后几层网络开始为所有样品产生相同的结果。
我将stdev
更改为1e-1
后,netwrok开始执行Keras
。