我尝试学习tf网络,但是我有一个问题,学习后它与不同的结果有关。 每当使用新质量时,我都会使用相同的火车数据,相同的测试数据。 我无法复制此模型,也找不到我的错误。
数据是人的向量化地址。
vec = CountVectorizer(token_pattern='(?u)\\b[а-яё0-9]+\\b', min_df=3)
X = sp.csc_matrix(sp.hstack([vec.fit_transform(df.Addr_upd),
(df.CourtType == 'MS').astype(np.int).values.reshape(df.shape[0], 1)]))
df.Addr_upd
是一个字符串地址,例如
Addr_upd
300041 г Тула, Тульская обл, ул Каминского, 54
300034 г Тула, Тульская обл, ул Демонстрации, 145
300004 г Тула, Тульская обл, Комбайновый пер, 11
我使用此代码来训练网络
batch_size=4096
def new_weights(shape, name, is_trainable=True):
return tf.Variable(tf.truncated_normal(shape, stddev=0.05), name=name, trainable=is_trainable)
def new_biases(length, name):
return tf.constant(1.0, shape=[length], name=name)
def new_fc_layer(input, # The previous layer.
weights,
biases,
name, # Name to save
dropout = None, # Dropout probability
use_relu=True): # Use Rectified Linear Unit (ReLU)?
layer = tf.matmul(input, weights) + biases
if dropout:
layer = tf.nn.dropout(layer, 1-dropout)
if use_relu:
layer = tf.nn.relu(layer)
return layer
def split(start, end, step):
end_ = end
start_ = start
step_ = step
res = []
for current_ in np.arange(step_, end_, step):
res.append((start_, current_))
start_ = current_
res.append((start_, end_))
return res
def print_progress(epoch, feed_dict_train, feed_dict_validate, val_loss):
train_pred_correct = session.run(predicted, feed_dict=feed_dict_train)
test_pred_correct = session.run(predicted, feed_dict=feed_dict_validate)
acc = train_pred_correct / feed_dict_train[y_true].shape[0]
val_acc = test_pred_correct / feed_dict_validate[y_true].shape[0]
msg = "Epoch {0} --- Training Accuracy: {1:>6.1%}, Validation Accuracy: {2:>6.1%}, Validation Loss: {3:.3f}"
print(msg.format(epoch + 1, acc, val_acc, val_loss))
def optimize(session, optimizer, X_train, X_test, y_train, y_test, epoch=1):
start_time = time.time()
for e in np.arange(epoch):
for start, end in split(0, X_train.shape[0], batch_size):
x_batch, y_true_batch, = X_train[start:end].toarray(), y_train[start:end]
feed_dict_train = {inp: x_batch, y_true: y_true_batch}
session.run(optimizer, feed_dict=feed_dict_train)
end_time = time.time()
time_dif = end_time - start_time
feed_dict_validate = {inp: X_test.toarray(), y_true: y_test}
val_loss = session.run(cost, feed_dict=feed_dict_validate)
print_progress(e, feed_dict_train, feed_dict_validate, val_loss)
print("Time elapsed: " + str(timedelta(seconds=int(round(time_dif)))))
sess.close()
tf.reset_default_graph()
ALPHA = 1e-5
inp = tf.placeholder(tf.float32, shape=[None, X_train.shape[1]], name='inp')
y_true = tf.placeholder(tf.float32, shape=[None, y_train2.shape[1]], name='y_true')
y_true_cls = tf.argmax(y_true, axis=1)
fc1_w = new_weights([X_train.shape[1], y_train1.shape[1]], name='fc1_w', is_trainable=False)
fc1_b = new_biases(length=y_train1.shape[1], name='fc1_b')
layer_1 = new_fc_layer(input=inp, weights=fc1_w, biases=fc1_b, name='l1')
outs_1 = tf.nn.softmax(layer_1, name='outs_1')
union = tf.concat([inp, outs_1], axis=1)
w2_3_size=4096
fc2_w = new_weights([X_train.shape[1] + y_train1.shape[1], w2_3_size], name='fc2_w')
fc2_b = new_biases(length=w2_3_size, name='fc2_b')
layer_2 = new_fc_layer(input=union, weights=fc2_w, biases=fc2_b, name='l2')
loss = tf.nn.l2_loss(fc2_w) * ALPHA
outs_2 = tf.nn.relu(layer_2, name='outs_2')
fc3_w = new_weights([w2_3_size, y_train2.shape[1]], name='fc2_w')
loss = loss + tf.nn.l2_loss(fc3_w) * ALPHA
fc3_b = new_biases(length=y_train2.shape[1], name='fc3_b')
layer_3 = new_fc_layer(input=outs_2, weights=fc3_w, biases=fc3_b, name='l3')
outs = tf.nn.softmax(layer_3, name='outs')
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=layer_3, labels=y_true)
y_pred_cls = tf.argmax(outs, axis=1)
acc = tf.cast(tf.equal(y_pred_cls, y_true_cls), tf.float16)
predicted = tf.reduce_sum(acc)
cost = tf.reduce_mean(cross_entropy) + loss
optimizer01 = tf.train.AdamOptimizer(learning_rate=1e-2).minimize(cost)
optimizer001 = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(cost)
optimizer0001 = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cost)
loader = tf.train.Saver({"fc1_w": fc1_w})
saver = tf.train.Saver({"fc1_w": fc1_w, "fc2_w": fc2_w, "fc3_w": fc3_w})
# Create a summary to monitor cost tensor
tf.summary.scalar("loss", cost)
# Create a summary to monitor accuracy tensor
tf.summary.scalar("accuracy", acc)
# Merge all summaries into a single op
merged_summary_op = tf.summary.merge_all()
with tf.Session() as session:
#tf.set_random_seed(123)
session.run(tf.global_variables_initializer())
loader.restore(session, STEP1)
# saver.restore(session, FINAL_PATH)
summary_writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())
feed_dict_validate = {inp: X_test.toarray()}
optimize(session, optimizer01, X_train, X_test, y_train2, y_test2, epoch=20)
optimize(session, optimizer001, X_train, X_test, y_train2, y_test2, epoch=10)
optimize(session, optimizer0001, X_train, X_test, y_train2, y_test2, epoch=20)
saver.save(session, FINAL_PATH)
此模型在相同数据上的工作方式不同。
我尝试添加seed
,但没有帮助。
也许我做错了,如何解决?