假设我有一个费率矩阵R
,我想将其分解为矩阵U
和V
并使用张量流
没有批量大小的简单问题,可以通过以下代码解决:
# define Variables
u = tf.Variable(np.random.rand(R_dim_1, output_dim), dtype=tf.float32, name='u')
v = tf.Variable(np.random.rand(output_dim, R_dim_2), dtype=tf.float32, name='v')
# predict rate by multiplication
predicted_R = tf.matmul(tf.cast(u, tf.float32), tf.cast(v, tf.float32))
#cost function and train step
cost = tf.reduce_sum(tf.reduce_sum(tf.abs(tf.sub(predicted_R, R))))
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cost)
with tf.Session() as sess:
init = tf.initialize_all_variables()
sess.run(init)
for i in range(no_epochs):
_, this_cost = sess.run([train_step, cost])
print 'cost: ', this_cost
我决定使用批量更新解决此问题,我的解决方案是发送U
和V
的索引,我想用它来预测费率矩阵R
并更新那些选定的索引
这是我的代码(如果花费很多时间,只需阅读评论):
# define variables
u = tf.Variable(np.random.rand(R_dim_1, output_dim), dtype=tf.float32, name='u')
v = tf.Variable(np.random.rand(output_dim, R_dim_2), dtype=tf.float32, name='v')
idx1 = tf.placeholder(tf.int32, shape=batch_size1, name='idx1')
idx2 = tf.placeholder(tf.int32, shape=batch_size2, name='idx2')
# get current U and current V by slicing U and V
cur_u = tf.Variable(tf.gather(u, idx1), dtype=tf.float32, name='cur_u')
cur_v = tf.transpose(v)
cur_v = tf.gather(cur_v, idx2)
cur_v = tf.Variable(tf.transpose(cur_v), dtype=tf.float32, name='cur_v')
# predict rate by multiplication
predicted_R = tf.matmul(tf.cast(cur_u, tf.float32), tf.cast(cur_v, tf.float32))
# get needed rate from rate matrix by slicing it
cur_rate = tf.gather(R, idx1)
cur_rate = tf.transpose(cur_rate)
cur_rate = tf.gather(cur_rate, idx2)
cur_rate = tf.transpose(cur_rate)
#cost function and train step
cost = tf.reduce_sum(tf.reduce_sum(tf.abs(tf.sub(predicted_R, cur_rate))))
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cost)
with tf.Session() as sess:
# initialize variables
init_new_vars_op = tf.initialize_variables([v, u])
sess.run(init_new_vars_op)
init = tf.initialize_all_variables()
rand_idx = np.sort(np.random.randint(0, R_dim_1, batch_size1))
rand_idx2 = np.sort(np.random.randint(0, R_dim_2, batch_size2))
sess.run(init, feed_dict={idx1: rand_idx, idx2: rand_idx2})
for i in range(no_epochs):
with tf.Graph().as_default():
rand_idx1 = np.random.randint(0, R_dim_1, batch_size1)
rand_idx2 = np.random.randint(0, R_dim_2, batch_size2)
_, this_cost, tmp_u, tmp_v, tmp_cur_u, tmp_cur_v = sess.run([train_step, cost, u, v, cur_u, cur_v],feed_dict={idx1: rand_idx1, idx2: rand_idx2})
print this_cost
#update U and V with computed current U and current V
tmp_u = np.array(tmp_u)
tmp_u[rand_idx] = tmp_cur_u
u = tf.assign(u, tmp_u)
tmp_v = np.array(tmp_v)
tmp_v[:, rand_idx2] = tmp_cur_v
v = tf.assign(v, tmp_v)
但我在u = tf.assign(u, tmp_u)
和u = tf.assign(u, tmp_u)
处有内存泄漏
我申请this但没有得到任何结果
还有另一个解决方案是将更新应用于U
和V
的子集,例如this,但遇到了很多其他错误,所以请继续关注如何解决我的内存泄漏问题。
对不起我的长问题,感谢您的阅读。
答案 0 :(得分:1)
我只是通过发送U
和V
的更新值作为占位符来解决此问题,然后将U
和V
分配给这些传递的参数,以便创建的图形将在不同的迭代上保持不变。
这是代码:
# define variables
u = tf.Variable(np.random.rand(R_dim_1, output_dim), dtype=tf.float32, name='u')
v = tf.Variable(np.random.rand(output_dim, R_dim_2), dtype=tf.float32, name='v')
idx1 = tf.placeholder(tf.int32, shape=batch_size1, name='idx1')
idx2 = tf.placeholder(tf.int32, shape=batch_size2, name='idx2')
#define new place holder for changed values of U and V
last_u = tf.placeholder(tf.float32, shape=[R_dim_1, output_dim], name='last_u')
last_v = tf.placeholder(tf.float32, shape=[output_dim, R_dim_2], name='last_v')
#set U and V to updated ones
change_u = tf.assign(u, last_u)
change_v = tf.assign(v, last_v)
# get current U and current V by slicing U and V
cur_u = tf.Variable(tf.gather(u, idx1), dtype=tf.float32, name='cur_u')
cur_v = tf.transpose(v)
cur_v = tf.gather(cur_v, idx2)
cur_v = tf.Variable(tf.transpose(cur_v), dtype=tf.float32, name='cur_v')
# predict rate by multiplication
predicted_R = tf.matmul(tf.cast(cur_u, tf.float32), tf.cast(cur_v, tf.float32))
# get needed rate from rate matrix by slicing it
cur_rate = tf.gather(R, idx1)
cur_rate = tf.transpose(cur_rate)
cur_rate = tf.gather(cur_rate, idx2)
cur_rate = tf.transpose(cur_rate)
#cost function and train step
cost = tf.reduce_sum(tf.reduce_sum(tf.abs(tf.sub(predicted_R, cur_rate))))
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cost)
with tf.Session() as sess:
tmp_u = initial_u;
tmp_v = initial_v;
# initialize variables
init_new_vars_op = tf.initialize_variables([v, u])
sess.run(init_new_vars_op, feed_dict={last_u: tmp_u, last_v: tmp_v})
init = tf.initialize_all_variables()
rand_idx = np.sort(np.random.randint(0, R_dim_1, batch_size1))
rand_idx2 = np.sort(np.random.randint(0, R_dim_2, batch_size2))
sess.run(init, feed_dict={idx1: rand_idx, idx2: rand_idx2})
for i in range(no_epochs):
with tf.Graph().as_default():
rand_idx1 = np.random.randint(0, R_dim_1, batch_size1)
rand_idx2 = np.random.randint(0, R_dim_2, batch_size2)
_, this_cost, tmp_u, tmp_v, tmp_cur_u, tmp_cur_v, _, _ =
sess.run([train_step, cost, u, v, cur_u, cur_v, change_u, change_v],
feed_dict={idx1: rand_idx1, idx2: rand_idx2, last_u: tmp_u, last_v: tmp_v})
print this_cost
# find new values of U and current V but don't assign to them
tmp_u = np.array(tmp_u)
tmp_u[rand_idx] = tmp_cur_u
tmp_v = np.array(tmp_v)
tmp_v[:, rand_idx2] = tmp_cur_v