我一直在阅读一些与张量流速度慢有关的问题,但是大多数问题处理的问题都略有不同,例如,他们比较GPU和CPU的速度,或者仅处理推论,或者没有得到答案。
据我所知,使用逻辑回归等简单的单层模型,在大型稀疏数据集上,tensorflow不会发挥作用。绝对不在GPU上,因为可能会有太多开销。但是我可以在CPU上正常运行,因为我只想使用tensorflow作为库来象征性地定义我自己的优化问题并足够快地解决它们。所以首先,我想在合理的时间内开始运行逻辑回归(在开始尝试自己的模型和想法之前),而现在情况并非如此。
我有一个数据集,其中包含超过1000万个样本和500万个特征(非常稀疏,每个样本大约只有10个非零特征),并且非常不平衡,大约只有0.01-0.1%的正样本。您可以在下面找到在tensorflow上运行它的代码,并将其与scikit-learn的逻辑SGD解算器进行比较。两者之间的区别令人难以置信。 Scikit学习能够在几秒钟内找到一个不错的解决方案。对于 Tensorflow ,大约需要四个小时才能完成一个历时!
问题:为什么?我在做什么错了?
def tf_fit(data, labels, learning_rate=0.0005, lmbd=0.001, nepochs=100, minibatch_size=32, tol=0.0001):
X_train = data
Y_train = labels
m = X_train.shape[0]
n_x = X_train.shape[1]
n_y = Y_train.shape[1]
num_minibatches = int(m/minibatch_size)
epoch_checkpoints = set(np.array(np.linspace(0,1,101)*nepochs,dtype=int))
batch_checkpoints = set(np.array(np.linspace(0,1,101)*num_minibatches,dtype=int))
X, Y = tf_create_input_placeholders(n_x,n_y)
W, b = tf_initialize_parameters(n_x)
Z, ZSigmoid = tf_add_forward_propagation(X, W, b)
loss = tf_add_loss(Z, Y, W, lmbd1=lmbd)
init_network = tf.global_variables_initializer()
adam = tf.train.AdamOptimizer(learning_rate=learning_rate)
optimizer = adam.minimize(loss)
init_optimizer = tf.variables_initializer(adam.variables())
prev_loss = np.inf
with tf.Session() as session:
session.run(init_network)
session.run(init_optimizer)
for epoch in range(nepochs):
epoch_loss = 0.
start_time = time.time()
minibatch_indices = mini_batch_indices(m, minibatch_size=minibatch_size)
for batchi, indices in enumerate(minibatch_indices):
minibatch_X, minibatch_Y = tf_get_sparse_tensor(X_train[indices,:]), Y_train[indices,:]
_ , minibatch_cost = session.run([optimizer, loss], feed_dict={X: minibatch_X, Y: minibatch_Y})
epoch_loss += minibatch_cost / num_minibatches
if batchi in batch_checkpoints:
logging.info("processed {}% of the batches - time {}".format(100*float(batchi)/num_minibatches, time.time()-start_time))
if epoch in epoch_checkpoints:
logging.info("Loss for epoch {}: {}".format(epoch, epoch_loss))
if epoch_loss > prev_loss - tol:
break
parameters = {
"W": W.eval(session=session),
"b": b.eval(session=session)
}
return parameters
稀疏矩阵索引可能会使它变慢一点,但是大部分时间都用在run方法上
1 0.036 0.036 304.412 304.412 LogisticRegression.py:1(<module>) 5471 0.086 0.000 1.241 0.000 LogisticRegression.py:113(tf_get_sparse_tensor) 1 0.247 0.247 292.775 292.775 LogisticRegression.py:121(tf_fit) 1 0.000 0.000 0.021 0.021 LogisticRegression.py:177(tf_create_input_placeholders) 1 0.000 0.000 1.067 1.067 LogisticRegression.py:182(tf_initialize_parameters) 1 0.000 0.000 0.013 0.013 LogisticRegression.py:199(tf_add_forward_propagation) 1 0.000 0.000 0.049 0.049 LogisticRegression.py:205(tf_add_loss) 1 0.000 0.000 0.027 0.027 LogisticRegression.py:46(<lambda>) 1 0.178 0.178 0.567 0.567 LogisticRegression.py:95(mini_batch_indices) 5473 0.125 0.000 48.280 0.009 csr.py:236(__getitem__) 5473 0.569 0.000 243.989 0.045 session.py:1052(_run) 10942 0.047 0.000 0.134 0.000 session.py:1055(_feed_fn) 5471 0.046 0.000 0.069 0.000 session.py:111(<lambda>) 10944 0.011 0.000 0.011 0.000 session.py:126(<lambda>) 5471 0.005 0.000 0.005 0.000 session.py:127(<lambda>) 5473 0.075 0.000 238.842 0.044 session.py:1277(_do_run) 27357 0.053 0.000 0.308 0.000 session.py:1305(<genexpr>) 5473 0.031 0.000 238.318 0.044 session.py:1309(_run_fn) 5473 0.018 0.000 238.336 0.044 session.py:1326(_do_call) 5473 0.022 0.000 0.375 0.000 session.py:1343(_extend_graph) 5473 0.052 0.000 0.240 0.000 session.py:1385(_update_with_movers) 1 0.001 0.001 0.055 0.055 session.py:14(<module>) 5473 0.090 0.000 237.912 0.043 session.py:1411(_call_tf_sessionrun)
def tf_get_sparse_tensor(data):
data = data.tocoo()
rowindices = data.row.reshape((len(data.row),1))
colindices = data.col.reshape((len(data.col),1))
indices = np.concatenate((rowindices,colindices), axis=1)
stv = tf.SparseTensorValue(indices=indices, values=data.data, dense_shape=data.shape)
return stv
在其他一些方法定义下面:
loss_types = {
"sigmoid": lambda labels, predictions: tf.sigmoid(evaluation_sign(labels,predictions)),
"softplus": lambda labels, predictions: tf.nn.softplus(evaluation_sign(labels,predictions)),
"relu": lambda labels, predictions: tf.nn.relu(evaluation_sign(labels,predictions)),
"sigmoid_entropy": lambda labels, predictions: tf.nn.sigmoid_cross_entropy_with_logits(labels=labels,logits=predictions)
}
def tf_create_input_placeholders(X_dim, Y_dim):
X = tf.sparse_placeholder(tf.float64,shape=(None,X_dim),name="X")
Y = tf.placeholder(tf.float64,shape=(None,Y_dim),name="Y")
return X, Y
def tf_initialize_parameters(input_dimension=None, seed=None):
if seed:
xavier_initializer = tf.contrib.layers.xavier_initializer(seed = seed)
else:
xavier_initializer = tf.contrib.layers.xavier_initializer()
W1 = tf.get_variable("W1",
[input_dimension,1],
dtype=tf.float64,
initializer = xavier_initializer)
b1 = tf.get_variable("b1",
[1,1],
dtype=tf.float64,
initializer = tf.zeros_initializer())
return W1, b1
def tf_add_forward_propagation(X, W, b):
Z = tf.add(tf.sparse_tensor_dense_matmul(X,W,adjoint_a=False,adjoint_b=False),b,name="Z")
ZSigmoid = tf.sigmoid(Z,name="ZSigmoid")
return Z, ZSigmoid
def tf_add_loss(Z, Y, W, lmbd1=0.001, loss_type="sigmoid_entropy"):
regularization_term = lmbd1*tf.reduce_sum(tf.square(W))
sample_loss = loss_types[loss_type](Y,Z)
avg_sample_loss = tf.reduce_mean(sample_loss,keepdims=False)
return tf.add(avg_sample_loss,regularization_term,name="loss")
我比较的scikit学习代码如下
def sk_fit(data, labels, learning_rate=0.0005, alpha=0.001, nepochs=100, minibatch_size=32, tol=0.0001):
params = {
'alpha':alpha,
'class_weight': "balanced",
"loss":"log",
"penalty":"l2",
"max_iter": nepochs,
"tol": tol,
"learning_rate":"constant",
"eta0":learning_rate,
"verbose":10
}
classifier = linear_model.SGDClassifier(**params)
classifier.fit(data, labels)
return {
"W":classifier.coef_,
"b": classifier.intercept_
}