我花了一段时间才能生成SparseTensor
# dense is an n x m matrix
sparse = coo_matrix(dense) # almost instantaneous
# for legibility
sparse_indicies = list(zip(
sparse.row.astype(np.int64).tolist(),
sparse.col.astype(np.int64).tolist()
)) # almost instantaneous
type_casted = (sparse.data).astype(np.float32) # almost instantaneous
# takes ages
input_tensor = tf.SparseTensor(
indices = sparse_indicies,
values = type_casted,
dense_shape = sparse.shape
)
# save to file so I can load it to memory locally if it exists.
我如何单独保存它?我尝试了泡菜和npy都没有成功。
import pickle, numpy as np
filename = os.path.expanduser('~/tmp/test.tmp')
with open(fn, 'wb') as f:
pickle.dump(tf_sparse, f)
# throws "TypeError: can't pickle _thread.RLock objects"
np.save(fn, tf_sparse)
# throws "TypeError: can't pickle _thread.RLock objects"
import os, tensorflow as tf, numpy as np
def write_tf_sparse(sparse, filename:str=None):
'''
Arguments:
sparse (scipy.sparse coo_matrix)
filename (str): full path to save the file. Default "~/Desktop/tf_sparse.pb"
Returns:
None
'''
filename = os.path.join(os.path.expanduser('~/Desktop'), 'tf_sparse.pb')
with tf.Graph().as_default():
row_indices = sparse.row.astype(np.int64).tolist()
col_indices = sparse.col.astype(np.int64).tolist()
sparse_indicies = list(zip(row_indices, col_indices))
val_cast = (sparse.data).astype(np.float32)
# Make TensorFlow constants
indices = tf.constant(sparse_indicies, name='Indices')
values = tf.constant(val_cast, name='Values')
shape = tf.constant(sparse.shape, dtype=tf.int64, name='Shape')
# Serialize graph
graph_def = tf.get_default_graph().as_graph_def()
with open(filename, 'wb') as f:
f.write(graph_def.SerializeToString())
def load_tf_sparse(filename:str=None):
filename = os.path.join(os.path.expanduser('~/Desktop'), 'tf_sparse.pb') if filename is None else filename
# Read graph
graph_def = tf.GraphDef()
with open(filename, 'rb') as f:
graph_def.MergeFromString(f.read())
# Import tensors
indices, values, shape = tf.import_graph_def(
graph_def, return_elements=['Indices', 'Values', 'Shape'], name='SparseTensorImport')
del graph_def
# print(indices, values, shape)
# Create sparse tensor
input_tensor = tf.SparseTensor(indices=indices, values=values, dense_shape=shape)
return input_tensor
然后尝试一下:
[1]: write_tf_sparse(sparse) # takes a while as expected
[2]: tf_sparse = load_tf_sparse()
TypeError Traceback (most recent call last)
<ipython-input-9-f0dee854ed2c> in <module>
----> 1 tf_sparse = load_tf_sparse()
<ipython-input-7-ffbf8b1df08d> in load_tf_sparse(filename)
39 # print(indices, values, shape)
40 # Create sparse tensor
---> 41 input_tensor = tf.SparseTensor(indices=indices, values=values, dense_shape=shape)
42 return input_tensor
/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/sparse_tensor.py in __init__(self, indices, values, dense_shape)
119 [indices, values, dense_shape]):
120 indices = ops.convert_to_tensor(
--> 121 indices, name="indices", dtype=dtypes.int64)
122 # Always pass as_ref=True because we want to be able to update
123 # values later if it is a VariableOp.
/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in convert_to_tensor(value, dtype, name, preferred_dtype)
996 name=name,
997 preferred_dtype=preferred_dtype,
--> 998 as_ref=False)
999
1000
/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in internal_convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, ctx)
1092
1093 if ret is None:
-> 1094 ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
1095
1096 if ret is NotImplemented:
/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in _operation_conversion_error(op, dtype, name, as_ref)
5913 raise TypeError(("Can't convert Operation '%s' to Tensor "
5914 "(target dtype=%r, name=%r, as_ref=%r)") % (op.name, dtype,
-> 5915 name, as_ref))
5916
5917
TypeError: Can't convert Operation 'SparseTensorImport/Indices' to Tensor (target dtype=tf.int64, name='indices', as_ref=False)
答案 0 :(得分:1)
您可以尝试制作TensorFlow常数并将其存储在GraphDef
文件中,然后将其加载并在需要时将其导入到图形中。我无法确定这是否会比您当前的方法快。
要将常量导出到文件中,可以执行以下操作:
import tensorflow as tf
# In an independent graph to make sure only the data we want is stored
with tf.Graph().as_default():
sparse = coo_matrix(dense)
sparse_indicies = list(zip(
sparse.row.astype(np.int64).tolist(),
sparse.col.astype(np.int64).tolist()
))
type_casted = (sparse.data).astype(np.float32)
# Make TensorFlow constants
indices = tf.constant(sparse_indicies, name='Indices', dtype=tf.int64)
values = tf.constant(type_casted, name='Values')
shape = tf.constant(sparse.shape, dtype=tf.int64, name='Shape')
# Serialize graph
graph_def = tf.get_default_graph().as_graph_def()
with open('sparse_tensor_data.pb', 'wb') as f:
f.write(graph_def.SerializeToString())
您可以从其他地方将其导入:
import tensorflow as tf
# Read graph
graph_def = tf.GraphDef()
with open('sparse_tensor_data.pb', 'rb') as f:
graph_def.MergeFromString(f.read())
# Import tensors
indices, values, shape = tf.import_graph_def(
graph_def, return_elements=['Indices:0', 'Values:0', 'Shape:0'], name='SparseTensorImport')
del graph_def
# Create sparse tensor
input_tensor = tf.SparseTensor(indices=indices, values=values, dense_shape=shape)