TensorFlow自己保存一个SparseTensor

时间:2019-03-11 10:36:37

标签: python python-3.x tensorflow

我花了一段时间才能生成SparseTensor

# dense is an n x m matrix

sparse = coo_matrix(dense) # almost instantaneous 

# for legibility
sparse_indicies = list(zip(
    sparse.row.astype(np.int64).tolist(), 
    sparse.col.astype(np.int64).tolist()
)) # almost instantaneous

type_casted = (sparse.data).astype(np.float32) # almost instantaneous

# takes ages
input_tensor = tf.SparseTensor(
    indices     = sparse_indicies,
    values      = type_casted,
    dense_shape = sparse.shape
) 

# save to file so I can load it to memory locally if it exists.

我如何单独保存它?我尝试了泡菜和npy都没有成功。

import pickle, numpy as np

filename = os.path.expanduser('~/tmp/test.tmp')

with open(fn, 'wb') as f:
    pickle.dump(tf_sparse, f)
    # throws "TypeError: can't pickle _thread.RLock objects"


np.save(fn, tf_sparse)
# throws "TypeError: can't pickle _thread.RLock objects"

基于答案

import os, tensorflow as tf, numpy as np
def write_tf_sparse(sparse, filename:str=None):
    '''
    Arguments:
        sparse (scipy.sparse coo_matrix)
        filename (str): full path to save the file. Default "~/Desktop/tf_sparse.pb"

    Returns:
        None
    '''
    filename = os.path.join(os.path.expanduser('~/Desktop'), 'tf_sparse.pb')
    with tf.Graph().as_default():
        row_indices = sparse.row.astype(np.int64).tolist()
        col_indices = sparse.col.astype(np.int64).tolist()
        sparse_indicies = list(zip(row_indices, col_indices))

        val_cast = (sparse.data).astype(np.float32)

        # Make TensorFlow constants
        indices = tf.constant(sparse_indicies, name='Indices')
        values  = tf.constant(val_cast, name='Values')
        shape   = tf.constant(sparse.shape, dtype=tf.int64, name='Shape')

        # Serialize graph
        graph_def = tf.get_default_graph().as_graph_def()
        with open(filename, 'wb') as f:
            f.write(graph_def.SerializeToString())

def load_tf_sparse(filename:str=None):
    filename = os.path.join(os.path.expanduser('~/Desktop'), 'tf_sparse.pb') if filename is None else filename
    # Read graph
    graph_def = tf.GraphDef()
    with open(filename, 'rb') as f:
        graph_def.MergeFromString(f.read())
    # Import tensors
    indices, values, shape = tf.import_graph_def(
        graph_def, return_elements=['Indices', 'Values', 'Shape'], name='SparseTensorImport')
    del graph_def
    # print(indices, values, shape)
    # Create sparse tensor
    input_tensor = tf.SparseTensor(indices=indices, values=values, dense_shape=shape)
    return input_tensor

然后尝试一下:


[1]: write_tf_sparse(sparse) # takes a while as expected

[2]: tf_sparse = load_tf_sparse()
TypeError                                 Traceback (most recent call last)
<ipython-input-9-f0dee854ed2c> in <module>
----> 1 tf_sparse = load_tf_sparse()

<ipython-input-7-ffbf8b1df08d> in load_tf_sparse(filename)
     39     # print(indices, values, shape)
     40     # Create sparse tensor
---> 41     input_tensor = tf.SparseTensor(indices=indices, values=values, dense_shape=shape)
     42     return input_tensor

/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/sparse_tensor.py in __init__(self, indices, values, dense_shape)
    119                         [indices, values, dense_shape]):
    120       indices = ops.convert_to_tensor(
--> 121           indices, name="indices", dtype=dtypes.int64)
    122       # Always pass as_ref=True because we want to be able to update
    123       # values later if it is a VariableOp.

/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in convert_to_tensor(value, dtype, name, preferred_dtype)
    996       name=name,
    997       preferred_dtype=preferred_dtype,
--> 998       as_ref=False)
    999 
   1000 

/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in internal_convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, ctx)
   1092 
   1093     if ret is None:
-> 1094       ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
   1095 
   1096     if ret is NotImplemented:

/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in _operation_conversion_error(op, dtype, name, as_ref)
   5913   raise TypeError(("Can't convert Operation '%s' to Tensor "
   5914                    "(target dtype=%r, name=%r, as_ref=%r)") % (op.name, dtype,
-> 5915                                                                name, as_ref))
   5916 
   5917 

TypeError: Can't convert Operation 'SparseTensorImport/Indices' to Tensor (target dtype=tf.int64, name='indices', as_ref=False)

1 个答案:

答案 0 :(得分:1)

您可以尝试制作TensorFlow常数并将其存储在GraphDef文件中,然后将其加载并在需要时将其导入到图形中。我无法确定这是否会比您当前的方法快。

要将常量导出到文件中,可以执行以下操作:

import tensorflow as tf

# In an independent graph to make sure only the data we want is stored
with tf.Graph().as_default():
    sparse = coo_matrix(dense)
    sparse_indicies = list(zip(
        sparse.row.astype(np.int64).tolist(),
        sparse.col.astype(np.int64).tolist()
    ))
    type_casted = (sparse.data).astype(np.float32)
    # Make TensorFlow constants
    indices = tf.constant(sparse_indicies, name='Indices', dtype=tf.int64)
    values = tf.constant(type_casted, name='Values')
    shape = tf.constant(sparse.shape, dtype=tf.int64, name='Shape')
    # Serialize graph
    graph_def = tf.get_default_graph().as_graph_def()
    with open('sparse_tensor_data.pb', 'wb') as f:
        f.write(graph_def.SerializeToString())

您可以从其他地方将其导入:

import tensorflow as tf

# Read graph
graph_def = tf.GraphDef()
with open('sparse_tensor_data.pb', 'rb') as f:
    graph_def.MergeFromString(f.read())
# Import tensors
indices, values, shape = tf.import_graph_def(
    graph_def, return_elements=['Indices:0', 'Values:0', 'Shape:0'], name='SparseTensorImport')
del graph_def
# Create sparse tensor
input_tensor = tf.SparseTensor(indices=indices, values=values, dense_shape=shape)