说我正在使用数据集+ feature_columns生成特征张量,
我想执行以下注释中的操作,以选择另一个indicator
标量张量的特征量中的一个张量:
def _parse_function(example_proto):
features = {"f_0": tf.FixedLenFeature((), tf.int32, default_value=0),
"f_1": tf.FixedLenFeature((), tf.int32, default_value=0),
"f_2": tf.FixedLenFeature((), tf.int32, default_value=0),
"indicator": tf.FixedLenFeature((), tf.int32, default_value=0),}
parsed_features = tf.parse_single_example(example_proto, features)
# what i want to do:
# i = parsed_features['indicator']
# parsed_features["f"] = parsed_features["f_{}".format(i)]
return parsed_features
dataset = tf.data.TFRecordDataset(filenames)
dataset = dataset.map(_parse_function)
# f is selected from [f_0, f_1, f_2]
feature_column = tf.feature_column.numeric_column(key="f")
我想知道如何在tensorflow中实现它。
答案 0 :(得分:0)
有两种方法可以做到这一点。下面是使用查找表和重组输入数据的示例。
从解析的示例中创建一个HashTable,然后使用table.lookup
获得所需的功能。
可运行的示例:
import tensorflow as tf
# Create example proto
def _int_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
e = tf.train.Example(features=tf.train.Features(feature={
"f_0": _int_feature([10]),
"f_1": _int_feature([20]),
"f_2": _int_feature([30]),
"indicator": _int_feature([1])}))
e_proto = e.SerializeToString()
def _parse_function(example_proto):
features = {"f_0": tf.FixedLenFeature((), tf.int64, default_value=0),
"f_1": tf.FixedLenFeature((), tf.int64, default_value=0),
"f_2": tf.FixedLenFeature((), tf.int64, default_value=0),
"indicator": tf.FixedLenFeature((), tf.int64, default_value=0)}
parsed_features = tf.parse_single_example(example_proto, features)
# Create HashTable
keys = ["f_0", "f_1", "f_2"]
values = [parsed_features[k] for k in keys]
table = tf.contrib.lookup.HashTable(
tf.contrib.lookup.KeyValueTensorInitializer(keys, values),
default_value=-1)
# Add lookup value to the parsed_features dictionary
indicator_key = "f_" + tf.as_string(parsed_features["indicator"])
parsed_features["f"] = table.lookup(indicator_key)
return parsed_features
features = _parse_function(e_proto)
with tf.Session() as sess:
sess.run(tf.initialize_all_tables())
print(sess.run(features))
输出:
{'indicator': 1, 'f_0': 10, 'f_1': 20, 'f_2': 30, 'f': 20}
将要素组合到一个矩阵中,并使用指标索引到要素矩阵中。
可运行的示例:
import tensorflow as tf
# Create example proto
def _int_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
e = tf.train.Example(features=tf.train.Features(feature={
"all_features": _int_feature([10, 20, 30]),
"indicator": _int_feature([1])}))
e_proto = e.SerializeToString()
def _parse_function(example_proto):
features = {"all_features": tf.FixedLenFeature([3], tf.int64),
"indicator": tf.FixedLenFeature((), tf.int64, default_value=0)}
parsed_features = tf.parse_single_example(example_proto, features)
indicator_key = parsed_features["indicator"]
parsed_features["f"] = parsed_features["all_features"][indicator_key]
return parsed_features
features = _parse_function(e_proto)
with tf.Session() as sess:
print(sess.run(features))
输出:
{'indicator': 1, 'all_features': array([10, 20, 30]), 'f': 20}