向估算器提供Tarseor of SparseTensors

时间:2017-10-15 05:39:56

标签: tensorflow

要开始使用TF,我想学习一个游戏匹配结果的预测器。有三个功能:团队0上的5个英雄,团队1上的5个英雄和地图。获胜者是标签,0或1.我想将团队和地图表示为SparseTensors。在可能的71位英雄中,将选出5位。同样地,对于地图,在可能的13中,将选择一个。

import tensorflow as tf
import packunpack as source
import tempfile
from collections import namedtuple

GameRecord = namedtuple('GameRecord', 'team_0 team_1 game_map winner')
def parse(line):
    parts = line.rstrip().split("\t")
    return GameRecord(
        game_map = parts[1], 
        team_0 = parts[2].split(","), 
        team_1 = parts[3].split(","), 
        winner = int(parts[4]))

def conjugate(record):
    return GameRecord(
        team_0 = record.team_1, 
        team_1 = record.team_0, 
        game_map = record.game_map, 
        winner = 0 if record.winner == 1 else 1)

def sparse_team(team):
    indices = list(map(lambda x: [x], map(source.encode_hero, team)))
    return tf.SparseTensor(indices=indices, values = [1] * len(indices), dense_shape=[len(source.heroes_array)])

def sparse_map(map_name):
    return tf.SparseTensor(indices=[[source.encode_hero(map_name)]], values = [1], dense_shape=[len(source.maps_array)])

def make_input_fn(filename, shuffle = True, add_conjugate_games = True):
    def _fn():
        records = []
        with open(filename, "r") as raw:
            i = 0
            for line in raw:
                record = parse(line)
                records.append(record)
                if add_conjugate_games:
                # since 0 and 1 are arbitrary team labels, learn and test the conjugate game whenever
                # learning the original inference
                    records.append(conjugate(record))

        print("Making team 0")
        team_0s = tf.constant(list(map(lambda r: sparse_team(r.team_0), records)))
        print("Making team 1")
        team_1s = tf.constant(list(map(lambda r: sparse_team(r.team_1), records)))
        print("making maps")
        maps = tf.constant(list(map(lambda r: sparse_map(r.game_map), records)))
        print("Making winners")
        winners = tf.constant(list(map(lambda r: tf.constant([r.winner]), records)))

        return {
                    "team_0": team_0s,
                    "team_1": team_1s,
                    "game_map": maps,
                }, winners
        #Please help me finish this function?

    return _fn

team_0 = tf.feature_column.embedding_column(
    tf.feature_column.categorical_column_with_vocabulary_list("team_0", source.heroes_array), len(source.heroes_array))
team_1 = tf.feature_column.embedding_column(
    tf.feature_column.categorical_column_with_vocabulary_list("team_1", source.heroes_array), len(source.heroes_array))
game_map = tf.feature_column.embedding_column(
    tf.feature_column.categorical_column_with_vocabulary_list("game_map", source.maps_array), len(source.maps_array))

model_dir = tempfile.mkdtemp()
m = tf.estimator.DNNClassifier(
    model_dir=model_dir,
    hidden_units = [1024, 512, 256], 
    feature_columns=[team_0, team_1, game_map])

def main():
    m.train(input_fn=make_input_fn("tiny.txt"), steps = 100)

if __name__ == "__main__":
    main()

team_0s = tf.constant(list(map(lambda r: sparse_team(r.team_0), records)))

失败

很难理解我希望我在input_fn中返回什么,因为我在文档中找到的所有示例最终都会调用一个pandas或numpy helper函数,而且我是不熟悉那些框架。我认为每个字典值应该是包含单个特征的所有示例的Tensor。我的每个例子都是一个SparseTensor,为了DNNClassifier,我想简单地将它们作为密集版本嵌入。

我确信我的心理模型现在已经被严重破坏了,我很感激任何直接帮助。

错误输出:

python3 estimator.py
Making team 0
Traceback (most recent call last):
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/tensor_util.py", line 468, in make_tensor_proto
    str_values = [compat.as_bytes(x) for x in proto_values]
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/tensor_util.py", line 468, in <listcomp>
    str_values = [compat.as_bytes(x) for x in proto_values]
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/util/compat.py", line 65, in as_bytes
    (bytes_or_text,))
TypeError: Expected binary or unicode string, got <tensorflow.python.framework.sparse_tensor.SparseTensor object at 0x7fe8
b4d7aef0>

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "estimator.py", line 79, in <module>
    main()
  File "estimator.py", line 76, in main
    m.train(input_fn=make_input_fn("tiny.txt"), steps = 100)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/estimator/estimator.py", line 302, in train
    loss = self._train_model(input_fn, hooks, saving_listeners)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/estimator/estimator.py", line 709, in _train_model
    input_fn, model_fn_lib.ModeKeys.TRAIN)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/estimator/estimator.py", line 577, in _get_features_and_l
abels_from_input_fn
    result = self._call_input_fn(input_fn, mode)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/estimator/estimator.py", line 663, in _call_input_fn
    return input_fn(**kwargs)
  File "estimator.py", line 44, in _fn
    team_0s = tf.constant(list(map(lambda r: sparse_team(r.team_0), records)))
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/constant_op.py", line 208, in constant
    value, dtype=dtype, shape=shape, verify_shape=verify_shape))
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/tensor_util.py", line 472, in make_tensor_proto
    "supported type." % (type(values), values))
TypeError: Failed to convert object of type <class 'list'> to Tensor. Contents: [<tensorflow.python.framework.sparse_tenso
r.SparseTensor object at 0x7fe8b4d7aef0>, <tensorflow.python.framework.sparse_tensor.SparseTensor object at 0x7fe8b4d7af28
>, <tensorflow.python.framework.sparse_tensor.SparseTensor object at 0x7fe8b4d7af60>, <tensorflow.python.framework.sparse_
tensor.SparseTensor object at 0x7fe8b4d7aeb8> ... ]

1 个答案:

答案 0 :(得分:2)

最终,我的文本表示转换为input_fn中的稀疏向量是不必要的。相反,我不得不告诉模型期望输入一个字符串数组,它理解如何转换成一个字符串&#34;或n-hot向量以及如何嵌入为密集向量。

import tensorflow as tf
import tempfile
import os
from collections import namedtuple

GameRecord = namedtuple('GameRecord', 'team_0 team_1 game_map winner')
def parse(line):
    parts = line.rstrip().split("\t")
    return GameRecord(
        game_map = parts[1], 
        team_0 = parts[2].split(","), 
        team_1 = parts[3].split(","), 
        winner = int(parts[4]))

def conjugate(record):
    return GameRecord(
        team_0 = record.team_1, 
        team_1 = record.team_0, 
        game_map = record.game_map, 
        winner = 0 if record.winner == 1 else 1)

def make_input_fn(filename, batch_size=128, shuffle = True, add_conjugate_games = True, epochs=1):
    def _fn():
        records = []
        with open(filename, "r") as raw:
            i = 0
            for line in raw:
                record = parse(line)
                records.append(record)
                if add_conjugate_games:
                    records.append(conjugate(record))

        team_0s = tf.constant(list(map(lambda r: r.team_0, records)))
        team_1s = tf.constant(list(map(lambda r: r.team_1, records)))
        maps    = tf.constant(list(map(lambda r: r.game_map, records)))
        winners = tf.constant(list(map(lambda r: [r.winner], 

        return {
                    "team_0": team_0s,
                    "team_1": team_1s,
                    "game_map": maps,
                }, winners

    return _fn

team_0 = tf.feature_column.embedding_column(
    tf.feature_column.categorical_column_with_vocabulary_list("team_0", source.heroes_array), dimension=len(source.heroes_array))
team_1 = tf.feature_column.embedding_column(
    tf.feature_column.categorical_column_with_vocabulary_list("team_1", source.heroes_array), dimension=len(source.heroes_array))
game_map = tf.feature_column.embedding_column(
    tf.feature_column.categorical_column_with_vocabulary_list("game_map", source.maps_array), dimension=len(source.maps_array))

model_dir = "DNNClassifierModel_00"
os.mkdir(model_dir)
m = tf.estimator.DNNClassifier(
    model_dir=model_dir,
    hidden_units = [1024, 512, 256], 
    feature_columns=[team_0, team_1, game_map])

def main():
        m.train(input_fn=make_input_fn("training.txt"))
        results = m.evaluate(input_fn=make_input_fn("validation.txt"))
        print("model directory = %s" % model_dir)
        for key in sorted(results):
            print("%s: %s" % (key, results[key]))

if __name__ == "__main__":
    main()

请注意,此代码尚不完美。我需要添加批处理。