Question

我正在尝试将一个退出函数应用于TensorFlow数据集，但是在引用功能列的正确方法方面遇到了一些问题。如果只有一个输入，则该功能将按预期工作。

import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds
from collections import Counter
from tensorflow.keras.preprocessing.sequence import pad_sequences


text = ["I played it a while but it was alright. The steam was a bit of trouble."
        " The more they move these game to steam the more of a hard time I have"
        " activating and playing a game. But in spite of that it was fun, I "
        "liked it. Now I am looking forward to anno 2205 I really want to "
        "play my way to the moon.",
        "This game is a bit hard to get the hang of, but when you do it's great."]


df = pd.DataFrame({"text": text})

dataset = (
    tf.data.Dataset.from_tensor_slices(
        tf.cast(df.text.values, tf.string)))

tokenizer = tfds.features.text.Tokenizer()

lowercase = True
vocabulary = Counter()
for text in dataset:
    if lowercase:
        text = tf.strings.lower(text)
    tokens = tokenizer.tokenize(text.numpy())
    vocabulary.update(tokens)

vocab_size = 5000
vocabulary, _ = zip(*vocabulary.most_common(vocab_size))

max_len = 15
max_sent = 5
encoder = tfds.features.text.TokenTextEncoder(vocabulary,
                                              lowercase=True,
                                              tokenizer=tokenizer)

def encode(text):
    sent_list = []
    sents = tf.strings.split(text, sep=". ").numpy()
    if max_sent:
        sents = sents[:max_sent]
    for sent in sents:
        text_encoded = encoder.encode(sent.decode())
        if max_len:
            text_encoded = text_encoded[:max_len]
            sent_list.append(pad_sequences([text_encoded], max_len))
    if len(sent_list) < 5:
        sent_list.append([tf.zeros(max_len) for _ in range(5 - len(sent_list))])
    return tf.concat(sent_list, axis=0)

def encode_pyfn(text):
    [text_encoded] = tf.py_function(encode, inp=[text], Tout=[tf.int32])
    return text_encoded

dataset = dataset.map(encode_pyfn).batch(batch_size=2)

next(iter(dataset))

但是当我尝试将同一功能应用于由make_csv_dataset产生的单个功能列时：

import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds
from collections import Counter
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

text = ["I played it a while but it was alright. The steam was a bit of trouble."
        " The more they move these game to steam the more of a hard time I have"
        " activating and playing a game. But in spite of that it was fun, I "
        "liked it. Now I am looking forward to anno 2205 I really want to "
        "play my way to the moon.",
        "This game is a bit hard to get the hang of, but when you do it's great."]
target = [0, 1]
gender = [1, 0]
age = [45, 35]



df = pd.DataFrame({"text": text,
                   "target": target,
                   "gender": gender,
                   "age": age})

df.to_csv('test.csv', index=False)

dataset = tf.data.experimental.make_csv_dataset(
    'test.csv',
    batch_size=2,
    label_name='target',
    num_epochs=1)

tokenizer = tfds.features.text.Tokenizer()

lowercase = True
vocabulary = Counter()
for features, _ in dataset:
    text = features['text']
    if lowercase:
        text = tf.strings.lower(text)
    for t in text:
        tokens = tokenizer.tokenize(t.numpy())
        vocabulary.update(tokens)
        
vocab_size = 5000
vocabulary, _ = zip(*vocabulary.most_common(vocab_size))

max_len = 15
max_sent = 5
encoder = tfds.features.text.TokenTextEncoder(vocabulary,
                                              lowercase=True,
                                              tokenizer=tokenizer)

def encode(text):
    sent_list = []
    sents = tf.strings.split(text, sep=". ").numpy()
    if max_sent:
        sents = sents[:max_sent]
    for sent in sents:
        text_encoded = encoder.encode(sent.decode())
        if max_len:
            text_encoded = text_encoded[:max_len]
            sent_list.append(pad_sequences([text_encoded], max_len, padding='post'))
    if len(sent_list) < 5:
        sent_list.append([tf.zeros(max_len) for _ in range(5 - len(sent_list))])
    return tf.concat(sent_list, axis=0)

def encode_pyfn(features, targets):
    features['text'] = tf.py_function(encode, inp=features[text], Tout=[tf.int32])
    return features, targets

dataset = dataset.map(encode_pyfn)

next(iter(dataset))

它引发以下内容：

TypeError: in user code:

    <ipython-input-9-30172a796c2e>:69 encode_pyfn  *
        features['text'] = tf.py_function(encode, inp=features[text], Tout=[tf.int32])
    /Users/username/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:823 __hash__
        raise TypeError("Tensor is unhashable. "

    TypeError: Tensor is unhashable. Instead, use tensor.ref() as the key.

将功能应用于单个功能的正确方法是什么？

将功能应用于功能后，Tensor无法散列。而是使用tensor.ref（）作为键

0 个答案: