我正在尝试将一个退出函数应用于TensorFlow数据集,但是在引用功能列的正确方法方面遇到了一些问题。如果只有一个输入,则该功能将按预期工作。
import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds
from collections import Counter
from tensorflow.keras.preprocessing.sequence import pad_sequences
text = ["I played it a while but it was alright. The steam was a bit of trouble."
" The more they move these game to steam the more of a hard time I have"
" activating and playing a game. But in spite of that it was fun, I "
"liked it. Now I am looking forward to anno 2205 I really want to "
"play my way to the moon.",
"This game is a bit hard to get the hang of, but when you do it's great."]
df = pd.DataFrame({"text": text})
dataset = (
tf.data.Dataset.from_tensor_slices(
tf.cast(df.text.values, tf.string)))
tokenizer = tfds.features.text.Tokenizer()
lowercase = True
vocabulary = Counter()
for text in dataset:
if lowercase:
text = tf.strings.lower(text)
tokens = tokenizer.tokenize(text.numpy())
vocabulary.update(tokens)
vocab_size = 5000
vocabulary, _ = zip(*vocabulary.most_common(vocab_size))
max_len = 15
max_sent = 5
encoder = tfds.features.text.TokenTextEncoder(vocabulary,
lowercase=True,
tokenizer=tokenizer)
def encode(text):
sent_list = []
sents = tf.strings.split(text, sep=". ").numpy()
if max_sent:
sents = sents[:max_sent]
for sent in sents:
text_encoded = encoder.encode(sent.decode())
if max_len:
text_encoded = text_encoded[:max_len]
sent_list.append(pad_sequences([text_encoded], max_len))
if len(sent_list) < 5:
sent_list.append([tf.zeros(max_len) for _ in range(5 - len(sent_list))])
return tf.concat(sent_list, axis=0)
def encode_pyfn(text):
[text_encoded] = tf.py_function(encode, inp=[text], Tout=[tf.int32])
return text_encoded
dataset = dataset.map(encode_pyfn).batch(batch_size=2)
next(iter(dataset))
但是当我尝试将同一功能应用于由make_csv_dataset
产生的单个功能列时:
import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds
from collections import Counter
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
text = ["I played it a while but it was alright. The steam was a bit of trouble."
" The more they move these game to steam the more of a hard time I have"
" activating and playing a game. But in spite of that it was fun, I "
"liked it. Now I am looking forward to anno 2205 I really want to "
"play my way to the moon.",
"This game is a bit hard to get the hang of, but when you do it's great."]
target = [0, 1]
gender = [1, 0]
age = [45, 35]
df = pd.DataFrame({"text": text,
"target": target,
"gender": gender,
"age": age})
df.to_csv('test.csv', index=False)
dataset = tf.data.experimental.make_csv_dataset(
'test.csv',
batch_size=2,
label_name='target',
num_epochs=1)
tokenizer = tfds.features.text.Tokenizer()
lowercase = True
vocabulary = Counter()
for features, _ in dataset:
text = features['text']
if lowercase:
text = tf.strings.lower(text)
for t in text:
tokens = tokenizer.tokenize(t.numpy())
vocabulary.update(tokens)
vocab_size = 5000
vocabulary, _ = zip(*vocabulary.most_common(vocab_size))
max_len = 15
max_sent = 5
encoder = tfds.features.text.TokenTextEncoder(vocabulary,
lowercase=True,
tokenizer=tokenizer)
def encode(text):
sent_list = []
sents = tf.strings.split(text, sep=". ").numpy()
if max_sent:
sents = sents[:max_sent]
for sent in sents:
text_encoded = encoder.encode(sent.decode())
if max_len:
text_encoded = text_encoded[:max_len]
sent_list.append(pad_sequences([text_encoded], max_len, padding='post'))
if len(sent_list) < 5:
sent_list.append([tf.zeros(max_len) for _ in range(5 - len(sent_list))])
return tf.concat(sent_list, axis=0)
def encode_pyfn(features, targets):
features['text'] = tf.py_function(encode, inp=features[text], Tout=[tf.int32])
return features, targets
dataset = dataset.map(encode_pyfn)
next(iter(dataset))
它引发以下内容:
TypeError: in user code:
<ipython-input-9-30172a796c2e>:69 encode_pyfn *
features['text'] = tf.py_function(encode, inp=features[text], Tout=[tf.int32])
/Users/username/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:823 __hash__
raise TypeError("Tensor is unhashable. "
TypeError: Tensor is unhashable. Instead, use tensor.ref() as the key.
将功能应用于单个功能的正确方法是什么?