我已经设置了TensorFlow linear classifier tutorial的玩具示例。在此示例中,使用参数fit
调用input_fn
方法,我在其中传递train_input_fn
。这就是TensorFlow喜欢传递数据的方式。但是,我真的想要运行迷你批次。幸运的是,fit
有一个batch_size
参数,但我需要放弃使用input_fn
并转而使用x
和y
。我已尝试传递ndarray
和DataFrames
以及train_input_fn
函数的输出。什么都行不通。我需要一个使用batch_size
参数的工作示例。
这是设置代码拆分成的东西我没有问题,后跟问题部分。
import pandas as pd
import numpy as np
import tensorflow as tf
import tempfile
np.random.seed([3,1415])
df = pd.DataFrame(dict(cat1=np.random.choice(('Yes', 'No'), (100,),),
val1=np.random.rand(100),
val2=np.random.rand(100),
val3=np.random.rand(100),
label=np.random.choice((0, 1), (100,))))
LABEL_COLUMN = "label"
trainBegin, trainEnd = 0, 80
testBegin, testEnd = 80, 100
df_train = df.iloc[trainBegin:trainEnd, :]
df_test = df.iloc[testBegin:testEnd, :]
CONTINUOUS_COLUMNS = ['val1', 'val2', 'val3']
CATEGORICAL_COLUMNS = ['cat1']
def input_fn(df):
# Creates a dictionary mapping from each continuous feature column name (k) to
# the values of that column stored in a constant Tensor.
continuous_cols = {k: tf.constant(df[k].values)
for k in CONTINUOUS_COLUMNS}
# Creates a dictionary mapping from each categorical feature column name (k)
# to the values of that column stored in a tf.SparseTensor.
categorical_cols = {k: tf.SparseTensor(
indices=[[i, 0] for i in range(df[k].size)],
values=df[k].values,
shape=[df[k].size, 1])
for k in CATEGORICAL_COLUMNS}
# Merges the two dictionaries into one.
feature_cols = dict(continuous_cols.items() + categorical_cols.items())
# Converts the label column into a constant Tensor.
label = tf.constant(df[LABEL_COLUMN].values)
# Returns the feature columns and the label.
return feature_cols, label
def train_input_fn():
return input_fn(df_train)
def eval_input_fn():
return input_fn(df_test)
val1 = tf.contrib.layers.real_valued_column("val1")
val2 = tf.contrib.layers.real_valued_column("val2")
val3 = tf.contrib.layers.real_valued_column("val3")
wide_columns = [val1, val2, val3]
model_dir = tempfile.mkdtemp()
m = tf.contrib.learn.LinearClassifier(feature_columns=wide_columns, model_dir=model_dir)
m.fit(input_fn=train_input_fn, steps=200)
results = m.evaluate(input_fn=eval_input_fn, steps=1)
for key in sorted(results):
print("%s: %s" % (key, results[key]))
accuracy: 0.45
eval_auc: 0.459596
loss: 0.771354
model_dir = tempfile.mkdtemp()
m = tf.contrib.learn.LinearClassifier(feature_columns=wide_columns, model_dir=model_dir)
m.fit(input_fn=train_input_fn, steps=200)
# 2 lines that are different ##########################
x, y = train_input_fn()
results = m.evaluate(x=x, y=y, batch_size=100, steps=1)
#######################################################
for key in sorted(results):
print("%s: %s" % (key, results[key]))
以下是我得到的错误,但根据我的尝试,我得到了不同的错误。文档说明了一个矩阵。我也试过了。
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-135-5b53add19aac> in <module>()
12 # p.fit(input_fn=train_input_fn, steps=10)
13 x, y = train_input_fn()
---> 14 p.fit(x=df_train, y=df_train, steps=10, batch_size=100)
15 results = p.evaluate(input_fn=eval_input_fn, steps=1)
16 for key in sorted(results):
/Users/sean/anaconda/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.pyc in fit(self, x, y, input_fn, steps, batch_size, monitors)
171 if x is None:
172 raise ValueError('Either x or input_fn must be provided.')
--> 173 input_fn, feed_fn = _get_input_fn(x, y, batch_size)
174 elif (x is not None) or (y is not None):
175 raise ValueError('Can not provide both input_fn and either of x and y.')
/Users/sean/anaconda/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.pyc in _get_input_fn(x, y, batch_size)
65 def _get_input_fn(x, y, batch_size):
66 df = data_feeder.setup_train_data_feeder(
---> 67 x, y, n_classes=None, batch_size=batch_size)
68 return df.input_builder, df.get_feed_dict_fn()
69
/Users/sean/anaconda/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/io/data_feeder.pyc in setup_train_data_feeder(X, y, n_classes, batch_size, shuffle, epochs)
97 ValueError: if one of `X` and `y` is iterable and the other is not.
98 """
---> 99 X, y = _data_type_filter(X, y)
100 if HAS_DASK:
101 # pylint: disable=g-import-not-at-top
/Users/sean/anaconda/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/io/data_feeder.pyc in _data_type_filter(X, y)
65 y = extract_dask_labels(y)
66 if HAS_PANDAS:
---> 67 X = extract_pandas_data(X)
68 if y is not None:
69 y = extract_pandas_labels(y)
/Users/sean/anaconda/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/io/pandas_io.pyc in extract_pandas_data(data)
51 return data.values.astype('float')
52 else:
---> 53 raise ValueError('Data types for data must be int, float, or bool.')
54
55
ValueError: Data types for data must be int, float, or bool.
答案 0 :(得分:3)
如果您传递x
和y
,则格式似乎与input_fn
不同。引自fit
的{{3}}:
x:形状的矩阵或张量[n_samples,n_features ...]。可以是返回特征数组的迭代器。用于拟合模型的训练输入样本。如果设置,input_fn必须为None。
以下示例有效。注意
我不得不用布尔值替换'Yes'
/ 'No'
(这可能不等同,但说明了这一点)因为似乎无法以这种方式输入稀疏数据。
我使用infer_real_valued_columns_from_input
来获取列。
修订版:
import pandas as pd
import numpy as np
import tensorflow as tf
import tempfile
np.random.seed([3,1415])
_x_df = pd.DataFrame(dict(
cat1=np.random.choice((True, False), (100,),),
val1=np.random.rand(100),
val2=np.random.rand(100),
val3=np.random.rand(100)))
_y_df = pd.DataFrame(dict(label=np.random.choice((0, 1), (100,))))
trainBegin, trainEnd = 0, 80
testBegin, testEnd = 80, 100
x_df_train = _x_df.iloc[trainBegin:trainEnd, :]
x_df_test = _x_df.iloc[testBegin:testEnd, :]
y_df_train = _y_df.iloc[trainBegin:trainEnd, :]
y_df_test = _y_df.iloc[testBegin:testEnd, :]
wide_columns = tf.contrib.learn.infer_real_valued_columns_from_input(x_df_train)
model_dir = tempfile.mkdtemp()
m = tf.contrib.learn.LinearClassifier(feature_columns=wide_columns, model_dir=model_dir)
m.fit(x_df_train, y_df_train, batch_size=5, steps=200)
results = m.evaluate(x_df_test, y_df_test, batch_size=5, steps=1)
for key in sorted(results):
print("%s: %s" % (key, results[key]))