具有批量大小和随机播放的Tensorflow输入功能

时间:2017-07-10 17:45:52

标签: python pandas input tensorflow batch-processing

我正在尝试使用tf.train.batch()构建tensorflow输入函数。我有火车,评估和预测的数据框。所以input_fn应该接受df,batch_size的参数。在df中,有连续和分类的列。

修改后的代码:

COLUMNS = ['atemp', 'holiday', 'humidity', 'season', 'temp', 'weather', 'windspeed', 'workingday', 'hour', 'weekday', 'month', 'label']

CONTINUOUS_COLUMNS = ['atemp',  'humidity',  'temp',  'windspeed',]
CATEGORICAL_COLUMNS =[ 'holiday', 'season', 'weather',
                      'workingday', 'weekday', 'month', 'hour' ]

LEARNING_RATE = 0.1
LABEL_COLUMN = 'label'
batch_size = 128

data_set =  pd.read_excel('bike_str.xlsx')

# Split the data into a training set, an eval set and a pred set.
train_set = data_set[:9500]
eval_set = data_set[9500:10800]
pred_set = data_set[10800:]

## Eval and Prediction labels:

eval_label = eval_set['label']
pred_label = pred_set['label']

Input_fn:

def batch_input_fn(df, batch_size):
        def input_fn():
            """Input builder function."""
            # Creates a dictionary mapping from each continuous feature column name (k) to
            # the values of that column stored in a constant Tensor.
            continuous_cols = {k: tf.constant(df[k].values) for k in CONTINUOUS_COLUMNS}
            # Creates a dictionary mapping from each categorical feature column name (k)
            # to the values of that column stored in a tf.SparseTensor.
            categorical_cols = {
                k: tf.SparseTensor(
                    indices=[[i, 0] for i in range(df[k].size)],
                    values=df[k].values,
                    dense_shape=[df[k].size, 1])
                for k in CATEGORICAL_COLUMNS}
            # Merges the two dictionaries into one.
            x = dict(continuous_cols)
            x.update(categorical_cols)
            # Converts the label column into a constant Tensor.
            y = tf.constant(df[LABEL_COLUMN].values)
            # Returns the feature columns and the label.

        sliced_input = tf.train.slice_input_producer([x, y], shuffle = shuffle)
 # So i'm trying to shuffle data for train and not shuffle for eval and pred
        return tf.train.batch(sliced_input, batch_size=batch_size, num_threads= 3) 
    return input_fn

## Continuous base columns.

atemp = tf.contrib.layers.real_valued_column('atemp')
humidity = tf.contrib.layers.real_valued_column('humidity')
temp = tf.contrib.layers.real_valued_column('temp')
windspeed = tf.contrib.layers.real_valued_column('windspeed')


## Categoric base columns:
### To define a feature column for a categorical feature, we can create a SparseColumn

holiday = tf.contrib.layers.sparse_column_with_keys(column_name="holiday", keys=["no", "yes"])
season = tf.contrib.layers.sparse_column_with_keys(column_name="season", keys=["spring", "summer", "fall","winter"])

feat_dnn = [atemp_b, humidity_b, windspeed_b, temp_b,

            tf.contrib.layers.embedding_column(holiday, dimension=1)        
           ]

dnnregressor = tf.contrib.learn.DNNRegressor(feature_columns= feat_dnn,
                                             hidden_units=[512,256, 512],
                                            optimizer=tf.train.FtrlOptimizer(
      learning_rate=0.250, l1_regularization_strength=0.8,  l2_regularization_strength=0.8),
      activation_fn =tf.nn.relu, dropout = 0.08)

dnnregressor.fit(input_fn= lambda: batch_input_fn(train_set, batch_size, shuffle = True), steps=1000 )

直接调用batch_input_fn时,错误为:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-8-9c356159093d> in <module>()
----> 1 dnnregressor.fit(input_fn= lambda: batch_input_fn(train_set, batch_size), steps=15000 )

C:\Python\Anaconda\lib\site-packages\tensorflow\python\util\deprecation.py in new_func(*args, **kwargs)
    287             'in a future version' if date is None else ('after %s' % date),
    288             instructions)
--> 289       return func(*args, **kwargs)
    290     return tf_decorator.make_decorator(func, new_func, 'deprecated',
    291                                        _add_deprecated_arg_notice_to_docstring(

C:\Python\Anaconda\lib\site-packages\tensorflow\contrib\learn\python\learn\estimators\estimator.py in fit(self, x, y, input_fn, steps, batch_size, monitors, max_steps)
    453       hooks.append(basic_session_run_hooks.StopAtStepHook(steps, max_steps))
    454 
--> 455     loss = self._train_model(input_fn=input_fn, hooks=hooks)
    456     logging.info('Loss for final step: %s.', loss)
    457     return self

C:\Python\Anaconda\lib\site-packages\tensorflow\contrib\learn\python\learn\estimators\estimator.py in _train_model(self, input_fn, hooks)
    951       random_seed.set_random_seed(self._config.tf_random_seed)
    952       global_step = contrib_framework.create_global_step(g)
--> 953       features, labels = input_fn()
    954       self._check_inputs(features, labels)
    955       model_fn_ops = self._get_train_ops(features, labels)

TypeError: 'function' object is not iterable

从这段代码看起来似乎有效,但这里的张量不是dict列表:

def batched_input_fn(dataset_x, dataset_y, batch_size):
    def _input_fn():
        all_x = tf.constant(dataset_x, shape=dataset_x.shape, dtype=tf.float32)
        all_y = tf.constant(dataset_y, shape=dataset_y.shape, dtype=tf.float32)
        sliced_input = tf.train.slice_input_producer([all_x, all_y])
        return tf.train.batch(sliced_input, batch_size=batch_size)
return _input_fn

0 个答案:

没有答案