tensorflow:报告给协调器的错误:feature_columns的项必须为_FeatureColumn。给定(类型<class'collections.NumericColumn'>)

时间:2019-09-10 09:58:06

标签: python tensorflow tensorflow-estimator

我试图使用tf估计器来建立逻辑回归模型。我使用了虹膜数据集,该数据集在我的计算机中成功运行。但是,当我尝试在集群中使用该模型(使用train_and_evaluate而不是classfier.train)时,遇到了这个问题。

python版本:3.6.8 tensorflow版本:1.13.1

以下是在本地运行的代码:

iris数据集仅包含数字数据。因此feature_columns是NumericColumn的列表。

FUTURES = ['SepalLength', 'SepalWidth','PetalLength', 'PetalWidth', 'Species']
feature_columns = []
for key in FUTURES:
    feature_columns.append(tf.feature_column.numeric_column(key=key))

定义估算器。将feature_columns传递给参数

classifier = tf.estimator.Estimator(
        model_fn=my_model_fn,
        model_dir=models_path,
        params={
            'feature_columns': feature_columns,
            'n_classes': 3,
        })

定义model_fn。

def my_model_fn(features,labels,mode,params):
    net = tf.feature_column.input_layer(features, params['feature_columns'])
    logits = tf.layers.dense(net, params['n_classes'], activation=None)

    predicted_classes = tf.argmax(logits, 1)
    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {'logits': logits}
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)

    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)

    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.AdagradOptimizer(learning_rate=0.1)
        train_op = optimizer.minimize(loss,global_step=tf.train.get_global_step())  
        return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)      

    accuracy = tf.metrics.accuracy(labels=labels,
                                   predictions=predicted_classes,
                                   name='acc_op') 
    metrics = {'accuracy': accuracy} 
    tf.summary.scalar('accuracy', accuracy[1])
    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics)

此代码可以很好地工作并产生一些结果。

-------------------------------------------------------------

然后,我想对它进行群集训练。 my_model_fn与上一个相同,并且self._feature_numeric_col仍然是NumericColumn的列表。

class LogisticReg():
   def __init__(self):
        self._feature_col = x.columns.tolist()
        self._feature_numeric_col = []
        for key in self._feature_col:
            self._feature_numeric_col.append(tf.feature_column.numeric_column(key=key))
        self.estimator = tf.estimator.Estimator(model_fn=self.my_model_fn,
                                                model_dir=self.model_path,
                                                config=self.config,
                                                params={'feature_columns':self._feature_numeric_col})

    def my_model_fn(self, features, labels, mode, params):

        net = tf.feature_column.input_layer(features, params['feature_columns'])
        logits = tf.layers.dense(net, self.n_class, activation=None)

        predicted_classes = tf.argmax(logits, 1)  
        if mode == tf.estimator.ModeKeys.PREDICT:
            predictions = {'logits': logits}
            return tf.estimator.EstimatorSpec(mode, predictions=predictions)

        loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)

        if mode == tf.estimator.ModeKeys.TRAIN:
            optimizer = tf.train.AdagradOptimizer(learning_rate=0.1) 
            train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())  !
            return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)

        accuracy = tf.metrics.accuracy(labels=labels,predictions=predicted_classes) 

        metrics = {'accuracy': accuracy}  
        tf.summary.scalar('accuracy', accuracy[1])  
        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics)

使用train_and_evaluate函数代替train / eval / predict

# input_fn
def input_fn(self, X, y, mode, batch_size):
    y = y.astype(np.int32)
    X = X.astype(np.float32)
    dataset = tf.data.Dataset.from_tensor_slices((dict(X), y)) # x,y:pandas
    if mode == 'train':
        dataset = dataset.shuffle(500)
        dataset = dataset.repeat()  
    dataset = dataset.batch(batch_size)
    return dataset

# train_spec
train_spec = tf.estimator.TrainSpec(input_fn=lambda: self.input_fn(x_train,y_train,'train',batch_size),
                                    max_steps=n_epochs)
# eval_spec
eval_spec = tf.estimator.EvalSpec(input_fn=lambda: self.input_fn(x_valid, y_valid, 'valid', batch_size),
                                          start_delay_secs=30, throttle_secs=30, steps=None)


tf.estimator.train_and_evaluate(self.estimator, train_spec, eval_spec)

我希望集群版本可以生成与本地版本相似的输出。但是,出现此错误。

Traceback (most recent call last):
  File "/usr/local/bin/python3/lib/python3.6/site-packages/tensorflow/python/training/coordinator.py", line 297, in stop_on_exception
    yield
  File "/usr/local/bin/python3/lib/python3.6/site-packages/tensorflow/python/distribute/mirrored_strategy.py", line 852, in run
    self.main_result = self.main_fn(*self.main_args, **self.main_kwargs)
  File "/usr/local/bin/python3/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1112, in _call_model_fn
    model_fn_results = self._model_fn(features=features, **kwargs)
  File "/mnt/glusterfs/model-center/train/classify.py", line 51, in my_model_fn
    net = tf.feature_column.input_layer(features, params['feature_columns'])
  File "/usr/local/bin/python3/lib/python3.6/site-packages/tensorflow/python/feature_column/feature_column.py", line 302, in input_layer
    cols_to_output_tensors=cols_to_output_tensors)
  File "/usr/local/bin/python3/lib/python3.6/site-packages/tensorflow/python/feature_column/feature_column.py", line 181, in _internal_input_layer
    feature_columns = _normalize_feature_columns(feature_columns)
  File "/usr/local/bin/python3/lib/python3.6/site-packages/tensorflow/python/feature_column/feature_column.py", line 2263, in _normalize_feature_columns
    'Given (type {}): {}.'.format(type(column), column))
ValueError: Items of feature_columns must be a _FeatureColumn. Given (type <class 'collections.NumericColumn'>): NumericColumn(key='sepal_length', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None).

0 个答案:

没有答案