Question

我有一个程序尝试使用Keras作为自定义估算器。我的培训课程似乎有效，但是在评估步骤中失败了。获得

试图将“张量”转换为张量，但失败。错误：不支持任何值

我不确定为什么标签没有通过。

请参见下面的完整代码。

#!/usr/bin/env python

# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from google.datalab.ml import TensorBoard

import shutil
import tensorflow as tf
import numpy as np

tf.logging.set_verbosity(tf.logging.INFO)

hidden_layers = [256, 128, 16, 4]
dropout = 0.2

# In CSV, label is the first column, after the features, followed by the key
CSV_COLUMNS = ['proc_code','SAME_DAY_YN','WALK_IN_YN','Lead_date','NEW_TO_SYS_YN','NEW_TO_PROV_YN','NEW_TO_DEP_YN','NEW_TO_SPEC_YN','NEW_TO_SERV_AREA_YN','WEEKEND_YN','DAY_OF_WEEK','HOUR_OF_DAY','auto_confirmed','PCP','SPECIALTY_DEP','fin_class','age','mychart_active','sex_c','cancellation_rate','confirmation_rate','Keep_appt_rate','no_show_rate','PRCP','SNOW','TAVG','destination_mi','duration_mins','APPT_STATUS_C']
FEATURES = CSV_COLUMNS[0:len(CSV_COLUMNS) - 1]
LABEL = CSV_COLUMNS[28]
LABEL_COLUMS = 'APPT_STATUS_C'
DEFAULTS =  [['*Not Scheduled From Orderi'],[1.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],['Monday'],[0],[0.0],[0.0],['Cardiology'],['Medicare'],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0]]

nbuckets=10

# These are the raw input columns, and will be provided for prediction also
INPUT_COLUMNS = [
    # Define features
    tf.feature_column.categorical_column_with_vocabulary_file(key='proc_code',vocabulary_file='/u01/appt/proc_code.csv'),

    # Numeric columns
    tf.feature_column.numeric_column('SAME_DAY_YN'),
    tf.feature_column.numeric_column('WALK_IN_YN'),
    tf.feature_column.numeric_column('Lead_date'),
    tf.feature_column.numeric_column('NEW_TO_SYS_YN'),
    tf.feature_column.numeric_column('NEW_TO_PROV_YN'),
    tf.feature_column.numeric_column('NEW_TO_DEP_YN'),
    tf.feature_column.numeric_column('NEW_TO_SPEC_YN'),
    tf.feature_column.numeric_column('NEW_TO_SERV_AREA_YN'),
    tf.feature_column.numeric_column('WEEKEND_YN'),

    tf.feature_column.categorical_column_with_vocabulary_list('DAY_OF_WEEK', vocabulary_list = ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']),
    # Categorical identity column
    tf.feature_column.categorical_column_with_identity('HOUR_OF_DAY', num_buckets = 24),

    # Numeric columns
    tf.feature_column.numeric_column('auto_confirmed'),
    tf.feature_column.numeric_column('PCP'),


    tf.feature_column.categorical_column_with_vocabulary_file(key='SPECIALTY_DEP', vocabulary_file='/u01/appt/specialty_dep.csv'),
    tf.feature_column.categorical_column_with_vocabulary_file(key='fin_class', vocabulary_file='/u01/appt/fin_class.csv'),


    # Numeric columns
    tf.feature_column.numeric_column('age'),
    tf.feature_column.numeric_column('mychart_active'),
    tf.feature_column.numeric_column('sex_c'),
    tf.feature_column.numeric_column('cancellation_rate'),
    tf.feature_column.numeric_column('confirmation_rate'),
    tf.feature_column.numeric_column('Keep_appt_rate'),
    tf.feature_column.numeric_column('no_show_rate'),
    tf.feature_column.numeric_column('PRCP'),
    tf.feature_column.numeric_column('SNOW'),
    tf.feature_column.numeric_column('TAVG'),
    tf.feature_column.numeric_column('destination_mi'),
    tf.feature_column.numeric_column('duration_mins'),


    # Engineered features that are created in the input_fn

]


def feature_columns(nbuckets):

    # Input columns
    (proc_code,SAME_DAY_YN,WALK_IN_YN,Lead_date,NEW_TO_SYS_YN,NEW_TO_PROV_YN,NEW_TO_DEP_YN,NEW_TO_SPEC_YN,NEW_TO_SERV_AREA_YN,WEEKEND_YN,DAY_OF_WEEK,HOUR_OF_DAY,auto_confirmed,PCP,SPECIALTY_DEP,fin_class,age,mychart_active,sex_c,cancellation_rate,confirmation_rate,Keep_appt_rate,no_show_rate,PRCP,SNOW,TAVG,destination_mi,duration_mins) = INPUT_COLUMNS

    # Bucketize the lats & lons
    age_buckets = np.linspace(0,120, nbuckets).tolist()
    leaddate_buckets = np.linspace(0,100, nbuckets).tolist()
    dest_buckets =  np.linspace(0,100, nbuckets).tolist()
    duration_buckets =  np.linspace(0,180, nbuckets).tolist()
    TAVG_buckets  =  np.linspace(0,100, nbuckets).tolist()

    # Feature cross
    day_hr =  tf.feature_column.crossed_column([DAY_OF_WEEK, HOUR_OF_DAY], 7 * 24)

    b_age = tf.feature_column.bucketized_column(age, age_buckets)
    b_leaddate = tf.feature_column.bucketized_column(Lead_date, leaddate_buckets)
    b_destination = tf.feature_column.bucketized_column(destination_mi, dest_buckets)
    b_duration = tf.feature_column.bucketized_column(duration_mins, duration_buckets)
    b_TAVG = tf.feature_column.bucketized_column(TAVG, TAVG_buckets) 



    deep_columns = [
        # Embedding_column to "group" together ...
        tf.feature_column.embedding_column(day_hr, 10),
        tf.feature_column.embedding_column(proc_code, 10),
        tf.feature_column.embedding_column(SPECIALTY_DEP, 10),
        tf.feature_column.embedding_column(fin_class, 10),


        # Numeric columns
        SAME_DAY_YN,
        WALK_IN_YN,
        b_leaddate,
        NEW_TO_SYS_YN,
        NEW_TO_PROV_YN,
        NEW_TO_DEP_YN,
        NEW_TO_SPEC_YN,
        NEW_TO_SERV_AREA_YN,
        WEEKEND_YN,
        auto_confirmed,
        PCP,
        b_age,
        mychart_active,
        sex_c,
        cancellation_rate,
        confirmation_rate,
        Keep_appt_rate,
        no_show_rate,
        PRCP,
        SNOW,
        b_TAVG,
        b_destination,
        b_duration
    ]

    return deep_columns


he_init = tf.keras.initializers.he_normal()

def build_fully_connected(X, n_units=100, activation=tf.keras.activations.relu, initialization=he_init,
                          batch_normalization=False, training=False, name=None):
    layer = tf.keras.layers.Dense(n_units,
                                  activation=None,
                                  kernel_initializer=he_init,
                                  name=name)(X)
    if batch_normalization:
        bn = tf.keras.layers.BatchNormalization(momentum=0.90)
        layer = bn(layer, training=training)
    return activation(layer)

def output_layer(h, n_units, initialization=he_init,
                 batch_normalization=False, training=False):
    logits = tf.keras.layers.Dense(n_units, activation=None)(h)
    if batch_normalization:
        bn = tf.keras.layers.BatchNormalization(momentum=0.90)
        logits = bn(logits, training=training)
    return logits

# build model

ACTIVATION = tf.keras.activations.relu
BATCH_SIZE = 550
HIDDEN_UNITS = [256, 128, 16, 1]
LEARNING_RATE = 0.01
NUM_STEPS = 10
USE_BATCH_NORMALIZATION = False

def dnn_custom_estimator(features, labels, mode, params):
    in_training = mode == tf.estimator.ModeKeys.TRAIN
    use_batch_norm = params['batch_norm']

    net = tf.feature_column.input_layer(features, params['features'])
    for i, n_units in enumerate(params['hidden_units']):
        net = build_fully_connected(net, n_units=n_units, training=in_training, 
                                    batch_normalization=use_batch_norm, 
                                    activation=params['activation'],
                                    name='hidden_layer'+str(i))

    logits = output_layer(net, 1, batch_normalization=use_batch_norm,
                          training=in_training)


    predicted_classes = tf.argmax(logits, 1)

    labels = tf.reshape(labels, [-1, 1])

    loss = tf.losses.sigmoid_cross_entropy(multi_class_labels=labels, logits=logits)
    accuracy = tf.metrics.accuracy(labels=tf.argmax(labels, 1),
                                   predictions=predicted_classes,
                                   name='acc_op')

    true_positives = tf.metrics.true_positives(labels=tf.argmax(labels, 1),
                                   predictions=predicted_classes)

    #tf.summary.scalar('accuracy', accuracy[1])  # for visualizing in TensorBoard


    #if mode == tf.estimator.ModeKeys.EVAL:
    #    return tf.estimator.EstimatorSpec(mode, loss=loss,
    #                                      eval_metric_ops={'accuracy': accuracy})

    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(mode, loss=loss,
                                          eval_metric_ops={'true_positives': true_positives})
    # Create training op.
    assert mode == tf.estimator.ModeKeys.TRAIN

    extra_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    optimizer = tf.train.AdamOptimizer(learning_rate=params['learning_rate'])
    with tf.control_dependencies(extra_ops):
        train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())

    return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)    



# Build the estimator
def build_estimator(model_dir, nbuckets, hidden_units):
    """
     Build an estimator starting from INPUT COLUMNS.
     These include feature transformations and synthetic features.
     The model is a wide-and-deep model.

      """

    #estimator = tf.estimator.DNNRegressor( model_dir = model_dir,
    #            feature_columns=deep_columns ,
    #            label_dimension=1,
    #            hidden_units=hidden_units,
    #            dropout= dropout)

    estimator = tf.estimator.DNNClassifier( model_dir = model_dir,
    #            feature_columns=deep_columns ,
    feature_columns=feature_columns(nbuckets) ,
                n_classes=2,
                hidden_units=hidden_units,
                dropout= dropout)


    # add extra evaluation metric for hyperparameter tuning
    estimator = tf.contrib.estimator.add_metrics(estimator, add_eval_metrics)
    estimator = tf.contrib.estimator.add_metrics(estimator, add_eval_metrics1)
    estimator = tf.contrib.estimator.add_metrics(estimator, add_eval_metrics2)
    estimator = tf.contrib.estimator.add_metrics(estimator, add_eval_metrics3)
    return estimator

def add_eval_metrics(labels, predictions):
    pred_values = predictions['class_ids']
    return {
        #'rmse': tf.metrics.root_mean_squared_error(labels, pred_values)
        #'accuracy': tf.metrics.accuracy(labels, pred_values)
        'false_negatives': tf.metrics.false_negatives(labels, pred_values)

    }

def add_eval_metrics1(labels, predictions):
    pred_values = predictions['class_ids']
    return {
        #'rmse': tf.metrics.root_mean_squared_error(labels, pred_values)
        #'accuracy': tf.metrics.accuracy(labels, pred_values)
        'true_negatives': tf.metrics.true_negatives(labels, pred_values)

    }

def add_eval_metrics2(labels, predictions):
    pred_values = predictions['class_ids']
    return {
        #'rmse': tf.metrics.root_mean_squared_error(labels, pred_values)
        #'accuracy': tf.metrics.accuracy(labels, pred_values)
        'false_positives': tf.metrics.false_positives(labels, pred_values)

    }

def add_eval_metrics3(labels, predictions):
    pred_values = predictions['class_ids']
    return {
        #'rmse': tf.metrics.root_mean_squared_error(labels, pred_values)
        #'accuracy': tf.metrics.accuracy(labels, pred_values)
        'true_positives': tf.metrics.true_positives(labels, pred_values)

    }



def serving_input_fn():
  feature_placeholders = {
        'proc_code' : tf.placeholder(tf.string, [None]),
        'SAME_DAY_YN' : tf.placeholder(tf.float32, [None]),
        'WALK_IN_YN' : tf.placeholder(tf.float32, [None]),
        'Lead_date' : tf.placeholder(tf.float32, [None]),
        'NEW_TO_SYS_YN' : tf.placeholder(tf.float32, [None]),
        'NEW_TO_PROV_YN' : tf.placeholder(tf.float32, [None]),
        'NEW_TO_DEP_YN' : tf.placeholder(tf.float32, [None]),
        'NEW_TO_SPEC_YN' : tf.placeholder(tf.float32, [None]),
        'NEW_TO_SERV_AREA_YN' : tf.placeholder(tf.float32, [None]),
        'WEEKEND_YN' : tf.placeholder(tf.float32, [None]),
        'DAY_OF_WEEK' : tf.placeholder(tf.string, [None]),
        'HOUR_OF_DAY' : tf.placeholder(tf.int32, [None]),
        'auto_confirmed' : tf.placeholder(tf.float32, [None]),
        'PCP' : tf.placeholder(tf.float32, [None]),
        'SPECIALTY_DEP' : tf.placeholder(tf.string, [None]),
        'fin_class' : tf.placeholder(tf.string, [None]),
        'age' : tf.placeholder(tf.float32, [None]),
        'mychart_active' : tf.placeholder(tf.float32, [None]),
        'sex_c' : tf.placeholder(tf.float32, [None]),
        'cancellation_rate' : tf.placeholder(tf.float32, [None]),
        'confirmation_rate' : tf.placeholder(tf.float32, [None]),
        'Keep_appt_rate' : tf.placeholder(tf.float32, [None]),
        'no_show_rate' : tf.placeholder(tf.float32, [None]),
        'PRCP' : tf.placeholder(tf.float32, [None]),
        'SNOW' : tf.placeholder(tf.float32, [None]),
        'TAVG' : tf.placeholder(tf.float32, [None]),
        'destination_mi' : tf.placeholder(tf.float32, [None]),
        'duration_mins' : tf.placeholder(tf.float32, [None]),
  }
  # You can transforma data here from the input format to the format expected by your model.
  #features = feature_placeholders # no transformation needed

  features = {
        key: tf.expand_dims(tensor, -1)
        for key, tensor in feature_placeholders.items()
    }
  return tf.estimator.export.ServingInputReceiver(features, feature_placeholders)

def read_dataset(filename, mode, batch_size = 512):
  def _input_fn():
    def decode_csv(value_column):
      columns = tf.decode_csv(value_column, record_defaults = DEFAULTS)
      features_in = dict(zip(CSV_COLUMNS, columns))
      label_in = features_in.pop(LABEL_COLUMS)
      return features_in, label_in



    # Create list of file names that match "glob" pattern (i.e. data_file_*.csv)
    filenames_dataset = tf.data.Dataset.list_files(filename)
    # Read lines from text files
    textlines_dataset = filenames_dataset.flat_map(
                                lambda filename: (
                                   tf.data.TextLineDataset(filename)
                                   .skip(1)
                                ))

    # Parse text lines as comma-separated values (CSV)
    dataset = textlines_dataset.map(decode_csv)

    # Note:
    # use tf.data.Dataset.flat_map to apply one to many transformations (here: filename -> text lines)
    # use tf.data.Dataset.map      to apply one to one  transformations (here: text line -> feature list)

    if mode == tf.estimator.ModeKeys.TRAIN:
        num_epochs = None # indefinitely
        dataset = dataset.shuffle(buffer_size = 10 * batch_size)
    else:
        num_epochs = 1 # end-of-input after this

    dataset = dataset.repeat(num_epochs).batch(batch_size)


    features, labels = dataset.make_one_shot_iterator().get_next()



    return  features, labels
  return _input_fn

def add_eval_metrics_2(labels, predictions):
    pred_values = predictions['predictions']
    return {
        'recall': tf.metrics.recall(labels, pred_values)
    }


# Create estimator train and evaluate function
def train_and_evaluate(args):
    #estimator = build_estimator('/u01/appt/appt_ts_5', 10, hidden_layers)

    estimator =  tf.estimator.Estimator(model_fn=dnn_custom_estimator,
                                        model_dir='/u01/appt/appt_ts_5',
                                        params={'features': feature_columns(10),
                                                'batch_norm': USE_BATCH_NORMALIZATION,
                                                'activation': ACTIVATION,
                                                'hidden_units': HIDDEN_UNITS,
                                                'learning_rate': LEARNING_RATE})


    #estimator = tf.contrib.estimator.add_metrics(estimator, add_eval_metrics_2)

    train_spec = tf.estimator.TrainSpec(
        input_fn = read_dataset(
            filename = '/u01/appt/appt_ts_train.csv',
            mode = tf.estimator.ModeKeys.TRAIN,
            batch_size = 256),
        max_steps = 50000000)
    exporter = tf.estimator.LatestExporter('exporter', serving_input_fn)
    eval_spec = tf.estimator.EvalSpec(
        input_fn = read_dataset(
            filename = '/u01/appt/appt_ts_test.csv',
            mode = tf.estimator.ModeKeys.EVAL,
            batch_size = 256 ),
        steps = 100,
        exporters = exporter,
        throttle_secs = 50)
    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

错误消息在下面

--> 196     labels = tf.reshape(labels, [-1, 1])
    197 
    198     loss = tf.losses.sigmoid_cross_entropy(multi_class_labels=labels, logits=logits)

ValueError：尝试将“张量”转换为张量，但失败。错误：不支持任何值。

真的很感谢任何见识。

在评估阶段失败，尝试将“张量”转换为张量，但失败了。错误：不支持任何值

0 个答案: