我有一个程序尝试使用Keras作为自定义估算器。我的培训课程似乎有效,但是在评估步骤中失败了。获得
试图将“张量”转换为张量,但失败。错误:不支持任何值
我不确定为什么标签没有通过。
请参见下面的完整代码。
#!/usr/bin/env python
# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from google.datalab.ml import TensorBoard
import shutil
import tensorflow as tf
import numpy as np
tf.logging.set_verbosity(tf.logging.INFO)
hidden_layers = [256, 128, 16, 4]
dropout = 0.2
# In CSV, label is the first column, after the features, followed by the key
CSV_COLUMNS = ['proc_code','SAME_DAY_YN','WALK_IN_YN','Lead_date','NEW_TO_SYS_YN','NEW_TO_PROV_YN','NEW_TO_DEP_YN','NEW_TO_SPEC_YN','NEW_TO_SERV_AREA_YN','WEEKEND_YN','DAY_OF_WEEK','HOUR_OF_DAY','auto_confirmed','PCP','SPECIALTY_DEP','fin_class','age','mychart_active','sex_c','cancellation_rate','confirmation_rate','Keep_appt_rate','no_show_rate','PRCP','SNOW','TAVG','destination_mi','duration_mins','APPT_STATUS_C']
FEATURES = CSV_COLUMNS[0:len(CSV_COLUMNS) - 1]
LABEL = CSV_COLUMNS[28]
LABEL_COLUMS = 'APPT_STATUS_C'
DEFAULTS = [['*Not Scheduled From Orderi'],[1.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],['Monday'],[0],[0.0],[0.0],['Cardiology'],['Medicare'],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0]]
nbuckets=10
# These are the raw input columns, and will be provided for prediction also
INPUT_COLUMNS = [
# Define features
tf.feature_column.categorical_column_with_vocabulary_file(key='proc_code',vocabulary_file='/u01/appt/proc_code.csv'),
# Numeric columns
tf.feature_column.numeric_column('SAME_DAY_YN'),
tf.feature_column.numeric_column('WALK_IN_YN'),
tf.feature_column.numeric_column('Lead_date'),
tf.feature_column.numeric_column('NEW_TO_SYS_YN'),
tf.feature_column.numeric_column('NEW_TO_PROV_YN'),
tf.feature_column.numeric_column('NEW_TO_DEP_YN'),
tf.feature_column.numeric_column('NEW_TO_SPEC_YN'),
tf.feature_column.numeric_column('NEW_TO_SERV_AREA_YN'),
tf.feature_column.numeric_column('WEEKEND_YN'),
tf.feature_column.categorical_column_with_vocabulary_list('DAY_OF_WEEK', vocabulary_list = ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']),
# Categorical identity column
tf.feature_column.categorical_column_with_identity('HOUR_OF_DAY', num_buckets = 24),
# Numeric columns
tf.feature_column.numeric_column('auto_confirmed'),
tf.feature_column.numeric_column('PCP'),
tf.feature_column.categorical_column_with_vocabulary_file(key='SPECIALTY_DEP', vocabulary_file='/u01/appt/specialty_dep.csv'),
tf.feature_column.categorical_column_with_vocabulary_file(key='fin_class', vocabulary_file='/u01/appt/fin_class.csv'),
# Numeric columns
tf.feature_column.numeric_column('age'),
tf.feature_column.numeric_column('mychart_active'),
tf.feature_column.numeric_column('sex_c'),
tf.feature_column.numeric_column('cancellation_rate'),
tf.feature_column.numeric_column('confirmation_rate'),
tf.feature_column.numeric_column('Keep_appt_rate'),
tf.feature_column.numeric_column('no_show_rate'),
tf.feature_column.numeric_column('PRCP'),
tf.feature_column.numeric_column('SNOW'),
tf.feature_column.numeric_column('TAVG'),
tf.feature_column.numeric_column('destination_mi'),
tf.feature_column.numeric_column('duration_mins'),
# Engineered features that are created in the input_fn
]
def feature_columns(nbuckets):
# Input columns
(proc_code,SAME_DAY_YN,WALK_IN_YN,Lead_date,NEW_TO_SYS_YN,NEW_TO_PROV_YN,NEW_TO_DEP_YN,NEW_TO_SPEC_YN,NEW_TO_SERV_AREA_YN,WEEKEND_YN,DAY_OF_WEEK,HOUR_OF_DAY,auto_confirmed,PCP,SPECIALTY_DEP,fin_class,age,mychart_active,sex_c,cancellation_rate,confirmation_rate,Keep_appt_rate,no_show_rate,PRCP,SNOW,TAVG,destination_mi,duration_mins) = INPUT_COLUMNS
# Bucketize the lats & lons
age_buckets = np.linspace(0,120, nbuckets).tolist()
leaddate_buckets = np.linspace(0,100, nbuckets).tolist()
dest_buckets = np.linspace(0,100, nbuckets).tolist()
duration_buckets = np.linspace(0,180, nbuckets).tolist()
TAVG_buckets = np.linspace(0,100, nbuckets).tolist()
# Feature cross
day_hr = tf.feature_column.crossed_column([DAY_OF_WEEK, HOUR_OF_DAY], 7 * 24)
b_age = tf.feature_column.bucketized_column(age, age_buckets)
b_leaddate = tf.feature_column.bucketized_column(Lead_date, leaddate_buckets)
b_destination = tf.feature_column.bucketized_column(destination_mi, dest_buckets)
b_duration = tf.feature_column.bucketized_column(duration_mins, duration_buckets)
b_TAVG = tf.feature_column.bucketized_column(TAVG, TAVG_buckets)
deep_columns = [
# Embedding_column to "group" together ...
tf.feature_column.embedding_column(day_hr, 10),
tf.feature_column.embedding_column(proc_code, 10),
tf.feature_column.embedding_column(SPECIALTY_DEP, 10),
tf.feature_column.embedding_column(fin_class, 10),
# Numeric columns
SAME_DAY_YN,
WALK_IN_YN,
b_leaddate,
NEW_TO_SYS_YN,
NEW_TO_PROV_YN,
NEW_TO_DEP_YN,
NEW_TO_SPEC_YN,
NEW_TO_SERV_AREA_YN,
WEEKEND_YN,
auto_confirmed,
PCP,
b_age,
mychart_active,
sex_c,
cancellation_rate,
confirmation_rate,
Keep_appt_rate,
no_show_rate,
PRCP,
SNOW,
b_TAVG,
b_destination,
b_duration
]
return deep_columns
he_init = tf.keras.initializers.he_normal()
def build_fully_connected(X, n_units=100, activation=tf.keras.activations.relu, initialization=he_init,
batch_normalization=False, training=False, name=None):
layer = tf.keras.layers.Dense(n_units,
activation=None,
kernel_initializer=he_init,
name=name)(X)
if batch_normalization:
bn = tf.keras.layers.BatchNormalization(momentum=0.90)
layer = bn(layer, training=training)
return activation(layer)
def output_layer(h, n_units, initialization=he_init,
batch_normalization=False, training=False):
logits = tf.keras.layers.Dense(n_units, activation=None)(h)
if batch_normalization:
bn = tf.keras.layers.BatchNormalization(momentum=0.90)
logits = bn(logits, training=training)
return logits
# build model
ACTIVATION = tf.keras.activations.relu
BATCH_SIZE = 550
HIDDEN_UNITS = [256, 128, 16, 1]
LEARNING_RATE = 0.01
NUM_STEPS = 10
USE_BATCH_NORMALIZATION = False
def dnn_custom_estimator(features, labels, mode, params):
in_training = mode == tf.estimator.ModeKeys.TRAIN
use_batch_norm = params['batch_norm']
net = tf.feature_column.input_layer(features, params['features'])
for i, n_units in enumerate(params['hidden_units']):
net = build_fully_connected(net, n_units=n_units, training=in_training,
batch_normalization=use_batch_norm,
activation=params['activation'],
name='hidden_layer'+str(i))
logits = output_layer(net, 1, batch_normalization=use_batch_norm,
training=in_training)
predicted_classes = tf.argmax(logits, 1)
labels = tf.reshape(labels, [-1, 1])
loss = tf.losses.sigmoid_cross_entropy(multi_class_labels=labels, logits=logits)
accuracy = tf.metrics.accuracy(labels=tf.argmax(labels, 1),
predictions=predicted_classes,
name='acc_op')
true_positives = tf.metrics.true_positives(labels=tf.argmax(labels, 1),
predictions=predicted_classes)
#tf.summary.scalar('accuracy', accuracy[1]) # for visualizing in TensorBoard
#if mode == tf.estimator.ModeKeys.EVAL:
# return tf.estimator.EstimatorSpec(mode, loss=loss,
# eval_metric_ops={'accuracy': accuracy})
if mode == tf.estimator.ModeKeys.EVAL:
return tf.estimator.EstimatorSpec(mode, loss=loss,
eval_metric_ops={'true_positives': true_positives})
# Create training op.
assert mode == tf.estimator.ModeKeys.TRAIN
extra_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
optimizer = tf.train.AdamOptimizer(learning_rate=params['learning_rate'])
with tf.control_dependencies(extra_ops):
train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
# Build the estimator
def build_estimator(model_dir, nbuckets, hidden_units):
"""
Build an estimator starting from INPUT COLUMNS.
These include feature transformations and synthetic features.
The model is a wide-and-deep model.
"""
#estimator = tf.estimator.DNNRegressor( model_dir = model_dir,
# feature_columns=deep_columns ,
# label_dimension=1,
# hidden_units=hidden_units,
# dropout= dropout)
estimator = tf.estimator.DNNClassifier( model_dir = model_dir,
# feature_columns=deep_columns ,
feature_columns=feature_columns(nbuckets) ,
n_classes=2,
hidden_units=hidden_units,
dropout= dropout)
# add extra evaluation metric for hyperparameter tuning
estimator = tf.contrib.estimator.add_metrics(estimator, add_eval_metrics)
estimator = tf.contrib.estimator.add_metrics(estimator, add_eval_metrics1)
estimator = tf.contrib.estimator.add_metrics(estimator, add_eval_metrics2)
estimator = tf.contrib.estimator.add_metrics(estimator, add_eval_metrics3)
return estimator
def add_eval_metrics(labels, predictions):
pred_values = predictions['class_ids']
return {
#'rmse': tf.metrics.root_mean_squared_error(labels, pred_values)
#'accuracy': tf.metrics.accuracy(labels, pred_values)
'false_negatives': tf.metrics.false_negatives(labels, pred_values)
}
def add_eval_metrics1(labels, predictions):
pred_values = predictions['class_ids']
return {
#'rmse': tf.metrics.root_mean_squared_error(labels, pred_values)
#'accuracy': tf.metrics.accuracy(labels, pred_values)
'true_negatives': tf.metrics.true_negatives(labels, pred_values)
}
def add_eval_metrics2(labels, predictions):
pred_values = predictions['class_ids']
return {
#'rmse': tf.metrics.root_mean_squared_error(labels, pred_values)
#'accuracy': tf.metrics.accuracy(labels, pred_values)
'false_positives': tf.metrics.false_positives(labels, pred_values)
}
def add_eval_metrics3(labels, predictions):
pred_values = predictions['class_ids']
return {
#'rmse': tf.metrics.root_mean_squared_error(labels, pred_values)
#'accuracy': tf.metrics.accuracy(labels, pred_values)
'true_positives': tf.metrics.true_positives(labels, pred_values)
}
def serving_input_fn():
feature_placeholders = {
'proc_code' : tf.placeholder(tf.string, [None]),
'SAME_DAY_YN' : tf.placeholder(tf.float32, [None]),
'WALK_IN_YN' : tf.placeholder(tf.float32, [None]),
'Lead_date' : tf.placeholder(tf.float32, [None]),
'NEW_TO_SYS_YN' : tf.placeholder(tf.float32, [None]),
'NEW_TO_PROV_YN' : tf.placeholder(tf.float32, [None]),
'NEW_TO_DEP_YN' : tf.placeholder(tf.float32, [None]),
'NEW_TO_SPEC_YN' : tf.placeholder(tf.float32, [None]),
'NEW_TO_SERV_AREA_YN' : tf.placeholder(tf.float32, [None]),
'WEEKEND_YN' : tf.placeholder(tf.float32, [None]),
'DAY_OF_WEEK' : tf.placeholder(tf.string, [None]),
'HOUR_OF_DAY' : tf.placeholder(tf.int32, [None]),
'auto_confirmed' : tf.placeholder(tf.float32, [None]),
'PCP' : tf.placeholder(tf.float32, [None]),
'SPECIALTY_DEP' : tf.placeholder(tf.string, [None]),
'fin_class' : tf.placeholder(tf.string, [None]),
'age' : tf.placeholder(tf.float32, [None]),
'mychart_active' : tf.placeholder(tf.float32, [None]),
'sex_c' : tf.placeholder(tf.float32, [None]),
'cancellation_rate' : tf.placeholder(tf.float32, [None]),
'confirmation_rate' : tf.placeholder(tf.float32, [None]),
'Keep_appt_rate' : tf.placeholder(tf.float32, [None]),
'no_show_rate' : tf.placeholder(tf.float32, [None]),
'PRCP' : tf.placeholder(tf.float32, [None]),
'SNOW' : tf.placeholder(tf.float32, [None]),
'TAVG' : tf.placeholder(tf.float32, [None]),
'destination_mi' : tf.placeholder(tf.float32, [None]),
'duration_mins' : tf.placeholder(tf.float32, [None]),
}
# You can transforma data here from the input format to the format expected by your model.
#features = feature_placeholders # no transformation needed
features = {
key: tf.expand_dims(tensor, -1)
for key, tensor in feature_placeholders.items()
}
return tf.estimator.export.ServingInputReceiver(features, feature_placeholders)
def read_dataset(filename, mode, batch_size = 512):
def _input_fn():
def decode_csv(value_column):
columns = tf.decode_csv(value_column, record_defaults = DEFAULTS)
features_in = dict(zip(CSV_COLUMNS, columns))
label_in = features_in.pop(LABEL_COLUMS)
return features_in, label_in
# Create list of file names that match "glob" pattern (i.e. data_file_*.csv)
filenames_dataset = tf.data.Dataset.list_files(filename)
# Read lines from text files
textlines_dataset = filenames_dataset.flat_map(
lambda filename: (
tf.data.TextLineDataset(filename)
.skip(1)
))
# Parse text lines as comma-separated values (CSV)
dataset = textlines_dataset.map(decode_csv)
# Note:
# use tf.data.Dataset.flat_map to apply one to many transformations (here: filename -> text lines)
# use tf.data.Dataset.map to apply one to one transformations (here: text line -> feature list)
if mode == tf.estimator.ModeKeys.TRAIN:
num_epochs = None # indefinitely
dataset = dataset.shuffle(buffer_size = 10 * batch_size)
else:
num_epochs = 1 # end-of-input after this
dataset = dataset.repeat(num_epochs).batch(batch_size)
features, labels = dataset.make_one_shot_iterator().get_next()
return features, labels
return _input_fn
def add_eval_metrics_2(labels, predictions):
pred_values = predictions['predictions']
return {
'recall': tf.metrics.recall(labels, pred_values)
}
# Create estimator train and evaluate function
def train_and_evaluate(args):
#estimator = build_estimator('/u01/appt/appt_ts_5', 10, hidden_layers)
estimator = tf.estimator.Estimator(model_fn=dnn_custom_estimator,
model_dir='/u01/appt/appt_ts_5',
params={'features': feature_columns(10),
'batch_norm': USE_BATCH_NORMALIZATION,
'activation': ACTIVATION,
'hidden_units': HIDDEN_UNITS,
'learning_rate': LEARNING_RATE})
#estimator = tf.contrib.estimator.add_metrics(estimator, add_eval_metrics_2)
train_spec = tf.estimator.TrainSpec(
input_fn = read_dataset(
filename = '/u01/appt/appt_ts_train.csv',
mode = tf.estimator.ModeKeys.TRAIN,
batch_size = 256),
max_steps = 50000000)
exporter = tf.estimator.LatestExporter('exporter', serving_input_fn)
eval_spec = tf.estimator.EvalSpec(
input_fn = read_dataset(
filename = '/u01/appt/appt_ts_test.csv',
mode = tf.estimator.ModeKeys.EVAL,
batch_size = 256 ),
steps = 100,
exporters = exporter,
throttle_secs = 50)
tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
错误消息在下面
--> 196 labels = tf.reshape(labels, [-1, 1])
197
198 loss = tf.losses.sigmoid_cross_entropy(multi_class_labels=labels, logits=logits)
ValueError:尝试将“张量”转换为张量,但失败。错误:不支持任何值。
真的很感谢任何见识。