我正在关注google机器学习速成课程,并且目前已经习惯于tensorflow,我正在尝试与其中一个教程一起学习不同的数据集,但我遇到了错误。 数据集来自加州大学欧文分校的资料库。链接:https://archive.ics.uci.edu/ml/machine-learning-databases/balance-scale/balance-scale.data
for thread in new_list:
if thread in old_list:
print(thread['name'])
但是我不断收到错误消息 第118行,在 training_predictions = np.array([training_predictions中的项目的[item ['predictions'] [0]]) KeyError:“预测” 我不太确定为什么不存储预测。
def my_input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None):
"""Trains a neural network model.
Args:
features: pandas DataFrame of features
targets: pandas DataFrame of targets
batch_size: Size of batches to be passed to the model
shuffle: True or False. Whether to shuffle the data.
num_epochs: Number of epochs for which data should be repeated. None = repeat indefinitely
Returns:
Tuple of (features, labels) for next data batch
"""
# Convert pandas data into a dict of np arrays.
features = {key: np.array(value) for key, value in dict(features).items()}
# Construct a dataset, and configure batching/repeating.
ds = Dataset.from_tensor_slices((features, targets)) # warning: 2GB limit
ds = ds.batch(batch_size).repeat(num_epochs)
# Shuffle the data, if specified.
if shuffle:
ds = ds.shuffle(50)
# Return the next batch of data.
features, labels = ds.make_one_shot_iterator().get_next()
return features, labels
##############################################################
def train_model(learning_rate, steps, batch_size, hidden_units, training_examples,
training_targets, validation_examples, validation_targets):
periods = 3
steps_per_period = steps / periods
my_optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate)
dnn_classifier = tf.estimator.DNNClassifier(
feature_columns=construct_feature_columns(),
hidden_units=hidden_units,
optimizer=my_optimizer,
config=tf.contrib.learn.RunConfig(keep_checkpoint_max=1)
)
# Create input functions.
training_input_fn = lambda: my_input_fn(training_examples,
training_targets,
batch_size=batch_size)
predict_training_input_fn = lambda: my_input_fn(training_examples,
training_targets,
num_epochs=1,
shuffle=False)
predict_validation_input_fn = lambda: my_input_fn(validation_examples,
validation_targets,
num_epochs=1,
shuffle=False)
print("Training model...")
print("RMSE (on training data):")
training_rmse = []
validation_rmse = []
for period in range(0, periods):
# Train the model, starting from the prior state.
dnn_classifier.train(
input_fn=training_input_fn,
steps=steps_per_period
)
# Take a break and compute predictions.
training_predictions = dnn_classifier.predict(input_fn=predict_training_input_fn)
training_predictions = np.array([item['predictions'][0] for item in training_predictions])
validation_predictions = dnn_classifier.predict(input_fn=predict_validation_input_fn)
validation_predictions = np.array([item['predictions'][0] for item in validation_predictions])
# Compute training and validation loss.
training_root_mean_squared_error = math.sqrt(
metrics.mean_squared_error(training_predictions, training_targets))
validation_root_mean_squared_error = math.sqrt(
metrics.mean_squared_error(validation_predictions, validation_targets))
# Occasionally print the current loss.
print(" period %02d : %0.2f" % (period, training_root_mean_squared_error))
# Add the loss metrics from this period to our list.
training_rmse.append(training_root_mean_squared_error)
validation_rmse.append(validation_root_mean_squared_error)
print("Model training finished.")
# Output a graph of loss metrics over periods.
plt.ylabel("RMSE")
plt.xlabel("Periods")
plt.title("Root Mean Squared Error vs. Periods")
plt.tight_layout()
plt.plot(training_rmse, label="training")
plt.plot(validation_rmse, label="validation")
plt.legend()
print("Final RMSE (on training data): %0.2f" % training_rmse)
print("Final RMSE (on validation data): %0.2f" % validation_rmse)
return dnn_classifier, training_rmse, validation_rmse