当我尝试使用AdaGradDual优化器时,输入的批量大小出现错误。我输入的批次大小为300,因为我要训练60000个样本。
我的代码:
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import numpy as np
import time
start_time = time.time()
data = tf.keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = data.load_data()
class_names = ['T-shirt', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle Boot']
train_images = train_images/255.0
test_images = test_images/255.0
optimizer1 = tf.compat.v1.train.AdagradDAOptimizer(0.001,0)
model = keras.Sequential([
keras.layers.Flatten(input_shape=(28, 28)),
keras.layers.Dense(100, activation="softsign"),
keras.layers.Dense(10, activation="softmax")
])
model.compile(optimizer=optimizer1, loss="sparse_categorical_crossentropy", metrics=["accuracy"])
model.fit(train_images, train_labels, epochs=5)
test_loss, test_acc1 = model.evaluate(test_images, test_labels)
print("Test acc is:", test_acc1)
print("--- %s seconds ---" % (time.time() - start_time))
错误:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
527 as_ref=input_arg.is_ref,
--> 528 preferred_dtype=default_dtype)
529 except TypeError as err:
13 frames
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/ops.py in internal_convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, ctx, accepted_result_types)
1272 "Tensor conversion requested dtype %s for Tensor with dtype %s: %r" %
-> 1273 (dtype.name, value.dtype.name, value))
1274 return value
ValueError: Tensor conversion requested dtype int64 for Tensor with dtype int32: <tf.Tensor 'training_16/AdagradDA/update_dense_22/kernel/Identity:0' shape=() dtype=int32>
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-9-41ac628d29d8> in <module>()
29 model.compile(optimizer=optimizer1, loss="sparse_categorical_crossentropy", metrics=["accuracy"])
30
---> 31 model.fit(train_images, train_labels, epochs=5)
32
33 test_loss, test_acc1 = model.evaluate(test_images, test_labels)
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
725 max_queue_size=max_queue_size,
726 workers=workers,
--> 727 use_multiprocessing=use_multiprocessing)
728
729 def evaluate(self,
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_arrays.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs)
673 validation_steps=validation_steps,
674 validation_freq=validation_freq,
--> 675 steps_name='steps_per_epoch')
676
677 def evaluate(self,
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_arrays.py in model_iteration(model, inputs, targets, sample_weights, batch_size, epochs, verbose, callbacks, val_inputs, val_targets, val_sample_weights, shuffle, initial_epoch, steps_per_epoch, validation_steps, validation_freq, mode, validation_in_fit, prepared_feed_values_from_dataset, steps_name, **kwargs)
187 # function we recompile the metrics based on the updated
188 # sample_weight_mode value.
--> 189 f = _make_execution_function(model, mode)
190
191 # Prepare validation data. Hold references to the iterator and the input list
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_arrays.py in _make_execution_function(model, mode)
564 if model._distribution_strategy:
565 return distributed_training_utils._make_execution_function(model, mode)
--> 566 return model._make_execution_function(mode)
567
568
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training.py in _make_execution_function(self, mode)
2181 def _make_execution_function(self, mode):
2182 if mode == ModeKeys.TRAIN:
-> 2183 self._make_train_function()
2184 return self.train_function
2185 if mode == ModeKeys.TEST:
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training.py in _make_train_function(self)
2113 # Training updates
2114 updates = self.optimizer.get_updates(
-> 2115 params=self._collected_trainable_weights, loss=self.total_loss)
2116 # Unconditional updates
2117 updates += self.get_updates_for(None)
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/optimizers.py in get_updates(self, loss, params)
751 grads = self.optimizer.compute_gradients(loss, params)
752 opt_update = self.optimizer.apply_gradients(
--> 753 grads, global_step=self.iterations)
754
755 self.updates.append(opt_update)
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/optimizer.py in apply_gradients(self, grads_and_vars, global_step, name)
612 scope_name = var.op.name
613 with ops.name_scope("update_" + scope_name), ops.colocate_with(var):
--> 614 update_ops.append(processor.update_op(self, grad))
615 if global_step is None:
616 apply_updates = self._finish(update_ops, name)
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/optimizer.py in update_op(self, optimizer, g)
169 return optimizer._resource_apply_sparse_duplicate_indices(
170 g.values, self._v, g.indices)
--> 171 update_op = optimizer._resource_apply_dense(g, self._v)
172 if self._v.constraint is not None:
173 with ops.control_dependencies([update_op]):
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/adagrad_da.py in _resource_apply_dense(self, grad, var)
136 math_ops.cast(self._l2_regularization_strength, grad.dtype.base_dtype),
137 global_step,
--> 138 use_locking=self._use_locking)
139
140 def _apply_sparse(self, grad, var):
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/gen_training_ops.py in resource_apply_adagrad_da(var, gradient_accumulator, gradient_squared_accumulator, grad, lr, l1, l2, global_step, use_locking, name)
1351 grad=grad, lr=lr, l1=l1, l2=l2,
1352 global_step=global_step,
-> 1353 use_locking=use_locking, name=name)
1354 return _op
1355 _result = None
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
549 if input_arg.type != types_pb2.DT_INVALID:
550 raise TypeError("%s expected type of %s." %
--> 551 (prefix, dtypes.as_dtype(input_arg.type).name))
552 else:
553 # Update the maps with the default, if needed.
TypeError: Input 'global_step' of 'ResourceApplyAdagradDA' Op has type int32 that does not match expected type of int64.
我尝试将值更改为100、30、10、0和1。这些值均无效,并且出现相同的错误。我不确定会解决此问题。我正在使用google colab,并且不知道如何准确地更新Tensorflow版本,因为有帖子提到了此修复程序。
答案 0 :(得分:1)
查看错误消息:
TypeError: Input 'global_step' of 'ResourceApplyAdagradDA' Op has type int32 that does not match expected type of int64
似乎优化器的第二个参数应该是int64。由于您要提供Python整数,因此默认情况下会将其转换为int32。试试这个:
optimizer1 = tf.compat.v1.train.AdagradDAOptimizer(0.001, tf.constant(0, tf.int64))
我不确定这是否完全正确,我认为训练步骤可能需要是一个变量,您可以在每个步骤之后增加该变量。我认为它应该像这样工作,但是也许它的行为就像优化器一直在第一步中一样。