问题:我正在 Google Colab 上使用 TensorFlow 2.5 进行多类图像分类。我收到了三种不同的分类准确度值,但我不知道应该相信哪一个,也不知道为什么它们不同。
演示:
当我在测试集上评估时,我收到了 accuracy_1
29/29 [==============================] - 5 秒 147 毫秒/步 - 损失:1.1036 - 准确度: 0.3186
当我在测试集上预测时,我收到了 accuracy_2
,即 0.22
precision recall f1-score support
0 0.69 0.12 0.21 1305
1 0.15 0.78 0.26 272
2 0.14 0.13 0.13 231
accuracy 0.22 1808
macro avg 0.33 0.34 0.20 1808
weighted avg 0.54 0.22 0.20 1808
这是我如何得到 accuracy_3
,其值为 0.2129424778761062
:
from sklearn.metrics import accuracy_score
prediction = np.argmax(detector.predict(test_dataset), axis=1)
accuracy_3 = accuracy_score(
np.concatenate([label.numpy() for image, label in test_dataset.take(-1)]),
prediction
))
我发现如果我多次运行计算 accuracy_3
的代码块。我每次都会得到不同的结果,但它们与 accuracy_2
相差很大,即 0.22。下面是计算 accuracy_1
和 accuracy_2
的代码:
from tensorflow.keras.callbacks import Callback
class Peek(Callback):
def on_epoch_begin(self, epoch, logs=None):
current_decayed_lr = self.model.optimizer._decayed_lr(tf.float32).numpy()
print(f"Current learning rate: {current_decayed_lr}")
def on_epoch_end(self, epoch, logs=None):
print("Evaluating...")
self.model.evaluate(test_dataset, verbose=1) # calculates accuracy_1
print("Predicting...")
predictions = np.argmax(self.model.predict(test_dataset), axis=1)
true_categories = np.array([label.numpy() for image, label in test_dataset.unbatch()])
print(classification_report(true_categories, predictions)) # calculates accuracy_2
accuracy_2
和 accuracy_3
之间的差异更有可能是由于随机机会造成的,但 accuracy_1
比其他两个大得多。我在 stackoverflow 上搜索,有些帖子说差异可能是由于创建测试集时 shuffle=True
中的 ImageDataGenerator
。我的情况不同,因为我不是为了性能而使用 ImageDataGenerator
。我正在使用 TFRecords 加载数据,这里是完整的代码。
import os
import math
import numpy as np
import tensorflow as tf
from glob import glob
from progressbar import progressbar
from os.path import basename, exists
from tensorflow.sparse import to_dense
from tensorflow.data import Dataset, Options, TFRecordDataset
from tensorflow.image import decode_jpeg, encode_jpeg, resize
from tensorflow.train import Feature, Features, BytesList, Int64List, FloatList, Example
from tensorflow.io import read_file, TFRecordWriter, FixedLenFeature, VarLenFeature, parse_single_example
from tensorflow.data.experimental import AUTOTUNE
class DataLoader:
def __init__(self, subset_name):
self.subset_name = subset_name
self.file_pattern = glob(
f"./dataset/{self.subset_name}/**/*.jpg",
recursive=True
)
self.target_size = (224, 224)
self.classes = [b"Negative", b"Positive", b"Unreadable"]
self.n_images = len(self.file_pattern)
self.n_shards = 32
self.write_shard_size = math.ceil(1.0 * self.n_images / self.n_shards)
self.read_shard_size = 64
self.output_dir = f"tfrecords-jpeg-{subset_name}-{'x'.join(map(lambda x: str(x), self.target_size))}"
def fetch_image_and_label(self, filename):
bits = read_file(filename)
image = decode_jpeg(bits)
image = resize(image, self.target_size)
height = tf.shape(image)[0]
width = tf.shape(image)[1]
image = tf.cast(image, tf.uint8)
image = encode_jpeg(image, optimize_size=True, chroma_downsampling=False)
label = tf.expand_dims(filename, axis=-1)
label = tf.strings.split(label, sep="/")
label = label.values[-2]
return image, label, height, width
@staticmethod
def _bytestring_feature(list_of_bytestrings):
return Feature(bytes_list=BytesList(value=list_of_bytestrings))
@staticmethod
def _int_feature(list_of_ints):
return Feature(int64_list=Int64List(value=list_of_ints))
@staticmethod
def _float_feature(list_of_floats):
return Feature(float_list=FloatList(value=list_of_floats))
def to_tfrecord(self, tfrec_filewriter, img_bytes, label, height, width):
class_num = np.argmax(np.array(self.classes) == label)
one_hot_class = np.eye(len(self.classes))[class_num]
feature = {
"image": self._bytestring_feature([img_bytes]),
"class": self._int_feature([class_num]),
"label": self._bytestring_feature([label]),
"size": self._int_feature([height, width]),
"one_hot_class": self._float_feature(one_hot_class.tolist())
}
return Example(features=Features(feature=feature))
def write_records(self):
print(f"{self.n_images} images, {self.n_shards} shards with {self.write_shard_size} images each.")
filenames = Dataset.list_files(self.file_pattern, seed=35155)
dataset = filenames.map(self.fetch_image_and_label, num_parallel_calls=AUTOTUNE).batch(self.write_shard_size)
if not exists(self.output_dir):
os.mkdir(self.output_dir)
print("Writing TFRecords...")
for shard, (image, label, height, width) in enumerate(dataset):
shard_size = image.numpy().shape[0]
filename = f"{self.output_dir}/{str(shard).zfill(2)}-{shard_size}.tfrec"
with TFRecordWriter(filename) as out_file:
for i in progressbar(range(shard_size)):
example = self.to_tfrecord(
out_file,
image.numpy()[i],
label.numpy()[i],
height.numpy()[i],
width.numpy()[i]
)
out_file.write(example.SerializeToString())
print(f"Wrote file {filename} containing {shard_size} records")
def _read_tfrecord(self, example):
features = {
"image": FixedLenFeature([], tf.string),
"class": FixedLenFeature([], tf.int64),
"label": FixedLenFeature([], tf.string),
"size": FixedLenFeature([2], tf.int64),
"one_hot_class": VarLenFeature(tf.float32)
}
example = parse_single_example(example, features)
image = decode_jpeg(example["image"], channels=3)
image = tf.reshape(image, [*self.target_size, 3])
class_num = example["class"]
label = example["label"]
height = example["size"][0]
width = example["size"][1]
one_hot_class = to_dense(example["one_hot_class"])
# return image, class_num, label, height, width, one_hot_class
# return only image and class_num because we're classifying images
return image, class_num
def read_records(self):
from tensorflow.io.gfile import glob
option_no_order = Options()
option_no_order.experimental_deterministic = False
filenames = glob(f"{self.output_dir}/*.tfrec")
dataset = TFRecordDataset(filenames, num_parallel_reads=AUTOTUNE)
dataset = dataset.with_options(option_no_order)
dataset = dataset.map(self._read_tfrecord, num_parallel_calls=AUTOTUNE)
dataset = dataset.shuffle(10000)
dataset = dataset.prefetch(buffer_size=AUTOTUNE)
dataset = dataset.batch(self.read_shard_size)
return dataset
train_loader = DataLoader("train")
validation_loader = DataLoader("validation")
test_loader = DataLoader("test")
train_dataset = train_loader.read_records()
validation_dataset = validation_loader.read_records()
test_dataset = test_loader.read_records()
train_dataset = train_dataset.concatenate(validation_dataset)
accuracy_2
和 accuracy_3
之间的区别仍然存在,而且每次运行计算 accuracy_3
的代码块时,accuracy_3
仍然会发生变化,即使在删除了 dataset = dataset.shuffle(10000)
之后来自 def read_records(self)
班的 DataLoader
。
我还将粘贴有关如何实例化和编译模型的代码以提供更多背景信息。
from tensorflow.keras import Input, Model
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.applications.densenet import DenseNet201
from tensorflow.keras.layers import GlobalAveragePooling2D, Dropout, Dense
from tensorflow.keras.applications.densenet import preprocess_input
def create_model():
feature_extractor = DenseNet201(
weights="imagenet",
input_shape=(224, 224, 3),
include_top=False
)
feature_extractor.trainable = True
inputs = Input([224, 224, 3])
x = preprocess_input(inputs)
x = feature_extractor(x)
x = GlobalAveragePooling2D()(x)
x = Dense(32, activation="elu")(x)
x = Dropout(0.8)(x)
outputs = Dense(3, activation="softmax")(x)
detector = Model(inputs, outputs)
detector.compile(
optimizer=SGD(learning_rate=0.001, momentum=0.9),
loss=["sparse_categorical_crossentropy"],
metrics=["sparse_categorical_accuracy"]
)
return detector
detector = create_model()
peek = Peek()
detector.fit(
train_dataset,
epochs=1,
validation_data=test_dataset,
class_weight=class_weight,
callbacks=[peek],
)