我正在研究多标签图像分类问题。我知道我应该使用f1score
作为度量标准,因为在这种情况下accuracy
可能会造成混淆和错误。问题是我无法定义f1score
。我发现的所有内容都与y_pred
,y_test
相关,而且我不知道如何将其与我的模型相关。
这是我的代码:
TRAIN_LABELS_FILE = "datasets/dresses/train/labels.txt"
VAL_LABELS_FILE = "datasets/dresses/val/labels.txt"
TEST_LABELS_FILE = "datasets/dresses/test/labels.txt"
COLOR_FILE = "datasets/dresses/names.txt"
color = pd.read_csv(COLOR_FILE)
color = color.T
color_list = list(color.iloc[0])
color_list.insert(0,'beige')
color_list.insert(0,'path')
train = pd.read_csv(TRAIN_LABELS_FILE,sep=" ",names=color_list, dtype="str",delimiter=' ')
val = pd.read_csv(VAL_LABELS_FILE,sep=" ",names=color_list, dtype="str",delimiter=' ')
test = pd.read_csv(TEST_LABELS_FILE,sep=" ",names=color_list, dtype="str",delimiter=' ')
def get_generator(filename, number=None):
train = pd.read_csv(filename, delimiter=' ', names=color_list, dtype="str")
if number:
train = train[:number]
datagen=ImageDataGenerator(rescale=1./255.)
directory = os.path.dirname(filename)
return datagen.flow_from_dataframe(train,
directory,
"path",
y_col = color_list[1:],
target_size=(128, 128),
batch_size=4,
shuffle=True,
seed=42,
interpolation="bilinear",
class_mode="other")
def get_generator2(filename, number=None):
val = pd.read_csv(filename, delimiter=' ', names=color_list, dtype="str")
if number:
val = val[:number]
# stworzenie generatora
valid_datagen=ImageDataGenerator(rescale=1./255.)
directory = os.path.dirname(filename)
return valid_datagen.flow_from_dataframe(val,
directory,
"path",
y_col = color_list[1:],
target_size=(128, 128),
batch_size=4,
shuffle=True,
seed=42,
interpolation="bilinear",
class_mode="other")
def get_generator3(filename, number=None):
test = pd.read_csv(filename, delimiter=' ', names=color_list, dtype="str")
if number:
test = test[:number]
test_datagen=ImageDataGenerator(rescale=1./255.)
directory = os.path.dirname(filename)
return test_datagen.flow_from_dataframe(test,
directory,
"path",
y_col = color_list[1:],
target_size=(128, 128),
batch_size=1,
shuffle=False,
seed=42,
interpolation="bilinear",
class_mode=None)
train_generator=get_generator(TRAIN_LABELS_FILE)
valid_generator=get_generator2(VAL_LABELS_FILE)
test_generator=get_generator3(TEST_LABELS_FILE)
#from efficientnet import EfficientNetB5
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
#from efficientnet.keras import EfficientNetB5
from keras_efficientnets import EfficientNetB5
effnet = EfficientNetB5(input_shape=(IMG_WIDTH, IMG_HEIGHT, CHANNELS),
weights='imagenet',
include_top=False)
#f score zamiast accuracy (classification report)
def create_model(datagen):
model = Sequential()
model.add(effnet)
model.add(GlobalAveragePooling2D())
model.add(Dropout(0.5))
model.add(Dense(5, activation='relu'))
model.add(Dense(14, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=[keras_metrics.f1_score()])
print(model.summary())
return model
def main():
#train_generator
#valid_generator
callbacks = get_callbacks()
model = create_model(train_generator)
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
model.fit_generator(generator=train_generator,
steps_per_epoch=STEP_SIZE_TRAIN,
validation_data=valid_generator,
validation_steps=STEP_SIZE_VALID,
epochs=10)
def get_callbacks():
callbacks = []
mc = ModelCheckpoint("weights.hdf5", monitor="val_loss", save_best_only=True, verbose=1)
es = EarlyStopping(monitor="val_loss", patience=2, verbose=1)
callbacks.append(mc)
callbacks.append(es)
return callbacks
main()
如何定义f1score
,以便在每个纪元后显示结果?