Keras多标签分类和f1score问题

时间:2019-10-13 10:42:21

标签: python keras multilabel-classification

我正在研究多标签图像分类问题。我知道我应该使用f1score作为度量标准,因为在这种情况下accuracy可能会造成混淆和错误。问题是我无法定义f1score。我发现的所有内容都与y_predy_test相关,而且我不知道如何将其与我的模型相关。

这是我的代码:

TRAIN_LABELS_FILE = "datasets/dresses/train/labels.txt"
VAL_LABELS_FILE = "datasets/dresses/val/labels.txt"
TEST_LABELS_FILE = "datasets/dresses/test/labels.txt"
COLOR_FILE = "datasets/dresses/names.txt"

color = pd.read_csv(COLOR_FILE)
color = color.T
color_list = list(color.iloc[0])
color_list.insert(0,'beige')
color_list.insert(0,'path')
train = pd.read_csv(TRAIN_LABELS_FILE,sep=" ",names=color_list, dtype="str",delimiter=' ')
val = pd.read_csv(VAL_LABELS_FILE,sep=" ",names=color_list, dtype="str",delimiter=' ')
test = pd.read_csv(TEST_LABELS_FILE,sep=" ",names=color_list, dtype="str",delimiter=' ')

def get_generator(filename, number=None):
    train = pd.read_csv(filename, delimiter=' ', names=color_list, dtype="str")
    if number:
        train = train[:number]
    datagen=ImageDataGenerator(rescale=1./255.)
    directory = os.path.dirname(filename)
    return datagen.flow_from_dataframe(train, 
                                       directory, 
                                       "path", 
                                       y_col = color_list[1:], 
                                       target_size=(128, 128),
                                       batch_size=4,
                                       shuffle=True,
                                       seed=42,
                                       interpolation="bilinear",
                                       class_mode="other")
def get_generator2(filename, number=None):
    val = pd.read_csv(filename, delimiter=' ', names=color_list, dtype="str")
    if number:
        val = val[:number]
    # stworzenie generatora
    valid_datagen=ImageDataGenerator(rescale=1./255.)
    directory = os.path.dirname(filename)
    return valid_datagen.flow_from_dataframe(val, 
                                       directory, 
                                       "path", 
                                       y_col = color_list[1:], 
                                       target_size=(128, 128),
                                       batch_size=4,
                                       shuffle=True,
                                       seed=42,
                                       interpolation="bilinear",
                                       class_mode="other")
def get_generator3(filename, number=None):
    test = pd.read_csv(filename, delimiter=' ', names=color_list, dtype="str")
    if number:
        test = test[:number]
    test_datagen=ImageDataGenerator(rescale=1./255.)
    directory = os.path.dirname(filename)
    return test_datagen.flow_from_dataframe(test, 
                                       directory,
                                       "path", 
                                       y_col = color_list[1:], 
                                       target_size=(128, 128),
                                       batch_size=1,
                                       shuffle=False,
                                       seed=42,
                                       interpolation="bilinear",
                                       class_mode=None)
train_generator=get_generator(TRAIN_LABELS_FILE)
valid_generator=get_generator2(VAL_LABELS_FILE)
test_generator=get_generator3(TEST_LABELS_FILE)
#from efficientnet import EfficientNetB5
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

#from efficientnet.keras import EfficientNetB5
from keras_efficientnets import EfficientNetB5

effnet = EfficientNetB5(input_shape=(IMG_WIDTH, IMG_HEIGHT, CHANNELS),
                          weights='imagenet',
                          include_top=False)
#f score zamiast accuracy (classification report)
def create_model(datagen):
    model = Sequential()
    model.add(effnet)
    model.add(GlobalAveragePooling2D())
    model.add(Dropout(0.5))
    model.add(Dense(5, activation='relu'))
    model.add(Dense(14, activation='sigmoid'))
    model.compile(loss='binary_crossentropy',
                  optimizer='adam', 
                  metrics=[keras_metrics.f1_score()])
    print(model.summary())
    return model
def main():
    #train_generator
    #valid_generator
    callbacks = get_callbacks()

    model = create_model(train_generator)


    STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
    STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size
    STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
    model.fit_generator(generator=train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=valid_generator,
                    validation_steps=STEP_SIZE_VALID,
                    epochs=10)
def get_callbacks():

    callbacks = []

    mc = ModelCheckpoint("weights.hdf5", monitor="val_loss", save_best_only=True, verbose=1)
    es = EarlyStopping(monitor="val_loss", patience=2, verbose=1)

    callbacks.append(mc)
    callbacks.append(es)

    return callbacks
main()

如何定义f1score,以便在每个纪元后显示结果?

0 个答案:

没有答案