我正在使用InceptionResNetV2网络进行迁移学习。 数据是医学图像集。 我在训练后保存了模型,然后将其用于测试数据,该数据是2类数据。问题在于,当我使用keras预报_生成器进行预测时,我获得了不同的准确性,甚至概率都不匹配。
这是我的代码段的一部分:
from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt
from sklearn.utils import class_weight
from keras.preprocessing.image import ImageDataGenerator, image,load_img
from keras import backend as K
from keras import models
from keras import layers
from keras import optimizers
from keras.models import load_model
from keras.models import model_from_json
# set image dimension for Conv layer etc based on tensor flow or theano
K.set_image_dim_ordering('tf')
from keras.applications import InceptionResNetV2
from keras.applications.inception_resnet_v2 import preprocess_input
from keras.callbacks import ModelCheckpoint
import os
import sys
import glob
import argparse
W = 299
H = 299
nc = 3
nclass = 2
load_mod = 1
if load_mod:
#model = load_model('diabetic_v9.h5')
json_file = open("diabetic_v9.json", 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
model.load_weights("diabetic_v9_weights.h5")
model.summary()
else:
def Resnet_finetune_model():
#IF Not LOADING ..MODEL is BUILD here
bs_pth = './no_resize'
train_dir = bs_pth + '/train'
validation_dir = bs_pth + '/valid'
test_dir = bs_pth + '/test'
batch_size = 8
datagen_tr = ImageDataGenerator(preprocessing_function=preprocess_input,rotation_range=40,width_shift_range=0.2,height_shift_range=0.2,shear_range=0.2,zoom_range=0.2,channel_shift_range=10,horizontal_flip=True,fill_mode='nearest')
datagen_vd = ImageDataGenerator(preprocessing_function=preprocess_input)
train_gen = datagen_tr.flow_from_directory(train_dir,target_size=(W, H),batch_size=batch_size,class_mode='categorical',shuffle=True,interpolation="bilinear")
vd_gen = datagen_vd.flow_from_directory(validation_dir,target_size=(W, H),batch_size=batch_size,class_mode='categorical',shuffle=False,interpolation="bilinear")
nTrain = np.size(train_gen.classes)
nVal = np.size(vd_gen.classes)
epochs = 40
steps_per_epoch_tr = int(nTrain/ batch_size)
steps_per_epoch_val = int(nVal/batch_size)
def get_files(path):
if os.path.isdir(path):
files = glob.glob(os.path.join(path, '*'))
elif path.find('*') > 0:
files = glob.glob(path)
else:
files = [path]
files = [f for f in files if f.endswith('png') or f.endswith('png')]
if not len(files):
sys.exit('No images found by the given path!')
return files
print("***********Test data :predict class 0*************************************")
files = get_files(test_dir + '/0')
cls_list = ['a','b']
print(cls_list)
# 2-d numpy arrray of probabibility of each class for each file
pred_c0 = np.empty((0, nclass))
sum_true_class = np.size(files)
for f in files:
img = image.load_img(f, target_size=(W,H))
if img is None:
continue
x = image.img_to_array(img)
x = preprocess_input(x)
x = np.expand_dims(x, axis=0)
pred = model.predict(x)[0] # [ [a,b]] so needs .x[0]
pred_c0 = np.append(pred_c0,[pred],axis=0)
# index of max prob
indxmx = np.argmax(pred)
if indxmx != 0:
sum_true_class = sum_true_class - 1
top_inds = pred.argsort()[::-1][:5]
print(f)
for i in top_inds:
print(' {:.3f} {}'.format(pred[i], cls_list[i]))
np.savetxt('./diabeticRetino/diabetic_v9_predict_c0.txt', pred_c0)
print("calss 0 accuracy = " + str( (sum_true_class/np.size(files))* 100 ) + '%')
print("***********Test data :predict class 1*************************************")
files = get_files(test_dir + '/1')
pred_c1 = np.empty((0, nclass))
sum_true_class = np.size(files)
for f in files:
img = image.load_img(f, target_size=(W,H))
if img is None:
continue
x = image.img_to_array(img)
x = preprocess_input(x)
x = np.expand_dims(x, axis=0)
pred = model.predict(x)[0] # net_final or net\n",
# y = 0
# ev = model.evaluate(x,y,batch_size= 1)
pred_c1 = np.append(pred_c1, [pred], axis=0)
# index of max prob
indxmx = np.argmax(pred)
if indxmx != 1:
sum_true_class = sum_true_class - 1
top_inds = pred.argsort()[::-1][:5] # gives indices [0,1]
print(f)
# print probability and corresponding class name
for i in top_inds:
print(' {:.3f} {}'.format(pred[i], cls_list[i]))
np.savetxt('./diabeticRetino/diabetic_v9_predict_c1.txt', pred_c1)
print("calss 1 accuracy = " + str( (sum_true_class/np.size(files))* 100 ) + '%')
test_gen = datagen_vd.flow_from_directory(test_dir,target_size=(W, H),batch_size=1,class_mode='categorical',shuffle=False,interpolation="bicubic")
# predict on test data, and save differences
filenames = test_gen.filenames
nTest = len(filenames)
tst_pred = model.predict_generator(test_gen,steps=nTest)
test_pred = np.argmax(tst_pred,axis=1)
tst_lbls = test_gen.classes
plt.plot(range(nTest),tst_lbls,'b-',label = 'True Class Labels:Test')
plt.plot(range(nTest),test_pred,'g-',label = 'Predicted Class Lables"Test')
plt.title('Test Prediction')
plt.legend(loc='center right')
plt.savefig('./diabeticRetino/diabetic_v9_predict.png')
tst_stat = np.vstack((tst_lbls, test_pred)).T
np.savetxt('./diabeticRetino/diabetic_v9_predict.txt', tst_stat)
dif = tst_lbls-test_pred
# count number of zeros( where true class matches predicted class)
print('test_acc_again:' + str(np.count_nonzero(dif==0)/nTest))
现在, pred_c0 (使用keras预测的类0的预测)和 pred_c1 (使用keras的预测类1的预测)应该匹配 tst_pred (对于使用predict_generator的两个类)。但是,如屏幕截图所示,它们之间的匹配不紧密,并且在两个类中 tst_pred 的测试准确度均为100%,对于class 0而言,其93%和class 1使用 pred_c0时其100% 和 pred_c1 。
我不知道为什么会这样。
请帮忙。
赛迪