我目前的方法是这样的:首先,我以这种方式将图像输入内存:
def load_data_from_directory(root_dir, image_height, image_format = 'jpg', mask_format = 'png'):
"""
Loads train images and corresponding masks with specified image sizes.
Masks should have same name as image.
Output files divided by 256 to be between 0-1.
Folder locations:
> images (Jpg format)
> segmentation
Example of usage:
from common_blocks.data_loaders import load_data_from_directory
data_dir = './data_objects'
x_train, y_train = load_data_from_directory(data_dir, image_height = 256)
"""
data = []
for stage in ['train']: #can be added 'test' stage
directory = os.path.join(root_dir, 'images')
file_names = [filename.replace(image_format , mask_format) for filename in os.listdir(directory)]
fps = [os.path.join(directory, filename) for filename in os.listdir(directory)]
for content in ['images', 'segmentation']:
# construct path to each image
directory = os.path.join(root_dir, content)
if content != 'images':
fps = [os.path.join(directory, filename) for filename in file_names]
# read images
images = [imread(filepath)/255 for filepath in fps]
# if images have different sizes you have to resize them before:
image = [resize(image, (image_height, image_height)) for image in images]
# stack to one np.array
np_images = np.stack(image, axis=0)
data.append(np_images)
del image, file_names
gc.collect()
return data
x_train, y_train = load_data_from_directory_crans('./train', image_width, image_height,'jpg', 'png')
然后我将这些图像提供给DataGenerator:
class DataGenerator(Sequence):
'''
Sample usage:
test_generator = DataGenerator(x_train, y_train, 1,
image_sizes, image_sizes, 1, True)
Xtest, ytest = test_generator.__getitem__(1)
plt.imshow(Xtest[0])
plt.show()
plt.imshow(ytest[0, :,:,0])
plt.show()
'''
def __init__(self, X, y, batch_size, height,width, nb_y_features, augmentation = True):
'Initialization'
self.batch_size = batch_size
self.X = X
self.y = y
self.indexes = None
self.currentIndex = 0
self.augmentation = augmentation
self.on_epoch_end()
self.height = height
self.width = width
self.nb_y_features = nb_y_features
def __len__(self):
'Denotes the number of batches per epoch'
return int(np.ceil(len(self.X) / self.batch_size))
def __getitem__(self, index):
'Generate one batch of data'
# Generate indexes of the batch
data_index_min = int(index*self.batch_size)
data_index_max = int(min((index+1)*self.batch_size, len(self.indexes)))
indexes = self.indexes[data_index_min:data_index_max]
this_batch_size = len(indexes) # The last batch can be smaller than the others
X = np.empty((this_batch_size, self.width, self.height, 3)) #, dtype=int)
y = np.empty((this_batch_size, self.width, self.height, self.nb_y_features), dtype=int)
for i, sample_index in enumerate(indexes):
data_index = self.indexes[index * self.batch_size + i]
X_sample, y_sample = self.X[data_index].copy(), self.y[data_index].copy()
if self.augmentation:
augmented = aug()(image=X_sample, mask=y_sample)
image_augm = augmented['image']
mask_augm = augmented['mask']#.reshape(self.width, self.height, self.nb_y_features)
X[i, ...] = image_augm
y[i, ...] = mask_augm
else:
X[i, ...] = X_sample
y[i, ...] = y_sample
return X, y
def on_epoch_end(self):
'Updates indexes after each epoch'
self.indexes = list(range(len(self.X)))
np.random.shuffle(self.indexes)
然后此生成器用于训练模型:
training_generator = DataGenerator(x_train, y_train, batch_size,
height = image_width, width = image_height, nb_y_features = 1, augmentation = True)
model = Unet(BACKBONE, encoder_weights='imagenet', encoder_freeze = False)
model.compile(optimizer = Adam(),
loss=bce_jaccard_loss, metrics=[iou_score])
history = model.fit_generator(training_generator, shuffle =True,
epochs=10)
问题在于数据大小。如果较小以适合内存-一切正常,一旦变大,它将失败,并显示内存不足错误。如何直接从文件夹中随机读取文件?
答案 0 :(得分:0)
类似的事情应该起作用:
类DataGeneratorFolder(Sequence): ''' 样品用法
if to_debug:
test_generator = DataGeneratorFolder(image_names
, masks_names
, batch_size=2,
image_size=256,
nb_y_features = 1, augmentation = True)
Xtest, ytest = test_generator.__getitem__(0)
plt.imshow(Xtest[0])
plt.show()
plt.imshow(ytest[0, :,:,0])
plt.show()
'''
def __init__(self, image_filenames, mask_names, batch_size,
image_size=768, nb_y_features = 1, augmentation = True,
center_crop_prop = 0.5):
self.image_filenames, self.mask_names = image_filenames, mask_names
self.batch_size = batch_size
self.currentIndex = 0
self.augmentation = augmentation
self.on_epoch_end()
self.image_size = image_size
self.nb_y_features = nb_y_features
self.indexes = None
self.center_crop_prop = center_crop_prop
def __len__(self):
return np.ceil(len(self.image_filenames) / float(self.batch_size))
def on_epoch_end(self):
'Updates indexes after each epoch'
self.image_filenames, self.mask_names = shuffle(self.image_filenames, self.mask_names)
def read_image_mask(self, image_name, mask_name):
print(image_name, mask_name)
return (imread(image_name)/255).astype(np.float32),\
(imread(mask_name, as_gray = True) > 0).astype(np.int8)
def __getitem__(self, index):
'Generate one batch of data'
# Generate indexes of the batch
data_index_min = int(index*self.batch_size)
data_index_max = int(min((index+1)*self.batch_size, len(self.image_filenames)))
indexes = self.image_filenames[data_index_min:data_index_max]
this_batch_size = len(indexes) # The last batch can be smaller than the others
X = np.empty((this_batch_size, self.image_size, self.image_size, 3), dtype=np.float32)
y = np.empty((this_batch_size, self.image_size, self.image_size, self.nb_y_features), dtype=np.uint8)
for i, sample_index in enumerate(indexes):
X_sample, y_sample = self.read_image_mask(self.image_filenames[index * self.batch_size + i],
self.mask_names[index * self.batch_size + i])
random_crop_prop = 1
if self.augmentation:
if np.sum(y_sample) > 0: # mask is not null
if np.random.choice( ['crop_with_object', 'crop_random'], 1,
p=[self.center_crop_prop, 1-self.center_crop_prop]) == ['crop_with_object']:
X_sample, y_sample = random_crop_box_center(X_sample, y_sample,
self.image_size, self.image_size)
random_crop_prop = 0
augmented = aug_with_crop(self.image_size, random_crop_prop)(image=X_sample, mask=y_sample)
image_augm = augmented['image']
mask_augm = augmented['mask'].reshape(self.image_size, self.image_size, self.nb_y_features)
X[i, ...] = image_augm
y[i, ...] = mask_augm
else:
X[i, ...] = X_sample
y[i, ...] = y_sample
return X, y