我正在使用python语言。 我正在训练大约有9000张图像的数据集。我正在使用预训练的神经网络VGG16。 现在的问题是内存错误:无法分配形状为(500,500,3)和数据类型为32的数组
我尝试了float 64、32、16,8,但内存错误相同 我应该怎么处理这个错误。使用keras的数据生成功能进行数据并行处理,但似乎我做错了事
图书馆
from keras.preprocessing.image import ImageDataGenerator
from keras.models import model_from_json
from keras.applications import VGG16
import numpy as np
import glob
import os
import keras
from keras import backend as K
from PIL import Image
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from sklearn.model_selection import train_test_split
from keras import optimizers
import matplotlib.pyplot as plt
img_rows, img_cols = 500,500
channels=3
def load_labels(myDir):
labels=[]
fileList = glob.glob(myDir)
for fname in fileList:
fileName = os.path.basename(fname)
curLabel = fileName.split("_")[0]
labels.append(curLabel)
return np.asarray(labels)
def load_dataThreeChannel(myDir):
images=[]
fileList = glob.glob(myDir)
# x = np.array([np.array(Image.open(fname)).flatten() for fname in fileList])
# x = np.array([np.array(Image.open(fname)) for fname in fileList])
for fname in fileList:
#print(fname)
img = Image.open(fname)
output = np.array(img.resize((img_rows,img_cols), Image.ANTIALIAS))
#output = np.stack((output,)*3, -1)
images.append(output)
x=np.asarray(images)
print(x.shape)
return x
myDir ="train_patches/*.png"
labels = load_labels(myDir)
data = load_dataThreeChannel(myDir)
#data = load_data(myDir)
# Data gen to avoid memory error
# create a data generator
datagen = ImageDataGenerator()
# load and iterate training dataset
train_it = datagen.flow_from_directory("train_patches/*.png", class_mode='binary', batch_size=64)
# confirm the iterator works
batchX, batchy = train_it.next()
print('Batch shape=%s, min=%.3f, max=%.3f' % (batchX.shape, batchX.min(), batchX.max()))
#Include_top=False, Does not load the last two fully connected layers which act as the classifier.
#We are just loading the convolutional layers.
vgg_conv = VGG16(weights='imagenet',include_top=False,input_shape=(img_rows,img_cols,3))
# freeze the layer except the last 4 layers
for layer in vgg_conv.layers[:-4]:
layer.trainable=False
num_classes=10
model = Sequential()
# Add the vgg convolutional base model
model.add(vgg_conv)
# Add new layers
model.add(Flatten())
model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
# Show a summary of the model. Check the number of trainable parameters
model.summary()
epochs = 3
X_train = data
Y_train =labels
#X_train,X_test,Y_train,Y_test = train_test_split(data,labels, test_size=0.20, random_state=42)
X_train = X_train.astype('float32', copy= False)
#X_test = X_test.astype('float32', copy= False)
X_train /= 255
#X_test /= 255
print('x_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
#print(X_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
Y_train = keras.utils.to_categorical(Y_train, num_classes)
#Y_test = keras.utils.to_categorical(Y_test, num_classes)
# Compile the model
model.compile(loss='categorical_crossentropy',
optimizer=optimizers.RMSprop(lr=1e-4),
metrics=['acc'])
# Train the model
history = model.fit(
X_train,Y_train,
epochs=10,
verbose=1)
acc = history.history['acc']
loss = history.history['loss']
epochs = range(len(acc))
plt.plot(epochs, acc, 'b', label='Training acc')
plt.title('Training accuracy')
plt.legend()
plt.show()
model_json = model.to_json()
open('imdata.json','w').write(model_json)
model.save_weights('imdata.h5',overwrite=True)
请帮助我解决此错误。并且请解释一下imdata.h5文件和imdata_json文件。
答案 0 :(得分:0)
我认为您正在尝试将整个ImgNet数据集加载到内存中,如标记所示。
def load_dataThreeChannel(myDir):
images=[]
fileList = glob.glob(myDir)
# x = np.array([np.array(Image.open(fname)).flatten() for fname in fileList])
# x = np.array([np.array(Image.open(fname)) for fname in fileList])
for fname in fileList:
#print(fname)
img = Image.open(fname) # <-- Load image into memory
output = np.array(img.resize((img_rows,img_cols), Image.ANTIALIAS))
#output = np.stack((output,)*3, -1)
images.append(output) # <-- Append the image into the list
x=np.asarray(images)
print(x.shape)
return x
myDir ="train_patches/*.png"
labels = load_labels(myDir)
data = load_dataThreeChannel(myDir) # <-- HERE
由于其大小,您当然会失败。解决方案是通过Keras数据生成器按需加载图像-请参阅:ImageDataGenerator
第二个问题-使用HDF5压缩技术,h5文件可能被视为大型压缩ZIP目录,而不是ZIP。可能与h5py模块一起包装了。我相信JSON是数据集的标识“树”,例如,将文件名与相关类相关联。您可以尝试使用json模块(函数“ loads”)加载它并打印以查看其内容。
以下是在另一个网络上加载ImgNet的示例:
import os
import math
import numpy as np
import cv2 as cv
import keras
import tensorflow as tf
from keras.applications import mobilenet
from keras.applications.mobilenet import MobileNet
from keras.applications.mobilenet import preprocess_input, decode_predictions
from keras import optimizers
from keras.preprocessing import image
from keras.utils import to_categorical
import tensorflow_datasets as tfds
# Load MobileNet model
model = MobileNet(weights='imagenet')
opt = optimizers.Adam(lr=0.001)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
# Fetch the dataset directly
imagenet = tfds.image.Imagenet2012()
## or by string name
#imagenet = tfds.builder('imagenet2012')
# Describe the dataset with DatasetInfo
C = imagenet.info.features['label'].num_classes
Ntrain = imagenet.info.splits['train'].num_examples
Nvalidation = imagenet.info.splits['validation'].num_examples
Nbatch = 32
assert C == 1000
assert Ntrain == 1281167
assert Nvalidation == 50000
# Download the data, prepare it, and write it to disk
imagenet.download_and_prepare()
# Load data from disk as tf.data.Datasets
datasets = imagenet.as_dataset()
train_dataset, validation_dataset = datasets['train'], datasets['validation']
assert isinstance(train_dataset, tf.data.Dataset)
assert isinstance(validation_dataset, tf.data.Dataset)
def imagenet_generator(dataset, batch_size=32, num_classes=1000, is_training=False):
images = np.zeros((batch_size, 224, 224, 3))
labels = np.zeros((batch_size, num_classes))
while True:
count = 0
for sample in tfds.as_numpy(dataset):
image = sample["image"]
label = sample["label"]
images[count%batch_size] = mobilenet.preprocess_input(np.expand_dims(cv.resize(image, (224, 224)), 0))
labels[count%batch_size] = np.expand_dims(to_categorical(label, num_classes=num_classes), 0)
count += 1
if (count%batch_size == 0):
yield images, labels
# Infer on ImageNet
labels = np.zeros((Nvalidation))
pred_labels = np.zeros((Nvalidation, C))
pred_labels_new = np.zeros((Nvalidation, C))
score = model.evaluate_generator(imagenet_generator(validation_dataset,batch_size=32),
steps= Nvalidation // Nbatch,
verbose=1)
print("Evaluation Result of Original Model on ImageNet2012: " + str(score))
# Train on ImageNet
checkpoint_path = "Mobilenet/cp-{epoch:04d}.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)
os.makedirs(checkpoint_dir, exist_ok=True)
cp_callback = keras.callbacks.ModelCheckpoint(
checkpoint_path, verbose=1, save_weights_only=True,
# Save weights, every 1-epoch
period=1)
csv_logger = keras.callbacks.CSVLogger('MobileNet_training.csv')
print("Starting to train Modified MobileNet...")
epochs = 5
model.fit_generator(imagenet_generator(train_dataset, batch_size=Nbatch, is_training=True),
steps_per_epoch= Ntrain // Nbatch,
epochs = epochs,
validation_data = imagenet_generator(validation_dataset, batch_size=Nbatch),
validation_steps = Nvalidation // Nbatch,
verbose = 1,
callbacks = [cp_callback, csv_logger])
model.save("MobileNet.h5")
答案 1 :(得分:0)
# -*- coding: utf-8 -*-
"""
Please kindly check is this correct im still getting issues in 3 channels
"""
from keras.models import model_from_json
from keras.applications import VGG16
import numpy as np
import glob
import os
import keras
from keras import backend as K
from PIL import Image
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from sklearn.model_selection import train_test_split
from keras import optimizers
import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator
img_rows, img_cols = 500,500
channels=3
# def load_labels(myDir):
# labels=[]
# fileList = glob.glob(myDir)
# for fname in fileList:
# fileName = os.path.basename(fname)
# curLabel = fileName.split("_")[0]
# labels.append(curLabel)
# return np.asarray(labels)
def load_dataThreeChannel(myDir):
images=[]
fileList = glob.glob(myDir)
# x = np.array([np.array(Image.open(fname)).flatten() for fname in fileList])
# x = np.array([np.array(Image.open(fname)) for fname in fileList])
for fname in fileList:
#print(fname)
img = Image.open(fname)
output = np.array(img.resize((img_rows,img_cols), Image.ANTIALIAS))
#output = np.stack((output,)*3, -1)
images.append(output)
x=np.asarray(images)
print(x.shape)
return x
myDir = ImageDataGenerator(rescale=1.0/255)
#included in our dependencies
myDir =myDir.flow_from_directory("C:/Users/iohan/.spyder-py3/IAM/train_patches/", target_size=(224,224), color_mode='rgb', batch_size=32, class_mode='categorical', shuffle=True)
Xbatch, Ybatch = myDir.next()
# myDir ="C:/Users/iohan/.spyder-py3/IAM/train_patches/*.png"
# labels = load_labels(myDir)
data = load_dataThreeChannel(myDir)
#data = load_data(myDir)
#Include_top=False, Does not load the last two fully connected layers which act as the classifier.
#We are just loading the convolutional layers.
vgg_conv = VGG16(weights='imagenet',include_top=False,input_shape=(img_rows,img_cols,3))
# freeze the layer except the last 4 layers
for layer in vgg_conv.layers[:-4]:
layer.trainable=False
num_classes=10
model = Sequential()
# Add the vgg convolutional base model
model.add(vgg_conv)
# Add new layers
model.add(Flatten())
model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
# Show a summary of the model. Check the number of trainable parameters
model.summary()
epochs = 3
X_train = data
Y_train =labels
#X_train,X_test,Y_train,Y_test = train_test_split(data,labels, test_size=0.20, random_state=42)
X_train = X_train.astype('float32', copy= False)
#X_test = X_test.astype('float32', copy= False)
X_train /= 255
#X_test /= 255
print('x_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
#print(X_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
Y_train = keras.utils.to_categorical(Y_train, num_classes)
#Y_test = keras.utils.to_categorical(Y_test, num_classes)
# Compile the model
model.compile(loss='categorical_crossentropy',
optimizer=optimizers.RMSprop(lr=1e-4),
metrics=['acc'])
# Train the model
# history = model.fit(
# X_train,Y_train,
# epochs=10,
# verbose=1)
history= step_size_train=myDir.n//myDir.batch_size
model.fit_generator(generator=myDir,
steps_per_epoch=step_size_train,
epochs=10)
acc = history.history['acc']
loss = history.history['loss']
epochs = range(len(acc))
plt.plot(epochs, acc, 'b', label='Training acc')
plt.title('Training accuracy')
plt.legend()
plt.show()
model_json = model.to_json()
open('imdata.json','w').write(model_json)
model.save_weights("C:/Users/iohan/.spyder-py3/IAM/mdata.h5",overwrite=True)