我想玩转用遗传算法训练神经网络。我在网上找到了一些代码,这些代码利用Keras和Keras中的数据集来优化具有GA的神经网络,但是我却花了很多时间来弄清楚如何加载和配置自己的数据集以用于Keras。这是可以调用MNIST数据集的代码:
def get_mnist():
"""Retrieve the MNIST dataset and process the data."""
# Set defaults.
nb_classes = 10
batch_size = 128
input_shape = (784,)
# Get the data.
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
# convert class vectors to binary class matrices
y_train = to_categorical(y_train, nb_classes)
y_test = to_categorical(y_test, nb_classes)
return (nb_classes, batch_size, input_shape, x_train, x_test, y_train, y_test)
我已经从我想使用的Kaggle(花朵图像预测集)中提取了一个数据集。我试图以一种可以创建类似于该函数的函数的方式加载数据集。我要加载到数据集中的代码是:
#%%
# import packages needed for neural network
import numpy as np #shows images
import matplotlib.pyplot as plt #array operations
import os #iterate through directories and move through paths
import cv2 #image operations
#%%
# creates directory and categories
DATADIR = 'pathtodata'
CATEGORIES = ['daisy', 'dandelion', 'rose', 'sunflower', 'tulip']
# iterates through all images in directory
for category in CATEGORIES:
path = os.path.join(DATADIR, category)
for img in os.listdir(path):
# converts image to array and greyscale
img_array = cv2.imread(os.path.join(path,img), cv2.IMREAD_GRAYSCALE)
plt.imshow(img_array, cmap="gray")
plt.show() #one image should print
break
break
print(img_array) #print array to check img_array
print(img_array.shape)
# reshape images
IMG_SIZE = 100 #pixel size
new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
plt.imshow(new_array, cmap = 'gray')
plt.show()
# training data
training_data = []
def create_training_data():
# iterates through all images in directory
for category in CATEGORIES:
path = os.path.join(DATADIR, category)
class_num = CATEGORIES.index(category)
for img in os.listdir(path):
try:
# converts image to array and greyscale
img_array = cv2.imread(os.path.join(path,img), cv2.IMREAD_GRAYSCALE)
new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
training_data.append([new_array, class_num])
except Exception:
#any images that are broken will not be included
pass
create_training_data()
# print training length
print(len(training_data))
#%%
# shuffle training data
import random
random.shuffle(training_data)
for sample in training_data[:10]:
print(sample[1])
#%%
X=[] #feature set
y=[] #labels
for features, label in training_data:
X.append(features)
y.append(label)
# shape needed for keras to except image
X = np.array(X).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
# save the data
import pickle
pickle_out = open("fX.pickle", "wb")
pickle.dump(X, pickle_out)
pickle_out.close()
pickle_out = open("fy.pickle", "wb")
pickle.dump(X, pickle_out)
pickle_out.close()
然后我创建并调用该函数:
def get_flower():
nb_classes = 4
batch_size = 32
input_shape = X.shape[1:]
# Loads in data
x=pickle.load(open("fX.pickle","rb"))
y=pickle.load(open("fy.pickle","rb"))
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)
x_train = x_train.astype('float32')
x_train /= 255
# convert class vectors to binary class matrices
y_train = to_categorical(y_train, 1)
y_test = to_categorical(y_test, 1)
return (nb_classes, batch_size, input_shape, x_train, x_test, y_train, y_test)
我遇到很多索引错误。任何见识将不胜感激。
我收到的错误如下:
IndexError Traceback (most recent call last)
<ipython-input-15-ebadce6a7460> in <module>
103
104 if __name__ == '__main__':
--> 105 main()
<ipython-input-15-ebadce6a7460> in main()
100 (generations, population))
101
--> 102 generate(generations, population, nn_param_choices, dataset)
103
104 if __name__ == '__main__':
<ipython-input-15-ebadce6a7460> in generate(generations, population, nn_param_choices, dataset)
54
55 # Train and get accuracy for networks.
---> 56 train_networks(networks, dataset)
57
58 # Get the average accuracy for this generation.
<ipython-input-15-ebadce6a7460> in train_networks(networks, dataset)
20 pbar = tqdm(total=len(networks))
21 for network in networks:
---> 22 network.train(dataset)
23 pbar.update(1)
24 pbar.close()
<ipython-input-3-bb87f4a033ec> in train(self, dataset)
40 """
41 if self.accuracy == 0.:
---> 42 self.accuracy = train_and_score(self.network, dataset)
43
44 def print_network(self):
<ipython-input-14-f15ae9ed57cd> in train_and_score(network, dataset)
146 elif dataset == 'flower':
147 nb_classes, batch_size, input_shape, x_train, \
--> 148 x_test, y_train, y_test = get_flower()
149
150 model = compile_model(network, nb_classes, input_shape)
<ipython-input-14-f15ae9ed57cd> in get_flower()
90
91 # convert class vectors to binary class matrices
---> 92 y_train = to_categorical(y_train, 1)
93 y_test = to_categorical(y_test, 1)
94
~/anaconda3/lib/python3.7/site-packages/keras/utils/np_utils.py in to_categorical(y, num_classes)
27 n = y.shape[0]
28 categorical = np.zeros((n, num_classes))
---> 29 categorical[np.arange(n), y] = 1
30 output_shape = input_shape + (num_classes,)
31 categorical = np.reshape(categorical, output_shape)
IndexError: index 9 is out of bounds for axis 1 with size 1