将您自己的数据集应用于Keras

时间:2019-04-11 07:08:46

标签: python keras

我想玩转用遗传算法训练神经网络。我在网上找到了一些代码,这些代码利用Keras和Keras中的数据集来优化具有GA的神经网络,但是我却花了很多时间来弄清楚如何加载和配置自己的数据集以用于Keras。这是可以调用MNIST数据集的代码:

    def get_mnist():
        """Retrieve the MNIST dataset and process the data."""
        # Set defaults.
        nb_classes = 10
        batch_size = 128
        input_shape = (784,)

        # Get the data.
        (x_train, y_train), (x_test, y_test) = mnist.load_data()
        x_train = x_train.reshape(60000, 784)
        x_test = x_test.reshape(10000, 784)
        x_train = x_train.astype('float32')
        x_test = x_test.astype('float32')
        x_train /= 255
        x_test /= 255

        # convert class vectors to binary class matrices
        y_train = to_categorical(y_train, nb_classes)
        y_test = to_categorical(y_test, nb_classes)

        return (nb_classes, batch_size, input_shape, x_train, x_test, y_train, y_test)

我已经从我想使用的Kaggle(花朵图像预测集)中提取了一个数据集。我试图以一种可以创建类似于该函数的函数的方式加载数据集。我要加载到数据集中的代码是:

#%%
# import packages needed for neural network
import numpy as np #shows images
import matplotlib.pyplot as plt #array operations
import os #iterate through directories and move through paths
import cv2 #image operations

#%%
# creates directory and categories
DATADIR = 'pathtodata'
CATEGORIES = ['daisy', 'dandelion', 'rose', 'sunflower', 'tulip']

# iterates through all images in directory
for category in CATEGORIES:
    path = os.path.join(DATADIR, category)
    for img in os.listdir(path):
        # converts image to array and greyscale
        img_array = cv2.imread(os.path.join(path,img), cv2.IMREAD_GRAYSCALE)
        plt.imshow(img_array, cmap="gray")
        plt.show() #one image should print
        break
    break

print(img_array) #print array to check img_array
print(img_array.shape)

# reshape images
IMG_SIZE = 100 #pixel size
new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
plt.imshow(new_array, cmap = 'gray')
plt.show()

# training data
training_data = []
def create_training_data():
    # iterates through all images in directory
    for category in CATEGORIES:
        path = os.path.join(DATADIR, category)
        class_num = CATEGORIES.index(category)
        for img in os.listdir(path):
            try:
                # converts image to array and greyscale
                img_array = cv2.imread(os.path.join(path,img), cv2.IMREAD_GRAYSCALE)
                new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
                training_data.append([new_array, class_num])
            except Exception:
                #any images that are broken will not be included
                pass

create_training_data()

# print training length
print(len(training_data))


#%%
# shuffle training data
import random
random.shuffle(training_data)
for sample in training_data[:10]:
    print(sample[1])

#%%
X=[] #feature set
y=[] #labels

for features, label in training_data:
    X.append(features)
    y.append(label)

# shape needed for keras to except image
X = np.array(X).reshape(-1, IMG_SIZE, IMG_SIZE, 1)

# save the data
import pickle
pickle_out = open("fX.pickle", "wb")
pickle.dump(X, pickle_out)
pickle_out.close()

pickle_out = open("fy.pickle", "wb")
pickle.dump(X, pickle_out)
pickle_out.close()

然后我创建并调用该函数:

def get_flower():
    nb_classes = 4
    batch_size = 32
    input_shape = X.shape[1:]

    # Loads in data
    x=pickle.load(open("fX.pickle","rb"))
    y=pickle.load(open("fy.pickle","rb"))
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)
    x_train = x_train.astype('float32')
    x_train /= 255

    # convert class vectors to binary class matrices
    y_train = to_categorical(y_train, 1)
    y_test = to_categorical(y_test, 1)

    return (nb_classes, batch_size, input_shape, x_train, x_test, y_train, y_test)

我遇到很多索引错误。任何见识将不胜感激。

我收到的错误如下:

IndexError                                Traceback (most recent call last)
<ipython-input-15-ebadce6a7460> in <module>
    103 
    104 if __name__ == '__main__':
--> 105     main()

<ipython-input-15-ebadce6a7460> in main()
    100                  (generations, population))
    101 
--> 102     generate(generations, population, nn_param_choices, dataset)
    103 
    104 if __name__ == '__main__':

<ipython-input-15-ebadce6a7460> in generate(generations, population, nn_param_choices, dataset)
     54 
     55         # Train and get accuracy for networks.
---> 56         train_networks(networks, dataset)
     57 
     58         # Get the average accuracy for this generation.

<ipython-input-15-ebadce6a7460> in train_networks(networks, dataset)
     20     pbar = tqdm(total=len(networks))
     21     for network in networks:
---> 22         network.train(dataset)
     23         pbar.update(1)
     24     pbar.close()

<ipython-input-3-bb87f4a033ec> in train(self, dataset)
     40         """
     41         if self.accuracy == 0.:
---> 42             self.accuracy = train_and_score(self.network, dataset)
     43 
     44     def print_network(self):

<ipython-input-14-f15ae9ed57cd> in train_and_score(network, dataset)
    146     elif dataset == 'flower':
    147         nb_classes, batch_size, input_shape, x_train, \
--> 148             x_test, y_train, y_test = get_flower()
    149 
    150     model = compile_model(network, nb_classes, input_shape)

<ipython-input-14-f15ae9ed57cd> in get_flower()
     90 
     91     # convert class vectors to binary class matrices
---> 92     y_train = to_categorical(y_train, 1)
     93     y_test = to_categorical(y_test, 1)
     94 

~/anaconda3/lib/python3.7/site-packages/keras/utils/np_utils.py in to_categorical(y, num_classes)
     27     n = y.shape[0]
     28     categorical = np.zeros((n, num_classes))
---> 29     categorical[np.arange(n), y] = 1
     30     output_shape = input_shape + (num_classes,)
     31     categorical = np.reshape(categorical, output_shape)

IndexError: index 9 is out of bounds for axis 1 with size 1

0 个答案:

没有答案