内存错误:训练大型数据集之前

时间:2019-12-21 14:10:44

标签: machine-learning dataset

num_classes = 122

labels_name={'0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9, '10': 10, '11': 11, '12': 12,
              '13': 13, '14': 14, '15': 15, '16': 16, '17': 17, '18': 18, '19': 19, '20': 20, '21': 21, '22': 22, '23': 23,
              '24': 24, '25': 25,'26': 26, '27': 27, '28': 28, '29': 29, '30': 30, '31': 31, '32': 32, '33': 33, '34': 34, 
              '35': 35, '36': 36, '37': 37, '38': 38, '39': 39, '40': 40, '41': 41, '42': 42, '43': 43, '44': 44, '45': 45,
              '46': 46, '47': 47, '48': 48, '49': 49, '50': 50, '51': 51, '52': 52, '53': 53, '54': 54, '55': 55, '56': 56,
              '57':57, '58': 58, '59': 59, '60': 60, '61': 61, '62': 62, '63': 63, '64': 64, '65': 65, '66': 66, '67': 67, 
              '68': 68, '69': 69, '70': 70, '71': 71, '72': 72, '73': 73, '74': 74, '75': 75, '76': 76, '77': 77, '78': 78,
              '79': 79, '80': 80, '81': 81, '82': 82, '83': 83, '84': 84, '85': 85, '86': 86, '87': 87, '88': 88,'89': 89, 
              '90': 90, '91': 91, '92': 92, '93': 93, '94': 94, '95': 95, '96': 96, '97': 97, '98': 98, '99': 99, '100': 100,
              '101': 101, '102': 102, '103': 103, '104': 104, '105': 105, '106': 106, '107': 107, '108': 108, '109': 109,
              '110': 110, '111': 111, '112': 112, '113': 113, '114': 114, '115': 115,'116': 116, '117': 117, '118': 118, 
             '119': 119, '120': 120, '121': 121}

img_data_list=[]
labels_list = []

for dataset in data_dir_list:
    img_list=os.listdir(data_path+'/'+ dataset)
    print ('Loading the images of dataset-'+'{}\n'.format(dataset))
    label = labels_name[dataset]
    for img in img_list:
        input_img=cv2.imread(data_path + '/'+ dataset + '/'+ img )
        input_img=cv2.cvtColor(input_img, cv2.COLOR_BGR2GRAY)
        input_img_resize=cv2.resize(input_img,(128,128))
        img_data_list.append(input_img_resize)
        labels_list.append(label)


n = 300000

img_data = np.array(img_data_list)

d1 = img_data[:, :int(n/2)].astype('float')

d2 = img_data[:, int(n/2):].astype('float')

img_data = np.hstack((d1, d2))

img_data /= 255
print (img_data.shape)

labels = np.array(labels_list)
# print the count of number of samples for different classes
print(np.unique(labels,return_counts=True))
# convert class labels to on-hot encoding
Y = np_utils.to_categorical(labels, num_classes)

#Shuffle the dataset
x,y = shuffle(img_data,Y, random_state=2)
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=2)
  

MemoryError跟踪(最近的调用)   最后)在()        33 img_data = np.array(img_data_list)        34   ---> 35 d1 = img_data [:,:int(n / 2)]。astype('float')        36        37 d2 = img_data [:, int(n / 2):]。astype('float')

0 个答案:

没有答案