Question

我正在建立一个CNN，以便对LFW数据集进行图像分类。

为此，我具有以下数据集：

class LFW(object):
def __init__(self, root, specific_folder, img_extension, preprocessing_method=None, crop_size=(96, 112)):
    """
    Dataloader of the LFW dataset.

    root: path to the dataset to be used.
    specific_folder: specific folder inside the same dataset.
    img_extension: extension of the dataset images.
    preprocessing_method: string with the name of the preprocessing method.
    crop_size: retrieval network specific crop size.
    """

    self.preprocessing_method = preprocessing_method
    self.crop_size = crop_size
    self.imgl_list = []
    self.classes = []
    self.people = []
    self.model_align = None

    # read the file with the names and the number of images of each people in the data set
    with open(os.path.join(root, 'people.txt')) as f:
        people = f.read().splitlines()[1:]

    # get only the people that have more than 20 images
    for p in people:
        p = p.split('\t')
        if len(p) > 1:
            if int(p[1]) >= 20:
                for num_img in range(1, int(p[1]) + 1):
                    self.imgl_list.append(os.path.join(root, specific_folder, p[0], p[0] + '_' +
                                                       '{:04}'.format(num_img) + '.' + img_extension))
                    self.classes.append(p[0])
                    self.people.append(p[0])

    le = preprocessing.LabelEncoder()
    self.classes = le.fit_transform(self.classes)

    print(len(self.imgl_list), len(self.classes), len(self.people))

def __getitem__(self, index):
    imgl = imageio.imread(self.imgl_list[index])
    cl = self.classes[index]

    # if image is grayscale, transform into rgb by repeating the image 3 times
    if len(imgl.shape) == 2:
        imgl = np.stack([imgl] * 3, 2)

    imgl, bb = preprocess(imgl, self.preprocessing_method, crop_size=self.crop_size,
                          is_processing_dataset=True, return_only_largest_bb=True, execute_default=True)

    # append image with its reverse
    imglist = [imgl, imgl[:, ::-1, :]]

    # normalization
    for i in range(len(imglist)):
        imglist[i] = (imglist[i] - 127.5) / 128.0
        imglist[i] = imglist[i].transpose(2, 0, 1)
    imgs = [torch.from_numpy(i).float() for i in imglist]

    return imgs, cl, imgl, bb, self.imgl_list[index], self.people[index]

def __len__(self):
    return len(self.imgl_list)

为了训练我的模块，我使用以下代码

if __name__ == '__main__':

#set_trace()
args = edict({
    'operation' : 'train',
    'feature_file' : None,
     'result_sample_path' : None,
     'gpu' : 'GPU',
     'path_image_query' : None,
     'query_label' : 'Query label',
     'dataset' : None,
     'specific_dataset_folder_name' : 'lfw',
     'img_extension' : 'jpg',
     'preprocessing_method' : 'sphereface',
     'model_name' : 'mobiface',
     'batch_size' : 3,
     'image_query':'/content/drive/My Drive/recfaces13/recfaces/datasets/LFW',
     'train':True,
     'device':'cuda'

}）打印（参数）

# selecting the size of the crop based on the network
if args.model_name == 'mobilefacenet' or args.model_name == 'sphereface':
    crop_size = (96, 112)
elif args.model_name == 'mobiface' or args.model_name == 'shufflefacenet':
    crop_size = (112, 112)
elif args.model_name == 'openface':
    crop_size = (96, 96)
elif args.model_name == 'facenet':
    crop_size = (160, 160)
else:
    raise NotImplementedError("Model " + args.model_name + " not implemented")

if args.dataset is not None:
    # process whole dataset
    assert args.specific_dataset_folder_name is not None, 'To process a dataset, ' \
                                                          'the flag --specific_dataset_folder_name is required.'
    process_dataset(args.operation, args.model_name, args.batch_size,
                    args.dataset, args.specific_dataset_folder_name,
                    args.img_extension, args.preprocessing_method, crop_size,
                    args.result_sample_path, args.feature_file)
#elif args.image_query is not None:
    # process unique image
  #  dataset = ImageDataLoader(args.image_query, args.preprocessing_method,
  #                            crop_size, args.operation == 'extract_features')
  #  dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=2, drop_last=False)
  #  features = None
elif args.operation == 'train':
  ##########set_trace()

  net = load_net('mobilefacenet', 'gpu')
  net = net.cuda()
  model_name=args.model_name
  
  dataset = LFW(args.image_query,args.specific_dataset_folder_name, args.img_extension, args.preprocessing_method, crop_size)
  dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=2, drop_last=False)

  
#  data_counter_per_class = np.zeros((len(dataloader)))
#  for i in range(len(dataloader)):
 #   path = os.path.join('image_query', dataloader[i])
#    files = get_files_from_folder(path)
#    data_counter_per_class[i] = len(files)
#    test_counter = np.round(data_counter_per_class * (1 - train_ratio))
  #dataloader1=dataloader.split(',')
  #train,test=train_test_split(dataloader,test_size=0.2)
  #trainloader = torch.utils.data.DataLoader(dataset, batch_size=4, shuffle=True, num_workers=2, drop_last=False)
 # testloader = torch.utils.data.DataLoader(dataset, batch_size=4, shuffle=False, num_workers=2, drop_last=False) //create path//
 #create array of data path split that data path and 
  features = None

  if args.feature_file is not None and os.path.isfile(args.feature_file):
        features = scipy.io.loadmat(args.feature_file)      
  epoch = 2
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
  train_loss = list()
  #set_trace()
  
  for i, data in enumerate(dataloader):
    
    inpss, labs = data
    for inps in inpss:
      #set_trace()
      inps, labs = inps.cuda(args['device']), labs.cuda(args['device'])
      inps.squeeze_(0)
      labs.squeeze_(0)
      inps = Variable(inps).cuda(args['device'])
      labs = Variable(labs).cuda(args['device'])
      optimizer.zero_grad()
      #set_trace()
      outs = net(inps)
      soft_outs = F.softmax(outs, dim=1)
      prds = soft_outs.data.max(1)[1]
      loss = criterion(outs, labs)
      loss.backward()
      optimizer.step()
      prds = prds.squeeze_(1).squeeze_(0).cpu().numpy()
      inps_np = inps.detach().squeeze(0).cpu().numpy()
      labs_np = labs.detach().squeeze(0).cpu().numpy()
      train_loss.append(loss.data.item())
      print('[epoch %d], [iter %d / %d], [train loss %.5f]' % (epoch, i + 1, len(train_loader), np.asarray(train_loss).mean()))

错误弹出，我有3D但需要4D我不确定如何解决。我现在是否需要更改数据负载或模型或训练非常困难，现在有人可以帮忙

RuntimeError：预期的4维4维输入，但改为3维

0 个答案: