我正在尝试在自定义数据集上使用ImageClassifierData.from_csv()
。
数据集包含一个数据框(train.csv),该数据框包含一个具有文件名的列和一个包含图像的zip文件夹。图像以4组为一组,组中的每个图像以4种不同的方式显示同一图像,当组合在一起时会创建该图像。由于某些原因,df中的文件名未从图像中提取出完整的文件名。
例如,
df["col1"] = ["0Image1","1Image1","2Image1","3Image1"]
image_file_names = ["0Image1_type1.png","1Image1_type2.png","2Image1_type3.png","3Image1_type4.png"]
这是以下用于获取数据的fast.ai代码:
PATH = "../filepath/"
f_model = resnet34
label_csv = f'{PATH}train.csv'
n = len(list(open(label_csv)))-1
val_idxs = get_cv_idxs(n)
def get_data(sz):
tfms = tfms_from_model(f_model,sz,aug_tfms=transforms_top_down,max_zoom=1.05)
return ImageClassifierData.from_csv(PATH,'train.zip',label_csv,tfms=tfms,suffix='.png',val_idxs=val_idxs,test_name="test")
data = get_data(299)
x,y = next(iter(data.val_dl))
运行next(iter(data.val_dl))
一经出现以下错误:
/opt/conda/lib/python3.6/site-packages/fastai/dataloader.py in __iter__(self)
86 # avoid py3.6 issue where queue is infinite and can result in memory exhaustion
87 for c in chunk_iter(iter(self.batch_sampler), self.num_workers*10):
---> 88 for batch in e.map(self.get_batch, c):
89 yield get_tensor(batch, self.pin_memory, self.half)
90
/opt/conda/lib/python3.6/concurrent/futures/_base.py in result_iterator()
584 # Careful not to keep a reference to the popped future
585 if timeout is None:
--> 586 yield fs.pop().result()
587 else:
588 yield fs.pop().result(end_time - time.time())
/opt/conda/lib/python3.6/concurrent/futures/_base.py in result(self, timeout)
423 raise CancelledError()
424 elif self._state == FINISHED:
--> 425 return self.__get_result()
426
427 self._condition.wait(timeout)
/opt/conda/lib/python3.6/concurrent/futures/_base.py in __get_result(self)
382 def __get_result(self):
383 if self._exception:
--> 384 raise self._exception
385 else:
386 return self._result
/opt/conda/lib/python3.6/concurrent/futures/thread.py in run(self)
54
55 try:
---> 56 result = self.fn(*self.args, **self.kwargs)
57 except BaseException as exc:
58 self.future.set_exception(exc)
/opt/conda/lib/python3.6/site-packages/fastai/dataloader.py in get_batch(self, indices)
73
74 def get_batch(self, indices):
---> 75 res = self.np_collate([self.dataset[i] for i in indices])
76 if self.transpose: res[0] = res[0].T
77 if self.transpose_y: res[1] = res[1].T
/opt/conda/lib/python3.6/site-packages/fastai/dataloader.py in <listcomp>(.0)
73
74 def get_batch(self, indices):
---> 75 res = self.np_collate([self.dataset[i] for i in indices])
76 if self.transpose: res[0] = res[0].T
77 if self.transpose_y: res[1] = res[1].T
/opt/conda/lib/python3.6/site-packages/fastai/dataset.py in __getitem__(self, idx)
165 xs,ys = zip(*[self.get1item(i) for i in range(*idx.indices(self.n))])
166 return np.stack(xs),ys
--> 167 return self.get1item(idx)
168
169 def __len__(self): return self.n
/opt/conda/lib/python3.6/site-packages/fastai/dataset.py in get1item(self, idx)
158
159 def get1item(self, idx):
--> 160 x,y = self.get_x(idx),self.get_y(idx)
161 return self.get(self.transform, x, y)
162
/opt/conda/lib/python3.6/site-packages/fastai/dataset.py in get_x(self, i)
237 super().__init__(transform)
238 def get_sz(self): return self.transform.sz
--> 239 def get_x(self, i): return open_image(os.path.join(self.path, self.fnames[i]))
240 def get_n(self): return len(self.fnames)
241
/opt/conda/lib/python3.6/site-packages/fastai/dataset.py in open_image(fn)
218 flags = cv2.IMREAD_UNCHANGED+cv2.IMREAD_ANYDEPTH+cv2.IMREAD_ANYCOLOR
219 if not os.path.exists(fn):
--> 220 raise OSError('No such file or directory: {}'.format(fn))
221 elif os.path.isdir(fn):
222 raise OSError('Is a directory: {}'.format(fn))
OSError: No such file or directory: ../filepath/train.zip/0Image1.png
在我看来,ImageDataClassifier正在使用csv文件中的文件名来查找图像文件,但名称不匹配。处理此问题的最有效方法是什么?
P.S。我需要将当前文件名保留在dataframe列中,以便以后提交。