我正在使用DataLoader从基于numpy memmap的自定义Dataset对象读取。
只要我不拖尾地读取数据,一切就可以正常工作,但是,根据我设置的shuffle=True
,运行时会崩溃。
我尝试通过使用置换向量并在DataLoader中设置shuffle=False
在Dataset类中实现改组机制,但问题仍然存在。
我还注意到,在改组时,Dataset对象的__getitem__()
函数被调用了n次,其中n是batch_size。
这是数据集代码:
class CustomDataset(Dataset):
num_pattern = 60112
base_folder = 'dataset'
def __init__(self, root):
self.root = os.path.expanduser(root)
self.output_ = np.memmap('{0}/output'.format(root), 'int64', 'r', shape=(60112, 62))
self.out_len = np.memmap('{0}/output-lengths'.format(root), 'int32', 'r', shape=(60112))
self.input_ = np.memmap('{0}/input'.format(root), 'float32', 'r', shape=(60112, 512, 1024))
self.in_len = np.memmap('{0}/input-lengths'.format(root), 'int32', 'r', shape=(60112))
def __len__(self):
return self.num_pattern
def __getitem__(self, index):
return (self.in_len[index], torch.from_numpy(self.input_[index])), (self.out_len[index], torch.from_numpy(self.output_[index]))
if __name__ == '__main__':
dataset = CustomDataset(root='/content/')
data_loader = data.DataLoader(dataset, batch_size=32, shuffle=False, num_workers=1)
for i, data in enumerate(data_loader, 0):
# training
错误堆栈如下:
RuntimeError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py in _try_get_batch(self, timeout)
510 try:
--> 511 data = self.data_queue.get(timeout=timeout)
512 return (True, data)
9 frames
/usr/lib/python3.6/multiprocessing/queues.py in get(self, block, timeout)
103 timeout = deadline - time.monotonic()
--> 104 if not self._poll(timeout):
105 raise Empty
/usr/lib/python3.6/multiprocessing/connection.py in poll(self, timeout)
256 self._check_readable()
--> 257 return self._poll(timeout)
258
/usr/lib/python3.6/multiprocessing/connection.py in _poll(self, timeout)
413 def _poll(self, timeout):
--> 414 r = wait([self], timeout)
415 return bool(r)
/usr/lib/python3.6/multiprocessing/connection.py in wait(object_list, timeout)
910 while True:
--> 911 ready = selector.select(timeout)
912 if ready:
/usr/lib/python3.6/selectors.py in select(self, timeout)
375 try:
--> 376 fd_event_list = self._poll.poll(timeout)
377 except InterruptedError:
/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/signal_handling.py in handler(signum, frame)
62 # Python can still get and update the process status successfully.
---> 63 _error_if_any_worker_fails()
64 if previous_handler is not None:
RuntimeError: DataLoader worker (pid 3978) is killed by signal: Bus error.
During handling of the above exception, another exception occurred:
RuntimeError Traceback (most recent call last)
<ipython-input-8-b407a8532808> in <module>()
5 data_loader = data.DataLoader(dataset, batch_size=4, shuffle=True, num_workers=1)
6
----> 7 for i, data in enumerate(data_loader, 0):
8 print(i)
/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py in __next__(self)
574 while True:
575 assert (not self.shutdown and self.batches_outstanding > 0)
--> 576 idx, batch = self._get_batch()
577 self.batches_outstanding -= 1
578 if idx != self.rcvd_idx:
/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py in _get_batch(self)
551 else:
552 while True:
--> 553 success, data = self._try_get_batch()
554 if success:
555 return data
/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py in _try_get_batch(self, timeout)
517 if not all(w.is_alive() for w in self.workers):
518 pids_str = ', '.join(str(w.pid) for w in self.workers if not w.is_alive())
--> 519 raise RuntimeError('DataLoader worker (pid(s) {}) exited unexpectedly'.format(pids_str))
520 if isinstance(e, queue.Empty):
521 return (False, None)
RuntimeError: DataLoader worker (pid(s) 3978) exited unexpectedly
答案 0 :(得分:0)
这是一个共享内存错误,您的数据加载器可能需要更多内存才能完成此特定任务
答案 1 :(得分:0)
RuntimeError: DataLoader worker (pid(s) 3978) exited unexpectedly
这个错误是因为,在 data.DataLoader(dataset, batch_size=32, shuffle=False, num_workers=1)
make num_workers=0
中,它说你的 subprocesses
中没有 cpu