我正在尝试遍历初始化如下的pytorch数据加载器:
trainDL = torch.utils.data.DataLoader(X_train,batch_size = BATCH_SIZE,shuffle = True,** kwargs)
因此,我无法执行以下语句,因为我在“枚举”中遇到了KeyError:
for batch_idx, (data, _) in enumerate(trainDL):
{stuff}
有人知道发生了什么吗?
编辑:
我得到的错误是:
KeyError Traceback (most recent call last)
~/.local/share/virtualenvs/Pipenv-l_wD1rT4/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2896 try:
-> 2897 return self._engine.get_loc(key)
2898 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 40592
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-63-95142e0748bb> in <module>
----> 1 for batch_idx, (data, _) in enumerate(trainDL):
2 print(".")
~/.local/share/virtualenvs/Pipenv-l_wD1rT4/lib/python3.6/site-packages/torch/utils/data/dataloader.py in __next__(self)
344 def __next__(self):
345 index = self._next_index() # may raise StopIteration
--> 346 data = self._dataset_fetcher.fetch(index) # may raise StopIteration
347 if self._pin_memory:
348 data = _utils.pin_memory.pin_memory(data)
~/.local/share/virtualenvs/Pipenv-l_wD1rT4/lib/python3.6/site-packages/torch/utils/data/_utils/fetch.py in fetch(self, possibly_batched_index)
42 def fetch(self, possibly_batched_index):
43 if self.auto_collation:
---> 44 data = [self.dataset[idx] for idx in possibly_batched_index]
45 else:
46 data = self.dataset[possibly_batched_index]
~/.local/share/virtualenvs/Pipenv-l_wD1rT4/lib/python3.6/site-packages/torch/utils/data/_utils/fetch.py in <listcomp>(.0)
42 def fetch(self, possibly_batched_index):
43 if self.auto_collation:
---> 44 data = [self.dataset[idx] for idx in possibly_batched_index]
45 else:
46 data = self.dataset[possibly_batched_index]
~/.local/share/virtualenvs/Pipenv-l_wD1rT4/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
2993 if self.columns.nlevels > 1:
2994 return self._getitem_multilevel(key)
-> 2995 indexer = self.columns.get_loc(key)
2996 if is_integer(indexer):
2997 indexer = [indexer]
~/.local/share/virtualenvs/Pipenv-l_wD1rT4/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2897 return self._engine.get_loc(key)
2898 except KeyError:
-> 2899 return self._engine.get_loc(self._maybe_cast_indexer(key))
2900 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2901 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 40592
答案 0 :(得分:1)
您必须创建torch.utils.data.Dataset
包装数据集。
例如:
from torch.utils.data import Dataset
class PandasDataset(Dataset):
def __init__(self, dataframe):
self.dataframe = dataframe
def __len__(self):
return len(self.dataframe)
def __getitem__(self, index):
return self.dataframe.iloc[index]
将此对象传递给由您的熊猫数据帧实例化的DataLoader
,您应该没事。