我正在使用StratifiedKFold
创建用于拆分数据的索引。它给了我索引超出范围的错误。 Xdev
的指数来自0
到47503
。 cv_index
变量的min
和max
索引为0
和10452
,那么为什么会出现此错误?
print X_dev.shape
print Y_dev.shape
skff = StratifiedKFold(Y_dev,5)
for train_index, cv_index in skff:
print train_index.min()
print train_index.max()
print cv_index.min()
print cv_index.max()
print train_index
print cv_index
print train_index.shape
print cv_index.shape
X_train = X_dev[cv_index]
输出
(47504, 128)
(47504,)
9049
47503
0
10452
[ 9049 9051 9054 ..., 47501 47502 47503]
[ 0 1 2 ..., 10177 10242 10452]
(38000,)
(9504,)
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-18-c911fae5d719> in <module>()
10 print train_index.shape
11 print cv_index.shape
---> 12 X_train = X_dev[cv_index]
/home/saurabh/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc in __getitem__(self, key)
1789 if isinstance(key, (Series, np.ndarray, Index, list)):
1790 # either boolean or fancy integer index
-> 1791 return self._getitem_array(key)
1792 elif isinstance(key, DataFrame):
1793 return self._getitem_frame(key)
/home/saurabh/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc in _getitem_array(self, key)
1834 else:
1835 indexer = self.ix._convert_to_indexer(key, axis=1)
-> 1836 return self.take(indexer, axis=1, convert=True)
1837
1838 def _getitem_multilevel(self, key):
/home/saurabh/anaconda/lib/python2.7/site-packages/pandas/core/generic.pyc in take(self, indices, axis, convert, is_copy)
1356 new_data = self._data.take(indices,
1357 axis=self._get_block_manager_axis(axis),
-> 1358 convert=True, verify=True)
1359 result = self._constructor(new_data).__finalize__(self)
1360
/home/saurabh/anaconda/lib/python2.7/site-packages/pandas/core/internals.pyc in take(self, indexer, axis, verify, convert)
3264 n = self.shape[axis]
3265 if convert:
-> 3266 indexer = maybe_convert_indices(indexer, n)
3267
3268 if verify:
/home/saurabh/anaconda/lib/python2.7/site-packages/pandas/core/indexing.pyc in maybe_convert_indices(indices, n)
1709 mask = (indices >= n) | (indices < 0)
1710 if mask.any():
-> 1711 raise IndexError("indices are out-of-bounds")
1712 return indices
1713
IndexError: indices are out-of-bounds
我添加了我认为相关的信息。如果需要更多信息来解决这个问题,请告诉我。
答案 0 :(得分:2)
此示例适用于较低维度的数据,但我不知道为什么它不适用于此维度。所以我换线
X_train = X_dev[cv_index]
到
X_train = X_dev.ix[cv_index]
及其工作。