trainchunks = pd.read_csv('emnist-byclass-train.csv', header=0, delimiter=",", engine='c', na_filter=False, dtype=np.int64, chunksize=50000)
for chk in trainchunks:
chk.columns.values[0]="labels"
print(chk.columns)
for i in range(9,63):
chk=chk[chk["labels"]!=i]
Y_trainchunk = chk['labels']
# Normalize the data
chk = chk / 255.0
# Reshape image in 3 dimensions (height = 282px, width = 28px , canal = 1)
chk = chk.values.reshape(-1,28,28,1)
# Encode labels to one hot vectors (ex : 2 -> [0,0,1,0,0,0,0,0,0,0])
Y_trainchunk = to_categorical(Y_train, num_classes = 26)
我正在尝试批量处理数据,因为在整体处理EMNIST数据集时遇到内存错误。我只想要小写字母a,b,c,d,e,f,g,h(我相信前8个类?),所以我想摆脱那之后的所有内容。这是我遇到的问题,即使我不分批加载,也会发生这种情况:这是该次运行的结果-
labels 0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 ... 0.466 \
0 36 0 0 0 0 0 0 0 0 0 ... 0
1 6 0 0 0 0 0 0 0 0 0 ... 0
2 3 0 0 0 0 0 0 0 0 0 ... 0
3 22 0 0 0 0 0 0 0 0 0 ... 0
4 38 0 0 0 0 0 0 0 0 0 ... 0
5 5 0 0 0 0 0 0 0 0 0 ... 0
6 9 0 0 0 0 0 0 0 0 0 ... 0
7 47 0 0 0 0 0 0 0 0 0 ... 0
8 4 0 0 0 0 0 0 0 0 0 ... 0
9 7 0 0 0 0 0 0 0 0 0 ... 0
10 56 0 0 0 0 0 0 0 0 0 ... 0
11 29 0 0 0 0 0 0 0 0 0 ... 0
12 40 0 0 0 0 0 0 0 0 0 ... 0
13 55 0 0 0 0 0 0 0 0 0 ... 0
14 6 0 0 0 0 0 0 0 0 0 ... 0
15 55 0 0 0 0 0 0 0 0 0 ... 0
16 57 0 0 0 0 0 0 0 0 0 ... 0
17 43 0 0 0 0 0 0 0 0 0 ... 0
18 6 0 0 0 0 0 0 0 0 0 ... 0
19 3 0 0 0 0 0 0 0 0 0 ... 0
20 7 0 0 0 0 0 0 0 0 0 ... 0
21 32 0 0 0 0 0 0 0 0 0 ... 0
22 3 0 0 0 0 0 0 0 0 0 ... 0
23 40 0 0 0 0 0 0 0 0 0 ... 0
24 9 0 0 0 0 0 0 0 0 0 ... 0
25 22 0 0 0 0 0 0 0 0 0 ... 0
26 15 0 0 0 0 0 0 0 0 0 ... 0
27 26 0 0 0 0 0 0 0 0 0 ... 0
28 2 0 0 0 0 0 0 0 0 0 ... 0
29 5 0 0 0 0 0 0 0 0 0 ... 0
... ... .. ... ... ... ... ... ... ... ... ... ...
697901 55 0 0 0 0 0 0 0 0 0 ... 0
697902 49 0 0 0 0 0 0 0 0 0 ... 0
697903 24 0 0 0 0 0 0 0 0 0 ... 0
697904 3 0 0 0 0 0 0 0 0 0 ... 0
697905 1 0 0 0 0 0 0 0 0 0 ... 0
697906 5 0 0 0 0 0 0 0 0 0 ... 0
697907 6 0 0 0 0 0 0 0 0 0 ... 0
697908 40 0 0 0 0 0 0 0 0 0 ... 0
697909 24 0 0 0 0 0 0 0 0 0 ... 0
697910 9 0 0 0 0 0 0 0 0 0 ... 0
697911 8 0 0 0 0 0 0 0 0 0 ... 0
697912 43 0 0 0 0 0 0 0 0 0 ... 0
697913 4 0 0 0 0 0 0 0 0 0 ... 0
697914 9 0 0 0 0 0 0 0 0 0 ... 0
697915 43 0 0 0 0 0 0 0 0 0 ... 0
697916 7 0 0 0 0 0 0 0 0 0 ... 0
697917 2 0 0 0 0 0 0 0 0 0 ... 0
697918 32 0 0 0 0 0 0 0 0 0 ... 0
697919 40 0 0 0 0 0 0 0 0 0 ... 0
697920 53 0 0 0 0 0 0 0 0 0 ... 0
697921 8 0 0 0 0 0 0 0 0 0 ... 0
697922 5 0 0 0 0 0 0 0 0 0 ... 0
697923 47 0 0 0 0 0 0 0 0 0 ... 0
697924 1 0 0 0 0 0 0 0 0 0 ... 0
697925 23 0 0 0 0 0 0 0 0 0 ... 0
697926 40 0 0 0 0 0 0 0 0 0 ... 0
697927 47 0 0 0 0 0 0 0 0 0 ... 0
697928 5 0 0 0 0 0 0 0 0 0 ... 0
697929 11 0 0 0 0 0 0 0 0 0 ... 0
697930 22 0 0 0 0 0 0 0 0 0 ... 0
0.467 0.468 0.469 0.470 0.471 0.472 0.473 0.474 0.475
0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0
5 0 0 0 0 0 0 0 0 0
6 0 0 0 0 0 0 0 0 0
7 0 0 0 0 0 0 0 0 0
8 0 0 0 0 0 0 0 0 0
9 0 0 0 0 0 0 0 0 0
10 0 0 0 0 0 0 0 0 0
11 0 0 0 0 0 0 0 0 0
12 0 0 0 0 0 0 0 0 0
13 0 0 0 0 0 0 0 0 0
14 0 0 0 0 0 0 0 0 0
15 0 0 0 0 0 0 0 0 0
16 0 0 0 0 0 0 0 0 0
17 0 0 0 0 0 0 0 0 0
18 0 0 0 0 0 0 0 0 0
19 0 0 0 0 0 0 0 0 0
20 0 0 0 0 0 0 0 0 0
21 0 0 0 0 0 0 0 0 0
22 0 0 0 0 0 0 0 0 0
23 0 0 0 0 0 0 0 0 0
24 0 0 0 0 0 0 0 0 0
25 0 0 0 0 0 0 0 0 0
26 0 0 0 0 0 0 0 0 0
27 0 0 0 0 0 0 0 0 0
28 0 0 0 0 0 0 0 0 0
29 0 0 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ...
697901 0 0 0 0 0 0 0 0 0
697902 0 0 0 0 0 0 0 0 0
697903 0 0 0 0 0 0 0 0 0
697904 0 0 0 0 0 0 0 0 0
697905 0 0 0 0 0 0 0 0 0
697906 0 0 0 0 0 0 0 0 0
697907 0 0 0 0 0 0 0 0 0
697908 0 0 0 0 0 0 0 0 0
697909 0 0 0 0 0 0 0 0 0
697910 0 0 0 0 0 0 0 0 0
697911 0 0 0 0 0 0 0 0 0
697912 0 0 0 0 0 0 0 0 0
697913 0 0 0 0 0 0 0 0 0
697914 0 0 0 0 0 0 0 0 0
697915 0 0 0 0 0 0 0 0 0
697916 0 0 0 0 0 0 0 0 0
697917 0 0 0 0 0 0 0 0 0
697918 0 0 0 0 0 0 0 0 0
697919 0 0 0 0 0 0 0 0 0
697920 0 0 0 0 0 0 0 0 0
697921 0 0 0 0 0 0 0 0 0
697922 0 0 0 0 0 0 0 0 0
697923 0 0 0 0 0 0 0 0 0
697924 0 0 0 0 0 0 0 0 0
697925 0 0 0 0 0 0 0 0 0
697926 0 0 0 0 0 0 0 0 0
697927 0 0 0 0 0 0 0 0 0
697928 0 0 0 0 0 0 0 0 0
697929 0 0 0 0 0 0 0 0 0
697930 0 0 0 0 0 0 0 0 0
[697931 rows x 785 columns]
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2524 try:
-> 2525 return self._engine.get_loc(key)
2526 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'labels'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-9-8934d31c2365> in <module>()
4 print(train)
5 for i in range(27,63):
----> 6 train=train[train["labels"]!=i]
/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in __getitem__(self, key)
2137 return self._getitem_multilevel(key)
2138 else:
-> 2139 return self._getitem_column(key)
2140
2141 def _getitem_column(self, key):
/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in _getitem_column(self, key)
2144 # get column
2145 if self.columns.is_unique:
-> 2146 return self._get_item_cache(key)
2147
2148 # duplicate columns & possible reduce dimensionality
/usr/local/lib/python3.6/dist-packages/pandas/core/generic.py in _get_item_cache(self, item)
1840 res = cache.get(item)
1841 if res is None:
-> 1842 values = self._data.get(item)
1843 res = self._box_item_values(item, values)
1844 cache[item] = res
/usr/local/lib/python3.6/dist-packages/pandas/core/internals.py in get(self, item, fastpath)
3841
3842 if not isna(item):
-> 3843 loc = self.items.get_loc(item)
3844 else:
3845 indexer = np.arange(len(self.items))[isna(self.items)]
/usr/local/lib/python3.6/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2525 return self._engine.get_loc(key)
2526 except KeyError:
-> 2527 return self._engine.get_loc(self._maybe_cast_indexer(key))
2528
2529 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'labels'
这里的格式奇怪,但是“ labels”标题恰好在控制台的第一列上方。谢谢。
答案 0 :(得分:0)
我本人对熊猫是陌生的,但是如果您要求的标签不存在,则会出现KeyError。
此代码显示名为“ labelz”的列有效,而名为“ labels”的列给出KeyError。重命名该列后,如果仍然看到KeyError,则它必须与其他内容有关。
df2 = df2.rename(columns={'Col1':'labelz'}) # the word "labels" is reserved and will give KeyError
df3 = df2.rename(columns={'labelz':'labels'}) # the word "labels" is reserved and will give KeyError
print("df2")
print(df2)
for i in range(5,7):
df2=df2[df2["labelz"] != i]
print("df2")
print(df2)
#
print("df3")
print(df3)
for i in range(5,7):
df3=df3[df2["labels"] != i]
print("df3")
print(df3)
Output:
df2
labelz Col2 Col3
Row4 4 6 1
Row5 5 1 1
Row6 6 2 0
Row7 7 4 -1
Row8 8 6 -2
df2
labelz Col2 Col3
Row4 4 6 1
Row7 7 4 -1
Row8 8 6 -2
df3
labels Col2 Col3
Row4 4 6 1
Row5 5 1 1
Row6 6 2 0
Row7 7 4 -1
Row8 8 6 -2
KeyError: 'labels'