当我使用以下代码加载MNIST数据时发生错误。(anaconda已在在线Jupyter笔记本电脑上安装并编码。)
from sklearn.datasets import fetch_mldata
mnist = fetch_mldata('MNIST original')
Timeouterror出现了,我不知道我在哪里犯了错误。我已经关闭了我的VPN代理,但没有用。救命!
TimeoutError Traceback (most recent call last)
<ipython-input-1-3ba7b9c02a3b> in <module>()
1 from sklearn.datasets import fetch_mldata
----> 2 mnist = fetch_mldata('MNIST original')
~\Anaconda3\lib\site-packages\sklearn\datasets\mldata.py in fetch_mldata(dataname, target_name, data_name, transpose_data, data_home)
152 urlname = MLDATA_BASE_URL % quote(dataname)
153 try:
--> 154 mldata_url = urlopen(urlname)
155 except HTTPError as e:
156 if e.code == 404:
~\Anaconda3\lib\urllib\request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
221 else:
222 opener = _opener
--> 223 return opener.open(url, data, timeout)
224
225 def install_opener(opener):
~\Anaconda3\lib\urllib\request.py in open(self, fullurl, data, timeout)
524 req = meth(req)
525
--> 526 response = self._open(req, data)
527
528 # post-process response
~\Anaconda3\lib\urllib\request.py in _open(self, req, data)
542 protocol = req.type
543 result = self._call_chain(self.handle_open, protocol, protocol +
--> 544 '_open', req)
545 if result:
546 return result
~\Anaconda3\lib\urllib\request.py in _call_chain(self, chain, kind, meth_name, *args)
502 for handler in handlers:
503 func = getattr(handler, meth_name)
--> 504 result = func(*args)
505 if result is not None:
506 return result
~\Anaconda3\lib\urllib\request.py in http_open(self, req)
1344
1345 def http_open(self, req):
-> 1346 return self.do_open(http.client.HTTPConnection, req)
1347
1348 http_request = AbstractHTTPHandler.do_request_
~\Anaconda3\lib\urllib\request.py in do_open(self, http_class, req, **http_conn_args)
1319 except OSError as err: # timeout error
1320 raise URLError(err)
-> 1321 r = h.getresponse()
1322 except:
1323 h.close()
~\Anaconda3\lib\http\client.py in getresponse(self)
1329 try:
1330 try:
-> 1331 response.begin()
1332 except ConnectionError:
1333 self.close()
~\Anaconda3\lib\http\client.py in begin(self)
295 # read until we get a non-100 response
296 while True:
--> 297 version, status, reason = self._read_status()
298 if status != CONTINUE:
299 break
~\Anaconda3\lib\http\client.py in _read_status(self)
256
257 def _read_status(self):
--> 258 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
259 if len(line) > _MAXLINE:
260 raise LineTooLong("status line")
~\Anaconda3\lib\socket.py in readinto(self, b)
584 while True:
585 try:
--> 586 return self._sock.recv_into(b)
587 except timeout:
588 self._timeout_occurred = True
TimeoutError: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond
我下载了MNIST数据集,并尝试自己加载数据。我复制了用于加载MNIST的代码,但无法再次加载数据。我以为我需要更改一些代码,而不是完全从Internet复制代码,但是我不知道应该在哪里进行更改。(只是Python的初学者) 我用来加载下载的MNIST数据的代码,是因为我将数据放在错误的文件中吗?
def loadmnist(imagefile, labelfile):
# Open the images with gzip in read binary mode
images = open(imagefile, 'rb')
labels = open(labelfile, 'rb')
# Get metadata for images
images.read(4) # skip the magic_number
number_of_images = images.read(4)
number_of_images = unpack('>I', number_of_images)[0]
rows = images.read(4)
rows = unpack('>I', rows)[0]
cols = images.read(4)
cols = unpack('>I', cols)[0]
# Get metadata for labels
labels.read(4)
N = labels.read(4)
N = unpack('>I', N)[0]
# Get data
x = np.zeros((N, rows*cols), dtype=np.uint8) # Initialize numpy array
y = np.zeros(N, dtype=np.uint8) # Initialize numpy array
for i in range(N):
for j in range(rows*cols):
tmp_pixel = images.read(1) # Just a single byte
tmp_pixel = unpack('>B', tmp_pixel)[0]
x[i][j] = tmp_pixel
tmp_label = labels.read(1)
y[i] = unpack('>B', tmp_label)[0]
images.close()
labels.close()
return (x, y)
上面的部分很好。
train_img, train_lbl = loadmnist('data/train-images-idx3-ubyte'
, 'data/train-labels-idx1-ubyte')
test_img, test_lbl = loadmnist('data/t10k-images-idx3-ubyte'
, 'data/t10k-labels-idx1-ubyte')
错误是这样的。
FileNotFoundError Traceback (most recent call last)
<ipython-input-5-b23a5078b5bb> in <module>()
1 train_img, train_lbl = loadmnist('data/train-images-idx3-ubyte'
----> 2 , 'data/train-labels-idx1-ubyte')
3 test_img, test_lbl = loadmnist('data/t10k-images-idx3-ubyte'
4 , 'data/t10k-labels-idx1-ubyte')
<ipython-input-4-967098b85f28> in loadmnist(imagefile, labelfile)
2
3 # Open the images with gzip in read binary mode
----> 4 images = open(imagefile, 'rb')
5 labels = open(labelfile, 'rb')
6
FileNotFoundError: [Errno 2] No such file or directory: 'data/train-images-idx3-ubyte'
我下载的数据放在我刚刚创建的文件夹中。 enter image description here
答案 0 :(得分:1)
如果您想直接从某个库中加载数据集而不是先下载然后加载,请从Keras加载。
可以这样做
from keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
如果您是机器学习和Python的初学者,想进一步了解它,建议您阅读this优秀的博客文章。
此外,将文件传递给函数时,也需要文件扩展名。也就是说,您必须像这样调用该函数。
train_img, train_lbl = loadmnist('mnist//train-images-idx3-ubyte.gz'
, 'mnist//train-labels-idx1-ubyte.gz')
test_img, test_lbl = loadmnist('mnist//t10k-images-idx3-ubyte.gz'
, 'mnist//t10k-labels-idx1-ubyte.gz')
在用于从本地磁盘加载数据的代码中,由于文件不在给定位置,因此会引发错误。确保笔记本计算机所在的文件夹中存在该文件夹mnist。
答案 1 :(得分:0)
服务器已经关闭了一段时间,请参考GitHub线程中的一些解决方案,包括从Tensorflow或直接从其他来源导入。
答案 2 :(得分:0)
您可以直接从sklearn数据集中加载它。
from sklearn import datasets
digits = datasets.load_digits()
或者您可以使用Keras加载它。
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
另一种选择是仅download the dataset并将其装入类似pandas的东西。
df = pd.read_csv('filename.csv')
答案 3 :(得分:0)
我在本地安装在Anaconda上的Spyder(Python 3.7)上编码时遇到此错误。 我已经尝试了很多答案,最后我只能通过在下载Mnist数据集后指定目标文件位置来遇到此错误。
from scipy.io import loadmat
mnist_path = (r"C:\Users\duppa\Desktop\mnist-original.mat")
mnist_raw = loadmat(mnist_path)
mnist = {
"data": mnist_raw["data"].T,
"target": mnist_raw["label"][0],
"COL_NAMES": ["label", "data"],
"DESCR": "mldata.org dataset: mnist-original",
}
mnist