在python中加载数据集期间,我遇到了一个简单问题。我想定义一个名为loading_dataset()
的函数来在训练自动编码器中使用它
我的代码是
import matplotlib
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from urllib import urlretrieve
import cPickle as pickle
import os
import gzip
rom urllib import urlretrieve
import cPickle as pickle
import os
import gzip
import matplotlib.cm as cm
import theano
import lasagne
from lasagne import layers
from lasagne.updates import nesterov_momentum
from nolearn.lasagne import NeuralNet
from nolearn.lasagne import visualize
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
#############################I tried to load data from open source
def load_dataset():
url = 'ftp://ftp nrg.wustl.edu/data/oasis_cross-sectional_disc2.tar.gz'
filename ='oasis_cross-sectional_disc2.tar.gz'
if not os.path.exists(filename):
print("Downloading MNIST dataset...")
urlretrieve(url, filename)
with gzip.open(filename, 'rb') as f:
data = pickle.load(f)
X_train, y_train = data[0]
X_val, y_val = data[1]
X_test, y_test = data[2]
X_train = X_train.reshape((-1, 1, 28, 28))
X_val = X_val.reshape((-1, 1, 28, 28))
X_test = X_test.reshape((-1, 1, 28, 28))
y_train = y_train.astype(np.uint8)
y_val = y_val.astype(np.uint8)
y_test = y_test.astype(np.uint8)
return X_train, y_train, X_val, y_val, X_test, y_test
X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()
下载MNIST数据集......
Traceback (most recent call last):
File "<pyshell#46>", line 1, in <module>
X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()
File "<pyshell#45>", line 6, in load_dataset
urlretrieve(url, filename)
File "/usr/local/Cellar/python/2.7.11/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib.py", line 98, in urlretrieve
return opener.retrieve(url, filename, reporthook, data)
File "/usr/local/Cellar/python/2.7.11/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib.py", line 245, in retrieve
fp = self.open(url, data)
File "/usr/local/Cellar/python/2.7.11/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib.py", line 213, in open
return getattr(self, name)(url)
File "/usr/local/Cellar/python/2.7.11/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib.py", line 526, in open_ftp
host = socket.gethostbyname(host)
IOError: [Errno socket error] [Errno 8] nodename nor servname provided, or not known
出现此错误
我还尝试使用此代码从桌面加载数据 对于路径,目录,os.walk(pat)中的文件: 对于文件中的文件名: fullpath = os.path.join(path,filename) 打开(fullpath,&#39; r&#39;)作为f: S = np.load(F) data = f.read() 打印数据
但我无法将数据加载为X_train,y_train,X_val,y_val,X_test,y_test的值 我不知道是否应该在.pkl.gz中压缩数据集或使用不同的函数来加载数据 你能救我吗?
答案 0 :(得分:0)
如果可以使用keras构建网络,则可以使用以下方法加载mnist数据集
import keras
from keras.datasets import mnist
from keras.layers import Dense, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.models import Sequential
(x_train,y_train),(x_test,y_test)= mnist.load_data()
如果您在下载数据集时遇到任何错误, 从https://s3.amazonaws.com/img-datasets/mnist.npz下载数据集 并将其放在名为〜/ .keras / dataset的文件夹中