Question

我正在尝试在caltech101数据集上运行adaboost算法。我想在python中使用sklearn。为了从mldata.org将数据集导入python，sklearn给出了sklearn.datasets.fetch_mldata（），但是我得到了404错误，但数据集在mldata站点上。

我尝试了以下但是遇到了404错误。

from sklearn.datasets import fetch_mldata
dataDict = fetch_mldata('caltech101-30')

from sklearn.datasets import fetch_mldata
dataDict = fetch_mldata('caltech101 30')

错误：

HTTPError                                 Traceback (most recent call last)
<ipython-input-46-939c88ab9518> in <module>()
      2 
      3 
----> 4 dataDict = fetch_mldata('caltech101 30')

C:\Anaconda\lib\site-packages\sklearn\datasets\mldata.pyc in fetch_mldata(dataname, target_name, data_name, transpose_data, data_home)
    140         urlname = MLDATA_BASE_URL % quote(dataname)
    141         try:
--> 142             mldata_url = urlopen(urlname)
    143         except HTTPError as e:
    144             if e.code == 404:

C:\Anaconda\lib\urllib2.pyc in urlopen(url, data, timeout)
    125     if _opener is None:
    126         _opener = build_opener()
--> 127     return _opener.open(url, data, timeout)
    128 
    129 def install_opener(opener):

C:\Anaconda\lib\urllib2.pyc in open(self, fullurl, data, timeout)
    408         for processor in self.process_response.get(protocol, []):
    409             meth = getattr(processor, meth_name)
--> 410             response = meth(req, response)
    411 
    412         return response

C:\Anaconda\lib\urllib2.pyc in http_response(self, request, response)
    521         if not (200 <= code < 300):
    522             response = self.parent.error(
--> 523                 'http', request, response, code, msg, hdrs)
    524 
    525         return response

C:\Anaconda\lib\urllib2.pyc in error(self, proto, *args)
    440             http_err = 0
    441         args = (dict, proto, meth_name) + args
--> 442         result = self._call_chain(*args)
    443         if result:
    444             return result

C:\Anaconda\lib\urllib2.pyc in _call_chain(self, chain, kind, meth_name, *args)
    380             func = getattr(handler, meth_name)
    381 
--> 382             result = func(*args)
    383             if result is not None:
    384                 return result

C:\Anaconda\lib\urllib2.pyc in http_error_302(self, req, fp, code, msg, headers)
    627         fp.close()
    628 
--> 629         return self.parent.open(new, timeout=req.timeout)
    630 
    631     http_error_301 = http_error_303 = http_error_307 = http_error_302

C:\Anaconda\lib\urllib2.pyc in open(self, fullurl, data, timeout)
    408         for processor in self.process_response.get(protocol, []):
    409             meth = getattr(processor, meth_name)
--> 410             response = meth(req, response)
    411 
    412         return response

C:\Anaconda\lib\urllib2.pyc in http_response(self, request, response)
    521         if not (200 <= code < 300):
    522             response = self.parent.error(
--> 523                 'http', request, response, code, msg, hdrs)
    524 
    525         return response

C:\Anaconda\lib\urllib2.pyc in error(self, proto, *args)
    446         if http_err:
    447             args = (dict, 'default', 'http_error_default') + orig_args
--> 448             return self._call_chain(*args)
    449 
    450 # XXX probably also want an abstract factory that knows when it makes

C:\Anaconda\lib\urllib2.pyc in _call_chain(self, chain, kind, meth_name, *args)
    380             func = getattr(handler, meth_name)
    381 
--> 382             result = func(*args)
    383             if result is not None:
    384                 return result

C:\Anaconda\lib\urllib2.pyc in http_error_default(self, req, fp, code, msg, hdrs)
    529 class HTTPDefaultErrorHandler(BaseHandler):
    530     def http_error_default(self, req, fp, code, msg, hdrs):
--> 531         raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
    532 
    533 class HTTPRedirectHandler(BaseHandler):

HTTPError: HTTP Error 404: Dataset 'caltech101-30' not found on mldata.org.

无法通过sklearn.datasets.fetch_mldata

0 个答案: