我正在尝试在caltech101数据集上运行adaboost算法。我想在python中使用sklearn。为了从mldata.org将数据集导入python,sklearn给出了sklearn.datasets.fetch_mldata(),但是我得到了404错误,但数据集在mldata站点上。
我尝试了以下但是遇到了404错误。
from sklearn.datasets import fetch_mldata
dataDict = fetch_mldata('caltech101-30')
from sklearn.datasets import fetch_mldata
dataDict = fetch_mldata('caltech101 30')
错误:
HTTPError Traceback (most recent call last)
<ipython-input-46-939c88ab9518> in <module>()
2
3
----> 4 dataDict = fetch_mldata('caltech101 30')
C:\Anaconda\lib\site-packages\sklearn\datasets\mldata.pyc in fetch_mldata(dataname, target_name, data_name, transpose_data, data_home)
140 urlname = MLDATA_BASE_URL % quote(dataname)
141 try:
--> 142 mldata_url = urlopen(urlname)
143 except HTTPError as e:
144 if e.code == 404:
C:\Anaconda\lib\urllib2.pyc in urlopen(url, data, timeout)
125 if _opener is None:
126 _opener = build_opener()
--> 127 return _opener.open(url, data, timeout)
128
129 def install_opener(opener):
C:\Anaconda\lib\urllib2.pyc in open(self, fullurl, data, timeout)
408 for processor in self.process_response.get(protocol, []):
409 meth = getattr(processor, meth_name)
--> 410 response = meth(req, response)
411
412 return response
C:\Anaconda\lib\urllib2.pyc in http_response(self, request, response)
521 if not (200 <= code < 300):
522 response = self.parent.error(
--> 523 'http', request, response, code, msg, hdrs)
524
525 return response
C:\Anaconda\lib\urllib2.pyc in error(self, proto, *args)
440 http_err = 0
441 args = (dict, proto, meth_name) + args
--> 442 result = self._call_chain(*args)
443 if result:
444 return result
C:\Anaconda\lib\urllib2.pyc in _call_chain(self, chain, kind, meth_name, *args)
380 func = getattr(handler, meth_name)
381
--> 382 result = func(*args)
383 if result is not None:
384 return result
C:\Anaconda\lib\urllib2.pyc in http_error_302(self, req, fp, code, msg, headers)
627 fp.close()
628
--> 629 return self.parent.open(new, timeout=req.timeout)
630
631 http_error_301 = http_error_303 = http_error_307 = http_error_302
C:\Anaconda\lib\urllib2.pyc in open(self, fullurl, data, timeout)
408 for processor in self.process_response.get(protocol, []):
409 meth = getattr(processor, meth_name)
--> 410 response = meth(req, response)
411
412 return response
C:\Anaconda\lib\urllib2.pyc in http_response(self, request, response)
521 if not (200 <= code < 300):
522 response = self.parent.error(
--> 523 'http', request, response, code, msg, hdrs)
524
525 return response
C:\Anaconda\lib\urllib2.pyc in error(self, proto, *args)
446 if http_err:
447 args = (dict, 'default', 'http_error_default') + orig_args
--> 448 return self._call_chain(*args)
449
450 # XXX probably also want an abstract factory that knows when it makes
C:\Anaconda\lib\urllib2.pyc in _call_chain(self, chain, kind, meth_name, *args)
380 func = getattr(handler, meth_name)
381
--> 382 result = func(*args)
383 if result is not None:
384 return result
C:\Anaconda\lib\urllib2.pyc in http_error_default(self, req, fp, code, msg, hdrs)
529 class HTTPDefaultErrorHandler(BaseHandler):
530 def http_error_default(self, req, fp, code, msg, hdrs):
--> 531 raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
532
533 class HTTPRedirectHandler(BaseHandler):
HTTPError: HTTP Error 404: Dataset 'caltech101-30' not found on mldata.org.