使用MultiPart Post,Python 2.7的内存不足

时间:2014-11-05 13:15:11

标签: python python-2.7 urllib2 urllib multipartform-data

我有一个需要上传的300 MB文件,而我当前的代码并没有删除它。

#----------------------------------------------------------------------------------
    def _post_multipart(self, host, selector,
                        fields, files,
                        ssl=False,port=80,
                        proxy_url=None,proxy_port=None):
        """ performs a multi-post to AGOL, Portal, or AGS
            Inputs:
               host - string - root url (no http:// or https://)
                   ex: www.arcgis.com
               selector - string - everything after the host
                   ex: /PWJUSsdoJDp7SgLj/arcgis/rest/services/GridIndexFeatures/FeatureServer/0/1/addAttachment
               fields - dictionary - additional parameters like token and format information
               files - tuple array- tuple with the file name type, filename, full path
               ssl - option to use SSL
               proxy_url - string - url to proxy server
               proxy_port - interger - port value if not on port 80

            Output:
               JSON response as dictionary
            Useage:
               import urlparse
               url = "http://sampleserver3.arcgisonline.com/ArcGIS/rest/services/SanFrancisco/311Incidents/FeatureServer/0/10261291"
               parsed_url = urlparse.urlparse(url)
               params = {"f":"json"}
               print _post_multipart(host=parsed_url.hostname,
                               selector=parsed_url.path,
                               files=files,
                               fields=params
                               )
        """
        content_type, body = self._encode_multipart_formdata(fields, files)

        headers = {
            'content-type': content_type,
            'content-length': str(len(body))
        }

        if proxy_url:
            if ssl:
                h = httplib.HTTPSConnection(proxy_url, proxy_port)

                h.request('POST', 'https://' + host + selector, body, headers)

            else:
                h = httplib.HTTPConnection(proxy_url, proxy_port)
                h.request('POST', 'http://' + host + selector, body, headers)
        else:
            if ssl:
                h = httplib.HTTPSConnection(host,port)
                h.request('POST', selector, body, headers)
            else:
                h = httplib.HTTPConnection(host,port)
                h.request('POST', selector, body, headers)

        resp_data = h.getresponse().read()
        try:
            result = json.loads(resp_data)
        except:
            return None

        if 'error' in result:
            if result['error']['message'] == 'Request not made over ssl':
                return self._post_multipart(host=host, selector=selector, fields=fields,
                                            files=files, ssl=True,port=port,
                                            proxy_url=proxy_url,proxy_port=proxy_port)
        return result

def _encode_multipart_formdata(self, fields, files):
        boundary = mimetools.choose_boundary()
        buf = StringIO()
        for (key, value) in fields.iteritems():
            buf.write('--%s\r\n' % boundary)
            buf.write('Content-Disposition: form-data; name="%s"' % key)
            buf.write('\r\n\r\n' + self._tostr(value) + '\r\n')
        for (key, filepath, filename) in files:
            if os.path.isfile(filepath):
                buf.write('--%s\r\n' % boundary)
                buf.write('Content-Disposition: form-data; name="%s"; filename="%s"\r\n' % (key, filename))
                buf.write('Content-Type: %s\r\n' % (self._get_content_type3(filename)))
                file = open(filepath, "rb")
                try:
                    buf.write('\r\n' + file.read() + '\r\n')
                finally:
                    file.close()
        buf.write('--' + boundary + '--\r\n\r\n')
        buf = buf.getvalue()
        content_type = 'multipart/form-data; boundary=%s' % boundary
        return content_type, buf

我不能使用请求模块 ,并且必须使用urllib2,urllib等标准库来实现python 2.7.x.

有没有办法将300 MB文件加载到站点而不将整个内容推送到内存?

更新

所以我切换到请求,现在我得到:MissingSchema:无效的URL u'www.arcgis.com / sharing / rest / content / users // addItem?':没有提供架构。也许你的意思是http://www.arcgis.com/sharing/rest/content/users/ / addItem ??

这是什么意思?

我提供了带有request.post()的字段:

    #----------------------------------------------------------------------------------
def _post_big_files(self, host, selector,
                    fields, files,
                    ssl=False,port=80,
                    proxy_url=None,proxy_port=None):
    import sys
    sys.path.insert(1,os.path.dirname(__file__))
    from requests_toolbelt import MultipartEncoder
    import requests
    if proxy_url is not None:
        proxyDict = {
              "http"  : "%s:%s" % (proxy_url, proxy_port),
            "https" : "%s:%s" % (proxy_url, proxy_port)
            }
    else:
        proxyDict = {}
    for k,v in fields.iteritems():
        print k,v
        fields[k] = json.dumps(v)
    for key, filepath, filename in files:
        fields[key] = ('filename', open(filepath, 'rb'), self._get_content_type3(filepath))
    m = MultipartEncoder(
    fields=fields)
    print host + selector
    r = requests.post(host + selector , data=m,
                      headers={'Content-Type': m.content_type})
    print r

我在请求和工具栏的帮助文档中都遵循了示例。任何想法为什么会破裂?

谢谢,

0 个答案:

没有答案