python中的代理处理程序

时间:2014-03-20 06:51:09

标签: python python-2.7 urllib2 urllib

当我在Web浏览器设置中禁用代理服务器并注释代码处理程序编码时,代码工作正常。

import urllib2
import urllib2_file
import urllib
import random
import mimetypes
import string
from os import listdir
import time
from google.refine import refine
from google.refine import facet
proxy = urllib2.ProxyHandler({'http': '10.200.1.26'})
opener = urllib2.build_opener(proxy)
urllib2.install_opener(opener)

def encode_multipart(fields, files, boundary=None):

    def escape_quote(s):
        return s.replace('"', '\\"')

    if boundary is None:
        boundary = ''.join(random.choice(_BOUNDARY_CHARS) for i in range(30))
    lines = []

    for name, value in fields.items():
        lines.extend((
            '--{0}'.format(boundary),
            'Content-Disposition: form-data; name="upload"', #.format(escape_quote(name)),
            '',
            str(value),
        ))

    for name, value in files.items():
        filename = value['filename']
        if 'mimetype' in value:
            mimetype = value['mimetype']
        else:
            mimetype = mimetypes.guess_type(filename)[0] or 'application/octet-stream'
        lines.extend((
            '--{0}'.format(boundary),
            'Content-Disposition: form-data; name="upload"; filename="{0}"'.format(escape_quote(filename)),
            'Content-Type: {0}'.format(mimetype),
            '',
            value['content'],
        ))

    lines.extend((
        '--{0}--'.format(boundary),
        '',
    ))
    body = '\r\n'.join(lines)

    headers = {
        'Content-Type': 'multipart/form-data; boundary={0}'.format(boundary),
        'Content-Length': str(len(body)),
    }

    return (body, headers)


_BOUNDARY_CHARS = string.digits + string.ascii_letters

u = urllib2.urlopen("http://127.0.0.1:3333/command/core/create-importing-job",data=urllib.urlencode({"test":""}))
a=u.read()
id=""
for i in a:
    if(i.isdigit()):
        id+=str(i)
# sample output '{ "jobID" : 1393566803991 }'

files = {}
pathtoXML = r"C:\75"
#pathtoXML = r"C:\AM\trial"
for i in listdir(pathtoXML):
    files[i] = {'filename': i, 'content': open(pathtoXML + "\\"+ i).read()}

#load raw data using the job id found in
url = "http://127.0.0.1:3333/command/core/importing-controller?controller=core%2Fdefault-importing-controller&jobI="+id+"&subCommand=load-raw-data"

data,headers = encode_multipart({}, files)
#print len(data)
#print headers
req = urllib2.Request(url, data=data, headers=headers)
f = urllib2.urlopen(req)
f.read()
# get job status
u=urllib2.urlopen("http://127.0.0.1:3333/command/core/get-importing-job-status?jobID="+id+"", "test")
u.read() 
#from fileSelection update file selection
u=urllib2.urlopen("http://127.0.0.1:3333/command/core/importing-controller?controller=core%2Fdefault-importing-controller&subCommand=update-file-selection&jobID="+id+"", "fileSelection=%5B0%2C1%2C2%2C3%5D")
u.read()



#init parser format text
u=urllib2.urlopen("http://127.0.0.1:3333/command/core/importing-controller?controller=core%2Fdefault-importing-controller&jobID="+id+"&subCommand=initialize-parser-ui&format=text%2Fxml")
u.read()
#update format and options
updateformatoptionurl = "http://127.0.0.1:3333/command/core/importing-controller?controller=core%2Fdefault-importing-controller&jobID="+id+"&subCommand=update-format-and-options"
d=urllib.urlencode({"format":"text/xml","options":{"recordPath":["ArrayOfAfiles","Afiles"],"limit":-1,"includeFileSources":"false","guessCellValueTypes":"false"}})
u=urllib2.urlopen(updateformatoptionurl,d)
u.read()
'{"status":"ok"}'
#get-models
u=urllib2.urlopen("http://127.0.0.1:3333/command/core/get-models?importingJobID="+id)
u.read()

# create project from import job
createfromimporturl = "http://127.0.0.1:3333/command/core/importing-controller?controller=core%2Fdefault-importing-controller&jobID="+id+"&subCommand=create-project"
d=urllib.urlencode({"format":"text/xml","options":{"recordPath":["ArrayOfAfiles","Afiles"],"limit":-1,"includeFileSources":"false","projectName":time.ctime()}})
u=urllib2.urlopen(createfromimporturl, d)
r=u.read()

嵌入代理处理程序编码后,当我运行代码抱怨时它不起作用:

Traceback (most recent call last):
  File "C:\hari\trial.py", line 87, in <module>
    u = urllib2.urlopen("http://127.0.0.1:3333/command/core/create-importing-job",data=urllib.urlencode({"test":""}))
  File "C:\Python27\lib\urllib2.py", line 126, in urlopen
    return _opener.open(url, data, timeout)
  File "C:\Python27\lib\urllib2.py", line 391, in open
    response = self._open(req, data)
  File "C:\Python27\lib\urllib2.py", line 409, in _open
    '_open', req)
  File "C:\Python27\lib\urllib2.py", line 369, in _call_chain
    result = func(*args)
  File "C:\Python27\urllib2_file.py", line 207, in http_open
    return self.do_open(httplib.HTTP, req)
  File "C:\Python27\urllib2_file.py", line 298, in do_open
    return self.parent.error('http', req, fp, code, msg, hdrs)
  File "C:\Python27\lib\urllib2.py", line 435, in error
    return self._call_chain(*args)
  File "C:\Python27\lib\urllib2.py", line 369, in _call_chain
    result = func(*args)
  File "C:\Python27\lib\urllib2.py", line 518, in http_error_default
    raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
HTTPError: HTTP Error 404: Not Found

0 个答案:

没有答案