我有以下问题。
我使用了BaseHttpServer。
class ReqHandler( BaseHTTPServer.BaseHTTPRequestHandler):
def __init__(self, request, client_address, server):
BaseHTTPServer.BaseHTTPRequestHandler.__init__( self, request, client_address, server )
def do_GET(self ):
self.performReq(self.path.decode('utf-8'))
def performReq (self, req ):
curDir = os.getcwd()
fname = curDir + '/' + self.path[1:]
try:
self.send_response(200,"Ok!")
ext = os.path.splitext(self.path)[1]
self.send_header('Content', 'text/xml; charset=UTF-8' )
self.end_headers()
f = open(fname, 'rb')
for l in f:
self.wfile.write(l)
f.close()
print 'file '+fname+" Ok"
except IOError:
print 'no file '+fname
self.send_error(404)
if __name__=='__main__':
server = BaseHTTPServer.HTTPServer( ('',8081), ReqHandler )
print('server ok!')
server.serve_forever()
如果文件的路径包含西里尔文。
http://localhost:8081/ТРА/Понедельник/Пн.doc)
我得到代码404。
谢谢。
答案 0 :(得分:0)
网址不仅编码为UTF-8;它们也是URL编码的。使用urllib.urlunquote()
function:
from urllib import urlunquote
self.performReq(unlunquote(self.path).decode('utf-8'))
演示:
>>> from urllib import unquote
>>> path = '/%D0%A2%D0%A0%D0%90/%D0%9F%D0%BE%D0%BD%D0%B5%D0%B4%D0%B5%D0%BB%D1%8C%D0%BD%D0%B8%D0%BA/%D0%9F%D0%BD.doc'
>>> unquote(path).decode('utf8')
u'/\u0422\u0420\u0410/\u041f\u043e\u043d\u0435\u0434\u0435\u043b\u044c\u043d\u0438\u043a/\u041f\u043d.doc'
>>> print unquote(path).decode('utf8')
/ТРА/Понедельник/Пн.doc