基于HTTP套接字的简单代理脚本出了什么问题?

时间:2009-09-07 12:48:41

标签: python sockets http-proxy

我为代理功能编写了一个简单的Python脚本。但是,如果请求的网页有许多其他HTTP请求,例如,它可以正常工作。谷歌地图,页面渲染速度很慢。

关于我的代码中可能存在瓶颈的任何提示,以及我如何改进?

#!/usr/bin/python
import socket,select,re
from threading import Thread

class ProxyServer():
    def __init__(self, host, port):
        self.host=host
        self.port=port 
        self.sk1 = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

    def startServer(self):
        self.sk1.bind((self.host,self.port))
        self.sk1.listen(256)
        print "proxy is ready for connections..."
        while(1):
            conn,clientAddr = self.sk1.accept()
           # print "new request coming in from " + str(clientAddr)
            handler = RequestHandler(conn)
            handler.start()


class RequestHandler(Thread):

    def __init__(self, sk1):
        Thread.__init__(self)
        self.clientSK = sk1
        self.buffer = ''
        self.header = {}





    def run(self):
        sk1 = self.clientSK 
        sk2 = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        while 1:
            self.buffer += sk1.recv(8192)
            if self.buffer.find('\n') != -1:
                break;

        self.header = self.processHeader(self.buffer)
        if len(self.header)>0: #header got processed
            hostString = self.header['Host']
            host=port=''
            if hostString.__contains__(':'): # with port number
                host,port = hostString.split(':')
            else:
                host,port = hostString,"80"
            sk2.connect((host,int(port)))

        else:
            sk1.send('bad request')
            sk1.close();
            return
        inputs=[sk1,sk2]
        sk2.send(self.buffer)
        #counter
        count = 0
        while 1:
            count+=1
            rl, wl, xl = select.select(inputs, [], [], 3)
            if xl:
                break
            if rl:
                for x in rl:
                    data = x.recv(8192)
                    if x is sk1:
                        output = sk2
                    else:
                        output = sk1
                    if data:
                        output.send(data)
                        count = 0
            if count == 20:
                 break


        sk1.close()
        sk2.close()



    def processHeader(self,header):
        header = header.replace("\r\n","\n")
        lines = header.split('\n')    
        result = {}
        uLine = lines[0] # url line
        if len(uLine) == 0: return result # if url line empty return empty dict
        vl = uLine.split(' ')
        result['method'] = vl[0]
        result['url'] = vl[1]
        result['protocol'] = vl[2]
        for line in lines[1: - 1]:
            if len(line)>3: # if line is not empty
                exp = re.compile(': ')
                nvp = exp.split(line, 1)
                if(len(nvp)>1):
                    result[nvp[0]] = nvp[1]
        return result




if __name__ == "__main__":
    HOST, PORT = "0.0.0.0", 8088
    proxy = ProxyServer(HOST,PORT)
    proxy.startServer()

1 个答案:

答案 0 :(得分:0)

我不确定你的速度问题是什么,但我发现这里有一些其他的东西:

result['protocal'] = vl[2]

应该是

result['protocol'] = vl[2]

此代码缩进了一个级别:

sk2.connect((host,int(port)))

您可以使用this装饰器按行分析各个方法。