通过套接字服务代理应用Tornado 4.4

时间:2017-04-10 08:45:13

标签: python sockets proxy network-programming tornado

我有一个小问题。 我是Tornado框架中的整个套接字理论的新手。最近我使用Tornado框架编写了代理服务器。我的应用程序站在连接客户端(浏览器)的中间< ===>远程地址。所以连接看起来像这样:

client(my web browser) <=== PROXY(my app) ===> Remote address (stackoverflow.com)

如果我使用标准的“主要”功能,一切正常。但是我想要更低级别,我的意思是我想创建套接字并通过该连接提供我的代理应用程序。

我的代理商网址:

# coding: utf-8
"""URL's for proxy app."""
from settings import settings
from tornado.web import (
    StaticFileHandler,
    url,
)
from handlers import (
    mainHandlers,
    myProxy,
)

urls = [
    url(r"/admin/$", mainHandlers.MainHandler),
    url(r"/admin/delete_filter/", mainHandlers.DataDeleteHandler),
    url(r"/admin/filters/$", mainHandlers.DataGetter),
    url(r"/admin/new_filter/$", mainHandlers.FormHandler),
    url(r"/admin/stats/$", mainHandlers.StatsTableHandler),
    url(r"/admin/stats/query/$", mainHandlers.AjaxStatsGetHandler),
    url(r"/static/", StaticFileHandler, dict(path=settings['static_path'])),

    url(r'.*', myProxy.ProxyHandler),
    ]

我的ProxyHandler:

class ProxyHandler(tornado.web.RequestHandler):

    SUPPORTED_METHODS = ['GET', 'POST']

    def data_received(self, chunk):
        pass

    def compute_etag(self):
        return None  # disable tornado Etag

    def handle_response(self, response):
        if response.error and not isinstance(response.error, tornado.httpclient.HTTPError):
            self.set_status(500)
            self.write('Internal server error:\n' + str(response.error))
        else:
            self.set_status(response.code, response.reason)
            self._headers = tornado.httputil.HTTPHeaders()  # clear tornado default header

            for header, v in response.headers.get_all():
                 if header not in ('Content-Length', 'Transfer-Encoding', 'Content-Encoding', 'Connection'):
                    self.add_header(header, v)  # some header appear multiple times, eg 'Set-Cookie'

            secured_page = False
            for page in secure_pages:
                if page in self.request.uri:
                    secured_page = True
                    self.set_header('Content-Length', len(response.body))
                    self.write(response.body)
                    break
            if response.body and not secured_page:
                c.execute('SELECT filter_name FROM filters WHERE filter_type=1')
                tags = c.fetchall()
                soup = BeautifulSoup(response.body, 'html.parser')
                for row in tags:
                    catched_tags = soup.find_all(str(row[0]))
                    if catched_tags:
                        print 'catched: %s of <%s> tags' % (len(catched_tags), str(row[0]))
                    for tag in catched_tags:
                        tag.extract()
                new_body = str(soup)
                self.set_header('Content-Length', len(new_body))
                self.write(new_body)
        self.finish()

    @tornado.web.asynchronous
    def get(self):
        logger.debug('Handle %s request to %s', self.request.method, self.request.uri)

        body = self.request.body
        if not body:
            body = None

        try:
            if 'Proxy-Connection' in self.request.headers:
                del self.request.headers['Proxy-Connection']

            c.execute('SELECT filter_name FROM filters WHERE filter_type=2')
            urls = c.fetchall()
            for url in urls:
                if url[0] in self.request.path:
                    self.set_status(403)
                    self.finish()
                    return

            fetch_request(self.request.uri, self.handle_response,
                      method=self.request.method, body=body, headers=self.request.headers, follow_redirects=False,
                      allow_nonstandard_methods=True)
        except tornado.httpclient.HTTPError as e:
            if hasattr(e, 'response') and e.response:
                self.handle_response(e.response)
            else:
                self.set_status(500)
                self.write('Internal server error:\n' + str(e))
                self.finish()

    @tornado.web.asynchronous
    def post(self):
        return self.get()

简单的主要功能:

# coding: utf-8
import sys
import tornado.web
from tornado.options import options
from configuration.application import MyApplication
from proxy.urls import proxy_urls

def make_app():
    """Create my application with my settings and urls."""
    return MyApplication(proxy_urls)


if __name__ == "__main__":
    u"""Main loop."""

    app = make_app()
    port = options.port
    if len(sys.argv) > 1:
        port = int(sys.argv[1])
    app.listen(port)
    print 'tornado working on port %s' % port
    tornado.ioloop.IOLoop.current().start()

所以我想根据文档更改简单的低级方式:

import errno
import functools
import tornado.ioloop
import socket

def connection_ready(sock, fd, events):
    while True:
        try:
            connection, address = sock.accept()
        except socket.error as e:
            if e.args[0] not in (errno.EWOULDBLOCK, errno.EAGAIN):
                raise
            return
        connection.setblocking(0)
        handle_connection(connection, address)

if __name__ == '__main__':
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)
    sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
    sock.setblocking(0)
    sock.bind(("", port))
    sock.listen(128)

    io_loop = tornado.ioloop.IOLoop.current()
    callback = functools.partial(connection_ready, sock)
    io_loop.add_handler(sock.fileno(), callback, io_loop.READ)
    io_loop.start()

为了实现这一点,我使用套接字(https://www.tutorialspoint.com/python/python_networking.htm)阅读了一些关于整个网络编程的内容。 本教程中的示例运行良好,因此我尝试将教程与Tornado文档中的示例相关联:

# coding: utf-8
import errno
import functools
import socket
import sys
import tornado.httpserver
import tornado.ioloop
import tornado.netutil
import tornado.process
import tornado.web
from tornado.options import options
from configuration.application import MyApplication

def make_app():
    u"""Create my application with my settings and urls."""
    return MyApplication()

def connection_ready(sock, fd, events):
    u"""Function to handle an incoming connection."""
    proxy_app = make_app()
    server = tornado.httpserver.HTTPServer(proxy_app)
    while True:
        try:
            connection, address = sock.accept()
        except socket.error as e:
            if e.args[0] not in (errno.EWOULDBLOCK, errno.EAGAIN):
                raise
            return
        print 'Got connection from', address
        # connection.setblocking(False)
        connection.send(server)
        connection.close()

if __name__ == "__main__":
    u"""Main loop."""

    port = options.port
    if len(sys.argv) > 1:
        port = int(sys.argv[1])

    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)
    sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
    sock.setblocking(False)
    sock.bind(('', port))
    sock.listen(5)

    io_loop = tornado.ioloop.IOLoop.current()
    callback = functools.partial(connection_ready, sock)
    io_loop.add_handler(sock.fileno(), callback, io_loop.READ)
    print 'Tornado Proxy working on port: %s' % port
    io_loop.start()

但是当我尝试连接到我的代理时(例如添加过滤器:http://127.0.0.1:8000/admin/filters/ - 我有一个处理这个url的处理程序) 我得到了特定的错误:

  

错误:tornado.application:回调中的异常(3,)

     

追踪(最近一次呼叫最后一次):

     

文件“/home/dave/.virtualenvs/teleV1/local/lib/python2.7/site-packages/tornado/ioloop.py”,第887行,开始       handler_func(fd_obj,events)

     

文件“/home/dave/.virtualenvs/teleV1/local/lib/python2.7/site-packages/tornado/stack_context.py”,第275行,null_wrapper       return fn(* args,** kwargs)

     

在connection_ready中的第35行文件“manage.py”       connection.send(服务器)

     

TypeError:send()参数1必须可转换为缓冲区,而不是HTTPServer

据我所知,我不能通过连接发送HTTPServer(从一端到另一端),它必须是一个缓冲区。 我的第一个想法是从处理URL的处理程序发送buffor(例如类ProxyHandler(tornado.web.RequestHandler)), 但是我如何处理不同的网址和不同的处理程序?

1 个答案:

答案 0 :(得分:0)

经过一些方法后,我将我作为Tornado应用程序编写的代理应用程序更改为纯Python代码,该代码处理来自远程地址的响应并执行过滤器操作。我认为这是我能做的最好,最快的事情。