在Python请求库中猴子修补_ssl_wrap_socket没有执行

时间:2017-10-23 05:06:39

标签: python python-3.x python-requests vhosts urllib3

我们正在尝试向Web服务器虚拟主机扫描工具添加HTTPS支持。该工具使用python3请求库,它使用urllib3。

我们需要一种方法来提供我们自己的SNI主机名,因此我们试图修补urllib3的_ssl_wrap_socket功能来控制server_hostname但是没有取得多大成功。

以下是完整代码:

from urllib3.util import ssl_
_target_host = None
_orig_wrap_socket = ssl_.ssl_wrap_socket

def _ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
                     ca_certs=None, server_hostname=None,
                     ssl_version=None, ciphers=None, ssl_context=None,
                     ca_cert_dir=None):
    _orig_wrap_socket(sock, keyfile=keyfile, certfile=certfile,
                      cert_reqs=cert_reqs, ca_certs=ca_certs,
                      server_hostname=_target_host, ssl_version=ssl_version,
                      ciphers=ciphers, ssl_context=ssl_context,
                      ca_cert_dir=ca_cert_dir)

ssl_.ssl_wrap_socket = _ssl_wrap_socket

然后我们在代码中进一步调用requests.get()。完整的上下文可以在Github (here)找到。

不幸的是,这不起作用,因为我们的代码似乎永远不会到达,我们不确定原因。是否存在我们遗漏的明显事件或更好的解决方法?

进一步说明

以下是完整的课程:

import os
import random

import requests
import hashlib
import pandas as pd
import time
from lib.core.discovered_host import *
import urllib3

DEFAULT_USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) '\
                     'AppleWebKit/537.36 (KHTML, like Gecko) '\
                     'Chrome/61.0.3163.100 Safari/537.36'

urllib3.disable_warnings()

from urllib3.util import ssl_



class virtual_host_scanner(object):
    """Virtual host scanning class

    Virtual host scanner has the following properties:

    Attributes:
        wordlist: location to a wordlist file to use with scans
        target: the target for scanning
        port: the port to scan. Defaults to 80
        ignore_http_codes: commad seperated list of http codes to ignore
        ignore_content_length: integer value of content length to ignore
        output: folder to write output file to
    """
    def __init__(self, target, wordlist, **kwargs):
        self.target = target
        self.wordlist = wordlist
        self.base_host = kwargs.get('base_host')
        self.rate_limit = int(kwargs.get('rate_limit', 0))
        self.port = int(kwargs.get('port', 80))
        self.real_port = int(kwargs.get('real_port', 80))
        self.ssl = kwargs.get('ssl', False)
        self.fuzzy_logic = kwargs.get('fuzzy_logic', False)
        self.unique_depth = int(kwargs.get('unique_depth', 1))
        self.ignore_http_codes = kwargs.get('ignore_http_codes', '404')
        self.first_hit = kwargs.get('first_hit')

        self.ignore_content_length = int(
            kwargs.get('ignore_content_length', 0)
        )

        self.add_waf_bypass_headers = kwargs.get(
            'add_waf_bypass_headers',
            False
        )

        # this can be made redundant in future with better exceptions
        self.completed_scan = False

        # this is maintained until likely-matches is refactored to use
        # new class
        self.results = []

        # store associated data for discovered hosts
        # in array for oN, oJ, etc'
        self.hosts = []

        # available user-agents
        self.user_agents = list(kwargs.get('user_agents')) \
            or [DEFAULT_USER_AGENT]

    @property
    def ignore_http_codes(self):
        return self._ignore_http_codes

    @ignore_http_codes.setter
    def ignore_http_codes(self, codes):
        self._ignore_http_codes = [
            int(code) for code in codes.replace(' ', '').split(',')
        ]

    _target_host = None
    _orig_wrap_socket = ssl_.ssl_wrap_socket

    def _ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
                         ca_certs=None, server_hostname=None,
                         ssl_version=None, ciphers=None, ssl_context=None,
                         ca_cert_dir=None):
        print('SHOULD BE PRINTED')
        _orig_wrap_socket(sock, keyfile=keyfile, certfile=certfile,
                          cert_reqs=cert_reqs, ca_certs=ca_certs,
                          server_hostname=_target_host, ssl_version=ssl_version,
                          ciphers=ciphers, ssl_context=ssl_context,
                          ca_cert_dir=ca_cert_dir)

    def scan(self):
        print('fdsa')
        ssl_.ssl_wrap_socket = self._ssl_wrap_socket

        if not self.base_host:
            self.base_host = self.target

        if not self.real_port:
            self.real_port = self.port

        for virtual_host in self.wordlist:
            hostname = virtual_host.replace('%s', self.base_host)

            if self.real_port == 80:
                host_header = hostname
            else:
                host_header = '{}:{}'.format(hostname, self.real_port)

            headers = {
                'User-Agent': random.choice(self.user_agents),
                'Host': host_header,
                'Accept': '*/*'
            }

            if self.add_waf_bypass_headers:
                headers.update({
                    'X-Originating-IP': '127.0.0.1',
                    'X-Forwarded-For': '127.0.0.1',
                    'X-Remote-IP': '127.0.0.1',
                    'X-Remote-Addr': '127.0.0.1'
                })

            dest_url = '{}://{}:{}/'.format(
                'https' if self.ssl else 'http',
                self.target,
                self.port
            )

            _target_host = hostname

            try:
                res = requests.get(dest_url, headers=headers, verify=False)
            except requests.exceptions.RequestException:
                continue

            if res.status_code in self.ignore_http_codes:
                continue

            response_length = int(res.headers.get('content-length', 0))
            if self.ignore_content_length and \
               self.ignore_content_length == response_length:
                continue

            # hash the page results to aid in identifing unique content
            page_hash = hashlib.sha256(res.text.encode('utf-8')).hexdigest()

            self.hosts.append(self.create_host(res, hostname, page_hash))

            # add url and hash into array for likely matches
            self.results.append(hostname + ',' + page_hash)

            if len(self.hosts) >= 1 and self.first_hit:
                break

            # rate limit the connection, if the int is 0 it is ignored
            time.sleep(self.rate_limit)

        self.completed_scan = True

    def likely_matches(self):
        if self.completed_scan is False:
            print("[!] Likely matches cannot be printed "
                  "as a scan has not yet been run.")
            return

        # segment results from previous scan into usable results
        segmented_data = {}
        for item in self.results:
            result = item.split(",")
            segmented_data[result[0]] = result[1]

        dataframe = pd.DataFrame([
            [key, value] for key, value in segmented_data.items()],
            columns=["key_col", "val_col"]
        )

        segmented_data = dataframe.groupby("val_col").filter(
            lambda x: len(x) <= self.unique_depth
        )

        return segmented_data["key_col"].values.tolist()

    def create_host(self, response, hostname, page_hash):
        """
        Creates a host using the responce and the hash.
        Prints current result in real time.
        """
        output = '[#] Found: {} (code: {}, length: {}, hash: {})\n'.format(
            hostname,
            response.status_code,
            response.headers.get('content-length'),
            page_hash
        )

        host = discovered_host()
        host.hostname = hostname
        host.response_code = response.status_code
        host.hash = page_hash
        host.contnet = response.content

        for key, val in response.headers.items():
            output += '  {}: {}\n'.format(key, val)
            host.keys.append('{}: {}'.format(key, val))

        print(output)

        return host

在这种情况下,以下行永远不会被命中:

print('SHOULD BE PRINTED')

这也会导致Web服务器上的以下日志条目:

  

[10月25日星期三16:37:23.654321 2017] [ssl:错误] [pid 1355] AH02032:   主机名通过SNI提供,主机名test.test通过提供   HTTP是不同的

这表明代码也从未运行过。

2 个答案:

答案 0 :(得分:3)

编辑-1:无需重新加载

感谢@MartijnPieters帮助我提高答案。如果我们直接修补urllib3.connection,则无需重新加载。但是请求包在最新版本中有一些变化,这使得原始答案对某些版本的请求不起作用。

这是代码的更新版本,它处理所有这些事情

import requests

try:
    assert requests.__version__ != "2.18.0"
    import requests.packages.urllib3.util.ssl_ as ssl_
    import requests.packages.urllib3.connection as connection
except (ImportError,AssertionError,AttributeError):
    import urllib3.util.ssl_ as ssl_
    import urllib3.connection as connection

print("Using " + requests.__version__)

def _ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
                     ca_certs=None, server_hostname=None,
                     ssl_version=None, ciphers=None, ssl_context=None,
                     ca_cert_dir=None):
    print('SHOULD BE PRINTED')
    return ssl_.ssl_wrap_socket(sock, keyfile=keyfile, certfile=certfile,
                      cert_reqs=cert_reqs, ca_certs=ca_certs,
                      server_hostname=server_hostname, ssl_version=ssl_version,
                      ciphers=ciphers, ssl_context=ssl_context,
                      ca_cert_dir=ca_cert_dir)

connection.ssl_wrap_socket = _ssl_wrap_socket

res = requests.get("https://www.google.com", verify=True)

该代码也可在

上找到

Android Training

原始答案

代码中有两个问题。

requests实际上并不直接导入urllib3。它使用requests.packages

通过自己的上下文来完成

所以要覆盖的套接字是

requests.packages.urllib3.util.ssl_.ssl_wrap_socket

接下来,如果您从connection.py

查看urllib3/connection.py
from .util.ssl_ import (
    resolve_cert_reqs,
    resolve_ssl_version,
    ssl_wrap_socket,
    assert_fingerprint,
)

这是本地导入,在我们使用import requests时加载代码时,首次尝试时无法覆盖它。您可以通过放置断点并将堆栈跟踪检查回父文件来轻松确认。

因此,为了使猴子补丁工作,我们需要在修补完成后重新加载模块,因此它需要我们的补丁功能

以下是显示拦截以这种方式工作的最小代码

try:
    reload  # Python 2.7
except NameError:
    try:
        from importlib import reload  # Python 3.4+
    except ImportError:
        from imp import reload  # Python 3.0 - 3.3

def _ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
                     ca_certs=None, server_hostname=None,
                     ssl_version=None, ciphers=None, ssl_context=None,
                     ca_cert_dir=None):
    print('SHOULD BE PRINTED')
    _orig_wrap_socket(sock, keyfile=keyfile, certfile=certfile,
                      cert_reqs=cert_reqs, ca_certs=ca_certs,
                      server_hostname=_target_host, ssl_version=ssl_version,
                      ciphers=ciphers, ssl_context=ssl_context,
                      ca_cert_dir=ca_cert_dir)

import requests
_orig_wrap_socket = requests.packages.urllib3.util.ssl_.ssl_wrap_socket

requests.packages.urllib3.util.ssl_.ssl_wrap_socket = _ssl_wrap_socket
reload(requests.packages.urllib3.connection)

res = requests.get("https://www.google.com", verify=True)

https://github.com/tarunlalwani/monkey-patch-ssl_wrap_socket

答案 1 :(得分:-3)

首先应该完成Monkey补丁,你应该在文件头部移动以下相关代码

from urllib3.util import ssl_

#...

_orig_wrap_socket = ssl_.ssl_wrap_socket
ssl_.ssl_wrap_socket = _ssl_wrap_socket

#...

然后移动import requests