研究

Question

来自Python标准json.loads模块的json是否容易受到任意代码执行或任何其他安全问题的影响？

我的应用程序可以从不值得信任的来源接收JSON消息。

Answer 1

请注意，以下答案与Windows 10 64位的默认Python3.4安装有关。另请注意，此答案仅关注py扫描仪，而不是c扫描仪。

对于源文件，请参阅https://hg.python.org/cpython/file/tip/Lib/json或在本地python安装中找到它们。

研究

_{与本研究一起查看本文底部的参考实现}

json.loads(s)调用的解析函数在\Lib\json\scanner.py：

中定义

parse_object = context.parse_object
parse_array = context.parse_array
parse_string = context.parse_string
parse_float = context.parse_float
parse_int = context.parse_int
parse_constant = context.parse_constant

context是JSONDecoder类的一个实例，它在\Lib\json\decoder.py中定义并使用以下解析器：

self.parse_float = parse_float or float
self.parse_int = parse_int or int
self.parse_constant = parse_constant or _CONSTANTS.__getitem__
self.parse_string = scanstring
self.parse_object = JSONObject
self.parse_array = JSONArray

从这里我们可以查看每个单独的解析器，以确定它是否容易受到任意代码的执行：

<小时/> 的 parse_float

这使用默认的float功能，因此是安全的。
<小时/> 的 parse_int

这使用默认的int功能，因此是安全的。
<小时/> 的 parse_constant

_CONSTANTS在同一文件中定义：

_CONSTANTS = { '-Infinity': NegInf, 'Infinity': PosInf, 'NaN': NaN, }

因此正在执行简单的查找，因此它是安全的。
<小时/> parse_string，JSONObject，JSONArray

通过查看本文末尾的实现可以看出，唯一可以执行的外部代码是：

来自JSONObject：

object_pairs_hook

object_hook

来自JSONArray：

scan_once

<小时/> object_pairs_hook，object_hook

默认 object_pairs_hook和object_hook从解码器初始化程序定义为None：

def __init__(self, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, strict=True, object_pairs_hook=None)

<小时/> 的 scan_once

scan_once定义为：

self.scan_once = scanner.make_scanner(self)

可以在\Lib\json\scanner.py中找到源代码，我们可以从中看到scan_once只为JSON对象的每个部分调用适当的解析器。

结论

从上面和参考实现中可以看出只要JSON解码器使用的扫描器是默认的，就不会执行任意代码，可能通过使用自定义解码器通过使用其__init__参数来改为执行任意代码，但除此之外，我不这么认为。

履行

<强> BACKSLASH

BACKSLASH = { '"': '"', '\\': '\\', '/': '/', 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t', }

<强> STRINGCHUNK

STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)

<强> scanstring

def py_scanstring(s, end, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): """Scan the string s for a JSON string. End is the index of the character in s after the quote that started the JSON string. Unescapes all valid JSON string escape sequences and raises ValueError on attempt to decode an invalid string. If strict is False then literal control characters are allowed in the string. Returns a tuple of the decoded string and the index of the character in s after the end quote.""" chunks = [] _append = chunks.append begin = end - 1 while 1: chunk = _m(s, end) if chunk is None: raise ValueError( errmsg("Unterminated string starting at", s, begin)) end = chunk.end() content, terminator = chunk.groups() # Content is contains zero or more unescaped string characters if content: _append(content) # Terminator is the end of string, a literal control character, # or a backslash denoting that an escape sequence follows if terminator == '"': break elif terminator != '\\': if strict: #msg = "Invalid control character %r at" % (terminator,) msg = "Invalid control character {0!r} at".format(terminator) raise ValueError(errmsg(msg, s, end)) else: _append(terminator) continue try: esc = s[end] except IndexError: raise ValueError( errmsg("Unterminated string starting at", s, begin)) # If not a unicode escape sequence, must be in the lookup table if esc != 'u': try: char = _b[esc] except KeyError: msg = "Invalid \\escape: {0!r}".format(esc) raise ValueError(errmsg(msg, s, end)) end += 1 else: uni = _decode_uXXXX(s, end) end += 5 if 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u': uni2 = _decode_uXXXX(s, end + 1) if 0xdc00 <= uni2 <= 0xdfff: uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) end += 6 char = chr(uni) _append(char) return ''.join(chunks), end scanstring = c_scanstring or py_scanstring

<强> WHITESPACE

WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)

<强> WHITESPACE_STR

WHITESPACE_STR = ' \t\n\r'

<强>的JSONObject

def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook, memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR): s, end = s_and_end pairs = [] pairs_append = pairs.append # Backwards compatibility if memo is None: memo = {} memo_get = memo.setdefault # Use a slice to prevent IndexError from being raised, the following # check will raise a more specific ValueError if the string is empty nextchar = s[end:end + 1] # Normally we expect nextchar == '"' if nextchar != '"': if nextchar in _ws: end = _w(s, end).end() nextchar = s[end:end + 1] # Trivial empty object if nextchar == '}': if object_pairs_hook is not None: result = object_pairs_hook(pairs) return result, end + 1 pairs = {} if object_hook is not None: pairs = object_hook(pairs) return pairs, end + 1 elif nextchar != '"': raise ValueError(errmsg( "Expecting property name enclosed in double quotes", s, end)) end += 1 while True: key, end = scanstring(s, end, strict) key = memo_get(key, key) # To skip some function call overhead we optimize the fast paths where # the JSON key separator is ": " or just ":". if s[end:end + 1] != ':': end = _w(s, end).end() if s[end:end + 1] != ':': raise ValueError(errmsg("Expecting ':' delimiter", s, end)) end += 1 try: if s[end] in _ws: end += 1 if s[end] in _ws: end = _w(s, end + 1).end() except IndexError: pass try: value, end = scan_once(s, end) except StopIteration as err: raise ValueError(errmsg("Expecting value", s, err.value)) from None pairs_append((key, value)) try: nextchar = s[end] if nextchar in _ws: end = _w(s, end + 1).end() nextchar = s[end] except IndexError: nextchar = '' end += 1 if nextchar == '}': break elif nextchar != ',': raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1)) end = _w(s, end).end() nextchar = s[end:end + 1] end += 1 if nextchar != '"': raise ValueError(errmsg( "Expecting property name enclosed in double quotes", s, end - 1)) if object_pairs_hook is not None: result = object_pairs_hook(pairs) return result, end pairs = dict(pairs) if object_hook is not None: pairs = object_hook(pairs) return pairs, end

<强> JSONArray

def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): s, end = s_and_end values = [] nextchar = s[end:end + 1] if nextchar in _ws: end = _w(s, end + 1).end() nextchar = s[end:end + 1] # Look-ahead for trivial empty array if nextchar == ']': return values, end + 1 _append = values.append while True: try: value, end = scan_once(s, end) except StopIteration as err: raise ValueError(errmsg("Expecting value", s, err.value)) from None _append(value) nextchar = s[end:end + 1] if nextchar in _ws: end = _w(s, end + 1).end() nextchar = s[end:end + 1] end += 1 if nextchar == ']': break elif nextchar != ',': raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1)) try: if s[end] in _ws: end += 1 if s[end] in _ws: end = _w(s, end + 1).end() except IndexError: pass return values, end

<强> scanner.make_scanner

def py_make_scanner(context): parse_object = context.parse_object parse_array = context.parse_array parse_string = context.parse_string match_number = NUMBER_RE.match strict = context.strict parse_float = context.parse_float parse_int = context.parse_int parse_constant = context.parse_constant object_hook = context.object_hook object_pairs_hook = context.object_pairs_hook memo = context.memo def _scan_once(string, idx): try: nextchar = string[idx] except IndexError: raise StopIteration(idx) if nextchar == '"': return parse_string(string, idx + 1, strict) elif nextchar == '{': return parse_object((string, idx + 1), strict, _scan_once, object_hook, object_pairs_hook, memo) elif nextchar == '[': return parse_array((string, idx + 1), _scan_once) elif nextchar == 'n' and string[idx:idx + 4] == 'null': return None, idx + 4 elif nextchar == 't' and string[idx:idx + 4] == 'true': return True, idx + 4 elif nextchar == 'f' and string[idx:idx + 5] == 'false': return False, idx + 5 m = match_number(string, idx) if m is not None: integer, frac, exp = m.groups() if frac or exp: res = parse_float(integer + (frac or '') + (exp or '')) else: res = parse_int(integer) return res, m.end() elif nextchar == 'N' and string[idx:idx + 3] == 'NaN': return parse_constant('NaN'), idx + 3 elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity': return parse_constant('Infinity'), idx + 8 elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': return parse_constant('-Infinity'), idx + 9 else: raise StopIteration(idx) def scan_once(string, idx): try: return _scan_once(string, idx) finally: memo.clear() return _scan_once make_scanner = c_make_scanner or py_make_scanner

json.loads（）容易受到任意代码执行的影响吗？

1 个答案:

研究

结论

履行