在一个小的project上工作,我偶然发现了一个在Python中找不到解析数学表达式的有效方法的问题。
简要介绍上下文:该程序依赖于在RPN中迭代非常大(~1,000万)个代数表达式,评估每个代数表达式是否与期望的结果相匹配。不出所料,该方法中最慢的部分是评估每个表达式,因此我热衷于简化它。
以下问题是我迄今为止的调查结果摘要,并邀请他们建议对现有方法进行改进。
我测试了以下方法:
我还测试了evil eval以获取Python内置解析器的参考点,以及“在线”执行计算。
通过尝试解析以下每个表达式100万次并记录每个函数所花费的时间来完成测试:
2 2 +
2 2 2 + +
2 2 2 2 + + +
调查结果如下: Table, times in seconds 由于存在一些细微差别,这些方法大致相似,因为它们都是堆栈的列表实现,所以延迟来自变异列表,我想。
相比之下,邪恶的评估明显更糟。内联评估显然是闪电般的。
据我所知,除了计算之外,解析还会执行大量的识别操作,但我希望有一种方法(显着)比我在这里看到的方法更有效。也许有一些我不知道的Python魔法...... 我欢迎来自Python大师的任何和所有指针,否则这篇文章可以作为后代RPN解析器的摘要......
import itertools as it
import random
import time
import operator
operators = ["+", "-", "/", "*"]
count = 0
def RPN_Classic_Stack(expression): #Mine
explist = expression.split(" ")
explist.pop(-1)
stack = []
for char in explist:
if not char in operators:
stack.append(int(char))
else:
if char == "+":
num1 = stack.pop()
num2 = stack.pop()
result = num1 + num2
stack.append(result)
if char == "-":
num1 = stack.pop()
num2 = stack.pop()
result = -num1 + num2
stack.append(result)
if char == "*":
num1 = stack.pop()
num2 = stack.pop()
result = num1 * num2
stack.append(result)
if char == "/":
divisor = stack.pop()
divident = stack.pop()
try:
result = divident / divisor
except:
return [-1]
stack.append(result)
return stack.pop()
def safe_divide(darg1, darg2):
try:
return darg1/darg2
except ZeroDivisionError:
return -1
def RPN_Generalised_Operators(expression): #https://stackoverflow.com/a/37770871/5482177
function_twoargs = {
'*': operator.mul,
'/': safe_divide,
'+': operator.add,
'-': operator.sub,
}
expression = expression.split(" ")
stack = []
for val in expression:
result = None
if val in function_twoargs:
arg2 = stack.pop()
arg1 = stack.pop()
result = function_twoargs[val](arg1, arg2)
else:
result = float(val)
stack.append(result)
return stack.pop()
def RPN_List_Slicing(expression): #https://stackoverflow.com/a/3866502/5482177
operators = {
'+': operator.add, '-': operator.sub,
'*': operator.mul, '/': operator.truediv, '%': operator.mod,
'**': operator.pow, '//': operator.floordiv,
}
stack = []
for val in expression.split():
if val in operators:
f = operators[val]
stack[-2:] = [f(*stack[-2:])]
else:
stack.append(int(val))
return stack.pop()
def not_RPN_Evil_Eval(expression):
return eval(expression)
expressions = ["2 2 +", "2 2 2 + +", "2 2 2 2 + + +"]
results = {}
parsers = [RPN_Classic_Stack, RPN_Generalised_Operators, RPN_List_Slicing]
for expression in expressions:
for parser in parsers:
start = time.time()
for i in range(0,1000000):
parser(expression)
end = time.time()
results[parser.__name__] = results.get(parser.__name__, {})
results[parser.__name__][expression] = end-start
print(results)
non_RPN_expressions = ["2 + 2", "2 + 2 + 2", "2 + 2 + 2 + 2"]
for expression in non_RPN_expressions:
start = time.time()
for i in range(0,1000000):
not_RPN_Evil_Eval(expression)
end = time.time()
print(expression, end-start)
############ Inline calculations ###################
start = time.time()
for i in range(0,1000000):
2 + 2
end = time.time()
print(expression, end-start)
start = time.time()
for i in range(0,1000000):
2 + 2 + 2
end = time.time()
print(expression, end-start)
start = time.time()
for i in range(0,1000000):
2 + 2 + 2 + 2
end = time.time()
print(expression, end-start)
##############################