假设您有一个程序,该程序会在用户提供的文本中搜索用户提供的RegEx。
使用re module,如何限制正则表达式为防止catastrophic backtracking而采取的步骤数。
理想情况下,该SME不应挂起:
regex = r"^(.*?,){11}P"
test_str = "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21P" # Example stolen from the previous link.
matches = list(re.finditer(regex, test_str, re.MULTILINE)) # This is an iterator so we consume it by casting to a list.