some nice ways来处理python中的同时多字符串替换。但是,我在创建一个能够做到这一点的同时支持反向引用的高效函数时遇到了麻烦。


e.g。 (注意\ 1)

repdict = {'&&':'and', '||':'or', '!([a-zA-Z_])':'not \1'}


def replaceAll(repdict, text):
    repdict = dict((re.escape(k), v) for k, v in repdict.items())
    pattern = re.compile("|".join(repdict.keys()))
    return pattern.sub(lambda m: repdict[re.escape(], text)


>>> replaceAll(repldict, "!newData.exists() || newData.val().length == 1")
'!newData.exists() or newData.val().length == 1'

如果我手动操作,它可以正常工作。 e.g:

pattern = re.compile("!([a-zA-Z_])")
pattern.sub(r'not \1', '!newData.exists()')


'not newData.exists()'



def replaceAll(repPat, text):
    def replacer(obj):
        match =
        # manually deal with exclamation mark match..
        if match[:1] == "!": return 'not ' + match[1:]
        # here we naively escape the matched pattern into
        # the format of our dictionary key
        else: return repPat[naive_escaper(match)]

    pattern = re.compile("|".join(repPat.keys()))
    return pattern.sub(replacer, text)

def naive_escaper(string):
    if '=' in string: return string.replace('=', '\=')
    elif '|' in string: return string.replace('|', '\|')
    else: return string

# manually escaping \ and = works fine
repPat = {'!([a-zA-Z_])':'', '&&':'and', '\|\|':'or', '\=\=\=':'=='}
replaceAll(repPat, "(!this && !that) || !this && foo === bar")


'(not this and not that) or not this'


更新:有关更好的选择,请参阅Angus Hollands' answer


但是,有一些困难。让我们假设repldict = {r'(a)': r'\1a', r'(b)': r'\1b'} 像这样:


如果我们将这些结合到一个正则表达式中,我们会得到(b) - 所以现在b不再是第1组,这意味着它的反向引用无法正常工作。

另一个问题是我们无法判断使用哪个替代品。如果正则表达式与文本\1b匹配,我们怎样才能发现(?P<group1>(a))|(?P<group2>(b)) 是合适的替换?这是不可能的;我们没有足够的信息。



现在,我们可以轻松识别匹配的密钥,并重新计算反向引用,使它们相对于此组。因此def replaceAll(repldict, text): # split the dict into two lists because we need the order to be reliable keys, repls = zip(*repldict.items()) # generate a regex pattern from the keys, putting each key in a named group # so that we can find out which one of them matched. # groups are named "_<idx>" where <idx> is the index of the corresponding # replacement text in the list above pattern = '|'.join('(?P<_{}>{})'.format(i, k) for i, k in enumerate(keys)) def repl(match): # find out which key matched. We know that exactly one of the keys has # matched, so it's the only named group with a value other than None. group_name = next(name for name, value in match.groupdict().items() if value is not None) group_index = int(group_name[1:]) # now that we know which group matched, we can retrieve the # corresponding replacement text repl_text = repls[group_index] # now we'll manually search for backreferences in the # replacement text and substitute them def repl_backreference(m): reference_index = int( # return the corresponding group's value from the original match # +1 because regex starts counting at 1 return + reference_index + 1) return re.sub(r'\\(\d+)', repl_backreference, repl_text) return re.sub(pattern, repl, text) 指的是&#34; group2&#34;之后的第一个组。


repldict = {'&&':'and', r'\|\|':'or', r'!([a-zA-Z_])':r'not \1'}
print( replaceAll(repldict, "!newData.exists() || newData.val().length == 1") )

repldict = {'!([a-zA-Z_])':r'not \1', '&&':'and', r'\|\|':'or', r'\=\=\=':'=='}
print( replaceAll(repldict, "(!this && !that) || !this && foo === bar") )

# output: not newData.exists() or newData.val().length == 1
#         (not this and not that) or not this and foo == bar


{r'(a)': r'\2'}


  • 仅支持数字反向引用;没有命名参考。
  • 默默接受{{1}}之类的无效反向引用。 (这些有时会抛出错误,但并非总是如此。)

import re
from collections import OrderedDict
from functools import partial

pattern_to_replacement = {'&&': 'and', '!([a-zA-Z_]+)': r'not \1'}

def build_replacer(cases):
    ordered_cases = OrderedDict(cases.items())
    replacements = {}

    leading_groups = 0
    for pattern, replacement in ordered_cases.items():
        leading_groups += 1

        # leading_groups is now the absolute position of the root group (back-references should be relative to this)
        group_index = leading_groups
        replacement = absolute_backreference(replacement, group_index)
        replacements[group_index] = replacement

        # This pattern contains N subgroups (determine by compiling pattern)
        subgroups = re.compile(pattern).groups
        leading_groups += subgroups

    catch_all = "|".join("({})".format(p) for p in ordered_cases)
    pattern = re.compile(catch_all)

    def replacer(match):
        replacement_pattern = replacements[match.lastindex]
        return match.expand(replacement_pattern)

    return partial(pattern.sub, replacer)

def absolute_backreference(text, n):
    ref_pat = re.compile(r"\\([0-99])")

    def replacer(match):
        return "\\{}".format(int( + n)

    return ref_pat.sub(replacer, text)

replacer = build_replacer(pattern_to_replacement)

repdict = {
    r'\s*' + re.escape('&&')) + r'\s*': ' and ',
    r'\s*' + re.escape('||') + r'\s*': ' or ',
    re.escape('!') + r'([a-zA-Z_])': r'not \1',
def replaceAll(repdict, text):
    for k, v in repdict.items():
        text = re.sub(k, v, text)
    return text