考虑下面的mcve:
import re
import textwrap
import traceback
import unittest
def replace_words(content, replacements):
rc = re.compile(r"[A-Za-z_]\w*")
def translate(match):
word = match.group(0)
return replacements.get(word, word)
return rc.sub(translate, content, re.IGNORECASE | re.MULTILINE)
class class_name(unittest.TestCase):
def setUp(self):
self.replacements = [
{
'PLUS': '"+"',
'DASH': '"-"',
'BANG': '"!"',
'TILDE': '"~"',
'STAR': '"*"',
'SLASH': '"/"',
'PERCENT': '"%"',
'LEFT_PAREN': '"("',
'RIGHT_PAREN': '")"'
}, {
"IF": "fi",
"FOO": "oof",
"BAR": "rab",
"OP_FOO": "oof_op"
}
]
self.texts = [
textwrap.dedent("""\
variable_identifier :
IDENTIFIER
primary_expression :
foo1
foo2
foo3
LEFT_PAREN expression RIGHT_PAREN
unary_operator :
PLUS
DASH
BANG
TILDE
multiplicative_expression :
unary_expression
multiplicative_expression STAR unary_expression
multiplicative_expression SLASH unary_expression
multiplicative_expression PERCENT unary_expression\
"""),
textwrap.dedent("""\
IF identifier IDENTIFIER FOO BAR BARycentric
OP_FOO
""")
]
self.expected_results = [
textwrap.dedent("""\
variable_identifier :
IDENTIFIER
primary_expression :
foo1
foo2
foo3
"(" expression ")"
unary_operator :
"+"
"-"
"!"
"~"
multiplicative_expression :
unary_expression
multiplicative_expression "*" unary_expression
multiplicative_expression "/" unary_expression
multiplicative_expression "%" unary_expression\
"""),
textwrap.dedent("""\
fi identifier IDENTIFIER oof rab BARycentric
oof_op
""")
]
def _tester(self, f):
replacements = self.replacements
expected_results = self.expected_results
texts = self.texts
self.assertEqual(f(texts[0], replacements[0]), expected_results[0])
self.assertEqual(f(texts[1], replacements[1]), expected_results[1])
def test_replace_words(self):
self._tester(replace_words)
if __name__ == "__main__":
unittest.main()
replace_words
函数正在尝试使用代码上方的替换词典来搜索和替换给定文本中区分大小写的整个单词,但它会在行self.assertEqual(f(texts[0], replacements[0]), expected_results[0])
中失败但我不会知道原因。
所以问题是,如何使用python中的替换词典找到并替换区分大小写的整个单词?
答案 0 :(得分:3)
您可以使用re.sub
和re.findall
:
import re
def regex_string(d, to_lower = False):
if not to_lower:
return '|'.join(r'\b{}\b'.format(i) for i in d.keys())
return '|'.join([c for b in [[r'\b{}\b'.format(i.lower()), r'\b{}\b'.format(i)] for i in d.keys()] for c in b])
replacements = {
'PLUS': '"+"',
'DASH': '"-"',
'BANG': '"!"',
'TILDE': '"~"',
'STAR': '"*"',
'SLASH': '"/"',
'PERCENT': '"%"',
'LEFT_PAREN': '"("',
'RIGHT_PAREN': '")"'
}
replaced = re.sub(regex_string(replacements, True), '{}', content)
final_result = replaced.format(*[replacements.get(i, i) for i in re.findall(regex_string(replacements, True), content)])
输出(case 1
):
variable_identifier :
IDENTIFIER
primary_expression :
foo1
foo2
foo3
"(" expression ")"
unary_operator :
"+"
"-"
"!"
"~"
multiplicative_expression :
unary_expression
multiplicative_expression "*" unary_expression
multiplicative_expression "/" unary_expression
multiplicative_expression "%" unary_expression
输出(case 2
):
fi identifier IDENTIFIER oof rab BARycentric
oof_op
或者,甚至更短:
replaced = re.sub(regex_string(replacements, True), lambda x:replacements.get(x.group(), x.group()), content)