我有下面的代码找到并替换。我试图调试代码,但我找不到任何帮助。
import pandas as pd
import re
##### Build test set of sentences to be cleaned
#dataframe to be cleaned
dat = pd.read_csv("H:\\Find Replace Tool\\Find_Replace_in_this_v1.csv")
#clean
dat.currentTitle=dat.currentTitle.str.replace(r"[^a-zA-Z0-9`~!|@#%&_}={:\"\];<>,./. -]",r'')
dat.currentTitle=dat.currentTitle.str.replace('\s+', ' ')
dat.currentTitle=dat.currentTitle.str.lower()
#create list of column
test_sentences = dat.currentTitle.tolist()
##### Create search and replace patterns
#list element of of each row like this [('word0','repl0'),('word1','repl1'),('word2','repl2')]
#dataframe with lookup
dat2 = pd.read_csv("H:\\Find Replace Tool\\Find_Replace_v1.csv")
patterns = list(zip(dat2.keyword, dat2.lookupId))
#convert list into dictionary
patterns_dict = dict(patterns)
#for value of first and second column in the list: return (first_column with regex) and second_column
patterns_comp = [ (re.compile("\\b"+search+"\\b"), repl) for search, repl in patterns ]
del dat2
def replace4( sentences ):
#pass dictionary index to get value. not sure what is done here
pd = patterns_dict.get
#m could be group of all regex matches
def repl(m):
#group() is used for assigning out of many regex, which one you want to use
w = m.group()
return pd(w,w)
for n, sentence in enumerate( sentences ):
sentence = re.sub(r"\w+", repl, sentence)
dat['cleanedTitle'] = sentence
replace4(test_sentences)
当我执行此操作时,我遇到错误。可能是什么原因,我该如何解决?我还没有找到任何解决方案
error Traceback (most recent call last)
<ipython-input-104-43c08b4ceffa> in <module>()
2 #for value of first and second column in the list: return (first_column with regex) and second_column
3 for search, repl in patterns:
----> 4 a = (re.compile(r'\b'+ search + r'\b'), repl)
5 #if j <5:
6 #print search,repl
c:\python27\lib\re.pyc in compile(pattern, flags)
192 def compile(pattern, flags=0):
193 "Compile a regular expression pattern, returning a pattern object."
--> 194 return _compile(pattern, flags)
195
196 def purge():
c:\python27\lib\re.pyc in _compile(*key)
249 p = sre_compile.compile(pattern, flags)
250 except error, v:
--> 251 raise error, v # invalid expression
252 if not bypass_cache:
253 if len(_cache) >= _MAXCACHE:
error: unbalanced parenthesis