我正在python
中的FuzzyWuzzy
中运行此代码,返回此错误:
TypeError: ('expected string or bytes-like object', 'occurred at index CONCAT')
有没有一种快速简便的方法来避免该错误?我的文件包含一些Int
,例如142 Aberdeen street。我想这就是错误代码的来源。
import pandas as pd
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
import csv
import os
#DEFINE AND CONFIGURE
FULL_MATCHING_THRESHOLD = 80
PARTIAL_MATCHING_THRESHOLD = 100
SORT_MATCHING_THRESHOLD = 100
TOKEN_MATCHING_THRESHOLD = 100
MAX_MATCHES=1
#READ THE CURRENT DATABASE
companies_db = "C://Users//Dell/Desktop//Fuzzy_reconcile//TEST_DUP.csv"
pwd = os.getcwd()
os.chdir(os.path.dirname(companies_db))
current_db_dataframe = pd.read_csv(os.path.basename(companies_db),skiprows=1,index_col=False, names=['CONCAT'])
os.chdir(pwd)
def find_matches(matchThis):
rows = current_db_dataframe['CONCAT'].values.tolist();
rows.remove(matchThis)
matches= process.extractBests(matchThis,rows,scorer=fuzz.ratio,score_cutoff=FULL_MATCHING_THRESHOLD,limit=MAX_MATCHES)
if len(matches)==0:
matches= process.extractBests(matchThis,rows,scorer=fuzz.partial_ratio,score_cutoff=PARTIAL_MATCHING_THRESHOLD,limit=MAX_MATCHES);
if len(matches)==0:
matches= process.extractBests(matchThis,rows,scorer=fuzz.token_set_ratio,score_cutoff=TOKEN_MATCHING_THRESHOLD,limit=MAX_MATCHES);
if len(matches)==0:
matches= process.extractBests(matchThis,rows,scorer=fuzz.token_sort_ratio,score_cutoff=SORT_MATCHING_THRESHOLD,limit=MAX_MATCHES);
return matches[0][0] if len(matches)>0 else None
fn_find_matches = lambda x: find_matches(x)
current_db_dataframe['Duplicate']=current_db_dataframe.applymap(fn_find_matches)
current_db_dataframe.to_csv("results.csv")
错误消息:
File "C:\ProgramData\Anaconda\lib\site-packages\fuzzywuzzy\utils.py", line 95, in full_process
string_out = StringProcessor.replace_non_letters_non_numbers_with_whitespace(s)
File "C:\ProgramData\Anaconda\lib\site-packages\fuzzywuzzy\string_processing.py", line 26, in replace_non_letters_non_numbers_with_whitespace
return cls.regex.sub(" ", a_string)
TypeError: ('expected string or bytes-like object', 'occurred at index CONCAT')
答案 0 :(得分:0)
您可以按照正则表达式删除字符串的字符
number=re.sub("[^a-zA-Z]", # Search for all non-letters
" ", # Replace all non-letters with spaces
str(string))