如果该列包含exceptionList中的值,我试图隐藏单个列的数据,那么它应该转义并移至下一个,但是以某种方式,我无法将其隐藏并引发错误
if(x in exceptionList):
ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
这是我的代码
data = [['NISAMANEE ROWELL', '9198762345','98 Oxford Ave.Elk Grove Village, IL 60007'], ['ALICE BAISDEN', '8756342865', '94 Valley Rd.Miami Gardens, FL 33056'], ['MARC COGNETTI', '9198762345', '221 Summer CircleGreer, SC 29650'], ['JOHNS HOPKINS HEALTHCARE', '9654987642', '8522 Pendergast AvenueVilla Park, IL 60181']]
df = pd.DataFrame(data, columns = ['Name', 'Number', 'Address'])
df
def title_format(inp):
return inp.str.title()
def new(x):
#x = input('Enter your column name')
#x = x.title()
x = title_format(x)
print(x)
exc_list=['Mackesson Inc','Care','Healthcare','Henery Schien','Besse','LLC','CandP','INC','LTD','PHARMACY','PHARMACEUTICAL','HOSPITAL','COMPANY','ELECTRONICS','APP','VOLUNTEERS','SPECIALITIES','APPLIANCE','EXPRESS','MAGAZINE','SUPPLY','ENDOSCOPY','NETWandK','SCHOOL','AT&T','SOLUTIONS','SANITATION','SYSTEMS','COMPOUNDING','CLINIC','UTILITIES','DEPARTMENT','CREATIVE','PIN','employment','consultant','units','label','machine','anesthesia','services','medical','community','plaza','tech','bipolar','brand','commerce','testing','inspection','killer','plus','electric','division','diagnostic','materials','imaging','international','district','chamber','city','products','essentials','life','scissand','leasing','units','health','healthcare','surgical','enterprises','print','radiology','water','screens','telecom']
exceptionList = [z.title() for z in exc_list]
if(x in exceptionList):
return x
else:
return x.str.replace(x, 'X' * random.randrange(3, 8))
#new(df.Name.astype(str))
new(df['Name'].astype(str))
答案 0 :(得分:1)
据我所知,我在代码中更改了几行:
import pandas as pd
import random
data = [['NISAMANEE ROWELL', '9198762345','98 Oxford Ave.Elk Grove Village, IL 60007'], ['ALICE BAISDEN', '8756342865', '94 Valley Rd.Miami Gardens, FL 33056'], ['MARC COGNETTI', '9198762345', '221 Summer CircleGreer, SC 29650'], ['Healthcare', '9654987642', '8522 Pendergast AvenueVilla Park, IL 60181']]
df = pd.DataFrame(data, columns = ['Name', 'Number', 'Address'])
def title_format(inp):
return inp.str.title()
def new(x):
#x = input('Enter your column name')
#x = x.title()
x = title_format(x)
print(x)
exc_list=['Mackesson Inc','Care','Healthcare','Henery Schien','Besse','LLC','CandP','INC','LTD','PHARMACY','PHARMACEUTICAL','HOSPITAL','COMPANY','ELECTRONICS','APP','VOLUNTEERS','SPECIALITIES','APPLIANCE','EXPRESS','MAGAZINE','SUPPLY','ENDOSCOPY','NETWandK','SCHOOL','AT&T','SOLUTIONS','SANITATION','SYSTEMS','COMPOUNDING','CLINIC','UTILITIES','DEPARTMENT','CREATIVE','PIN','employment','consultant','units','label','machine','anesthesia','services','medical','community','plaza','tech','bipolar','brand','commerce','testing','inspection','killer','plus','electric','division','diagnostic','materials','imaging','international','district','chamber','city','products','essentials','life','scissand','leasing','units','health','healthcare','surgical','enterprises','print','radiology','water','screens','telecom']
exceptionList = [z.title() for z in exc_list]
match = [x1 in exceptionList for x1 in x]
df.loc[match,'Name'] = ['X' * random.randrange(3, 8) for a in range(sum(match))]
# return x
# else:
# return x.str.replace(x, 'X' * random.randrange(3, 8))
#new(df.Name.astype(str))
new(df['Name'].astype(str))
df
Out[1]:
Name Number Address
0 NISAMANEE ROWELL 9198762345 98 Oxford Ave.Elk Grove Village, IL 60007
1 ALICE BAISDEN 8756342865 94 Valley Rd.Miami Gardens, FL 33056
2 MARC COGNETTI 9198762345 221 Summer CircleGreer, SC 29650
3 XXXXXXX 9654987642 8522 Pendergast AvenueVilla Park, IL 60181
exc_list = [x.title() for x in exc_list]
df['Name'] = df['Name'].map(str.title)
df['match'] = [nn in exc_list for nn in df['Name']]
df.loc[df['match'] == True,'Name'] = ['X' * random.randrange(3, 8) for a in range(sum(df['match']))]
exc_list = [x.title() for x in exc_list]
df['Name'] = df['Name'].map(str.title)
df['match'] = [nn in exc_list for nn in df['Name']]
df['NameIf'] = list(zip(df['Name'], [(lambda x: 'XXX' + s[3:] if len(x)>3 else 'XXX')(s) for s in df['Name']]))
df['Name'] = [n[0][n[1]] for n in list(zip(df['NameIf'],df['match'].astype(int)))]
df = df.drop(['NameIf', 'match'], axis = 1)
df
exc_list = [x.title() for x in exc_list]
df['Name'] = df['Name'].map(str.title)
df['match'] = [nn in exc_list for nn in df['Name']]
hide_row = {c:'XXX' for c in df.columns}
df[df['match'] != True].merge(pd.DataFrame(hide_row, index = df[df['match'] == True].index), how = 'outer')
简短说明
# Step 1. this gives you DataFrame without matching
df[df['match'] != True]
Out[3]:
Name Number Address match
0 Nisamanee Rowell 9198762345 98 Oxford Ave.Elk Grove Village, IL 60007 False
1 Alice Baisden 8756342865 94 Valley Rd.Miami Gardens, FL 33056 False
2 Marc Cognetti 9198762345 221 Summer CircleGreer, SC 29650 False
# Step 2. this opposite gives you DataFrame with matching
df[df['match'] == True]
Out[4]:
Name Number Address match
3 Healthcare 9654987642 8522 Pendergast AvenueVilla Park, IL 60181 True
# Step 3. but you take only index from Step 2. And create new dataframe with indexes and 'XXX' columns
hide_row = {c:'XXX' for c in df.columns}
pd.DataFrame(hide_row, index = df[df['match'] == True].index)
Out[5]:
Name Number Address match
3 XXX XXX XXX XXX
# Step 4. And then you just merge two dataframes from step 1 and step 3 by indexes
df[df['match'] != True].merge(pd.DataFrame(hide_row, index = df[df['match'] == True].index), how = 'outer')
答案 1 :(得分:0)
对您的代码进行很小的改动就可以了,请记住这不是最佳的,但是效果很好。
data = [['NISAMANEE ROWELL', '9198762345','98 Oxford Ave.Elk Grove Village, IL 60007'], ['ALICE BAISDEN', '8756342865', '94 Valley Rd.Miami Gardens, FL 33056'], ['MARC COGNETTI', '9198762345', '221 Summer CircleGreer, SC 29650'], ['Healthcare', '9654987642', '8522 Pendergast AvenueVilla Park, IL 60181']]
df = pd.DataFrame(data, columns = ['Name', 'Number', 'Address'])
df
def title_format(inp):
return inp.title()
def new(x):
#x = input('Enter your column name')
#x = x.title()
x = title_format(x)
print(x)
exc_list=['Mackesson Inc','Care','Healthcare','Henery Schien','Besse','LLC','CandP','INC','LTD','PHARMACY','PHARMACEUTICAL','HOSPITAL','COMPANY','ELECTRONICS','APP','VOLUNTEERS','SPECIALITIES','APPLIANCE','EXPRESS','MAGAZINE','SUPPLY','ENDOSCOPY','NETWandK','SCHOOL','AT&T','SOLUTIONS','SANITATION','SYSTEMS','COMPOUNDING','CLINIC','UTILITIES','DEPARTMENT','CREATIVE','PIN','employment','consultant','units','label','machine','anesthesia','services','medical','community','plaza','tech','bipolar','brand','commerce','testing','inspection','killer','plus','electric','division','diagnostic','materials','imaging','international','district','chamber','city','products','essentials','life','scissand','leasing','units','health','healthcare','surgical','enterprises','print','radiology','water','screens','telecom']
exceptionList = [z.title() for z in exc_list]
if(x in exceptionList):
return x
else:
return x.replace(x, 'X' * random.randrange(3, 8))
#new(df.Name.astype(str))
df['Name'] = df['Name'].apply(new)