I am using pandas
DataFrame.apply
function to create more columns but I am hitting a ValueError
. Below is my code
def find_processes(rule, g_df):
diff_ids = list(map(int,rule['actual_rule'].replace('=>','#').split('#')))
filtered = g_df[[set(diff_ids).intersection(row) == set(diff_ids) for row in g_df['Diff_No']]].reset_index()
process_list = list(filtered.Process_Id)
return '#'.join(process_list), len(process_list)
and I pass the above function as a parameter to apply function as follows
out_df[['processes','num_processes']] = out_df.apply(find_processes, axis=1, result_type="expand", args=(group_df))
and I get the following error
ValueError Traceback (most recent call last) in 2 group_df = diff_df.groupby('Process_Id')['Diff_No'].apply(list).to_frame().reset_index() 3 ----> 4 out_df[['processes','num_processes']] = out_df.apply(find_processes, axis=1, result_type="expand", args=(group_df)) 5 #out_df['processes'], out_df['num_processes'] = zip(*out_df['actual_rule'].map(find_processes)) 6 #find_processes('48#382=>235#195=>387#300=>381=>323#164#166#263#7#298#304#338#115#93')
~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in apply(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds) 6484 result_type=result_type, 6485 args=args, -> 6486 kwds=kwds) 6487 return op.get_result() 6488
~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py in frame_apply(obj, func, axis, broadcast, raw, reduce, result_type, ignore_failures, args, kwds) 29 raw=raw, reduce=reduce, result_type=result_type, 30 ignore_failures=ignore_failures, ---> 31 args=args, kwds=kwds) 32 33
~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py in init(self, obj, func, broadcast, raw, reduce, result_type, ignore_failures, args, kwds) 39 self.raw = raw 40 self.ignore_failures = ignore_failures ---> 41 self.args = args or () 42 self.kwds = kwds or {} 43
~/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py in nonzero(self) 1476 raise ValueError("The truth value of a {0} is ambiguous. " 1477 "Use a.empty, a.bool(), a.item(), a.any() or a.all()." -> 1478 .format(self.class.name)) 1479 1480 bool = nonzero
ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
But if I call like
out_df[['processes','num_processes']] = out_df.apply(find_processes, axis=1, result_type="expand", g_df = group_df)
It works without any problems. What could be the problem with the first approach?