我有一个像这样的数据集:
shop_code pick_tel tel account mail stk
0 A 123456 123456 123456789 sfsdf 123
1 B 234567 234567 9876543210 dfndf 456
2 C 789456 789456 1579624325 sldf 789
3 D 258643 258643 1112299555 mvklsd 234
4 F 123456 123456 1487658794 sfsdf 476
5 Y 159753 159753 2369874568 lsdnfg 123
6 U 753951 753951 123456789 klsdjv 345
7 O 234567 234567 1234587963 dfndf 456
8 P 951357 951357 1579624325 skse 678
9 A 951357 123456 123456789 sfsdf 123
10 B 654798 234567 9876543210 dfndf 456
11 C 121347 789456 1579624325 sldf 789
12 D 123485 258643 1112299555 mvklsd 234
13 A 753951 123456 123456789 sfsdf 123
14 O 657543 234567 1234587963 dfndf 456
15 A 245213 123456 123456789 sfsdf 123
16 O 753951 234567 1234587963 dfndf 456
17 P 124856 951357 1579624325 skse 678
我的代码是:
def check_trung(df):
dict_diachi = {}
dict_mashop = {}
dict_mail = {}
dict_sdt = {}
dict_stk = {}
dict_picktel = {}
dict_total = []
group = {}
group_id = 1
for i, row in df.iterrows():
# print("row:", i, row[0])
diachi, mashop, picktel, mail, sdt, stk = row
if diachi in dict_diachi:
group_id_2 = dict_diachi[diachi]
group[group_id_2].append(i)
elif mashop in dict_mashop:
group_id_2 = dict_mashop[mashop]
group[group_id_2].append(i)
elif picktel in dict_picktel:
group_id_2 = dict_picktel[picktel]
group[group_id_2].append(i)
elif mail in dict_mail:
group_id_2 = dict_mail[mail]
group[group_id_2].append(i)
elif sdt in dict_sdt:
group_id_2 = dict_sdt[sdt]
group[group_id_2].append(i)
elif stk in dict_stk:
group_id_2 = dict_stk[stk]
group[group_id_2].append(i)
else:
group[group_id] = [i]
dict_diachi[diachi] = group_id
dict_mashop[mashop] = group_id
dict_picktel[picktel] = group_id
dict_mail[mail] = group_id
dict_sdt[sdt] = group_id
dict_stk[stk] = group_id
group_id += 1
for i, item in group.items(): ## Heading ##
print("----------")
print("item:", i, item)
check_trung(df)
结果是:
项目:1 [0、4、5、6、9、13、15]
项目:2 [1、7、10、14、16]
项目:3 [2,8,11,17]
项目:4 [3,12]
项目1缺少8,因为8和9具有相同的“ picktel”列值。谁能告诉我为什么他们不在一起。
与另一行至少共享一个其他值的每一行都应分组在一起。