import pandas as pd
yod_user = pd.read_excel("C:\\Users\\Desktop\\yod_user.xlsx")
yod_bank = pd.read_excel("C:\\Users\\Desktop\\yod_bank.xlsx")
#converting DataFrames into dictionary
userd = yod_user.to_dict()
bankd = yod_bank.to_dict()
#Definitions
userd = [{"id":2, "username":"pk@gmail.in","password":"YkxJNWNDT"},
{"id":4, "username":"test@gmail.com", "password":"VjNUYWh"},
{"id":6, "username":"zz113@gmail.com", "password":"dddd"},
{"id":8, "username":"faulmike@aol.com", "password":"ssss"},
{"id":10, "username":"newr10@gmail.com", "password":"errfs"}]
bankd = [{"userid":"2" "bankid": "99" "acc_number": "4590" "bank_name":"xyz"},
{"userid":"4" "bankid": "100" "acc_number": "4520" "bank_name": "abc"},
{"userid":"6" "bankid": "56" "acc_number": "4980" "bank_name": "xyz"},
{"userid":"8" "bankid": "99" "acc_number": "4570" "bank_name": "ypr"},
{"userid":"2" "bankid": "17" "acc_number": "4530" "bank_name": "abc"}]
我想从上面的代码中实现的是这样的:
Result
[{"id": 2, "username":"pk@gmail.in","password":"YkxJNWNDT"
"account":
{"userid":"2" "bankid": "99" "acc_number": "4590" "bank_name": "xyz"},
{"userid":"2" "bankid": "17" "acc_number": "4530" "bank_name": "abc"}
]
基本上,有关一个id的所有信息都应该在一个密钥中。因此,关键是来自'userd'的“id”,对于该ID,所有信息,即用户名等以及银行详细信息都应该在那里。 id = 2持有的所有帐户应该汇集在一起。我怎样才能做到这一点?
将“bankd”嵌入“userd”并引用id。
一旦我实现了这一点,我可以将其转换为json并存储在作为主要目标的mongodb中。任何帮助表示赞赏..
答案 0 :(得分:1)
<强>设置强>
df1 # yod_user
id password username
0 2 YkxJNWNDT pk@gmail.in
1 4 VjNUYWh test@gmail.com
2 6 dddd zz113@gmail.com
3 8 ssss faulmike@aol.com
4 10 errfs newr10@gmail.com
df2 # yod_bank
acc_number bank_name bankid userid
0 4590 xyz 99 2
1 4520 abc 100 4
2 4980 xyz 56 6
3 4570 ypr 99 8
4 4530 abc 17 2
首先,点击df2
并将其转换为按userid
分组的词典列表:
df3 = df2.set_index('userid', drop=False)\
.rename_axis('id')\
.apply(dict, 1)\
.groupby(level=0)\
.apply(lambda x: x.tolist())\ # x.values.T.tolist()
.to_frame('account')
df3.index = df3.index.astype(int)
df3
account
id
2 [{'bankid': '99', 'userid': '2', 'bank_name': ...
4 [{'bankid': '100', 'userid': '4', 'bank_name':...
6 [{'bankid': '56', 'userid': '6', 'bank_name': ...
8 [{'bankid': '99', 'userid': '8', 'bank_name': ...
请注意,我将df3.index
转换为整数类型,因为df1.id
也是整数类型。这将有助于下一步。
现在,执行merge
:
df = df1.merge(df3, left_on='id', right_index=True)
df
id password username \
0 2 YkxJNWNDT pk@gmail.in
1 4 VjNUYWh test@gmail.com
2 6 dddd zz113@gmail.com
3 8 ssss faulmike@aol.com
account
0 [{'bankid': '99', 'userid': '2', 'bank_name': ...
1 [{'bankid': '100', 'userid': '4', 'bank_name':...
2 [{'bankid': '56', 'userid': '6', 'bank_name': ...
3 [{'bankid': '99', 'userid': '8', 'bank_name': ...
(可选)转换为记录:
import pprint
pprint.pprint(df.to_dict('r'))
[{'id': 2,
'password': 'YkxJNWNDT',
'userid': [{'acc_number': '4590',
'bank_name': 'xyz',
'bankid': '99',
'userid': '2'},
{'acc_number': '4530',
'bank_name': 'abc',
'bankid': '17',
'userid': '2'}],
'username': 'pk@gmail.in'},
{'id': 4,
'password': 'VjNUYWh',
'userid': [{'acc_number': '4520',
'bank_name': 'abc',
'bankid': '100',
'userid': '4'}],
'username': 'test@gmail.com'},
{'id': 6,
'password': 'dddd',
'userid': [{'acc_number': '4980',
'bank_name': 'xyz',
'bankid': '56',
'userid': '6'}],
'username': 'zz113@gmail.com'},
{'id': 8,
'password': 'ssss',
'userid': [{'acc_number': '4570',
'bank_name': 'ypr',
'bankid': '99',
'userid': '8'}],
'username': 'faulmike@aol.com'}]
答案 1 :(得分:0)
简单,不优雅的方法是使用嵌套的for循环,并匹配id
/ userid
:
for u_entry in userd:
u_entry["account"] = []
for b_entry in bankd:
if b_entry["userid"] == str(u_entry["id"]):
u_entry["account"].append(b_entry)
输出:
[{'account': [{'acc_number': '4590',
'bank_name': 'xyz',
'bankid': '99',
'userid': '2'},
{'acc_number': '4530', 'bank_name': 'abc', 'bankid': '17', 'userid': '2'}],
'id': 2,
'password': 'YkxJNWNDT',
'username': 'pk@gmail.in'},
{'account': [{'acc_number': '4520',
'bank_name': 'abc',
'bankid': '100',
'userid': '4'}],
'id': 4,
'password': 'VjNUYWh',
'username': 'test@gmail.com'},
{'account': [{'acc_number': '4980',
'bank_name': 'xyz',
'bankid': '56',
'userid': '6'}],
'id': 6,
'password': 'dddd',
'username': 'zz113@gmail.com'},
{'account': [{'acc_number': '4570',
'bank_name': 'ypr',
'bankid': '99',
'userid': '8'}],
'id': 8,
'password': 'ssss',
'username': 'faulmike@aol.com'},
{'account': [],
'id': 10,
'password': 'errfs',
'username': 'newr10@gmail.com'}]
答案 2 :(得分:0)
您可以更轻松地将数据保存为pandas数据帧(即在yod_user, yod_bank
上执行操作)。无论如何,使用您的变量名称:
userd = pd.DataFrame([
{"id":2, "username":"pk@gmail.in","password":"YkxJNWNDT"},
{"id":4, "username":"test@gmail.com", "password":"VjNUYWh"},
{"id":6, "username":"zz113@gmail.com", "password":"dddd"},
{"id":8, "username":"faulmike@aol.com", "password":"ssss"},
{"id":10, "username":"newr10@gmail.com", "password":"errfs"}
])
bankd = pd.DataFrame([
{"userid":"2", "bankid": "99", "acc_number": "4590", "bank_name":"xyz"},
{"userid":"4", "bankid": "100", "acc_number": "4520", "bank_name": "abc"},
{"userid":"6", "bankid": "56", "acc_number": "4980", "bank_name": "xyz"},
{"userid":"8", "bankid": "99", "acc_number": "4570" ,"bank_name": "ypr"},
{"userid":"2", "bankid": "17", "acc_number": "4530", "bank_name": "abc"}
])
# Your data was not in the same format between bankd and userd.
bankd['userid'] = pd.to_numeric(bankd['userid'])
# Iterate through the dict and append results from searching through dataframes
def append_bankd(df_in):
userid = df_in['id']
df_in['bankd'] = bankd[bankd['userid']==userid].to_dict('r')
return df_in
result = [append_bankd(m) for m in userd.to_dict('r')]
# Sample result
[
{
'bankd': [
{'acc_number': '4590',
'bank_name': 'xyz',
'bankid': '99',
'userid': 2},
{
'acc_number': '4530', 'bank_name': 'abc', 'bankid': '17', 'userid': 2}],
'id': 2,
'password': 'YkxJNWNDT',
'username': 'pk@gmail.in'}, ...
]