给出一个包含数字(浮点数)序列和分类ID(df
)的数据框。如何创建'key': []
形式的字典,其中键是数据框中的ID,并且列表包含各个数据框中的数字之间的差异?
尽管我正在寻找一种更多的熊猫方式来做到这一点,但我已经使用循环对其进行了管理。
import pandas as pd
from collections import defaultdict
df = pd.DataFrame({'a': [0.75435, 0.74897, 0.60949,
0.87438, 0.90885, 0.28547,
0.27327, 0.31078, 0.15576,
0.58139],
'id': list('aaaxxbbyyy')})
rl = pd.DataFrame({'b': [0.51, 0.30], 'id': ['aaa', 'bbb']})
interval = 0.1
d = defaultdict(list)
for index, row in rl.iterrows():
before = df[df['a'].between(row['b'] - interval, row['b'], inclusive=False)]
after = df[df['a'].between(row['b'], row['b'] + interval, inclusive=True)]
for x, b_row in before.iterrows():
d[b_row['id']].append((b_row['a'] - row['b']))
for x, a_row in after.iterrows():
d[a_row['id']].append((a_row['a'] - row['b']))
for k, v in d.items():
print('{k}\t{v}'.format(k=k, v=len(v)))
a 1
y 2
b 2
d
defaultdict(list,
{'a': [0.09948],
'b': [-0.01452, -0.02672],
'y': [0.07138, 0.01078]})