我需要从循环创建一个字典,该循环遍历2列数字的数组。下面是数组的一小部分:
array([[ 0, 1],
[ 1, 0],
[ 1, 2],
[ 2, 3],
[ 2, 1]])
我想创建一个字典,将第一列的唯一编号作为键(例如本例中为{0,1,2}),将第二列中的相应数字作为值。
对于此示例,字典将如下所示:
dict = {0:[1], 1:[0,2], 2:[3,1]}
我的数组很长(370,000 x 2)所以我想通过一个有效的循环来完成这个。任何建议将不胜感激!
答案 0 :(得分:5)
您可以使用defaultdict
来完成此操作。
from collections import defaultdict
a = np.array([[ 0, 1],[ 1, 0],[ 1, 2],[ 2, 3], [ 2, 1]])
d = defaultdict(list)
for x,y in a:
d[x].append(y)
答案 1 :(得分:1)
假设您的数组已按第一列排序,您可以使用 groupby :
from itertools import groupby
{k: [v for _, v in g] for k, g in groupby(arr, lambda x: x[0])}
# {0: [1], 1: [0, 2], 2: [3, 1]}
#arr = np.array([[ 0, 1],
# [ 1, 0],
# [ 1, 2],
# [ 2, 3],
# [ 2, 1]])
答案 2 :(得分:1)
很好的单行来做到这一点:
import itertools
array = [[ 0, 1],
[ 1, 0],
[ 1, 2],
[ 2, 3],
[ 2, 1]]
d = {k:[i[1] for i in v] for k,v in itertools.groupby(sorted(array),lambda x : x[0])}
结果:
{0: [1], 1: [0, 2], 2: [1, 3]}
答案 3 :(得分:1)
如果您的第一列是“重复范围”
steps_at = np.searchsorted(a[:,0], np.arange(a[-1,0]+1))
result = {k:v for k,v in zip(a[steps_at,0], np.split(a[:,1], steps_at[1:]))}
如果您的第一列具有相同的聚类但未分类
steps_at = np.where(np.diff(np.r_[np.nan, a[:,0]]))[0]
return {k:v for k,v in zip(a[steps_at,0], np.split(a[:,1], steps_at[1:]))}
一般情况
ind = np.argsort(a[:, 0], kind='mergesort')
aa = a[ind, 0]
steps_at = np.where(np.diff(np.r_[np.nan, aa]))[0]
return {k:v for k,v in zip(aa[steps_at], np.split(a[ind,1], steps_at[1:]))}
枪战:
(19, 2) correctness
Psidom {0: [0, 28, 38, 97, 99, 65, 73], 1: [64, 91, 70, 40, 9], 2: [94, 96, 69, 46], 3: [85, 15, 65]}
Daniel_Jimenez defaultdict(<class 'list'>, {0: [0, 28, 38, 97, 99, 65, 73], 1: [64, 91, 70, 40, 9], 2: [94, 96, 69, 46], 3: [85, 15, 65]})
Jean_Francois_Fabre {0: [0, 28, 38, 97, 99, 65, 73], 1: [64, 91, 70, 40, 9], 2: [94, 96, 69, 46], 3: [85, 15, 65]}
Alexandre_Kempf {0: array([ 0, 28, 38, 97, 99, 65, 73]), 1: array([64, 91, 70, 40, 9]), 2: array([94, 96, 69, 46]), 3: array([85, 15, 65])}
Or_Duan {0: [0, 28, 38, 97, 99, 65, 73], 1: [64, 91, 70, 40, 9], 2: [94, 96, 69, 46], 3: [85, 15, 65]}
Paul_Panzer_sorted {0: array([ 0, 28, 38, 97, 99, 65, 73]), 1: array([64, 91, 70, 40, 9]), 2: array([94, 96, 69, 46]), 3: array([85, 15, 65])}
Paul_Panzer_grouped {0: array([ 0, 28, 38, 97, 99, 65, 73]), 1: array([64, 91, 70, 40, 9]), 2: array([94, 96, 69, 46]), 3: array([85, 15, 65])}
Paul_Panzer_general {0: array([ 0, 28, 38, 97, 99, 65, 73]), 1: array([64, 91, 70, 40, 9]), 2: array([94, 96, 69, 46]), 3: array([85, 15, 65])}
B_M_sorted {0: array([ 0, 28, 38, 97, 99, 65, 73]), 1: array([64, 91, 70, 40, 9]), 2: array([94, 96, 69, 46]), 3: array([85, 15, 65])}
B_M_general {0: array([ 0, 28, 38, 65, 73, 97, 99]), 1: array([ 9, 40, 64, 70, 91]), 2: array([46, 69, 94, 96]), 3: array([15, 65, 85])}
(40194, 2) speed (seconds used for 10 repeats)
Psidom 0.4336233548820019
Daniel_Jimenez 0.3609276609495282
Jean_Francois_Fabre 0.17962428089231253
Alexandre_Kempf 3.5392782238777727
Or_Duan 0.1873011060524732
Paul_Panzer_sorted 0.08001555898226798
Paul_Panzer_grouped 0.08144942414946854
Paul_Panzer_general 0.10183193604461849
B_M_sorted 0.09192353091202676
B_M_general 0.16612185980193317
(400771, 2) speed (seconds used for 10 repeats)
Psidom 3.968917251098901
Daniel_Jimenez 3.619185874937102
Jean_Francois_Fabre 1.7871235068887472
Or_Duan 1.9176530800759792
Paul_Panzer_sorted 0.8291062880307436
Paul_Panzer_grouped 0.8662846579682082
Paul_Panzer_general 1.0812653130851686
B_M_sorted 1.031000167131424
B_M_general 2.16174431797117
Alexandre_Kempf 513.2718367418274
代码:
from collections import defaultdict
from itertools import groupby
import numpy as np
import timeit
Psidom = lambda a: {k: [v for _, v in g] for k, g in groupby(a, lambda x: x[0])}
def Daniel_Jimenez(a):
d = defaultdict(list)
for x,y in a:
d[x].append(y)
return d
Jean_Francois_Fabre = lambda a: {k:[i[1] for i in v] for k,v in groupby(a,lambda x : x[0])}
def Alexandre_Kempf(a):
keys = a[:,0]
items = a[:,1]
uniqkey = np.unique(keys)
prelist = [items[keys==i] for i in uniqkey]
dico = {}
for i in np.arange(len(uniqkey)):
dico[uniqkey[i]] = prelist[i]
return dico
def Or_Duan(a):
default = {}
for elm in a:
try:
default[elm[0]].append(elm[1])
except KeyError:
default[elm[0]] = [elm[1]]
return default
def Paul_Panzer_sorted(a):
steps_at = np.searchsorted(a[:,0], np.arange(a[-1,0]+1))
return {k:v for k,v in zip(a[steps_at,0], np.split(a[:,1], steps_at[1:]))}
def Paul_Panzer_grouped(a):
steps_at = np.where(np.diff(np.r_[np.nan, a[:,0]]))[0]
return {k:v for k,v in zip(a[steps_at,0], np.split(a[:,1], steps_at[1:]))}
def Paul_Panzer_general(a):
ind = np.argsort(a[:, 0], kind='mergesort')
aa = a[ind, 0]
steps_at = np.where(np.diff(np.r_[np.nan, aa]))[0]
return {k:v for k,v in zip(aa[steps_at], np.split(a[ind,1], steps_at[1:]))}
def B_M_sorted(b):
keys,values=b.T
uniq,steps=np.unique(keys,return_index =True)
bins=np.split(values,steps[1:])
return dict(zip(uniq,bins))
def B_M_general(a):
b=a[np.lexsort(a.T[::-1])]
keys,values=b.T
uniq,steps=np.unique(keys,return_index =True)
bins=np.split(values,steps[1:])
return dict(zip(uniq,bins))
c = np.arange(4).repeat(np.random.randint(1,10,(4)))
d = np.random.randint(100, size=c.shape)
t = np.c_[c, d]
c = np.arange(8000).repeat(np.random.randint(1,10,(8000)))
d = np.random.randint(100, size=c.shape)
a = np.c_[c, d]
c = np.arange(80000).repeat(np.random.randint(1,10,(80000)))
d = np.random.randint(100, size=c.shape)
b = np.c_[c, d]
print(t.shape, 'correctness\n')
i = 0
for f in (Psidom, Daniel_Jimenez, Jean_Francois_Fabre, Alexandre_Kempf,
Or_Duan, Paul_Panzer_sorted, Paul_Panzer_grouped,
Paul_Panzer_general, B_M_sorted, B_M_general):
name = f.__name__
if name == '<lambda>':
name = ['Psidom', 'Jean_Francois_Fabre'][i]
i += 1
print(name + (20 - len(name)) * ' ', f(t))
print('\n', a.shape, 'speed (seconds used for 10 repeats)\n')
i = 0
for f in (Psidom, Daniel_Jimenez, Jean_Francois_Fabre, Alexandre_Kempf,
Or_Duan, Paul_Panzer_sorted, Paul_Panzer_grouped,
Paul_Panzer_general, B_M_sorted, B_M_general):
name = f.__name__
if name == '<lambda>':
name = ['Psidom', 'Jean_Francois_Fabre'][i]
i += 1
print(name + (20 - len(name)) * ' ', timeit.timeit("f(a)",number=10,
globals={'f':f, 'a':a}))
print('\n', b.shape, 'speed (seconds used for 10 repeats)\n')
i = 0
for f in (Psidom, Daniel_Jimenez, Jean_Francois_Fabre,
Or_Duan, Paul_Panzer_sorted, Paul_Panzer_grouped,
Paul_Panzer_general, B_M_sorted, B_M_general, Alexandre_Kempf):
name = f.__name__
if name == '<lambda>':
name = ['Psidom', 'Jean_Francois_Fabre'][i]
i += 1
print(name + (20 - len(name)) * ' ', timeit.timeit("f(a)",number=10,
globals={'f':f, 'a':b}))
答案 4 :(得分:1)
一个纯粹的numpy解决方案:
b=a[np.lexsort(a.T[::-1])] # if necessary.
keys,values=b.T
uniq,steps=np.unique(keys,return_index =True)
bins=np.split(values,steps[1:])
如果uniq==range(len(uniq))
,那么就这样说:bins[key]
会起作用,这是最快的方法。
其他:
d=dict(zip(uniq,bins))
#{0: array([1]), 1: array([0, 2]), 2: array([1, 3])}
将构建您的词典。
答案 5 :(得分:0)
一种解决方案是做(如果 a 是你的数组):
keys = a[:,0]
items = a[:,1]
uniqkey = np.unique(keys)
prelist = [items[keys==i] for i in uniqkey]
dico = {}
for i in np.arange(len(uniqkey)):
dico[uniqkey[i]] = prelist[i]
答案 6 :(得分:0)
defaultdict
的替代方法是使用try except并实现"ask forgiveness not permission"方法。
array = [[ 0, 1],
[ 1, 0],
[ 1, 2],
[ 2, 3],
[ 2, 1]]
default = {}
for elm in array:
try:
default[elm[0]].append(elm[1])
except KeyError:
default[elm[0]] = [elm[1]]