我正在创建一个模型,以查看元组的元素如何迭代到列表中的其他元组。
对于实例
employerEmployeeEdges = [(12,a), (12,c), (12,d), (14,e), (14,a), (13, a), (13,b), (13,d), (13,c), (16,b),(16,b) ]
这里的目标是匹配例如元组1中的12到元组2中的12,如果它们匹配计数。比赛被视为“链接”。我需要将这些链接的数量放入矩阵中。
例如:
a b c d e
a 0 1 2 2
b 0
c 1 0 0
d 0 0
e 1 0
我有以下代码
from collections import defaultdict
将熊猫作为pd导入 将numpy导入为np 从itertools导入组合 从馆藏进口柜台 将numpy导入为np 将scipy.sparse导入为ss np.seterr(divide ='ignore',invalid ='ignore')
year= [2001, 2002, 2002, 2005, 2002, 2004, 2001, 2001, 2002, 2003, 2003, 2002, 2004, 2005, 2003, 2004, 2005, 2004, 2004, 2002, 2001, 2001]
indviduals= [12, 23, 12, 24, 28,30, 15, 17, 18, 18, 19, 12, 15, 12, 12, 12, 15, 15, 15, 12, 12, 15, 200, 200]
employers= ['a', 'b', 'b','c', 'd', 'e', 'a', 'a', 'b', 'b', 'c', 'b', 'a', 'c', 'e', 'a', 'a', 'a', 'a', 'b', 'a', 'a', 'b']
employerEmployeeEdges=[]
for j in np.unique(year):
"""generates the count of employees per employer per year"""
#print("year",j)
d = dict.fromkeys(employers, ())
cond_year = j
for i,e,y in zip(indviduals, employers, year):
if y == cond_year:
d[e] = d[e] + (i,)
#print(d, [len(v) for k, v in d.items()]) # if I want to print all the employers and employee per year
for k, v in d.items():
if len(v)>1:
"""I am gonna have to ignore if there are no values for that specific employer.
Zero employees means nothing for that year"""
#print(j,k)
for item in v:
#print(item, "item")
#print(j, item, k)
edges = (item, k)
edges=edges
#print(edges, type(edges))
employerEmployeeEdges.append(edges) # create a list of employees employer edge for all years
print("employees employer edges", [i for i in employerEmployeeEdges]) # list of possible links between employee and employer
employersNew=[i[1] for i in employerEmployeeEdges]
# print("dfd",employersNew)
n = len([i[1] for i in employerEmployeeEdges])
Q = np.zeros((n, n), dtype=int)
for firstLink in employerEmployeeEdges:
for secondLink in employerEmployeeEdges[1:]: #potential second link where the combination is possible.
if firstLink[0]==secondLink[0]:
print(firstLink[1], secondLink[1])
# # print(firstLink, secondLink)
# # break
# from_node, to_node=firstLink[1],secondLink[1] #check where did the employee go?
# indx, jdx= employersNew.index(from_node), employersNew[1:].index(to_node)
# Q[indx, jdx]=0
# print(Q)
# #print(len(employerEmployeeEdges))
# #print(Q)
此打印不会给我想要的输出。如何将链接数放在矩阵上?
此外,我想使用矩阵Q来计算概率,如下所示:
# P=np.empty((n,n))
# #print(P)
# for i in range(n):
# #print(i)
# P[i, :] = Q[i, :] / Q[i, :].sum()
# #print(P)
答案 0 :(得分:0)
您可以执行以下操作:
employerEmployeeEdges= np.array([(12,'a'), (12,'c'), (12,'d'), (14,'e'), (14,'a'),
(13, 'a'), (13,'b'), (13,'d'), (13,'c'), (16,'b'),(16,'b') ])
unique_employee = np.unique(employerEmployeeEdges[:,1])
n_unique = len(unique_employee)
Q = np.zeros([n_unique,n_unique])
for n, employer_employee in enumerate(employerEmployeeEdges):
#copy the array for the original o be intact
eee = np.copy(employerEmployeeEdges)
#sustitue the current tuple with a empty one to avoid self comparing
eee[n] = (None,None)
#get the index for the current employee, the one on the y axis
employee_index = np.where(employer_employee[1] == unique_employee)
#get the indexes where the the employees letter match
eq_index = np.where(eee[:,0] == employer_employee[0])[0]
eq_employee = eee[eq_index,1]
#add at the final array Q by index
for emp in eq_employee:
emp_index = np.where(unique_employee == emp)
Q[employee_index,emp_index]+= 1
print(Q)
此代码提供以下答案:
[[0. 1. 2. 2. 1.]
[1. 2. 1. 1. 0.]
[2. 1. 0. 2. 0.]
[2. 1. 2. 0. 0.]
[1. 0. 0. 0. 0.]]
请记住,Q [0,0]是'a:a',而Q [-1,-1]是'e:e'