测试数据

Question

我正在创建一个模型，以查看元组的元素如何迭代到列表中的其他元组。

对于实例

employerEmployeeEdges = [(12,a), (12,c), (12,d), (14,e), (14,a), (13, a), (13,b), (13,d), (13,c), (16,b),(16,b) ]

这里的目标是匹配例如元组1中的12到元组2中的12，如果它们匹配计数。比赛被视为“链接”。我需要将这些链接的数量放入矩阵中。

例如：

   a  b  c  d  e 
a  0     1  2  2
b    0
c  1    0   0
d    0     0
e  1           0

我有以下代码

from collections import defaultdict

将熊猫作为pd导入将numpy导入为np 从itertools导入组合从馆藏进口柜台将numpy导入为np 将scipy.sparse导入为ss np.seterr（divide ='ignore'，invalid ='ignore'）

测试数据

year= [2001, 2002, 2002, 2005, 2002, 2004, 2001, 2001, 2002, 2003, 2003, 2002, 2004, 2005, 2003, 2004, 2005, 2004, 2004, 2002, 2001, 2001]
indviduals= [12, 23, 12, 24, 28,30, 15, 17, 18, 18, 19, 12, 15, 12, 12, 12, 15, 15, 15, 12, 12, 15, 200, 200]
employers= ['a', 'b', 'b','c', 'd', 'e', 'a', 'a', 'b', 'b', 'c', 'b', 'a', 'c', 'e', 'a', 'a', 'a', 'a', 'b', 'a', 'a', 'b']

employerEmployeeEdges=[]
for j in np.unique(year):
    """generates the count of employees per employer per year"""
    #print("year",j)
    d = dict.fromkeys(employers, ())
    cond_year = j
    for i,e,y in zip(indviduals, employers, year):
        if y == cond_year:
            d[e] = d[e] + (i,)

    #print(d, [len(v) for k, v in d.items()]) # if I want to print all the employers and employee per year 

    for k, v in d.items():
        if len(v)>1:
            """I am gonna have to ignore if there are no values for that specific employer. 
            Zero employees means nothing for that year"""
            #print(j,k)
            for item in v:
                #print(item, "item")
                #print(j, item, k)
                edges = (item, k)
                edges=edges
                #print(edges, type(edges))
                employerEmployeeEdges.append(edges) # create a list of employees employer edge for all years


print("employees employer edges", [i for i in employerEmployeeEdges]) # list of possible links between employee and employer 
employersNew=[i[1] for i in employerEmployeeEdges]
# print("dfd",employersNew)
n = len([i[1] for i in employerEmployeeEdges])
Q = np.zeros((n, n), dtype=int)

for firstLink in  employerEmployeeEdges:
    for secondLink in employerEmployeeEdges[1:]: #potential second link where the combination is possible. 
        if firstLink[0]==secondLink[0]:
            print(firstLink[1], secondLink[1])
# #             print(firstLink, secondLink)
# #             break
#         from_node, to_node=firstLink[1],secondLink[1] #check where did the employee go? 

#         indx, jdx= employersNew.index(from_node), employersNew[1:].index(to_node)

#         Q[indx, jdx]=0
#         print(Q)
# #print(len(employerEmployeeEdges))
# #print(Q)

此打印不会给我想要的输出。如何将链接数放在矩阵上？

此外，我想使用矩阵Q来计算概率，如下所示：

# P=np.empty((n,n))
# #print(P)
# for i in range(n):
#     #print(i)
#     P[i, :] = Q[i, :] / Q[i, :].sum()

# #print(P)

Answer 1

您可以执行以下操作：

employerEmployeeEdges= np.array([(12,'a'), (12,'c'), (12,'d'), (14,'e'), (14,'a'),
(13, 'a'), (13,'b'), (13,'d'), (13,'c'), (16,'b'),(16,'b') ])

unique_employee = np.unique(employerEmployeeEdges[:,1])
n_unique = len(unique_employee)

Q = np.zeros([n_unique,n_unique])

for n, employer_employee in enumerate(employerEmployeeEdges):
   #copy the array for the original o be intact
   eee = np.copy(employerEmployeeEdges)
   #sustitue the current tuple with a empty one to avoid self comparing
   eee[n] = (None,None)

   #get the index for the current employee, the one on the y axis
   employee_index = np.where(employer_employee[1] == unique_employee)

   #get the indexes where the the employees letter match
   eq_index = np.where(eee[:,0] == employer_employee[0])[0]
   eq_employee = eee[eq_index,1]

   #add at the final array Q by index
   for emp in eq_employee:

      emp_index = np.where(unique_employee == emp)
      Q[employee_index,emp_index]+= 1

print(Q)

此代码提供以下答案：

[[0. 1. 2. 2. 1.]
 [1. 2. 1. 1. 0.]
 [2. 1. 0. 2. 0.]
 [2. 1. 2. 0. 0.]
 [1. 0. 0. 0. 0.]]

请记住，Q [0,0]是'a：a'，而Q [-1，-1]是'e：e'

用链接创建矩阵

测试数据

1 个答案: