Question

我想制作一个部分复制模型，以便我从图G中的s＆gt; = 1个顶点开始。然后引入一个新的顶点“v”并在 G 随机。概率 q ，连接 v 和 u 。彼此独立地将u的每个邻居以概率 p 连接到v。我想根据我的 s 重复这一点。

我有一个包含三列的文本文件：Protein1，Protein2和Combined score。此文件包含1000多个此类条目。因此，每条线代表图中从“protein1”到“protein2”的边缘，其重量为“combined_score”。我正在使用此文件来实现该算法（如上所述）。我只筛选出我的combined_score大于990的那些行。

4932.Q0010 4932.Q0017 951 4932.Q0010 4932.Q0032 951 4932.Q0010 4932.Q0045 313 4932.Q0010 4932.Q0085 263 4932.Q0010 4932.Q0105 410 4932.Q0010 4932.Q0143 930

代码：

import networkx as nx import matplotlib.pyplot as plt import random def partial_duplication_model(G,p,q,s,max_score): k=G.number_of_nodes() list=[] for i in range(s): #random.randint(1,k) node = random.choice(G.nodes()) if node not in list: v = max_score + i G.add_node(v) list.append(node) G.add_edge(v,node,weight = q) #for j in range(k): for j in G.neighbors(node): if not j==v: G.add_edge(j,node,weight = p) print(G.number_of_nodes()) return(G) if __name__ == '__main__': f=open("4932.protein.links.v10.txt","r").readlines() G=nx.Graph() max_score=0 for x in f[1:]: y=x.split(" ") for node in y[:1]: if int(y[2])>=990: G.add_node(node) if int(y[2])>=990: G.add_edge(y[0], y[1], weight=int(y[2])) temp=int(y[2]) #print(type(temp)) max_score=max(max_score,temp) p = 0.3 q = 0.7 s = 2 res = partial_duplication_model(G,p,q,s,max_score) print("making a plot") stuff = nx.degree_histogram(res) plt.loglog(stuff) plt.show() #print("Average shortest path length : " , nx.average_shortest_path_length(res))`

此代码无效，因为当我尝试计算平均最短路径长度时，它表示图形未连接。

Answer 1

我要做的第一件事是摆脱单字母变量名称。
快捷方式：如果添加节点不存在的边缘，则会添加节点。

您不能将list用作变量。（好吧，你可以，但不要期望取得好成绩）

import networkx as nx
import matplotlib.pyplot as plt
import random

def partial_duplication_model(G,p,q,s,max_score):
    k=G.number_of_nodes()
    my_list=[]

    for i in range(s):
        #random.randint(1,k)
        node = random.choice(G.nodes())
        if node not in my_list:
            v = max_score + i
            G.add_edge(v,node,weight = q)   # I don't understand what you are doing here.
            list.append(node)

        #are you sure your spacing is correct in the next 5 lines?    
        #for j in range(k):
            for j in G.neighbors(node):
                if not j==v:
                    G.add_edge(j,node,weight = p)


    print(G.number_of_nodes())
    return(G)

if __name__ == '__main__':
    G= nx.Graph()
    max_score = 0
    with open('4932.protein.links.v10.txt','r') as f:
        for x in f.readlines()[1:]:
            node1, node2, val=x.split(" ")
            val = int(val)
            if val>=990:
                G.add_edge(node1, node2, weight=val)
            max_score=max(max_score,val)
            p = 0.3
            q = 0.7
            s = 2
            res = partial_duplication_model(G,p,q,s,max_score)
            print("making a plot")
            stuff = nx.degree_histogram(res)
            plt.loglog(stuff)
            plt.show()
            #print("Average shortest path length : " , nx.average_shortest_path_length(res))

用Python制作网络

1 个答案: