Question

在以下代码中

import time
import nltk
from nltk import word_tokenize
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import community



########################################################################################################################
#Reading all csv name files
begin = time.clock() #record start time



male_names= pd.read_csv('male_names.csv', ',')
female_names= pd.read_csv('female_names.csv', ',')
last_names= pd.read_csv('last_names.csv', ',')

male_name=male_names['Names'].values
female_name=female_names['Names'].values
last_name=last_names['Names'].values 
########################################################################################################################
#Book Testing, tokenization, creating a dictionary
text_file =open("HP1.txt", "r").read()
paragraph=text_file.split('\r\r')             # Para is a list of strings(paragraphs)
#print para
lls=[[p] for p in paragraph]                  # List of lists of strings(paragraphs)
tagged=[]
for item in lls:
    for i in item:
        token=word_tokenize(i)           # tokenize inside each paragraph
        tagged.append(nltk.pos_tag(token))
print tagged                             #tagged is a list of list of strings with taggs

#print tagged[0][0][1]
my_dict={}

########################################################################################################################
#Finding all matching names

for lst in tagged:
    for i in range(0,len(lst)):
        if lst[i][1]=="NNP":                                                # If the tagged is NNP
            key=lst[i][0]                                                   # We take this name as our key
            if ((key in male_name) or (key in female_name)):                # If this key is in our dictionary
                if (i<=len(lst)-2 and (lst[i+1][0] in last_name)):
                    key=key+" "+lst[i+1][0]
                if key in my_dict:                                          #We add the keys into out dictionary
                    my_dict[key] += 1
                else:
                    my_dict[key] = 1
print my_dict


# ########################################################################################################################                     
# #Find top ten keys
word = np.array(my_dict.keys())
count = np.array(my_dict.values())

for i in range(0,len(word)):
    if " " in word[i]:
        string=word[i]
        l=string.split()
        for item in l: #item 1: Harry item 2: Potter
            for j in range(0,len(word)): 
                if (i != j and item==word[j]):
                    count[i]=count[i]+count[j]
                    count[j]=0
                    word[j]=""

print "hello"
n=0
while(n<len(count)):
    if count[n]==0:
        count = np.delete(count,n)
        word = np.delete(word,n)
    else:
        n=n+1
print word
print count




top = np.array([])
topcount = np.array([])

for i in range(10):
    max_index = np.argmax(count)
    top = np.append(top,word[max_index])
    topcount = np.append(topcount,count[max_index])

    word[max_index] = ''
    count[max_index] = 0


print
print top
print
print topcount

########################################################################################################################   
#initialize a adjacency matrix
adj = np.zeros((10,10))


for para in tagged: #for each paragraph
    name_index = set() #set list to identify all unique names in one paragraph
    for each in para: #for each word in the paragraph
        if (each[1]=="NNP"):
            for i in range(0,len(top)): #iterate the top list to find if NNP is a top name
                if (each[0] in top[i]):
                    name_index.add(i)  #if found, add index of top list
                    break

    name_index = list(name_index)
    #print name_index


    for i in range(0,len(name_index)):
        for j in range(i+1, len(name_index)):
            adj[name_index[i]][name_index[j]] +=1
            adj[name_index[j]][name_index[i]] +=1

#add the frequency counts to the adj matrix
for i in range(0,len(topcount)):
    print topcount[i]
    adj[i][i] = topcount[i]

print adj
########################################################################################################################   

G=nx.DiGraph(adj)
########################################################################################################################   
def pagerank(G, alpha=0.85,max_iter=100, tol=1.0e-6, weight='weight'):    
    if len(G) == 0:
        return {}

    # Create a copy in (right) stochastic form
    W = nx.stochastic_graph(G, weight=weight)
    N = W.number_of_nodes()

    # Choose fixed starting vector
    x = dict.fromkeys(W, 1.0 / N)

    # Assign uniform personalization vector if not given
    p = dict.fromkeys(W, 1.0 / N)

    dangling_weights = p
    dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]

    # power iteration: make up to max_iter iterations
    for _ in range(max_iter):
        xlast = x
        x = dict.fromkeys(xlast.keys(), 0)
        danglesum = alpha * sum(xlast[n] for n in dangling_nodes)
        for n in x:
            # this matrix multiply looks odd because it is
            # doing a left multiply x^T=xlast^T*W
            for nbr in W[n]:
                x[nbr] += alpha * xlast[n] * W[n][nbr][weight]
            x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n]

        # check convergence, l1 norm
        err = sum([abs(x[n] - xlast[n]) for n in x])
        if err < N*tol:
            return x
#    raise NetworkXError('pagerank: power iteration failed to converge '
#                        'in %d iterations.' % max_iter)

p_rank=pagerank(G).values()
for i in range(0,len(p_rank)):
    p_rank[i]*=3000
print p_rank
######################################################################################################################


#community detection_ modularity
H=G.to_undirected()
communities = community.best_partition(H)
global_modularity = community.modularity(communities, H)
print(global_modularity)
values = [communities.get(node) for node in H.nodes()]





#edges
all_weights = []
for (node1,node2,data) in G.edges(data=True):
    all_weights.append(data['weight']) #we'll use this when determining edge thickness

print all_weights
    #Plot the edges - one by one
pos=nx.spring_layout(G) 
labeldict = {}                      #dictionary of node to node names



for i in range(0,len(top)):
    labeldict[i] = top[i]
for weight in all_weights:
    #Form a filtered list with just the weight you want to draw
    weighted_edges = [(node1,node2) for (node1,node2,edge_attr) in G.edges(data=True) if edge_attr['weight']==weight]
    #multiplying by [num_nodes/sum(all_weights)] makes the graphs edges look cleaner
    width = weight/66
    nx.draw_networkx_edges(G,pos,edgelist=weighted_edges,width=width,edges_color=values)

nx.draw_networkx_nodes(G,pos,node_color=values,node_size=p_rank,with_labels = True)

# customize positions of labels and font size
pos_new = {}
for k, v in pos.items():
    pos_new[k] = (v[0], v[1]-0.13)

 nx.draw_networkx_labels(G,pos=pos_new,labels=labeldict,
                        font_size=14, 
                        font_family='ubuntu')

#==============================================================================
# for k in range(10):
#     num=np.log(p_rank[k])*7
#     nx.draw_networkx_labels(G,pos=pos_new[k],labels=labeldict[k],
#                             font_size=num,
#                             font_family='ubuntu')          
#==============================================================================   



#change labels 
plt.axis('off')

plt.show() 
########################################################################################################################   

end = time.clock()
print end - begin #calculate difference (elapsed time)

我正在尝试使用networkx绘制网络。一切都很顺利。但是在我替换了代码块之后

nx.draw_networkx_labels(G,pos=pos_new,labels=labeldict,
                        font_size=14, 
                        font_family='ubuntu')

与

for k in range(10):
    num=np.log(p_rank[k])*7
    nx.draw_networkx_labels(G,pos=pos_new[k],labels=labeldict[k],
                            font_size=num,
                            font_family='ubuntu')

以便为标签绘制具有不同字体大小的图形。该程序返回一个错误： AttributeError：'numpy.string_'对象没有属性'items'

Traceback (most recent call last):

  File "<ipython-input-9-06fd195d7391>", line 1, in <module>
    runfile('C:/Users/Irourong/Desktop/PIC 16/Project/Final/Final/final_project.py', wdir='C:/Users/Irourong/Desktop/PIC 16/Project/Final/Final')

  File "C:\Users\Irourong\Anaconda2\lib\site-packages\spyder\utils\site\sitecustomize.py", line 866, in runfile
    execfile(filename, namespace)

  File "C:\Users\Irourong\Anaconda2\lib\site-packages\spyder\utils\site\sitecustomize.py", line 87, in execfile
    exec(compile(scripttext, filename, 'exec'), glob, loc)

  File "C:/Users/Irourong/Desktop/PIC 16/Project/Final/Final/final_project.py", line 262, in <module>
    font_family='ubuntu')

  File "C:\Users\Irourong\Anaconda2\lib\site-packages\networkx\drawing\nx_pylab.py", line 791, in draw_networkx_labels
    for n, label in labels.items():

我该如何解决这个问题？

编辑：这是我根据@Joel

修改代码后得到的内容

https://i.stack.imgur.com/7c5KL.png

Answer 1

此代码

for k in range(10):
    num=np.log(p_rank[k])*7
    nx.draw_networkx_labels(G,pos=pos_new[k],labels=labeldict[k],
                            font_size=num,
                            font_family='ubuntu')

应该是

for node in range(10):
    font_size = np.log(p_rank[node])*7
    tmp_labels = {node: labeldict[node]}
    nx.draw_networkx_labels(G, pos=pos_new, labels = tmp_labels, 
                            font_size=font_size, font_family='ubuntu')

这里tmp_labels是一个dict，它匹配你有兴趣在传递上标记的所有节点（每个传递只有一个节点）及其标签。

AttributeError：'numpy.string_'对象没有属性'items'

1 个答案: