我正在尝试使用一个csv文件制作一个图形,该文件具有有关节点的边缘,专业和年龄的信息。我为每个节点分配社区并执行链接预测。
import networkx as nx
import csv
engineers1 = []
engineers2 = []
engineers3 = []
engineers4 = []
engineers5 = []
actors1= []
actors2= []
actors3= []
actors4= []
actors5= []
writers1 = []
writers2= []
writers3= []
writers4 = []
writers5 = []
doctors1= []
doctors2= []
doctors3= []
doctors4= []
doctors5= []
drivers1=[]
drivers2=[]
drivers3=[]
drivers4=[]
drivers5=[]
teachers1=[]
teachers2=[]
teachers3=[]
teachers4=[]
teachers5=[]
nodes=[]
g=nx.Graph()
for i in range(0,4038):
g.add_node(i)
with open("asd1.csv",'r') as csv_file:
csv_reader=csv.DictReader(csv_file)
for line in csv_reader:
g.add_edge(line['first'],line['second'])
csv_file.close()
with open("asd1.csv",'r') as csv_file:
csv_reader=csv.DictReader(csv_file)
for line in csv_reader:
if (line['profession'] == 'actor' and line['age'] >= '13' and
line['age'] <= '17'):
actors1.append(line['name'])
if (line['profession'] == 'actor' and line['age'] >= '18' and
line['age'] <= '29'):
actors2.append(line['name'])
if (line['profession'] == 'actor' and line['age'] >= '30' and
line['age'] <= '49'):
actors3.append(line['name'])
if (line['profession'] == 'actor' and line['age'] >= '50' and line['age'] <= '64'):
actors4.append(line['name'])
if (line['profession'] == 'actor' and line['age'] >= '65'):
actors5.append(line['name'])
if (line['profession'] == 'eng' and line['age'] >= '13' and line['age'] <= '17'):
engineers1.append(line['name'])
if (line['profession'] == 'eng' and line['age'] >= '18' and line['age'] <= '29'):
engineers2.append(line['name'])
if (line['profession'] == 'eng' and line['age'] >= '30' and line['age'] <= '49'):
engineers3.append(line['name'])
if (line['profession'] == 'eng' and line['age'] >= '50' and line['age'] <= '64'):
engineers4.append(line['name'])
if (line['profession'] == 'eng' and line['age'] >= '65'):
engineers5.append(line['name'])
if (line['profession'] == 'teacher' and line['age'] >= '13' and line['age'] <= '17'):
teachers1.append(line['name'])
if (line['profession'] == 'teacher' and line['age'] >= '18' and line['age'] <= '29'):
teachers2.append(line['name'])
if (line['profession'] == 'teacher' and line['age'] >= '30' and line['age'] <= '49'):
teachers3.append(line['name'])
if (line['profession'] == 'teacher' and line['age'] >= '50' and line['age'] <= '64'):
teachers4.append(line['name'])
if (line['profession'] == 'teacher' and line['age'] >= '65'):
teachers5.append(line['name'])
if (line['profession'] == 'driver' and line['age'] >= '13' and line['age'] <= '17'):
drivers1.append(line['name'])
if (line['profession'] == 'driver' and line['age'] >= '18' and line['age'] <= '29'):
drivers2.append(line['name'])
if (line['profession'] == 'driver' and line['age'] >= '30' and line['age'] <= '49'):
drivers3.append(line['name'])
if (line['profession'] == 'driver' and line['age'] >= '50' and line['age'] <= '64'):
doctors4.append(line['name'])
if (line['profession'] == 'driver' and line['age'] >= '65'):
drivers5.append(line['name'])
if (line['profession'] == 'doctor' and line['age'] >= '13' and line['age'] <= '17'):
doctors1.append(line['name'])
if (line['profession'] == 'doctor' and line['age'] >= '18' and line['age'] <= '29'):
doctors2.append(line['name'])
if (line['profession'] == 'doctor' and line['age'] >= '30' and line['age'] <= '49'):
doctors3.append(line['name'])
if (line['profession'] == 'doctor' and line['age'] >= '50' and line['age'] <= '64'):
drivers4.append(line['name'])
if (line['profession'] == 'doctor' and line['age'] >= '65'):
doctors5.append(line['name'])
csv_file.close()
print("actors having age between 13 and 17: ",actors1)
print("actors having age between 18 and 29: ",actors2)
print("actors having age between 30 and 49: ",actors3)
print("actors having age between 50 and 64: ",actors4)
print("actors having age 65 and above: ",actors5)
print('\n')
print("engineers having age between 13 and 17: ",engineers1)
print("engineers having age between 18 and 29: ",engineers2)
print("engineers having age between 30 and 49: ",engineers3)
print("engineers having age between 50 and 64: ",engineers4)
print("engineers having age 65 and above: ",engineers5)
print('\n')
print("teachers having age between 13 and 17: ",teachers1)
print("teachers having age between 18 and 29: ",teachers2)
print("teachers having age between 30 and 49: ",teachers3)
print("teachers having age between 50 and 64: ",teachers4)
print("teachers having age 65 and above: ",teachers5)
print('\n')
print("drivers having age between 13 and 17: ",drivers1)
print("drivers having age between 18 and 29: ",drivers2)
print("drivers having age between 30 and 49: ",drivers3)
print("drivers having age between 50 and 64: ",drivers4)
print("drivers having age 65 and above: ",drivers5)
print('\n')
print("doctors having age between 13 and 17: ",doctors1)
print("doctors having age between 18 and 29: ",doctors2)
print("doctors having age between 30 and 49: ",doctors3)
print("doctors having age between 50 and 64: ",doctors4)
print("doctors having age 65 and above: ",doctors5)
print('\n')
for i in range(0,4038):
g.node[i]['community']=0
for x1 in actors1:
g.node[x1]['community']=0
for x2 in actors2:
g.node[x2]['community']=1
for x3 in actors3:
g.node[x3]['community']=2
for x4 in actors4:
g.node[x4]['community']=3
for x5 in actors5:
g.node[x5]['community']=4
for x6 in engineers1:
g.node[x6]['community']=5
for x7 in engineers2:
g.node[x7]['community']=6
for x8 in engineers3:
g.node[x8]['community']=7
for x9 in engineers4:
g.node[x9]['community']=8
for x10 in engineers5:
g.node[x10]['community']=9
for x11 in teachers1:
g.node[x11]['community']=10
for x12 in teachers2:
g.node[x12]['community']=11
for x13 in teachers3:
g.node[x13]['community']=12
for x14 in teachers4:
g.node[x14]['community']=13
for x15 in teachers5:
g.node[x15]['community']=14
for x16 in drivers1:
g.node[x16]['community']=15
for x17 in drivers2:
g.node[x17]['community']=16
for x18 in drivers3:
g.node[x18]['community']=17
for x19 in drivers4:
g.node[x19]['community']=18
for x20 in drivers5:
g.node[x20]['community']=19
for x21 in doctors1:
g.node[x21]['community']=20
for x22 in doctors2:
g.node[x22]['community']=21
for x23 in doctors3:
g.node[x23]['community']=22
for x24 in doctors4:
g.node[x24]['community']=23
for x25 in doctors5:
g.node[x25]['community']=24
print(g.nodes())
l=list(nx.cn_soundarajan_hopcroft(g))
print(l)
答案 0 :(得分:0)
序言
我高度建议您阅读任何解释算法的优秀编程书籍。您的问题可以用几行代码来解决。
行为1
看看你的问题。您有多个专业,多个年龄组和多个名称作为唯一标识符。您想彼此区别。现在看一下您的代码。为了解决您的问题,您正在为每个年龄-行业组合创建唯一列表。它是可以创建的最少修改的结构。如果您将不得不添加另外五个职业(有成千上万个不同的职业),那么您就必须从字面上将代码加倍。此外,复制粘贴时很容易出错。仅用普通的merchandiser3
代替merchandiser4
就可以使您接下来的两小时变成红眼。看,您的代码中已经有错误!
if (line['profession'] == 'doctor' and line['age'] >= '13' and line['age'] <= '17'):
doctors1.append(line['name'])
if (line['profession'] == 'doctor' and line['age'] >= '18' and line['age'] <= '29'):
doctors2.append(line['name'])
if (line['profession'] == 'doctor' and line['age'] >= '30' and line['age'] <= '49'):
doctors3.append(line['name'])
if (line['profession'] == 'doctor' and line['age'] >= '50' and line['age'] <= '64'):
# Hello, guys! I am ready to torture his brain and eyes for hours!!
drivers4.append(line['name'])
if (line['profession'] == 'doctor' and line['age'] >= '65'):
doctors5.append(line['name'])
而且,作为最后的镜头,您实际上并不需要所有这些列表。例如,您可以为每个专业创建一个字典。或者是其他东西。但是您可以注意到,您的数据对每个人都有非常频繁的重复模式。名称,年龄,职业...等等,我们从哪里获取数据? CSV文件?而什么是CSV文件?
是的
表。
第二幕
如果您从表中读取数据,则最好将此数据存储在表中! (嗯,大多数时间……)Python有一个很棒的表库-Pandas。您的所有数百行可以减少到一打十二!现在,仔细看我的手,魔法开始...
零。我们进口熊猫:
import pandas as pd
首先。我们为年龄聚类创建单独的功能。如果我们的大老板说我们要处理11岁的神经科学家,我们将完全准备就绪:
def get_age_cluster(age):
a = int(age)
if a >= 0 and a <= 12:
return '<13'
if a >= 13 and a <= 17:
return '13-17'
if a >= 18 and a <= 29:
return '18-29'
if a >= 30 and a <= 49:
return '30-49'
if a >= 50 and a <= 64:
return '50-64'
elif a >= 65:
return '>64'
第二。我们阅读了CSV。您正在手动操作,逐行,处理每种可能的组合……为什么?!这是一种常见的操作!人们早就写了它!懒惰!
(这是我多年的老老师的建议,多年来我一直存储在我的心中!笑话。我没有心。)
df=pd.read_csv('TF.csv')
是的,仅此而已。是。真。一条线。二十四个符号(记住这个数字!)。现在,让我们与我们的十个小美人成为朋友:
我们刚刚加载了CSV,但没有转换age
列。它包含年龄,但应包含群集。没问题!
df['age'] = df['age'].apply(get_age_cluster)
完成!您可以将任何转换函数应用于表中的行或列。因此,我们不需要分类年龄,分类年龄和分类时间,分类aegs和…。我们可以编写一个漂亮的单线。结果如下:
您可以注意到我们有一些垃圾专栏。没问题!
df = df.drop('waka', axis=1)
df = df.drop('we_dont_need_this_column', axis=1)
我们有一张漂亮的小桌子:
现在是主要任务。根据每个职业和年龄获取所有名称。熊猫具有许多分组功能。让我们使用最简单的方法:
grouped = df.groupby(['profession', 'age'])
for group in grouped.groups:
print(group, list(grouped.get_group(group)['name']))
我们得到具有专业年龄组的分组结构:grouped = df.groupby(['profession', 'age'])
,并且对于该结构中的每个组:for group in grouped.groups:
,我们打印:print()
中的“名称”列的列表每组:grouped.get_group(group)['name'])
。结果如下:
('eng', '30-49') ['Cthulhu']
('driver', '18-29') ['John Doe 3']
('actor', '13-17') ['John Doe 4']
('actor', '18-29') ['Yog-Sothoth']
('teacher', '18-29') ['John Doe 2', 'Shub-Niggurath']
('eng', '>64') ['Fblthp the Lost']
('driver', '<13') ['Azathoth']
('doctor', '18-29') ['Nyarlathotep']
('doctor', '30-49') ['John Doe 1']
这是完整的代码:
import pandas as pd
def get_age_cluster(age):
a = int(age)
if a >= 0 and a <= 12:
return '<13'
if a >= 13 and a <= 17:
return '13-17'
if a >= 18 and a <= 29:
return '18-29'
if a >= 30 and a <= 49:
return '30-49'
if a >= 50 and a <= 64:
return '50-64'
elif a >= 65:
return '>64'
df=pd.read_csv('TF.csv')
df['age'] = df['age'].apply(get_age_cluster)
df = df.drop('waka', axis=1)
df = df.drop('we_dont_need_this_column', axis=1)
grouped = df.groupby(['profession', 'age'])
for group in grouped.groups:
print(group, list(grouped.get_group(group)['name']))
二十四行。我认为我们现在可以称自己为“神奇的二十四岁”。它就像神奇四侠,但神奇的二十四。但是我们的Graph Doom还活着...
行为3
我们创建了表格,进行了一些转换,排序和过滤。但是您还有另一个问题-图形。而且这个问题比第一个困难。
您正在从一个文件中读取节点(人类)和边缘(我不知道确切的关系。)。它迫使您的图具有严格的限制-节点数等于边数。这是非常罕见的情况。我认为在开始编写此脚本之前,您做错了什么。我建议您为节点和边使用不同的文件(或至少一个文件中的不同部分)。但!假设您正在做自己想要的事情,并且每个人(当然还有克苏鲁!)只有一条优势。在这种情况下,我们可以仅用两行代码来构建图形:
G = nx.Graph()
G.add_edges_from(df[['first', 'second']].values)
宾果!我们完了。现在让我们得到这个奇怪的复杂事情:
设置每个节点的社区(请注意,算法需要它):
for n in G.nodes:
G.nodes[n]['community'] = 0
并计算此:
csh = nx.cn_soundarajan_hopcroft(G)
我们得到一个迭代器。将其转换为列表并获取结果:
[(1, 8, 2),
(1, 9, 0),
(1, 2, 4),
(1, 4, 0),
(1, 6, 2),
(2, 8, 2),
(2, 9, 2),
(2, 5, 0),
(2, 6, 2),
(3, 9, 0),
(3, 4, 2),
(3, 5, 2),
(3, 6, 0),
(3, 7, 4),
(4, 8, 0),
(4, 5, 2),
(4, 7, 2),
(5, 8, 0),
(5, 9, 0),
(5, 7, 2),
(6, 8, 0),
(6, 9, 2),
(6, 7, 0),
(7, 8, 0),
(7, 9, 0),
(8, 9, 0)]
总决赛
希望您喜欢我为您写的音乐小作品:)我推荐,您要写一些不错的Python编程书和算法编程书。祝你好运!