User | Computer
1 A
2 B
3 A
import pandas as pd
df = pd.DataFrame({'user':['a','b','c','d', 'd', 'e'], 'computer':[1,1,2,3,1,1]})
id user computer
0 a 1
1 b 1
2 c 2
3 d 3
4 d 1
5 e 1
joined = df.join(df, on='computer', rsuffix='y')
id user computer usery computery
0 a 1 b 1
1 b 1 b 1
2 c 2 c 2
3 d 3 d 3
4 d 1 b 1
5 e 1 b 1
答案 0 :(得分:1)
import numpy as np, pandas as pd, matplotlib.pyplot as plt, networkx as nx
df = pd.DataFrame(
{'user':['a','b','c','d', 'd', 'e'], 'computer':[1,1,2,3,1,1]})
G = nx.from_pandas_edgelist(df, source='user', target='computer')
pos = nx.spring_layout(G)
colours = [('red' if str(node).isdigit() else 'blue') for node in G.nodes]
nx.draw_networkx(G, pos, label=True, node_color=colours)
import itertools
subgraphs = []
for cc in nx.connected_component_subgraphs(G):
# collect all nodes in the connected subgraph that aren't labeled with digits
nodes = [a for a in cc.nodes if not str(a).isdigit()]
Subgraph = nx.Graph()
# generate all pairwise combinations for these nodes and add as edges:
Subgraph.add_edges_from(itertools.combinations(nodes, 2))
# optional:
# combine all subgraphs into a new graph
G_new = nx.compose(*subgraphs)
pos = nx.spring_layout(G_new)
nx.draw_networkx(G_new, pos)
G = nx.from_pandas_edgelist(df, source='user', target='computer')
# add some more connections
G.add_edges_from([(4, 'b'), (4, 'a'), (5, 'b'), (5, 'c'), (6, 'b'), (6, 'a')])
pos = nx.spring_layout(G)
colours = [('red' if str(node).isdigit() else 'blue') for node in G.nodes]
nx.draw_networkx(G, pos, label=True, node_color=colours)
import itertools
# collect all connection nodes
connecting_nodes = [n for n in G.nodes if str(n).isdigit()]
edgelist = []
for cn in connecting_nodes:
# create all combinations of adjacent nodes and store in list of tuples
edgelist += itertools.combinations(G.neighbors(cn), 2)
#remove positional information
edgelist = [tuple(sorted(list(set(a)))) for a in edgelist]
from collections import Counter
# now count occurences of each tuple (= number of "independent connections"
# between two non-digit nodes).
# Counter(edgelist) returns a dict, i.e. {('a', 'b'): 2, ...},
# which can be unpacked like so:
weighted_edges = [(*u, v) for u,v in Counter(edgelist).items()]
# now make new graph with non-digit nodes and add weighted edges:
H = nx.Graph()
H.add_nodes_from([n for n in G.nodes if not str(n).isdigit()])
# and draw, with width proportional to weight
pos = nx.spring_layout(H)
weights = [e[2]['weight'] for e in H.edges(data=True)]
nx.draw_networkx(H, pos, width=weights)
答案 1 :(得分:1)
import pandas as pd
def get_edges(df, var, on):
"""Get all combinations of variable var that share a value for variable on (using an inner join)."""
inner_self_join = df.merge(df, how='inner', on=on)
excluding_self_pairs = inner_self_join[inner_self_join[var + '_x']!=inner_self_join[var + '_y']]
edges = excluding_self_pairs[[var + '_x', var + '_y']].values
return edges
df = pd.DataFrame({'user':['a','b','c','d', 'd', 'e'], 'computer':[1,1,2,3,1,1]})
edges = get_edges(df, 'user', 'computer')
# array([['a', 'b'],
# ['a', 'd'],
# ['a', 'e'],
# ['b', 'a'],
# ['b', 'd'],
# ['b', 'e'],
# ['d', 'a'],
# ['d', 'b'],
# ['d', 'e'],
# ['e', 'a'],
# ['e', 'b'],
# ['e', 'd']], dtype=object)
然后可以使用边缘列表创建networkx Graph
答案 2 :(得分:1)
import pandas as pd
import networkx as nx
df = pd.DataFrame({'user':['a','b','c','d', 'd', 'e'], 'computer':[1,1,2,3,1,1]})
G = nx.from_pandas_edgelist(df, source='user', target='computer')
cnodes = [1,2,3] #the computers
unodes = ['a', 'b', 'c', 'd', 'e'] #the users
#create the network based only on users. An edge means they share a computer
Uprojection = nx.algorithms.bipartite.overlap_weighted_projected_graph(G, unodes)
#the edges are weighted based on how much they share (see documentation for details)
>EdgeDataView([('a', 'd', {'weight': 0.5}), ('a', 'b', {'weight': 1.0}), ('a', 'e', {'weight': 1.0}), ('b', 'd', {'weight': 0.5}), ('b', 'e', {'weight': 1.0}), ('d', 'e', {'weight': 0.5})])
nx.draw(Uprojection, with_labels=True)