假设我创建了以下图表。我的问题是如何将其可视化?
# Create a Vertex DataFrame with unique ID column "id"
v = sqlContext.createDataFrame([
("a", "Alice", 34),
("b", "Bob", 36),
("c", "Charlie", 30),
], ["id", "name", "age"])
# Create an Edge DataFrame with "src" and "dst" columns
e = sqlContext.createDataFrame([
("a", "b", "friend"),
("b", "c", "follow"),
("c", "b", "follow"),
], ["src", "dst", "relationship"])
# Create a GraphFrame
from graphframes import *
g = GraphFrame(v, e)
答案 0 :(得分:0)
我找不到任何可视化数据的原生GraphFrame库。
尽管如此,您可以尝试使用display()函数从DataBricks中执行此操作。您可以看到示例here。
此外,您可以尝试将GraphFrame转换为python列表,并使用matplotlib或Pygraphviz库。
答案 1 :(得分:0)
使用Python / PySpark / Jupyter,我正在使用networkx库中的绘制功能。诀窍是从grapheframe图创建networkx图
import networkx as nx
from graphframes import GraphFrame
def PlotGraph(edge_list):
Gplot=nx.Graph()
for row in edge_list.select('src','dst').take(1000):
Gplot.add_edge(row['src'],row['dst'])
plt.subplot(121)
nx.draw(Gplot, with_labels=True, font_weight='bold')
spark = SparkSession \
.builder \
.appName("PlotAPp") \
.getOrCreate()
sqlContext = SQLContext(spark)
vertices = sqlContext.createDataFrame([
("a", "Alice", 34),
("b", "Bob", 36),
("c", "Charlie", 30),
("d", "David", 29),
("e", "Esther", 32),
("e1", "Esther2", 32),
("f", "Fanny", 36),
("g", "Gabby", 60),
("h", "Mark", 61),
("i", "Gunter", 62),
("j", "Marit", 63)], ["id", "name", "age"])
edges = sqlContext.createDataFrame([
("a", "b", "friend"),
("b", "a", "follow"),
("c", "a", "follow"),
("c", "f", "follow"),
("g", "h", "follow"),
("h", "i", "friend"),
("h", "j", "friend"),
("j", "h", "friend"),
("e", "e1", "friend")
], ["src", "dst", "relationship"])
g = GraphFrame(vertices, edges)
PlotGraph(g.edges)