如何将火花(作业)UI中存在的阶段ID /作业ID映射到阶段的SQL可视化?
即
val df = Seq(("a",1,1, 10),("a",1,2, 20),("a",2,3, 30),("b",1,4, 40),("b",1,5, 50)).toDF("group", "id", "value1", "value2")
import org.apache.spark.sql.expressions.Window
val commonPartition = Window.partitionBy("group")
val enrich = Seq(("a", "x", "y"), ("b", "foo", "y")).toDF("group", "e1", "e2")
val enriched = df.join(enrich, Seq("group"), "INNER")
val ranked = enriched.withColumn("rank1", rank over commonPartition.orderBy(desc("value1"))).withColumn("rank2", rank over commonPartition.orderBy(desc("value2")))
//ranked.cache
val df1Top = ranked.filter(col("rank1") === 1).select(col("group"), col("id"), lit("foo").alias("rank1Top"))
val df2Top = ranked.filter(col("rank2") === 1).select(col("group"), col("id"), lit("bar").alias("rank2Top"))
val df1 = ranked.join(df1Top, Seq("group", "id")).drop("rank2")
val df2 = ranked.join(df2Top, Seq("group", "id")).drop("rank1")
df1.join(df2, Seq("id"), "FULL").show
在SQL可视化图表中哪里可以找到stage 25
?