我的数据集例如
(hrs,name,moneyearned)
(0,"cat26",30.9), (0,"cat13",22.1), (0,"cat95",19.6), (0,"cat105",1.3),
(1,"cat67",28.5), (1,"cat4",26.8), (1,"cat13",12.6), (1,"cat23",5.3),
(2,"cat56",39.6), (2,"cat40",29.7), (2,"cat187",27.9), (2,"cat68",9.8),
(3,"cat8",35.6)))
我想找到每小时收入的第n个位置,下面的代码可能会有所帮助。
val position1 = sc.broadcast(0) // position start with 0 ,1,2....n
import sqlContext.implicits._
val dataRDD = sc.parallelize(Seq(
(0,"cat26",30.9), (0,"cat13",22.1), (0,"cat95",19.6), (0,"cat105",1.3),
(1,"cat67",28.5), (1,"cat4",26.8), (1,"cat13",12.6), (1,"cat23",5.3),
(2,"cat56",39.6), (2,"cat40",29.7), (2,"cat187",27.9), (2,"cat68",9.8),
(3,"cat8",35.6))).groupBy(_._1)
val orderedData= dataRDD.flatMap(data => data._2.toSeq.sortWith(_._3.toDouble > _._3.toDouble).zipWithIndex)
orderedData.toDF("Data","position").filter($"position" === position1.value).show