假设我有以下DataFrame。
import pyspark.sql.functions as f
from pyspark.sql.window import Window
l =[( 9 , 1, 'A' ),
( 9 , 2, 'B' ),
( 9 , 3, 'C' ),
( 9 , 4, 'D' ),
( 10 , 1, 'A' ),
( 10 , 2, 'B' )]
df = spark.createDataFrame(l, ['prod','rank', 'value'])
df.show()
+----+----+-----+
|prod|rank|value|
+----+----+-----+
| 9| 1| A|
| 9| 2| B|
| 9| 3| C|
| 9| 4| D|
| 10| 1| A|
| 10| 2| B|
+----+----+-----+
如何用数组创建新框架,并根据value
对rank
列的值进行排序?
所需的输出:
l =[( 9 , ['A','B','C','D'] ),
( 10 , ['A','B'])]
l = spark.createDataFrame(l, ['prod', 'conc'])
+----+------------+
|prod| conc|
+----+------------+
| 9|[A, B, C, D]|
| 10| [A, B]|
+----+------------+
答案 0 :(得分:0)
override func loadView() {
self.view = webView
let url = NSURL(string: "https://itunes.apple.com/gb/app/economics-a-level/id1300094663?mt=8")!
webView.load(URLRequest(url: url as URL))
}
答案 1 :(得分:-1)
这是根据您指定的内容提供的快速解决方案。希望对您有帮助
w = Window.partitionBy('prod').orderBy('rank')
desiredDF = df.withColumn('values_list', f.collect_list('value').over(w)).groupBy('prod').agg(f.max('values_list').alias('conc'))
desiredDF.show()
+----+------------+
|prod| conc|
+----+------------+
| 9|[A, B, C, D]|
| 10| [A, B]|
+----+------------+