def takeArray(c: Column): Column = {
when(size(c) === lit(4), c.getItem(2)).
when(size(c) === lit(2), c.getItem(1))
when(size(c) === lit(0), lit(0)).otherwise(lit(-100))
}
df.withColumn("new_col", takeArray(col("id")))
.select( col("id"),size(col("id")), col("new_col")).show()
function takeArray-根据数组的长度,将获取索引并返回值。
+------------+--------+-------+
| id|size(id)|new_col|
+------------+--------+-------+
|[1, 2, 3, 4]| 4| -100|
| [3, 4]| 2| -100|
| []| 0| 0|
+------------+--------+-------+
已更新:
添加架构
root
|-- id: array (nullable = false)
| |-- element: integer (containsNull = false)
我超越了输出,这是错误的。第一行id列大小为4,它应与first when子句匹配,并应返回“ 2” ..但返回的是-100。任何想法 ?为什么我得到奇怪的答案?
示例2:
import org.apache.spark.sql.Column
def lifeStage(col: Column): Column = {
when(col < 13, "child")
.when(col >= 13 && col <= 18, "teenager")
.when(col > 18, "adult")
}
val df = Seq(10, 15, 25).toDF("age")
df
.withColumn(
"life_stage",
lifeStage(col("age"))
)
.show()
+---+----------+
|age|life_stage|
+---+----------+
| 10| child|
| 15| teenager|
| 25| adult|
+---+----------+
解决方案:出现dot(。)问题。错过添加when子句
答案 0 :(得分:0)
第二个条件后您缺少一个点。请检查以下代码。
scala> :paste
// Entering paste mode (ctrl-D to finish)
def takeArray(c: Column): Column = {
when(size(c) === lit(4), c.getItem(2)).
when(size(c) === lit(2), c.getItem(1)) // here you are missing one dot, if you add dot here your code will work.
when(size(c) === lit(0), lit(0)).otherwise(lit(-100))
}
df.withColumn("new_col", takeArray(col("id"))).select( col("id"),size(col("id")), col("new_col")).show()
// Exiting paste mode, now interpreting.
+------------+--------+-------+
| id|size(id)|new_col|
+------------+--------+-------+
|[1, 2, 3, 4]| 4| -100|
| [3, 4]| 2| -100|
| []| 0| 0|
+------------+--------+-------+
takeArray: (c: org.apache.spark.sql.Column)org.apache.spark.sql.Column
scala> :paste
// Entering paste mode (ctrl-D to finish)
def takeArray(c: Column): Column = {
when(size(c) === lit(4), c.getItem(2)).
when(size(c) === lit(2), c.getItem(1)). // here I have added . now it is showing result as expected.
when(size(c) === lit(0), lit(0)).otherwise(lit(-100))
}
df.withColumn("new_col", takeArray(col("id"))).select( col("id"),size(col("id")), col("new_col")).show()
// Exiting paste mode, now interpreting.
+------------+--------+-------+
| id|size(id)|new_col|
+------------+--------+-------+
|[1, 2, 3, 4]| 4| 3|
| [3, 4]| 2| 4|
| []| 0| 0|
+------------+--------+-------+
takeArray: (c: org.apache.spark.sql.Column)org.apache.spark.sql.Column
scala>