答案 0 :(得分:2)
在主键上加入两个数据帧,稍后使用with列并且UDF传递两个列值(旧值和新值),在UDF中比较数据并返回值(如果不相同)。
val check = udf ( (old_val:String,new_val:String) => if (old_val == new_val) new_val else "")
df_check= df
.withColumn("Check_Name",check(df.col("name"),df.col("new_name")))
.withColumn("Check_Namelast",check(df.col("lastname"),df.col("new_lastname")))
或Def功能
def fn(old_df:Dataframe,new_df:Dataframe) : Dataframe =
{
val old_df_array = old_df.collect() //make df to array to loop thru
val new_df_array = new_df.collect() //make df to array to loop thru
var value_change : Array[String] = ""
val count = old_df.count
val row_count = old_df.coloumn
val row_c = row.length
val coloumn_name = old_df.coloumn
for (i to count ) //loop thru all rows
{
var old = old_df_array.Map(x => x.split(","))
var new = new_df_array.Map(x => x.split(","))
for (j to row_c ) //loop thru all coloumn
{
if( old(j) != new(j) )
{
value_change = value_change + coloumn_name(j) " has value changed" ///this will add all changes in one full row
}
//append to array
append j(0) //primary key
append value_change //Remarks coloumn
}
}
//convert array to df
}