我需要转换dataframe中列的数据类型并捕获所有数据类型转换失败。我尝试过以下选项,但它会抛出"任务不可序列化"。
var errorListBuffer = new ListBuffer[Map[String, String]]()
df.map(r => {
val value = r.getAs(columnName).toString
val index = r.fieldIndex(columnName)
Try {
val cleanValue = value match {
case n if r.isNullAt(index) => null
case x => x.trim
}
new_type match {
case "date" => new SimpleDateFormat("yyyy-MM-dd").format(new SimpleDateFormat(dateFormat).parse(cleanValue))
case "datetime" => new SimpleDateFormat("yyyy-MM-dd hh:mm:ss").format(new SimpleDateFormat(dateFormat).parse(cleanValue))
case "string" => toLower match {
case "1" => cleanValue.toLowerCase
case _ => cleanValue
}
case _ => cleanValue
}
} match {
case Success(v) => org.apache.spark.sql.Row.fromSeq(r.toSeq ++ v)
case Failure(e) => errorListBuffer += Map(
LOADER_COLUMN_NAME -> columnName,
LOADER_LEVEL -> "ERROR",
LOADER_ERROR_MESSAGE -> e.getMessage,
LOADER_RECORD_UUID -> r.getAs(LOADER_UUID).toString)
org.apache.spark.sql.Row.fromSeq(r.toSeq ++ null)
}
})
var dfnew = sqlContext.createDataFrame(df, schema)
请告诉我如何解决这个问题。