我正在尝试在spark UPDATE
上实现DataFrames
命令。但得到这个错误。请提出应该做什么的建议。
17/01/19 11:49:39 INFO Replace$: query --> UPDATE temp SET c2 = REPLACE(c2,"i","a");
17/01/19 11:49:39 ERROR Main$: [1.1] failure: ``with'' expected but identifier UPDATE found
UPDATE temp SET c2 = REPLACE(c2,"i","a");
^
java.lang.RuntimeException: [1.1] failure: ``with'' expected but identifier UPDATE found
UPDATE temp SET c2 = REPLACE(c2,"i","a");
这是程序
object Replace extends SparkPipelineJob{
val logger = LoggerFactory.getLogger(getClass)
protected implicit val jsonFormats: Formats = DefaultFormats
def createSetCondition(colTypeMap:List[(String,DataType)], pattern:String, replacement:String):String = {
val res = colTypeMap map {
case (c,t) =>
if(t == StringType)
c+" = REPLACE(" + c + ",\"" + pattern + "\",\"" + replacement + "\")"
else
c+" = REPLACE(" + c + "," + pattern + "," + replacement + ")"
}
return res.mkString(" , ")
}
override def execute(dataFrames: List[DataFrame], sc: SparkContext, sqlContext: SQLContext, params: String, productId: Int) : List[DataFrame] = {
import sqlContext.implicits._
val replaceData = ((parse(params)).extractOpt[ReplaceDataSchema]).get
logger.info(s"Replace-replaceData --> ${replaceData}")
val (inputDf, (columnsMap, colTypeMap)) = (dataFrames(0), LoadInput.colMaps(dataFrames(0)))
val tableName = Constants.TEMP_TABLE
inputDf.registerTempTable(tableName)
val colMap = replaceData.colName map {
x => (x,colTypeMap.get(x).get)
}
logger.info(s"colMap --> ${colMap}")
val setCondition = createSetCondition(colMap,replaceData.input,replaceData.output)
val query = "UPDATE "+tableName+" SET "+setCondition+";"
logger.info(s"query --> ${query}")
val outputDf = sqlContext.sql(query)
List(outputDf)
}
}
这是一些额外的信息。
17/01/19 11:49:39 INFO Replace$: Replace-replaceData --> ReplaceDataSchema(List(SchemaDetectData(s3n://fakepath/data37.csv,None,None)),List(c2),i,a)
17/01/19 11:49:39 INFO Replace$: colMap --> List((c2,StringType))
data37.csv
c1 c2
90 nine
如果需要,请询问额外信息。
答案 0 :(得分:1)
Spark SQL不支持UPDATE
个查询。如果你想"修改"您应该使用SELECT
创建新表的数据:
SELECT * REPLACE(c2, 'i', 'a') AS c2 FROM table