如何在嵌套结构中替换Json文件中的null

时间:2019-04-17 10:37:48

标签: scala

我要从csv文件中提取一些字段,并尝试将其放入Json文件中。如果 AdditionalIdentifier 不包含任何值,我想使用空白数组(即[]),但是保存在JSON文件中时,我会在{{1}之间插入null }}。尝试使用when时的其他选项,但显示模式不匹配错误。有人可以帮我吗?

在选择字段时,我尝试检查某个字段的值是否为null,然后创建数组并放入某些值,否则为空数组,但不起作用。粘贴到我得到的错误下面:

[null]
Exception in thread "main" org.apache.spark.sql.AnalysisException: cannot resolve 'CASE WHEN (`ISIN` IS NOT NULL) THEN named_struct('IdentifierType', 'ISIN', 'IdentifierValue', `ISIN`) ELSE [] END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type;;
'Project [ObservationID#30, PublicationDate#11, substring(ObservationDate#37, 1, 10) AS ObservationDate#72, ObservationType#32, RIC#10, array(CASE WHEN isnotnull(ISIN#28) THEN named_struct(IdentifierType, ISIN, IdentifierValue, ISIN#28) ELSE [] END) AS AdditionalIdentifier#75]
+- Relation[RIC#10,PublicationDate#11,Type#12,Price#13,Volume#14,Qualifiers#15,SeqNo#16,ExchangeTime#17,ExchangeDate#18,AccVolume#19,OriginalExchangeDate#20,OriginalPrice#21,OriginalVolume#22,OriginalSeqNo#23,OriginalExchangeTime#24,TradePriceCurrency#25,TradeID#26,OriginalTradeID#27,ISIN#28,GMTOffset#29,ObservationID#30,OriginalObservationDate#31,ObservationType#32,TradeEventType#33,... 7 more fields] csv

    at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42)
    at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$3.applyOrElse(CheckAnalysis.scala:115)
    at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$3.applyOrElse(CheckAnalysis.scala:107)
    at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:278)
    at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:278)
    at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70)
    at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:277)
    at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:275)
    at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:275)
    at org.apache.spark.sql.catalyst.trees.TreeNode.org$apache$spark$sql$catalyst$trees$TreeNode$$mapChild$2(TreeNode.scala:295)
    at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4$$anonfun$apply$13.apply(TreeNode.scala:354)
    at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
    at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
    at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
    at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
    at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
    at scala.collection.AbstractTraversable.map(Traversable.scala:104)
    at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:354)
    at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
    at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:324)
    at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:275)
    at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:275)
    at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:275)
    at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:326)
    at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
    at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:324)
    at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:275)
    at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$transformExpressionsUp$1.apply(QueryPlan.scala:93)
    at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$transformExpressionsUp$1.apply(QueryPlan.scala:93)
    at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$1.apply(QueryPlan.scala:105)
    at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$1.apply(QueryPlan.scala:105)
    at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70)
    at org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpression$1(QueryPlan.scala:104)
    at org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1(QueryPlan.scala:116)
    at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1$2.apply(QueryPlan.scala:121)
    at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
    at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
    at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
    at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
    at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
    at scala.collection.AbstractTraversable.map(Traversable.scala:104)
    at org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1(QueryPlan.scala:121)
    at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$2.apply(QueryPlan.scala:126)
    at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
    at org.apache.spark.sql.catalyst.plans.QueryPlan.mapExpressions(QueryPlan.scala:126)
    at org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressionsUp(QueryPlan.scala:93)
    at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:107)
    at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:85)
    at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:127)
    at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:85)
    at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:95)
    at org.apache.spark.sql.catalyst.analysis.Analyzer$$anonfun$executeAndCheck$1.apply(Analyzer.scala:108)
    at org.apache.spark.sql.catalyst.analysis.Analyzer$$anonfun$executeAndCheck$1.apply(Analyzer.scala:105)
    at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:201)
    at org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:105)
    at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:57)
    at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:55)
    at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:47)
    at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:79)
    at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$withPlan(Dataset.scala:3407)
    at org.apache.spark.sql.Dataset.select(Dataset.scala:1335)
    at com.tr.edp.bulk.frtb.processor.SvdsFileProcessor.process(SvdsFileProcessor.scala:58)
    at com.tr.edp.bulk.frtb.executer.SVDSFileRunner$.main(SVDSFileRunner.scala:28)
    at com.tr.edp.bulk.frtb.executer.SVDSFileRunner.main(SVDSFileRunner.scala)

如果我删除了我得到的其他部分输出,则为:

val result = svdsFile.select(col("ObservationID"),col("PublicationDate"),col("ObservationDate").substr(1, 10).alias("ObservationDate"),//extracting the required fields
      col("ObservationType"),col("RIC"),array(when(col("ISIN").isNotNull,struct(lit("ISIN").alias("IdentifierType"),col("ISIN")
      .alias("IdentifierValue"))).otherwise(Array.empty[Long])).alias("AdditionalIdentifier"))

其他标识符字段中的{ "ObservationID": "6B86B27355FF34375747F56ASD4567GH", "PublicationDate": "8/8/2018", "ObservationDate": "08/08/2018", "ObservationType": "Trade", "RIC": "AAME.gF", "AdditionalIdentifier": [{ "IdentifierType": "ISIN", "IdentifierValue": "hdsjdjs" }] } { "ObservationID": "4G5FD6HSUR48HFEDKH5HFSFHFG8888", "PublicationDate": "12/18/2018", "ObservationDate": "8/9/201812", "ObservationType": "Mkt. Condition", "RIC": "AAME.gF", "AdditionalIdentifier": [{ "IdentifierType": "ISIN", "IdentifierValue": "hkcsdhsdhc" }] } { "ObservationID": "8FGH45JKGH78HG90BHUIDF4GH56FYT", "PublicationDate": "12/18/2018", "ObservationDate": "8/10/20181", "ObservationType": "Trade", "RIC": "gAOI.DF", "AdditionalIdentifier": [null] } { "ObservationID": "8GH56FGHR798HFGRTY465HFJ89HG74", "PublicationDate": "12/18/2018", "ObservationDate": "8/11/20181", "ObservationType": "Market Price", "RIC": "gIIQ.DF", "AdditionalIdentifier": [{ "IdentifierType": "ISIN", "IdentifierValue": "sddvgsdag" }] } { "ObservationID": "8DFG73HRR7HDHD7HEJF7HDE7HEH66", "PublicationDate": "12/18/2018", "ObservationDate": "8/12/20181", "ObservationType": "Market Price", "RIC": "ggNE.DF", "AdditionalIdentifier": [null] } 之间不应有null,如果为空。它应显示为[]

0 个答案:

没有答案