我正在学习Spark Sql,并尝试了以下代码进行学习。其实我在Spark Sql中做类似的查询
select open , case when open < 160 then "Bad" else "Good" end as status from table_name;
在使用数据框的Spark sql中,我尝试使用以下代码
val sparkSession = SparkSession.builder().appName("citiGroupData").master("local[*]").getOrCreate()
val baseData = sparkSession.read.option("header", "true").option("inferSchema", "true").csv(filePath)
// baseData.foreach(s => println(s.getAs("Low")))
import sparkSession.implicits._
baseData.printSchema()
baseData.select("Open").show()
我的架构详细信息
root
|-- MyDate: timestamp (nullable = true)
|-- Open: double (nullable = true)
|-- High: double (nullable = true)
|-- Low: double (nullable = true)
|-- Close: double (nullable = true)
|-- Volume: integer (nullable = true)
下面一行导致编译时问题
baseData.select("Open",when($"Open" <= 160, "Bad").otherwise("Good").alias("Status")).show()
有什么主意吗?
答案 0 :(得分:1)
对于sql,您需要创建一个临时视图。见下文。
scala> val cols = Seq("Date","Open","High","Low","Close","Volume","Adj_Close")
cols: Seq[String] = List(Date, Open, High, Low, Close, Volume, Adj_Close)
scala> val df = Seq(
| ("2012-03-30",608.77,610.56,597.94,599.55,26050900,599.55),
| ("2012-03-29",612.78,616.56,607.23,609.86,21668300,609.86),
| ("2012-03-28",618.38,621.45,610.31,617.62,23385200,617.62),
| ("2012-03-27",606.18,616.28,606.06,614.48,21628200,614.48),
| ("2012-03-26",599.79,607.15,595.26,606.98,21259900,606.98),
| ("2012-03-23",600.49,601.80,594.40,596.05,15359900,596.05),
| ("2012-03-22",597.78,604.50,595.53,599.34,22281100,599.34),
| ("2012-03-21",602.74,609.65,601.41,602.50,22958200,602.50)).toDF(cols:_*)
df: org.apache.spark.sql.DataFrame = [Date: string, Open: double ... 5 more fields]
scala> df.createOrReplaceTempView("stocks")
scala> spark.sql(" select Open, case when Open <= 600 then 'Bad' else 'good' end status from stocks").show()
+------+------+
| Open|status|
+------+------+
|608.77| good|
|612.78| good|
|618.38| good|
|606.18| good|
|599.79| Bad|
|600.49| good|
|597.78| Bad|
|602.74| good|
+------+------+
scala> df.select($"Open",when('open <= 600, "Bad").otherwise("good").alias("status")).show()
+------+------+
| Open|status|
+------+------+
|608.77| good|
|612.78| good|
|618.38| good|
|606.18| good|
|599.79| Bad|
|600.49| good|
|597.78| Bad|
|602.74| good|
+------+------+
scala>
答案 1 :(得分:0)
baseData.select($"Open",when($"Open" <= 160, "Bad").otherwise("Good").alias("Status")).show()