我是Zeppelin(和spark& sql)的新手,我正试图在Zeppelin笔记本中运行一个例子。我无法弄清楚这个错误有什么问题:
list: (table: String, col: String)Array[(String, String)]
tables: ()Array[(String, String)]
columns: (table: String)Array[(String, String)]
org.apache.spark.sql.AnalysisException: Invalid usage of '*' in expression 'alias';
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.failAnalysis(CheckAnalysis.scala:39)
...
我试图运行的例子来自这篇文章: https://randerzander.github.io/?post=dynamic-forms.md
代码如下:
def list(table: String, col: String) : Array[(String, String)] = {
sqlContext.sql("select distinct " + col + " from " + table + " order by " + col).collect.map(x => (x(0).asInstanceOf[String], x(0).asInstanceOf[String]))
}
def tables(): Array[(String, String)] = {
sqlContext.sql("show tables").collect.map(x => (x(0).asInstanceOf[String], x(0).asInstanceOf[String]))
}
def columns(table: String) : Array[(String, String)] = {
sqlContext.sql("select * from " + table + " limit 0").columns.map(x => (x, x))
}
val col1 = z.select("col1", columns(table))
val col2 = z.select("col2", columns(table))
val query = "select " + col1 + "," + col2 + ", count(*) as count from " + table + " group by " + col1 + "," + col2 + " order by count desc"
val df = sqlContext.sql(query)
println("%table " + df.columns.mkString("\t"))
println(df.map(x => x.mkString("\t")).collect().mkString("\n"))
由于
答案 0 :(得分:0)
代码中存在几个问题。
以下代码适用于我。
def list(table: String, col: String) : Array[(String, String)] = {
sqlContext.sql("select distinct " + col + " from " + table + " order by " + col).collect.map(x => (x(0).asInstanceOf[String], x(0).asInstanceOf[String]))
}
def tables(): Array[(String, String)] = {
sqlContext.sql("show tables").collect.map(x => (x(1).asInstanceOf[String], x(1).asInstanceOf[String]))
}
def columns(table: String) : Array[(String, String)] = {
sqlContext.sql("select * from " + table + " limit 0").columns.map(x => (x, x))
}
val table = z.select("table", tables()).asInstanceOf[String]
println(s"table:$table" )
val col1 = z.select("col1", columns(table))
val col2 = z.select("col2", columns(table))
val query = "select " + col1 + "," + col2 + ", count(1) as count from " + table + " group by " + col1 + "," + col2 + " order by count desc"
val df = sqlContext.sql(query)
println("%table " + df.columns.mkString("\t"))
println(df.map(x => x.mkString("\t")).collect().mkString("\n"))