尝试在面临问题的Cloudera群集上从spark-shell创建RDD
,同时从hdfs位置访问文件:
scala> val file = sc.textFile("hdfs://user/cloudera/nvegesn/emp.txt")
<console>:13: error: not found: value sc
答案 0 :(得分:1)
你忘了定义Spark Context。
val conf = new SparkConf().setAppName("Simple Application")
val sc = new SparkContext(conf)
例如:
/* SimpleApp.scala */
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
object SimpleApp {
def main(args: Array[String]) {
val logFile = "hdfs://user/cloudera/nvegesn/emp.txt"
val conf = new SparkConf().setAppName("Simple Application")
val sc = new SparkContext(conf)
val logData = sc.textFile(logFile, 2).cache()
val numAs = logData.filter(line => line.contains("a")).count()
val numBs = logData.filter(line => line.contains("b")).count()
println("Lines with a: %s, Lines with b: %s".format(numAs, numBs))
}
}