import org.apache.spark.rdd._
import org.apache.spark._
import org.apache.spark.SparkContext._
object ScalaTeraSort {
def main(args: Array[String])
{
if (args.length < 2)
{
System.err.println(s"Usage: $ScalaTeraSort <INPUT_HDFS> <OUTPUT_HDFS>"
)
System.exit(1)
}
val sparkConf = new SparkConf().setAppName("ScalaTeraSort")
val sc = new SparkContext(sparkConf)
val file = sc.textFile(args(0))
val data = file.map(line => (line.substring(0, 10), line.substring(10)))
.sortByKey().map{case(k, v) => k + v}
data.saveAsTextFile(args(1))
sc.stop()
}
我尝试从stackoverflow帖子中运行此代码