我想在亚马逊S3上的某个位置保存chekpoint测试,这是我在DStream上的scala代码的一部分,使用下面的格式但是得到错误..
线程中的异常" main" java.lang.IllegalArgumentException:必须将AWS Access Key ID和Secret Access Key指定为s3n URL的用户名或密码,或者分别设置fs.s3n.awsAccessKeyId或fs.s3n.awsSecretAccessKey属性。
代码:
val creatingFunc = { ()=>
// Create a StreamingContext
val ssc = new StreamingContext(sc, Seconds(batchIntervalSeconds))
val ggsnLines = ssc.fileStream[LongWritable, Text, TextInputFormat]("C:\\Users\\Mbazarganigilani\\Documents\\RA\\GGSN\\Files1",filterF,false)
val ccnLines= ssc.fileStream[LongWritable, Text, TextInputFormat]("C:\\Users\\Mbazarganigilani\\Documents\\RA\\CCN\\Files1",filterF,false)
val probeLines= ssc.fileStream[LongWritable, Text, TextInputFormat]("C:\\Users\\Mbazarganigilani\\Documents\\RA\\Probe\\Files1",filterF,false)
val ggssnArrays=ggsnLines.map(x=>(x._1,x._2.toString())).filter(!_._2.contains("ggsnIPAddress")).map(x=>(x._1,x._2.split(",")))
ggssnArrays.foreachRDD(s=> {
s.collect().take(10).foreach(u=>println(u._2.mkString(",")))
})
ssc.remember(Minutes(1)) // To make sure data is not deleted by the time we query it interactively
ssc.checkpoint("s3n://probecheckpoints/checkpoints")
println("Creating function called to create new StreamingContext")
newContextCreated = true
ssc
}
def main(args:Array[String]): Unit =
{
//the minremeberduration is set to read the previous files from the directory
//the kyroclasses serialization needs to be enabled for the filestream
if (stopActiveContext) {
StreamingContext.getActive.foreach { _.stop(stopSparkContext = false) }
}
// Get or create a streaming context
val hadoopConfiguration:Configuration=new Configuration()
hadoopConfiguration.set("fs.s3n.impl", "org.apache.hadoop.fs.s3native.NativeS3FileSystem")
hadoopConfiguration.set("fs.s3n.awsAccessKeyId", "AKIAIOPSJVBDTEUHUJCQ")
hadoopConfiguration.set("fs.s3n.awsSecretAccessKey", "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")
//val ssc = StreamingContext.getActiveOrCreate(creatingFunc)
val ssc=StreamingContext.getActiveOrCreate("s3n://probecheckpoints/SparkCheckPoints",creatingFunc,hadoopConfiguration,false)
if (newContextCreated) {
println("New context created from currently defined creating function")
} else {
println("Existing context running or recovered from checkpoint, may not be running currently defined creating function")
}
// Start the streaming context in the background.
ssc.start()