为什么使用spark-submit将4000张图像加载到Redis中比将相同图像加载到HBase中(2.5分钟)要花费更长的时间(9分钟)?

时间:2018-07-05 12:32:48

标签: redis hbase spark-submit

将图像加载到Redis中应该比使用Hbase进行相同的操作要快得多,因为Redis处理RAM,而HBase使用HDFS存储数据。当我将4000张图像加载到Redis中时,我感到很惊讶,它花了9分钟完成!虽然我使用HBase完成了相同的过程,但只花了2.5分钟。是否对此有解释?有什么建议可以改善我的代码吗?这是我的代码:

// The code for loading the images into Hbase (adopted from NIST)
val conf = new SparkConf().setAppName("Fingerprint.LoadData") 
val sc = new SparkContext(conf) 
Image.dropHBaseTable() Image.createHBaseTable() 
val checksum_path = args(0) 
println("Reading paths from: %s".format(checksum_path.toString)) 
val imagepaths = loadImageList(checksum_path) println("Got %s images".format(imagepaths.length))
imagepaths.foreach(println) 
println("Reading files into RDD") 
val images = sc.parallelize(imagepaths).map(paths => Image.fromFiles(paths._1, paths._2)) 
println(s"Saving ${images.count} images to HBase")
Image.toHBase(images) 
println("Done")

} val conf = new SparkConf().setAppName("Fingerprint.LoadData") val sc = new SparkContext(conf) Image.dropHBaseTable() Image.createHBaseTable() val checksum_path = args(0) println("Reading paths from: %s".format(checksum_path.toString)) val imagepaths = loadImageList(checksum_path) println("Got %s images".format(imagepaths.length)) imagepaths.foreach(println) println("Reading files into RDD") val images = sc.parallelize(imagepaths) .map(paths => Image.fromFiles(paths._1, paths._2)) println(s"Saving ${images.count} images to HBase") Image.toHBase(images) println("Done")

} def toHBase(rdd: RDD[T]): Unit = {

     val cfg = HBaseConfiguration.create()
     cfg.set(TableOutputFormat.OUTPUT_TABLE, tableName)
     val job = Job.getInstance(cfg)
     job.setOutputFormatClass(classOf[TableOutputFormat[String]])
     rdd.map(Put).saveAsNewAPIHadoopDataset(job.getConfiguration)

} 

///将图像加载到Redis中的代码

  val images = sc.parallelize(imagepaths).map(paths => Image.fromFiles(paths._1, paths._2)).collect
        for(i <- images){
val stringRdd = sc.parallelize(Seq((i.uuid, new String(i.Png, StandardCharsets.UTF_8))))
        sc.toRedisKV(stringRdd)(redisConfig)
        stringRdd.collect}                    
        println("Done")

0 个答案:

没有答案