我正在尝试使用spark stratio connector(版本:spark-mongodb_2.11-0.12.0)在spark中加载mongo数据。为此我添加了所有必要的依赖项。我试图通过从我的本地mongo加载mongo数据来创建rdd。以下是我的代码:
import org.apache.spark.SparkContext
import org.apache.spark.sql._
import com.mongodb.casbah.{WriteConcern => MongodbWriteConcern}
import com.stratio.datasource.mongodb._
import com.stratio.datasource.mongodb.config._
import com.stratio.datasource.mongodb.config.MongodbConfig._
import org.apache.spark.sql.SparkSession
import akka.actor.ActorSystem
import org.apache.spark.SparkConf
object newtest {def main(args:Array[String]){
System.setProperty("hadoop.home.dir", "C:\\winutil\\");
import org.apache.spark.sql.functions._
val sparkSession = SparkSession.builder().master("local").getOrCreate()
//spark.conf.set("spark.executor.memory", "2g")
val builder = MongodbConfigBuilder(Map(Host -> List("localhost:27017"), Database -> "test", Collection ->"SurvyAnswer", SamplingRatio -> 1.0, WriteConcern -> "normal"))
val readConfig = builder.build()
val columns=Array("GroupId", "_Id", "hgId")
val mongoRDD = sparkSession.sqlContext.fromMongoDB(readConfig)
mongoRDD.take(2).foreach(println)
我在连接失败时遇到错误。我不明白为什么会出现此错误:
17/02/21 14:45:45 WARN SparkContext: Use an existing SparkContext, some configuration may not take effect.
17/02/21 14:45:45 INFO SharedState: Warehouse path is 'file:/C:/Users/gbhog/Desktop/BDG/example/mongospark/spark-warehouse'.
17/02/21 14:45:48 INFO cluster: Cluster created with settings {hosts=[localhost:27017], mode=MULTIPLE, requiredClusterType=UNKNOWN, serverSelectionTimeout='30000 ms', maxWaitQueueSize=500}
17/02/21 14:45:48 INFO cluster: Adding discovered server localhost:27017 to client view of cluster
Exception in thread "main" java.lang.NoSuchFieldError: NONE
at com.mongodb.casbah.WriteConcern$.<init>(WriteConcern.scala:40)
at com.mongodb.casbah.WriteConcern$.<clinit>(WriteConcern.scala)
at com.mongodb.casbah.BaseImports$class.$init$(Implicits.scala:162)
at com.mongodb.casbah.Imports$.<init>(Implicits.scala:142)
at com.mongodb.casbah.Imports$.<clinit>(Implicits.scala)
at com.mongodb.casbah.MongoClient.apply(MongoClient.scala:217)
at com.stratio.datasource.mongodb.partitioner.MongodbPartitioner.isShardedCollection(MongodbPartitioner.scala:78)
at com.stratio.datasource.mongodb.partitioner.MongodbPartitioner$$anonfun$computePartitions$1.apply(MongodbPartitioner.scala:67)
at com.stratio.datasource.mongodb.partitioner.MongodbPartitioner$$anonfun$computePartitions$1.apply(MongodbPartitioner.scala:66)
at com.stratio.datasource.mongodb.util.usingMongoClient$.apply(usingMongoClient.scala:27)
at com.stratio.datasource.mongodb.partitioner.MongodbPartitioner.computePartitions(MongodbPartitioner.scala:66)
17/02/21 14:45:48 INFO SparkContext: Invoking stop() from shutdown hook
17/02/21 14:45:48 INFO SparkUI: Stopped Spark web UI at http://192.168.242.1:4040
17/02/21 14:45:48 INFO MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped!
17/02/21 14:45:49 INFO MemoryStore: MemoryStore cleared
17/02/21 14:45:49 INFO BlockManager: BlockManager stopped
17/02/21 14:45:49 INFO BlockManagerMaster: BlockManagerMaster stopped
17/02/21 14:45:49 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped!
17/02/21 14:45:49 INFO SparkContext: Successfully stopped SparkContext
17/02/21 14:45:49 INFO ShutdownHookManager: Shutdown hook called