我正在尝试使用datastax连接器连接Spark和Cassandra。 Spark在本地机器上运行,Cassandra在AWS上运行。 我从IDEA运行它。
这是我的设置: build.sbt:
name := "bubblical"
version := "1.0"
scalaVersion := "2.11.8"
val sparkVersion: String = "2.1.0"
val akkaVersion: String = "10.0.3"
val sparkTestingVersion: String = sparkVersion + "_0.3.3"
resolvers += Resolver.sonatypeRepo("releases")
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-core" % sparkVersion,
"org.apache.spark" %% "spark-streaming" % sparkVersion,
"org.apache.spark" %% "spark-sql" % sparkVersion,
"com.typesafe.akka" %% "akka-http" % akkaVersion ,
"com.typesafe.akka" %% "akka-http-spray-json" % akkaVersion,
"junit" % "junit" % "4.10" % "test",
// "org.scalactic" %% "scalactic" % "3.0.0",
// "org.scalatest" %% "scalatest" % "3.0.0" % "test",
// "com.holdenkarau" %% "spark-testing-base" % sparkTestingVersion ,
// "org.scalaj" % "scalaj-time_2.10.2" % "0.7",
"com.datastax.spark" % "spark-cassandra-connector_2.11" % "2.0.1",
"com.datastax.cassandra" % "cassandra-driver-core" % "3.0.0",
// "com.datastax.cassandra" % "cassandra-driver-core" % "2.1.5",
"mysql" % "mysql-connector-java" % "5.1.33"
)
这是我尝试使用它的方式:
package bubblical.db
import com.datastax.spark.connector._
import com.datastax.spark.connector.cql.CassandraConnector
import org.apache.spark.{SparkConf, SparkContext}
import org.scalatest.FunSuite
/**
* Created by kirill on 22/04/17.
*/
case class Aggregation(apn: String, imei: String, download_average: Double, download_total: Double)
class CassandraAccessTest extends FunSuite {
val url = "52.215.24.146"//config.getString("cassandra.url")
val port = "9160"
// val port = "9042"
val conf = new SparkConf(true)
.set("spark.cassandra.connection.host", url)
.set("spark.cassandra.connection.port", port)
.set("spark.cassandra.auth.username", "cassandra")
.set("spark.cassandra.auth.password", "cassandra")
val sc = new SparkContext("local[*]", "test", conf)
test("Cassandra access") {
val tableName = "aggregations"
val keySpace = "bubbling_dev_test_1"
implicit val sContext = sc
implicit val connector = CassandraConnector(conf)
val rdd = sc.cassandraTable(keySpace,tableName)
rdd.collect().foreach(println(_))
}
}
我得到NullPOinterException:
java.lang.NullPointerException was thrown.
java.lang.NullPointerException
at com.datastax.driver.core.Cluster$Manager.close(Cluster.java:1627)
at com.datastax.driver.core.Cluster$Manager.access$200(Cluster.java:1318)
at com.datastax.driver.core.Cluster.closeAsync(Cluster.java:566)
at com.datastax.driver.core.Cluster.close(Cluster.java:578)
at com.datastax.spark.connector.cql.CassandraConnector$.com$datastax$spark$connector$cql$CassandraConnector$$createSession(CassandraConnector.scala:167)
at com.datastax.spark.connector.cql.CassandraConnector$$anonfun$8.apply(CassandraConnector.scala:154)
at com.datastax.spark.connector.cql.CassandraConnector$$anonfun$8.apply(CassandraConnector.scala:154)
at com.datastax.spark.connector.cql.RefCountedCache.createNewValueAndKeys(RefCountedCache.scala:32)
at com.datastax.spark.connector.cql.RefCountedCache.syncAcquire(RefCountedCache.scala:69)
at com.datastax.spark.connector.cql.RefCountedCache.acquire(RefCountedCache.scala:57)
at com.datastax.spark.connector.cql.CassandraConnector.openSession(CassandraConnector.scala:79)
at com.datastax.spark.connector.cql.CassandraConnector.withSessionDo(CassandraConnector.scala:111)
at com.datastax.spark.connector.cql.CassandraConnector.withClusterDo(CassandraConnector.scala:122)
at com.datastax.spark.connector.cql.Schema$.fromCassandra(Schema.scala:330)
at com.datastax.spark.connector.cql.Schema$.tableFromCassandra(Schema.scala:350)
at com.datastax.spark.connector.rdd.CassandraTableRowReaderProvider$class.tableDef(CassandraTableRowReaderProvider.scala:50)
at com.datastax.spark.connector.rdd.CassandraTableScanRDD.tableDef$lzycompute(CassandraTableScanRDD.scala:62)
at com.datastax.spark.connector.rdd.CassandraTableScanRDD.tableDef(CassandraTableScanRDD.scala:62)
at com.datastax.spark.connector.rdd.CassandraTableRowReaderProvider$class.verify(CassandraTableRowReaderProvider.scala:137)
at com.datastax.spark.connector.rdd.CassandraTableScanRDD.verify(CassandraTableScanRDD.scala:62)
at com.datastax.spark.connector.rdd.CassandraTableScanRDD.getPartitions(CassandraTableScanRDD.scala:262)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1958)
at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:935)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
at org.apache.spark.rdd.RDD.collect(RDD.scala:934)
at bubblical.db.CassandraAccessTest$$anonfun$1.apply$mcV$sp(CassandraAccessTest.scala:37)
at bubblical.db.CassandraAccessTest$$anonfun$1.apply(CassandraAccessTest.scala:26)
at bubblical.db.CassandraAccessTest$$anonfun$1.apply(CassandraAccessTest.scala:26)
at org.scalatest.Transformer$$anonfun$apply$1.apply$mcV$sp(Transformer.scala:22)
at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85)
at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
at org.scalatest.Transformer.apply(Transformer.scala:22)
at org.scalatest.Transformer.apply(Transformer.scala:20)
at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:166)
at org.scalatest.Suite$class.withFixture(Suite.scala:1122)
at org.scalatest.FunSuite.withFixture(FunSuite.scala:1555)
at org.scalatest.FunSuiteLike$class.invokeWithFixture$1(FunSuiteLike.scala:163)
at org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175)
at org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175)
at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306)
at org.scalatest.FunSuiteLike$class.runTest(FunSuiteLike.scala:175)
at org.scalatest.FunSuite.runTest(FunSuite.scala:1555)
at org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:208)
at org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:208)
at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:413)
at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:401)
at scala.collection.immutable.List.foreach(List.scala:381)
at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401)
at org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:396)
at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:483)
at org.scalatest.FunSuiteLike$class.runTests(FunSuiteLike.scala:208)
at org.scalatest.FunSuite.runTests(FunSuite.scala:1555)
at org.scalatest.Suite$class.run(Suite.scala:1424)
at org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1555)
at org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:212)
at org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:212)
at org.scalatest.SuperEngine.runImpl(Engine.scala:545)
at org.scalatest.FunSuiteLike$class.run(FunSuiteLike.scala:212)
at org.scalatest.FunSuite.run(FunSuite.scala:1555)
at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:55)
at org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$3.apply(Runner.scala:2563)
at org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$3.apply(Runner.scala:2557)
at scala.collection.immutable.List.foreach(List.scala:381)
at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:2557)
at org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1044)
at org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1043)
at org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:2722)
at org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:1043)
at org.scalatest.tools.Runner$.run(Runner.scala:883)
at org.scalatest.tools.Runner.run(Runner.scala)
at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2(ScalaTestRunner.java:138)
at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:28)
我发现有人说这可能与Spark集群有关,但我在我的本地计算机上运行。
谢谢。
基里尔。