如何使用具有不同ssl策略的多个cassandra服务器来处理Spark

时间:2017-10-27 00:53:40

标签: ssl cassandra spark-cassandra-connector

一个cassandra集群未启用SSL,另一个cassandra集群启用了SSL。如何从单个spark作业中同时与cassandra集群进行交互。我必须从一台服务器(没有SSL)复制该表,并将其放入另一台服务器(使用SSL)。

Spark职位: -

object TwoClusterExample extends App {

  val conf = new SparkConf(true).setAppName("SparkCassandraTwoClusterExample")
  println("Starting the SparkCassandraLocalJob....")

  val sc = new SparkContext(conf)

  val connectorToClusterOne = CassandraConnector(sc.getConf.set("spark.cassandra.connection.host", "localhost"))
  val connectorToClusterTwo = CassandraConnector(sc.getConf.set("spark.cassandra.connection.host", "remote"))


  val rddFromClusterOne = {
    implicit val c = connectorToClusterOne
    sc.cassandraTable("test","one")
  }

  {
    implicit val c = connectorToClusterTwo
    rddFromClusterOne.saveToCassandra("test","one")
  }


}

Cassandra conf: -

spark.master spark://ip:6066
spark.executor.memory 1g
spark.cassandra.connection.host remote
spark.cassandra.auth.username iccassandra
spark.cassandra.auth.password pwd1
spark.serializer org.apache.spark.serializer.KryoSerializer
spark.eventLog.enabled true
spark.eventLog.dir /Users/test/logs/spark
spark.cassandra.connection.ssl.enabled true
spark.cassandra.connection.ssl.trustStore.password pwd2
spark.cassandra.connection.ssl.trustStore.path truststore.jks

提交作业: -

spark-submit --deploy-mode cluster --master spark://ip:6066 --properties-file cassandra-count.conf --class TwoClusterExample target/scala-2.10/cassandra-table-assembly-1.0.jar

我得到以下错误: -

17/10/26 16:27:20 DEBUG STATES: [/remote:9042] preventing new connections for the next 1000 ms
17/10/26 16:27:20 DEBUG STATES: [/remote:9042] Connection[/remote:9042-1, inFlight=0, closed=true] failed, remaining = 0
17/10/26 16:27:20 DEBUG ControlConnection: [Control connection] error on /remote:9042 connection, no more host to try
com.datastax.driver.core.exceptions.TransportException: [/remote] Cannot connect
    at com.datastax.driver.core.Connection$1.operationComplete(Connection.java:157)
    at com.datastax.driver.core.Connection$1.operationComplete(Connection.java:140)
    at io.netty.util.concurrent.DefaultPromise.notifyListener0(DefaultPromise.java:680)
    at io.netty.util.concurrent.DefaultPromise.notifyListeners0(DefaultPromise.java:603)
    at io.netty.util.concurrent.DefaultPromise.notifyListeners(DefaultPromise.java:563)
    at io.netty.util.concurrent.DefaultPromise.tryFailure(DefaultPromise.java:424)
    at io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe$1.run(AbstractNioChannel.java:222)
    at io.netty.util.concurrent.PromiseTask$RunnableAdapter.call(PromiseTask.java:38)
    at io.netty.util.concurrent.ScheduledFutureTask.run(ScheduledFutureTask.java:120)
    at io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:357)
    at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:357)
    at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:111)
    at java.lang.Thread.run(Thread.java:748)
Caused by: io.netty.channel.ConnectTimeoutException: connection timed out: /remote:9042
    at io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe$1.run(AbstractNioChannel.java:220)
    ... 6 more
17/10/26 16:27:20 DEBUG Cluster: Shutting down
Exception in thread "main" java.lang.reflect.InvocationTargetException
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at org.apache.spark.deploy.worker.DriverWrapper$.main(DriverWrapper.scala:58)
    at org.apache.spark.deploy.worker.DriverWrapper.main(DriverWrapper.scala)
Caused by: java.io.IOException: Failed to open native connection to Cassandra at {remote}:9042
    at com.datastax.spark.connector.cql.CassandraConnector$.com$datastax$spark$connector$cql$CassandraConnector$$createSession(CassandraConnector.scala:162)
    at com.datastax.spark.connector.cql.CassandraConnector$$anonfun$2.apply(CassandraConnector.scala:148)
    at com.datastax.spark.connector.cql.CassandraConnector$$anonfun$2.apply(CassandraConnector.scala:148)
    at com.datastax.spark.connector.cql.RefCountedCache.createNewValueAndKeys(RefCountedCache.scala:31)
    at com.datastax.spark.connector.cql.RefCountedCache.acquire(RefCountedCache.scala:56)
    at com.datastax.spark.connector.cql.CassandraConnector.openSession(CassandraConnector.scala:81)
    at com.datastax.spark.connector.cql.CassandraConnector.withSessionDo(CassandraConnector.scala:109)
    at com.datastax.spark.connector.cql.CassandraConnector.withClusterDo(CassandraConnector.scala:120)
    at com.datastax.spark.connector.cql.Schema$.fromCassandra(Schema.scala:304)
    at com.datastax.spark.connector.rdd.CassandraTableRowReaderProvider$class.tableDef(CassandraTableRowReaderProvider.scala:51)
    at com.datastax.spark.connector.rdd.CassandraTableScanRDD.tableDef$lzycompute(CassandraTableScanRDD.scala:59)
    at com.datastax.spark.connector.rdd.CassandraTableScanRDD.tableDef(CassandraTableScanRDD.scala:59)
    at com.datastax.spark.connector.rdd.CassandraTableRowReaderProvider$class.verify(CassandraTableRowReaderProvider.scala:146)
    at com.datastax.spark.connector.rdd.CassandraTableScanRDD.verify(CassandraTableScanRDD.scala:59)
    at com.datastax.spark.connector.rdd.CassandraTableScanRDD.getPartitions(CassandraTableScanRDD.scala:143)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:239)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:237)
    at scala.Option.getOrElse(Option.scala:120)
    at org.apache.spark.rdd.RDD.partitions(RDD.scala:237)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:1929)
    at org.apache.spark.rdd.RDD.count(RDD.scala:1143)
    at cassandraCount$.runJob(cassandraCount.scala:27)
    at cassandraCount$delayedInit$body.apply(cassandraCount.scala:22)
    at scala.Function0$class.apply$mcV$sp(Function0.scala:40)
    at scala.runtime.AbstractFunction0.apply$mcV$sp(AbstractFunction0.scala:12)
    at scala.App$$anonfun$main$1.apply(App.scala:71)
    at scala.App$$anonfun$main$1.apply(App.scala:71)
    at scala.collection.immutable.List.foreach(List.scala:318)
    at scala.collection.generic.TraversableForwarder$class.foreach(TraversableForwarder.scala:32)
    at scala.App$class.main(App.scala:71)
    at cassandraCount$.main(cassandraCount.scala:10)
    at cassandraCount.main(cassandraCount.scala)
    ... 6 more
Caused by: com.datastax.driver.core.exceptions.NoHostAvailableException: All host(s) tried for query failed (tried: /remote:9042 (com.datastax.driver.core.exceptions.TransportException: [/remote] Cannot connect))
    at com.datastax.driver.core.ControlConnection.reconnectInternal(ControlConnection.java:231)
    at com.datastax.driver.core.ControlConnection.connect(ControlConnection.java:77)
    at com.datastax.driver.core.Cluster$Manager.init(Cluster.java:1414)
    at com.datastax.driver.core.Cluster.getMetadata(Cluster.java:393)
    at com.datastax.spark.connector.cql.CassandraConnector$.com$datastax$spark$connector$cql$CassandraConnector$$createSession(CassandraConnector.scala:155)
    ... 37 more
17/10/26 16:27:23 INFO SparkContext: Invoking stop() from shutdown hook
17/10/26 16:27:23 INFO SparkUI: Stopped Spark web UI at http://10.7.10.138:4040

工作代码: -

object ClusterSSLTest extends App{

  val conf = new SparkConf(true).setAppName("sparkCassandraLocalJob")
  println("Starting the ClusterSSLTest....")

  val sc = new SparkContext(conf)

  val sourceCluster =  CassandraConnector(
    sc.getConf.set("spark.cassandra.connection.host", "localhost"))

  val destinationCluster = CassandraConnector(
    sc.getConf.set("spark.cassandra.connection.host", "remoteip1,remoteip2")
      .set("spark.cassandra.auth.username","uname")
      .set("spark.cassandra.auth.password","pwd")
      .set("spark.cassandra.connection.ssl.enabled","true")
      .set("spark.cassandra.connection.timeout_ms","10000")
      .set("spark.cassandra.connection.ssl.trustStore.path", "../truststore.jks")
      .set("spark.cassandra.connection.ssl.trustStore.password", "pwd")
      .set("spark.cassandra.connection.ssl.trustStore.type", "JKS")
      .set("spark.cassandra.connection.ssl.protocol", "TLS")
      .set("spark.cassandra.connection.ssl.enabledAlgorithms", "TLS_RSA_WITH_AES_256_CBC_SHA,TLS_RSA_WITH_AES_128_CBC_SHA")
  )


  val rddFromSourceCluster = {
    implicit val c = sourceCluster

    val tbRdd = sc.cassandraTable("analytics","products")
    println(s"no of rows ${tbRdd.count()}")

    tbRdd
  }
  val rddToDestinationCluster = {
    implicit val c = destinationCluster // connect to source cluster in this code block.

    rddFromSourceCluster.saveToCassandra("analytics","products")
  }
  sc.stop()
}

0 个答案:

没有答案