一个cassandra集群未启用SSL,另一个cassandra集群启用了SSL。如何从单个spark作业中同时与cassandra集群进行交互。我必须从一台服务器(没有SSL)复制该表,并将其放入另一台服务器(使用SSL)。
Spark职位: -
object TwoClusterExample extends App {
val conf = new SparkConf(true).setAppName("SparkCassandraTwoClusterExample")
println("Starting the SparkCassandraLocalJob....")
val sc = new SparkContext(conf)
val connectorToClusterOne = CassandraConnector(sc.getConf.set("spark.cassandra.connection.host", "localhost"))
val connectorToClusterTwo = CassandraConnector(sc.getConf.set("spark.cassandra.connection.host", "remote"))
val rddFromClusterOne = {
implicit val c = connectorToClusterOne
sc.cassandraTable("test","one")
}
{
implicit val c = connectorToClusterTwo
rddFromClusterOne.saveToCassandra("test","one")
}
}
Cassandra conf: -
spark.master spark://ip:6066
spark.executor.memory 1g
spark.cassandra.connection.host remote
spark.cassandra.auth.username iccassandra
spark.cassandra.auth.password pwd1
spark.serializer org.apache.spark.serializer.KryoSerializer
spark.eventLog.enabled true
spark.eventLog.dir /Users/test/logs/spark
spark.cassandra.connection.ssl.enabled true
spark.cassandra.connection.ssl.trustStore.password pwd2
spark.cassandra.connection.ssl.trustStore.path truststore.jks
提交作业: -
spark-submit --deploy-mode cluster --master spark://ip:6066 --properties-file cassandra-count.conf --class TwoClusterExample target/scala-2.10/cassandra-table-assembly-1.0.jar
我得到以下错误: -
17/10/26 16:27:20 DEBUG STATES: [/remote:9042] preventing new connections for the next 1000 ms
17/10/26 16:27:20 DEBUG STATES: [/remote:9042] Connection[/remote:9042-1, inFlight=0, closed=true] failed, remaining = 0
17/10/26 16:27:20 DEBUG ControlConnection: [Control connection] error on /remote:9042 connection, no more host to try
com.datastax.driver.core.exceptions.TransportException: [/remote] Cannot connect
at com.datastax.driver.core.Connection$1.operationComplete(Connection.java:157)
at com.datastax.driver.core.Connection$1.operationComplete(Connection.java:140)
at io.netty.util.concurrent.DefaultPromise.notifyListener0(DefaultPromise.java:680)
at io.netty.util.concurrent.DefaultPromise.notifyListeners0(DefaultPromise.java:603)
at io.netty.util.concurrent.DefaultPromise.notifyListeners(DefaultPromise.java:563)
at io.netty.util.concurrent.DefaultPromise.tryFailure(DefaultPromise.java:424)
at io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe$1.run(AbstractNioChannel.java:222)
at io.netty.util.concurrent.PromiseTask$RunnableAdapter.call(PromiseTask.java:38)
at io.netty.util.concurrent.ScheduledFutureTask.run(ScheduledFutureTask.java:120)
at io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:357)
at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:357)
at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:111)
at java.lang.Thread.run(Thread.java:748)
Caused by: io.netty.channel.ConnectTimeoutException: connection timed out: /remote:9042
at io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe$1.run(AbstractNioChannel.java:220)
... 6 more
17/10/26 16:27:20 DEBUG Cluster: Shutting down
Exception in thread "main" java.lang.reflect.InvocationTargetException
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.worker.DriverWrapper$.main(DriverWrapper.scala:58)
at org.apache.spark.deploy.worker.DriverWrapper.main(DriverWrapper.scala)
Caused by: java.io.IOException: Failed to open native connection to Cassandra at {remote}:9042
at com.datastax.spark.connector.cql.CassandraConnector$.com$datastax$spark$connector$cql$CassandraConnector$$createSession(CassandraConnector.scala:162)
at com.datastax.spark.connector.cql.CassandraConnector$$anonfun$2.apply(CassandraConnector.scala:148)
at com.datastax.spark.connector.cql.CassandraConnector$$anonfun$2.apply(CassandraConnector.scala:148)
at com.datastax.spark.connector.cql.RefCountedCache.createNewValueAndKeys(RefCountedCache.scala:31)
at com.datastax.spark.connector.cql.RefCountedCache.acquire(RefCountedCache.scala:56)
at com.datastax.spark.connector.cql.CassandraConnector.openSession(CassandraConnector.scala:81)
at com.datastax.spark.connector.cql.CassandraConnector.withSessionDo(CassandraConnector.scala:109)
at com.datastax.spark.connector.cql.CassandraConnector.withClusterDo(CassandraConnector.scala:120)
at com.datastax.spark.connector.cql.Schema$.fromCassandra(Schema.scala:304)
at com.datastax.spark.connector.rdd.CassandraTableRowReaderProvider$class.tableDef(CassandraTableRowReaderProvider.scala:51)
at com.datastax.spark.connector.rdd.CassandraTableScanRDD.tableDef$lzycompute(CassandraTableScanRDD.scala:59)
at com.datastax.spark.connector.rdd.CassandraTableScanRDD.tableDef(CassandraTableScanRDD.scala:59)
at com.datastax.spark.connector.rdd.CassandraTableRowReaderProvider$class.verify(CassandraTableRowReaderProvider.scala:146)
at com.datastax.spark.connector.rdd.CassandraTableScanRDD.verify(CassandraTableScanRDD.scala:59)
at com.datastax.spark.connector.rdd.CassandraTableScanRDD.getPartitions(CassandraTableScanRDD.scala:143)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:239)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:237)
at scala.Option.getOrElse(Option.scala:120)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:237)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1929)
at org.apache.spark.rdd.RDD.count(RDD.scala:1143)
at cassandraCount$.runJob(cassandraCount.scala:27)
at cassandraCount$delayedInit$body.apply(cassandraCount.scala:22)
at scala.Function0$class.apply$mcV$sp(Function0.scala:40)
at scala.runtime.AbstractFunction0.apply$mcV$sp(AbstractFunction0.scala:12)
at scala.App$$anonfun$main$1.apply(App.scala:71)
at scala.App$$anonfun$main$1.apply(App.scala:71)
at scala.collection.immutable.List.foreach(List.scala:318)
at scala.collection.generic.TraversableForwarder$class.foreach(TraversableForwarder.scala:32)
at scala.App$class.main(App.scala:71)
at cassandraCount$.main(cassandraCount.scala:10)
at cassandraCount.main(cassandraCount.scala)
... 6 more
Caused by: com.datastax.driver.core.exceptions.NoHostAvailableException: All host(s) tried for query failed (tried: /remote:9042 (com.datastax.driver.core.exceptions.TransportException: [/remote] Cannot connect))
at com.datastax.driver.core.ControlConnection.reconnectInternal(ControlConnection.java:231)
at com.datastax.driver.core.ControlConnection.connect(ControlConnection.java:77)
at com.datastax.driver.core.Cluster$Manager.init(Cluster.java:1414)
at com.datastax.driver.core.Cluster.getMetadata(Cluster.java:393)
at com.datastax.spark.connector.cql.CassandraConnector$.com$datastax$spark$connector$cql$CassandraConnector$$createSession(CassandraConnector.scala:155)
... 37 more
17/10/26 16:27:23 INFO SparkContext: Invoking stop() from shutdown hook
17/10/26 16:27:23 INFO SparkUI: Stopped Spark web UI at http://10.7.10.138:4040
工作代码: -
object ClusterSSLTest extends App{
val conf = new SparkConf(true).setAppName("sparkCassandraLocalJob")
println("Starting the ClusterSSLTest....")
val sc = new SparkContext(conf)
val sourceCluster = CassandraConnector(
sc.getConf.set("spark.cassandra.connection.host", "localhost"))
val destinationCluster = CassandraConnector(
sc.getConf.set("spark.cassandra.connection.host", "remoteip1,remoteip2")
.set("spark.cassandra.auth.username","uname")
.set("spark.cassandra.auth.password","pwd")
.set("spark.cassandra.connection.ssl.enabled","true")
.set("spark.cassandra.connection.timeout_ms","10000")
.set("spark.cassandra.connection.ssl.trustStore.path", "../truststore.jks")
.set("spark.cassandra.connection.ssl.trustStore.password", "pwd")
.set("spark.cassandra.connection.ssl.trustStore.type", "JKS")
.set("spark.cassandra.connection.ssl.protocol", "TLS")
.set("spark.cassandra.connection.ssl.enabledAlgorithms", "TLS_RSA_WITH_AES_256_CBC_SHA,TLS_RSA_WITH_AES_128_CBC_SHA")
)
val rddFromSourceCluster = {
implicit val c = sourceCluster
val tbRdd = sc.cassandraTable("analytics","products")
println(s"no of rows ${tbRdd.count()}")
tbRdd
}
val rddToDestinationCluster = {
implicit val c = destinationCluster // connect to source cluster in this code block.
rddFromSourceCluster.saveToCassandra("analytics","products")
}
sc.stop()
}