我想将旧的cassandra集群数据迁移到新集群,并考虑编写一些火花作业来实现这一目标。有没有办法从同一个SparkContext与多个cassandra集群进行交互。这样我就可以在同一个sparkJob中使用saveToCassandra函数从一个集群读取数据并写入另一个集群。
val products = sc.cassandraTable("first_cluster","products").cache()
products.saveToCassandra("diff_cluster","products2")
我们可以将数据保存到不同的群集中吗?
答案 0 :(得分:4)
import com.datastax.spark.connector._
import com.datastax.spark.connector.cql._
import org.apache.spark.SparkContext
def twoClusterExample ( sc: SparkContext) = {
val connectorToClusterOne = CassandraConnector(sc.getConf.set("spark.cassandra.connection.host", "127.0.0.1"))
val connectorToClusterTwo = CassandraConnector(sc.getConf.set("spark.cassandra.connection.host", "127.0.0.2"))
val rddFromClusterOne = {
// Sets connectorToClusterOne as default connection for everything in this code block
implicit val c = connectorToClusterOne
sc.cassandraTable("ks","tab")
}
{
//Sets connectorToClusterTwo as the default connection for everything in this code block
implicit val c = connectorToClusterTwo
rddFromClusterOne.saveToCassandra("ks","tab")
}
}