//使用RDD从Cassandra表中获取20k记录的数据
println(Rdd.count()
val startTimeMillis = System.currentTimeMillis()
var Rdd= sc.cassandraTable[Class]("Keyspace", "Table").keyBy(f => f.partition_key)
val endTimeMillis = System.currentTimeMillis()
val durationSeconds = (endTimeMillis - startTimeMillis)
println("fetch "+durationSeconds)
println(Rdd.count())
//将在rdd中获得的数据存储到Cassandra中的表中。这是20k记录的上述副本
val startTimeMillisinsert = System.currentTimeMillis()
Rdd.saveToCassandra("Keyspace", "Table")
val stopTimeMillisinsert = System.currentTimeMillis()
val durationSecondsinsert = (stopTimeMillisinsert -
startTimeMillisinsert)
println("insert "+durationSecondsinsert)
//使用20k记录的数据集从Cassandra表中获取数据
println(r1.count)
val startTimeMillis1 = System.currentTimeMillis()
var r1= spark.read.format("org.apache.spark.sql.cassandra")
.options(Map("table" ->"TableName","keyspace" ->"keySpaceName"))
.load() .where(" Partition_key =' fa1eb916-67a3-46ed-8ea4- 045d43a49cdb'")。作为[Class]
val durationSeconds = (endTimeMillis1 - startTimeMillis1)
println("fetch "+durationSeconds)
//将数据集中获得的数据存储到
中的表中Cassandra是上述20k记录的副本
val startTimeMillis1 = System.currentTimeMillis()
r.write
.format("org.apache.spark.sql.cassandra")
.cassandraFormat("Table","Keyspace")
.mode(SaveMode.Append).save()
val endTimeMillis1 = System.currentTimeMillis()
val durationSecondsinsert= (endTimeMillis1 - startTimeMillis)
println("insert "+durationSecondsinsert)
输出:
For DataSet
21299
fetch 9207
insert 62410
For RDD
21299
fetch 473
insert 23326