创建uuid并插入Spark Dataframe

时间:2017-10-09 17:55:15

标签: scala apache-spark cassandra spark-dataframe uuid

请找到下面的代码,让我知道如何创建cassandra UUID并将其附加到SNO列中。 CSV文件中没有sno列。但是我在cassandra表中添加了sno列。我必须使用cassandra的uuid填充sno并将其与CSV数据一起插入Cassandra Table。以下是我的代码。

Scala版本:2.11 Spark:2.2

import org.apache.spark.sql.SparkSession
import org.apache.log4j.{Level, Logger}
import com.datastax


import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import com.datastax.spark.connector._
import org.apache.spark.sql._

object dataframeset {

  def main(args: Array[String]): Unit = {

    val conf = new SparkConf().setAppName("Sample1").setMaster("local[*]")
    val sc = new SparkContext(conf)
    sc.setLogLevel("ERROR")
    val rdd1 = sc.cassandraTable("tdata", "map3")
    Logger.getLogger("org").setLevel(Level.ERROR)
    Logger.getLogger("akka").setLevel(Level.ERROR)
    val spark1 = org.apache.spark.sql.SparkSession.builder().master("local").config("spark.cassandra.connection.host","127.0.0.1")
      .appName("Spark SQL basic example").getOrCreate()

    val df = spark1.read.format("csv").option("header","true").option("inferschema", "true").load("/Users/Desktop/del2.csv")
    import spark1.implicits._
    println("\nTop Records are:")
    df.show(1)

    val dfprev1 = df.select(col = "year", "StateAbbr")

    dfprev1.show(1)
  }
}

需要出局:

+---+----+---------+
|sno|year|StateAbbr|
+---+----+---------+
|  54d6e-29bb-11e5-b345-feff819cdc9f |2014|       US|
+---+----+---------+
only showing top 1 row

0 个答案:

没有答案