import java.sql.Timestamp
import org.apache.spark.sql.functions._
import org.apache.spark.sql.{DataFrame, SparkSession}
case class DeviceData(deviceId:String, value:Double, userId:String, timestamp:Timestamp)
object StructuredNetworkWordCountWindowed {
def main(args: Array[String]) {
if (args.length < 3) {
System.err.println("Usage: StructuredNetworkWordCountWindowed <hostname> <port>" +
" <window duration in seconds> [<slide duration in seconds>]")
val host = args(0)
val port = args(1).toInt
val windowSize = args(2).toInt
val slideSize = if (args.length == 3) windowSize else args(3).toInt
if (slideSize > windowSize) {
System.err.println("<slide duration> must be less than or equal to <window duration>")
val windowDuration = s"$windowSize seconds"
val slideDuration = s"$slideSize seconds"
val spark = SparkSession
import spark.implicits._
// Create DataFrame representing the stream of input lines from connection to host:port
val lines = spark.readStream
.option("host", host)
.option("port", port)
val deviceDF:DataFrame = lines.as[String].map(_.split(",")).
map(value=>DeviceData(value(0), value(1).toDouble, value(2), new Timestamp(value(3).toLong))).toDF()
// Group the data by window and deviceId and compute the count of each group
val windowedCounts = deviceDF
.withWatermark("timestamp", "2 minutes")
.groupBy(window($"timestamp", windowDuration, slideDuration), $"deviceId")
val query = windowedCounts.writeStream
.option("truncate", "false")