我正在Windows上运行以下代码,它引发错误并且无法正常工作。 zk,kafka,elasticsearch虽然所有服务器都在运行。数据已经发布到kafka主题
object kses {
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder.
master("local")
.appName("sparkToES")
.config("es.nodes", "localhost")
.config("es.index.auto.create","true")
.getOrCreate()
import spark.implicits._
spark.sparkContext.setLogLevel("ERROR")
val df = spark
.readStream
.format("kafka")
.option("kafka.bootstrap.servers", "localhost:9092")
.option("subscribe", "logi1")
.option("startingOffsets", "earliest")
.load()
val data = df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
.as[(String, String)]
val results = data
.map(_._2)
.flatMap(value => value.split("\\s+"))
.groupByKey(_.toLowerCase)
.count()
val query = results.writeStream
.format("org.elasticsearch.spark.sql")
.outputMode("append")
.option("es.nodes", "localhost")
.option("es.port", "9200")
.option("es.nodes.discovery", "true")
.option("es.http.timeout", "20s")
.option("es.http.retries", "0")
.option("es.resource","logi123")
.option("checkpointLocation", "~/checkpoint_es")
.start()
query.awaitTermination()
}
}
ERROR - Exception in thread "main" org.apache.spark.sql.AnalysisException: Append output mode not supported when there are streaming aggregations on streaming DataFrames/DataSets without watermark;
When i change it to 'complete' mode then also code doesnot run.
zk,kafka,elasticsearch虽然所有服务器都在运行。
答案 0 :(得分:0)
Save()中的ElasticSearch资源路径应该是ES节点上索引资源的路径,而不是您的本地路径。
检查ES节点的连通性,并在Save()方法中提供有效的Index(例如:“ index / persons”)路径。
df.write.format("org.elasticsearch.spark.sql").option("es.nodes.wan.only","true")
.option("es.port","9200")
.option("es.net.ssl","true")
.option("es.nodes","192.168.0.1")
.mode("append")
.option("es.nodes.client.only", "false")
.save(<Index Resource PATH>)