我正在从kafka流中接收json消息。我想在此流上创建一个基于时间的窗口并采用最大值。我正在使用以下代码:
jsonSchema = StructType([ StructField("State", StringType(), True) \
, StructField("Value", StringType(), True) \
, StructField("SourceTimestamp", StringType(), True) \
, StructField("Tag", StringType(), True)
])
spark = SparkSession \
.builder \
.appName("StructuredStreaming") \
.config("spark.default.parallelism", "100") \
.getOrCreate()
df = spark \
.readStream \
.format("kafka") \
.option("kafka.bootstrap.servers", "10.129.140.23:9092") \
.option("subscribe", "SIMULATOR.SUPERMAN.TOTO") \
.load() \
.select(from_json(col("value").cast("string"), jsonSchema).alias("data")) \
.select("data.*")
df = df.withColumn("window",window(current_timestamp(),"4 seconds")).groupBy("window").agg(func.max("Value")).select("window")
query = df \
.writeStream \
.outputMode("update") \
.format("console") \
.start()
query.awaitTermination()
我在日志中得到以下结果:
---------------------------------------------------------------------------
Py4JJavaError Traceback (most recent call last)
/usr/spark/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
62 try:
---> 63 return f(*a, **kw)
64 except py4j.protocol.Py4JJavaError as e:
/usr/spark/spark/python/lib/py4j-0.10.7-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
327 "An error occurred while calling {0}{1}{2}.\n".
--> 328 format(target_id, ".", name), value)
329 else:
Py4JJavaError: An error occurred while calling o165.awaitTermination.
: org.apache.spark.sql.streaming.StreamingQueryException: Writing job aborted.
=== Streaming Query ===
Identifier: [id = b6a5b7f5-59c7-43ac-a6a0-8d27fb3fff6d, runId = ca394e5a-14b1-4ccb-9872-36cf7319ae75]
Current Committed Offsets: {}
Current Available Offsets: {KafkaV2[Subscribe[SIMULATOR.SUPERMAN.TOTO]]: {"SIMULATOR.SUPERMAN.TOTO":{"0":956487}}}
Current State: ACTIVE
Thread State: RUNNABLE
Logical Plan:
Project [window#118]
+- Aggregate [window#118], [window#118, max(Value#111) AS max(Value)#133]
+- Project [State#85, Value#111, SourceTimestamp#87, Tag#88, Source#105, window#119 AS window#118]
+- Filter isnotnull(current_timestamp())
+- Project [named_struct(start, precisetimestampconversion(((((CASE WHEN (cast(CEIL((cast((precisetimestampconversion(current_timestamp(), TimestampType, LongType) - 0) as double) / cast(4000000 as double))) as double) = (cast((precisetimestampconversion(current_timestamp(), TimestampType, LongType) - 0) as double) / cast(4000000 as double))) THEN (CEIL((cast((precisetimestampconversion(current_timestamp(), TimestampType, LongType) - 0) as double) / cast(4000000 as double))) + cast(1 as bigint)) ELSE CEIL((cast((precisetimestampconversion(current_timestamp(), TimestampType, LongType) - 0) as double) / cast(4000000 as double))) END + cast(0 as bigint)) - cast(1 as bigint)) * 4000000) + 0), LongType, TimestampType), end, precisetimestampconversion((((((CASE WHEN (cast(CEIL((cast((precisetimestampconversion(current_timestamp(), TimestampType, LongType) - 0) as double) / cast(4000000 as double))) as double) = (cast((precisetimestampconversion(current_timestamp(), TimestampType, LongType) - 0) as double) / cast(4000000 as double))) THEN (CEIL((cast((precisetimestampconversion(current_timestamp(), TimestampType, LongType) - 0) as double) / cast(4000000 as double))) + cast(1 as bigint)) ELSE CEIL((cast((precisetimestampconversion(current_timestamp(), TimestampType, LongType) - 0) as double) / cast(4000000 as double))) END + cast(0 as bigint)) - cast(1 as bigint)) * 4000000) + 0) + 4000000), LongType, TimestampType)) AS window#119, State#85, Value#111, SourceTimestamp#87, Tag#88, Source#105]
+- Project [State#85, cast(Value#86 as double) AS Value#111, SourceTimestamp#87, Tag#88, Source#105]
+- Project [State#85, Value#86, SourceTimestamp#87, Tag#88, cast(Source#99L as timestamp) AS Source#105]
+- Project [State#85, Value#86, SourceTimestamp#87, Tag#88, cast(Source#93 as bigint) AS Source#99L]
+- Project [State#85, Value#86, SourceTimestamp#87, Tag#88, substring(SourceTimestamp#87, 1, 10) AS Source#93]
+- Project [data#83.State AS State#85, data#83.Value AS Value#86, data#83.SourceTimestamp AS SourceTimestamp#87, data#83.Tag AS Tag#88]
+- Project [jsontostructs(StructField(State,StringType,true), StructField(Value,StringType,true), StructField(SourceTimestamp,StringType,true), StructField(Tag,StringType,true), cast(value#70 as string), Some(Etc/UTC)) AS data#83]
+- StreamingExecutionRelation KafkaV2[Subscribe[SIMULATOR.SUPERMAN.TOTO]], [key#69, value#70, topic#71, partition#72, offset#73L, timestamp#74, timestampType#75]
at org.apache.spark.sql.execution.streaming.StreamExecution.org$apache$spark$sql$execution$streaming$StreamExecution$$runStream(StreamExecution.scala:297)
at org.apache.spark.sql.execution.streaming.StreamExecution$$anon$1.run(StreamExecution.scala:193)
Caused by: org.apache.spark.SparkException: Writing job aborted.
at org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2Exec.doExecute(WriteToDataSourceV2Exec.scala:92)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
at org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:247)
at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:296)
at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:3383)
at org.apache.spark.sql.Dataset$$anonfun$collect$1.apply(Dataset.scala:2782)
at org.apache.spark.sql.Dataset$$anonfun$collect$1.apply(Dataset.scala:2782)
at org.apache.spark.sql.Dataset$$anonfun$53.apply(Dataset.scala:3364)
at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3363)
at org.apache.spark.sql.Dataset.collect(Dataset.scala:2782)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution$$anonfun$org$apache$spark$sql$execution$streaming$MicroBatchExecution$$runBatch$5$$anonfun$apply$17.apply(MicroBatchExecution.scala:540)
at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution$$anonfun$org$apache$spark$sql$execution$streaming$MicroBatchExecution$$runBatch$5.apply(MicroBatchExecution.scala:535)
at org.apache.spark.sql.execution.streaming.ProgressReporter$class.reportTimeTaken(ProgressReporter.scala:351)
at org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:58)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution.org$apache$spark$sql$execution$streaming$MicroBatchExecution$$runBatch(MicroBatchExecution.scala:534)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution$$anonfun$runActivatedStream$1$$anonfun$apply$mcZ$sp$1.apply$mcV$sp(MicroBatchExecution.scala:198)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution$$anonfun$runActivatedStream$1$$anonfun$apply$mcZ$sp$1.apply(MicroBatchExecution.scala:166)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution$$anonfun$runActivatedStream$1$$anonfun$apply$mcZ$sp$1.apply(MicroBatchExecution.scala:166)
at org.apache.spark.sql.execution.streaming.ProgressReporter$class.reportTimeTaken(ProgressReporter.scala:351)
at org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:58)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution$$anonfun$runActivatedStream$1.apply$mcZ$sp(MicroBatchExecution.scala:166)
at org.apache.spark.sql.execution.streaming.ProcessingTimeExecutor.execute(TriggerExecutor.scala:56)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution.runActivatedStream(MicroBatchExecution.scala:160)
at org.apache.spark.sql.execution.streaming.StreamExecution.org$apache$spark$sql$execution$streaming$StreamExecution$$runStream(StreamExecution.scala:281)
... 1 more
Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 52 in stage 1.0 failed 1 times, most recent failure: Lost task 52.0 in stage 1.0 (TID 200, localhost, executor driver): java.lang.NoSuchMethodError: net.jpountz.lz4.LZ4BlockInputStream.<init>(Ljava/io/InputStream;Z)V
at org.apache.spark.io.LZ4CompressionCodec.compressedInputStream(CompressionCodec.scala:122)
at org.apache.spark.serializer.SerializerManager.wrapForCompression(SerializerManager.scala:163)
at org.apache.spark.serializer.SerializerManager.wrapStream(SerializerManager.scala:124)
at org.apache.spark.shuffle.BlockStoreShuffleReader$$anonfun$3.apply(BlockStoreShuffleReader.scala:50)
at org.apache.spark.shuffle.BlockStoreShuffleReader$$anonfun$3.apply(BlockStoreShuffleReader.scala:50)
at org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:453)
at org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:64)
at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:435)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:441)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
at org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:31)
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
at org.apache.spark.sql.execution.streaming.StateStoreRestoreExec$$anonfun$doExecute$1.apply(statefulOperators.scala:243)
at org.apache.spark.sql.execution.streaming.StateStoreRestoreExec$$anonfun$doExecute$1.apply(statefulOperators.scala:242)
at org.apache.spark.sql.execution.streaming.state.package$StateStoreOps$$anonfun$1.apply(package.scala:67)
at org.apache.spark.sql.execution.streaming.state.package$StateStoreOps$$anonfun$1.apply(package.scala:62)
at org.apache.spark.sql.execution.streaming.state.StateStoreRDD.compute(StateStoreRDD.scala:92)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
at org.apache.spark.sql.execution.streaming.state.StateStoreRDD.compute(StateStoreRDD.scala:91)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:121)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:403)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:409)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1889)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1877)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1876)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1876)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:926)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2110)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2059)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2048)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:737)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061)
at org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2Exec.doExecute(WriteToDataSourceV2Exec.scala:64)
... 35 more
Caused by: java.lang.NoSuchMethodError: net.jpountz.lz4.LZ4BlockInputStream.<init>(Ljava/io/InputStream;Z)V
at org.apache.spark.io.LZ4CompressionCodec.compressedInputStream(CompressionCodec.scala:122)
at org.apache.spark.serializer.SerializerManager.wrapForCompression(SerializerManager.scala:163)
at org.apache.spark.serializer.SerializerManager.wrapStream(SerializerManager.scala:124)
at org.apache.spark.shuffle.BlockStoreShuffleReader$$anonfun$3.apply(BlockStoreShuffleReader.scala:50)
at org.apache.spark.shuffle.BlockStoreShuffleReader$$anonfun$3.apply(BlockStoreShuffleReader.scala:50)
at org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:453)
at org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:64)
at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:435)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:441)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
at org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:31)
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
at org.apache.spark.sql.execution.streaming.StateStoreRestoreExec$$anonfun$doExecute$1.apply(statefulOperators.scala:243)
at org.apache.spark.sql.execution.streaming.StateStoreRestoreExec$$anonfun$doExecute$1.apply(statefulOperators.scala:242)
at org.apache.spark.sql.execution.streaming.state.package$StateStoreOps$$anonfun$1.apply(package.scala:67)
at org.apache.spark.sql.execution.streaming.state.package$StateStoreOps$$anonfun$1.apply(package.scala:62)
at org.apache.spark.sql.execution.streaming.state.StateStoreRDD.compute(StateStoreRDD.scala:92)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
at org.apache.spark.sql.execution.streaming.state.StateStoreRDD.compute(StateStoreRDD.scala:91)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:121)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:403)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:409)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
During handling of the above exception, another exception occurred:
StreamingQueryException Traceback (most recent call last)
<ipython-input-15-382e5f46aca0> in <module>()
2 query = df .writeStream .outputMode("update") .format("console") .start()
3
----> 4 query.awaitTermination()
/usr/spark/spark/python/pyspark/sql/streaming.py in awaitTermination(self, timeout)
101 return self._jsq.awaitTermination(int(timeout * 1000))
102 else:
--> 103 return self._jsq.awaitTermination()
104
105 @property
/usr/spark/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py in __call__(self, *args)
1255 answer = self.gateway_client.send_command(command)
1256 return_value = get_return_value(
-> 1257 answer, self.gateway_client, self.target_id, self.name)
1258
1259 for temp_arg in temp_args:
/usr/spark/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
73 raise ParseException(s.split(': ', 1)[1], stackTrace)
74 if s.startswith('org.apache.spark.sql.streaming.StreamingQueryException: '):
---> 75 raise StreamingQueryException(s.split(': ', 1)[1], stackTrace)
76 if s.startswith('org.apache.spark.sql.execution.QueryExecutionException: '):
77 raise QueryExecutionException(s.split(': ', 1)[1], stackTrace)
StreamingQueryException: 'Writing job aborted.\n=== Streaming Query ===\nIdentifier: [id = b6a5b7f5-59c7-43ac-a6a0-8d27fb3fff6d, runId = ca394e5a-14b1-4ccb-9872-36cf7319ae75]\nCurrent Committed Offsets: {}\nCurrent Available Offsets: {KafkaV2[Subscribe[SIMULATOR.SUPERMAN.TOTO]]: {"SIMULATOR.SUPERMAN.TOTO":{"0":956487}}}\n\nCurrent State: ACTIVE\nThread State: RUNNABLE\n\nLogical Plan:\nProject [window#118]\n+- Aggregate [window#118], [window#118, max(Value#111) AS max(Value)#133]\n +- Project [State#85, Value#111, SourceTimestamp#87, Tag#88, Source#105, window#119 AS window#118]\n +- Filter isnotnull(current_timestamp())\n +- Project [named_struct(start, precisetimestampconversion(((((CASE WHEN (cast(CEIL((cast((precisetimestampconversion(current_timestamp(), TimestampType, LongType) - 0) as double) / cast(4000000 as double))) as double) = (cast((precisetimestampconversion(current_timestamp(), TimestampType, LongType) - 0) as double) / cast(4000000 as double))) THEN (CEIL((cast((precisetimestampconversion(current_timestamp(), TimestampType, LongType) - 0) as double) / cast(4000000 as double))) + cast(1 as bigint)) ELSE CEIL((cast((precisetimestampconversion(current_timestamp(), TimestampType, LongType) - 0) as double) / cast(4000000 as double))) END + cast(0 as bigint)) - cast(1 as bigint)) * 4000000) + 0), LongType, TimestampType), end, precisetimestampconversion((((((CASE WHEN (cast(CEIL((cast((precisetimestampconversion(current_timestamp(), TimestampType, LongType) - 0) as double) / cast(4000000 as double))) as double) = (cast((precisetimestampconversion(current_timestamp(), TimestampType, LongType) - 0) as double) / cast(4000000 as double))) THEN (CEIL((cast((precisetimestampconversion(current_timestamp(), TimestampType, LongType) - 0) as double) / cast(4000000 as double))) + cast(1 as bigint)) ELSE CEIL((cast((precisetimestampconversion(current_timestamp(), TimestampType, LongType) - 0) as double) / cast(4000000 as double))) END + cast(0 as bigint)) - cast(1 as bigint)) * 4000000) + 0) + 4000000), LongType, TimestampType)) AS window#119, State#85, Value#111, SourceTimestamp#87, Tag#88, Source#105]\n +- Project [State#85, cast(Value#86 as double) AS Value#111, SourceTimestamp#87, Tag#88, Source#105]\n +- Project [State#85, Value#86, SourceTimestamp#87, Tag#88, cast(Source#99L as timestamp) AS Source#105]\n +- Project [State#85, Value#86, SourceTimestamp#87, Tag#88, cast(Source#93 as bigint) AS Source#99L]\n +- Project [State#85, Value#86, SourceTimestamp#87, Tag#88, substring(SourceTimestamp#87, 1, 10) AS Source#93]\n +- Project [data#83.State AS State#85, data#83.Value AS Value#86, data#83.SourceTimestamp AS SourceTimestamp#87, data#83.Tag AS Tag#88]\n +- Project [jsontostructs(StructField(State,StringType,true), StructField(Value,StringType,true), StructField(SourceTimestamp,StringType,true), StructField(Tag,StringType,true), cast(value#70 as string), Some(Etc/UTC)) AS data#83]\n +- StreamingExecutionRelation KafkaV2[Subscribe[SIMULATOR.SUPERMAN.TOTO]], [key#69, value#70, topic#71, partition#72, offset#73L, timestamp#74, timestampType#75]\n'
除非我尝试按窗口分组,否则流值不会有任何问题。 我缺少内存或分区的问题吗?就像我们无法完全处理内存中的窗口!!!!
答案 0 :(得分:0)
您使用的LZ4 Java
版本没有构造函数LZ4BlockInputStream(InputStream in)
,您具有该库的哪个版本?