我正在尝试将消息从Spark(版本2.4.4)发布到PubSub。我在Spark中创建一个DataFrame,然后尝试将所有记录发送到PubSub。记录不会很多,所以我决定做最简单的事情,然后将每个记录一一发送。
DataFrame上的操作很简单,我只需将每一行转换为一个JSON字符串,然后将其发送。
dataFrame.map(row => {
//converting row to json string
}).foreach(record => producer.send(record))
生产者是一个自定义类,其中有一个发送方法:
def send(message: String): Unit = {
val publisher = buildProducer(project, topic, credentialsPath)
publish(message,publisher)
flushAndExit(publisher)
}
buildProducer 创建一个Publisher和两个接下来的重要方法,如下所示:
private def publish(message: String, publisher: Publisher): Future[String] = {
val data = ByteString.copyFromUtf8(message)
val pubsubMessage = PubsubMessage.newBuilder.setData(data).build
val future: ApiFuture[String] = publisher.publish(pubsubMessage)
future
}
private def flushAndExit(publisher: Publisher): Unit = {
publisher.publishAllOutstanding()
publisher.shutdown()
publisher.awaitTermination(10, TimeUnit.SECONDS)
}
每个执行者都将创建自己的生产者,该生产者将用于发送一条记录,这是使记录工作然后进行优化的最简单方法。通过使用具有相同参数和相同Spark版本的spark-submit,可以在本地计算机上的Spark本地模式下解决序列化错误和代码。
但是,一旦我尝试在其他环境中,具有更好参数的机器以及Spark本地模式下在spark-submit中运行jar,这就是我得到的:
2019-12-06T13:33:35.255 ERROR [ManagedChannelImpl.java(uncaughtException:148)] [grpc-default-worker-ELG-3-2] [Channel<5>: (pubsub.googleapis.com:443)] Uncaught exception in the SynchronizationContext. Panic!
java.util.concurrent.RejectedExecutionException: Task java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask@5cb54590 rejected from java.util.concurrent.ScheduledThreadPoolExecutor@4890ecdf[Terminated, pool size = 0, active threads = 0, queued tasks = 0, completed tasks = 10]
at java.util.concurrent.ThreadPoolExecutor$AbortPolicy.rejectedExecution(ThreadPoolExecutor.java:2063)
at java.util.concurrent.ThreadPoolExecutor.reject(ThreadPoolExecutor.java:830)
at java.util.concurrent.ScheduledThreadPoolExecutor.delayedExecute(ScheduledThreadPoolExecutor.java:326)
at java.util.concurrent.ScheduledThreadPoolExecutor.schedule(ScheduledThreadPoolExecutor.java:533)
at java.util.concurrent.ScheduledThreadPoolExecutor.execute(ScheduledThreadPoolExecutor.java:622)
at io.grpc.internal.DelayedClientTransport.reprocess(DelayedClientTransport.java:297)
at io.grpc.internal.ManagedChannelImpl.updateSubchannelPicker(ManagedChannelImpl.java:790)
at io.grpc.internal.ManagedChannelImpl.access$5200(ManagedChannelImpl.java:102)
at io.grpc.internal.ManagedChannelImpl$LbHelperImpl$1UpdateBalancingState.run(ManagedChannelImpl.java:1149)
at io.grpc.SynchronizationContext.drain(SynchronizationContext.java:88)
at io.grpc.internal.InternalSubchannel$TransportListener.transportReady(InternalSubchannel.java:579)
at io.grpc.netty.shaded.io.grpc.netty.ClientTransportLifecycleManager.notifyReady(ClientTransportLifecycleManager.java:43)
at io.grpc.netty.shaded.io.grpc.netty.NettyClientHandler$FrameListener.onSettingsRead(NettyClientHandler.java:783)
at io.grpc.netty.shaded.io.netty.handler.codec.http2.DefaultHttp2ConnectionDecoder$FrameReadListener.onSettingsRead(DefaultHttp2ConnectionDecoder.java:419)
at io.grpc.netty.shaded.io.netty.handler.codec.http2.DefaultHttp2ConnectionDecoder$PrefaceFrameListener.onSettingsRead(DefaultHttp2ConnectionDecoder.java:637)
at io.grpc.netty.shaded.io.netty.handler.codec.http2.Http2InboundFrameLogger$1.onSettingsRead(Http2InboundFrameLogger.java:93)
at io.grpc.netty.shaded.io.netty.handler.codec.http2.DefaultHttp2FrameReader.readSettingsFrame(DefaultHttp2FrameReader.java:542)
at io.grpc.netty.shaded.io.netty.handler.codec.http2.DefaultHttp2FrameReader.processPayloadState(DefaultHttp2FrameReader.java:263)
at io.grpc.netty.shaded.io.netty.handler.codec.http2.DefaultHttp2FrameReader.readFrame(DefaultHttp2FrameReader.java:160)
at io.grpc.netty.shaded.io.netty.handler.codec.http2.Http2InboundFrameLogger.readFrame(Http2InboundFrameLogger.java:41)
at io.grpc.netty.shaded.io.netty.handler.codec.http2.DefaultHttp2ConnectionDecoder.decodeFrame(DefaultHttp2ConnectionDecoder.java:118)
at io.grpc.netty.shaded.io.netty.handler.codec.http2.Http2ConnectionHandler$FrameDecoder.decode(Http2ConnectionHandler.java:390)
at io.grpc.netty.shaded.io.netty.handler.codec.http2.Http2ConnectionHandler$PrefaceDecoder.decode(Http2ConnectionHandler.java:254)
at io.grpc.netty.shaded.io.netty.handler.codec.http2.Http2ConnectionHandler.decode(Http2ConnectionHandler.java:450)
at io.grpc.netty.shaded.io.netty.handler.codec.ByteToMessageDecoder.decodeRemovalReentryProtection(ByteToMessageDecoder.java:502)
at io.grpc.netty.shaded.io.netty.handler.codec.ByteToMessageDecoder.callDecode(ByteToMessageDecoder.java:441)
at io.grpc.netty.shaded.io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:278)
at io.grpc.netty.shaded.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:359)
at io.grpc.netty.shaded.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:345)
at io.grpc.netty.shaded.io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:337)
at io.grpc.netty.shaded.io.netty.handler.ssl.SslHandler.unwrap(SslHandler.java:1476)
at io.grpc.netty.shaded.io.netty.handler.ssl.SslHandler.decodeJdkCompatible(SslHandler.java:1225)
at io.grpc.netty.shaded.io.netty.handler.ssl.SslHandler.decode(SslHandler.java:1272)
at io.grpc.netty.shaded.io.netty.handler.codec.ByteToMessageDecoder.decodeRemovalReentryProtection(ByteToMessageDecoder.java:502)
at io.grpc.netty.shaded.io.netty.handler.codec.ByteToMessageDecoder.callDecode(ByteToMessageDecoder.java:441)
at io.grpc.netty.shaded.io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:278)
at io.grpc.netty.shaded.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:359)
at io.grpc.netty.shaded.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:345)
at io.grpc.netty.shaded.io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:337)
at io.grpc.netty.shaded.io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1408)
at io.grpc.netty.shaded.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:359)
at io.grpc.netty.shaded.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:345)
at io.grpc.netty.shaded.io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:930)
at io.grpc.netty.shaded.io.netty.channel.epoll.AbstractEpollStreamChannel$EpollStreamUnsafe.epollInReady(AbstractEpollStreamChannel.java:796)
at io.grpc.netty.shaded.io.netty.channel.epoll.EpollEventLoop.processReady(EpollEventLoop.java:427)
at io.grpc.netty.shaded.io.netty.channel.epoll.EpollEventLoop.run(EpollEventLoop.java:328)
at io.grpc.netty.shaded.io.netty.util.concurrent.SingleThreadEventExecutor$5.run(SingleThreadEventExecutor.java:905)
at io.grpc.netty.shaded.io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)
at java.lang.Thread.run(Thread.java:748)
根本没有发送任何消息。我不知道问题出在哪里。