在我的Spark Streaming应用程序中看到此错误堆栈跟踪
18/08/01 20:33:45 INFO RetryInvocationHandler: Exception while invoking ClientNamenodeProtocolTranslatorPB.renewLease over master2.hadoop.example.com/192.168.1.11:8020 after 9 failover attempts. Trying to failover immediately.
java.io.IOException: Failed on local exception: java.io.IOException: Couldn't setup connection for argus@HADOOP.example.COM to master2.hadoop.example.com/192.168.1.11:8020; Host Details : local host is: "slave2.hadoop.example.com/192.168.1.13"; destination host is: "master2.hadoop.example.com":8020;
at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:782)
at org.apache.hadoop.ipc.Client.getRpcResponse(Client.java:1556)
at org.apache.hadoop.ipc.Client.call(Client.java:1496)
at org.apache.hadoop.ipc.Client.call(Client.java:1396)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:233)
at com.sun.proxy.$Proxy12.renewLease(Unknown Source)
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.renewLease(ClientNamenodeProtocolTranslatorPB.java:635)
at sun.reflect.GeneratedMethodAccessor26.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:278)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:194)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:176)
at com.sun.proxy.$Proxy13.renewLease(Unknown Source)
at org.apache.hadoop.hdfs.DFSClient.renewLease(DFSClient.java:902)
at org.apache.hadoop.hdfs.LeaseRenewer.renew(LeaseRenewer.java:423)
at org.apache.hadoop.hdfs.LeaseRenewer.run(LeaseRenewer.java:448)
at org.apache.hadoop.hdfs.LeaseRenewer.access$700(LeaseRenewer.java:71)
at org.apache.hadoop.hdfs.LeaseRenewer$1.run(LeaseRenewer.java:304)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.io.IOException: Couldn't setup connection for argus@HADOOP.example.COM to master2.hadoop.example.com/192.168.1.11:8020
at org.apache.hadoop.ipc.Client$Connection$1.run(Client.java:712)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1724)
at org.apache.hadoop.ipc.Client$Connection.handleSaslConnectionFailure(Client.java:683)
at org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:770)
at org.apache.hadoop.ipc.Client$Connection.access$3200(Client.java:397)
at org.apache.hadoop.ipc.Client.getConnection(Client.java:1618)
at org.apache.hadoop.ipc.Client.call(Client.java:1449)
... 17 more
Caused by: javax.security.sasl.SaslException: GSS initiate failed [Caused by GSSException: No valid credentials provided (Mechanism level: Failed to find any Kerberos tgt)]
at com.sun.security.sasl.gsskerb.GssKrb5Client.evaluateChallenge(GssKrb5Client.java:211)
at org.apache.hadoop.security.SaslRpcClient.saslConnect(SaslRpcClient.java:413)
at org.apache.hadoop.ipc.Client$Connection.setupSaslConnection(Client.java:595)
at org.apache.hadoop.ipc.Client$Connection.access$2000(Client.java:397)
at org.apache.hadoop.ipc.Client$Connection$2.run(Client.java:762)
at org.apache.hadoop.ipc.Client$Connection$2.run(Client.java:758)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1724)
at org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:757)
... 20 more
Caused by: GSSException: No valid credentials provided (Mechanism level: Failed to find any Kerberos tgt)
at sun.security.jgss.krb5.Krb5InitCredential.getInstance(Krb5InitCredential.java:147)
at sun.security.jgss.krb5.Krb5MechFactory.getCredentialElement(Krb5MechFactory.java:122)
at sun.security.jgss.krb5.Krb5MechFactory.getMechanismContext(Krb5MechFactory.java:187)
at sun.security.jgss.GSSManagerImpl.getMechanismContext(GSSManagerImpl.java:224)
at sun.security.jgss.GSSContextImpl.initSecContext(GSSContextImpl.java:212)
at sun.security.jgss.GSSContextImpl.initSecContext(GSSContextImpl.java:179)
at com.sun.security.sasl.gsskerb.GssKrb5Client.evaluateChallenge(GssKrb5Client.java:192)
... 29 more
从堆栈跟踪看来,代码中的某些内容正在尝试续订hdfs kerberos令牌租约?尽管在我的应用程序中我没有使用hdfs。
我的spark应用程序在hortonworks平台上的纱线上运行。上面的异常似乎没有使我的应用程序崩溃,它仍在运行,但是我经常看到此异常。
我也经常看到这句话
WARN UserGroupInformation: Not attempting to re-login since the last re-login was attempted less than 600 seconds before.
同样从堆栈跟踪中,似乎没有任何一个来自spark框架。
请帮助!