我有一个通过spark submit启动的Spark应用程序。设置了代理用户并且正在使用纱线模式。 启动应用程序时,它在Hue UI中正确显示作业正在作为代理用户运行。 在HDFS上,创建了/user/proxy-user/.sparkStaging下的.sparkStaging文件夹(及其所有内容),其所有者是代理用户 - 到目前为止一切都很好。
但是,一旦应用程序代码执行并尝试在/ user / proxy-user / result下创建资源(即名为:result的文件夹),所有者将由超级用户而不是代理用户创建。这会导致后续失败,因为代理用户现在缺少对“结果”的正确访问权限。文件夹及其子项。
这是使用Kerberos在Cloudera 5.7上运行的。如果需要的话,我可以提供更多详细信息 - 只是不想发出不相关细节的长纱(没有双关语)。
然后,代理用户看起来无法写入由不同所有者创建的文件夹。来自日志:
at WARN TaskSetManager: Lost task 0.0 in stage 1.0 (TID 2, clustername.com, executor 2): org.apache.hadoop.security.AccessControlException: Permission denied: user=anonymous, access=WRITE, inode="/user/anonymous/result345/_temporary/0":superuser:supergroup:drwxr-xr-x
org.apache.hadoop.hdfs.server.namenode.DefaultAuthoriz ationProvider.checkFsPermission(DefaultAuthoriz ationProvider.java:281)
at org.apache.hadoop.hdfs.server.namenode.DefaultAuthoriz ationProvider.check(DefaultAuthoriz ationProvider.java:262)
at org.apache.hadoop.hdfs.server.namenode.DefaultAuthoriz ationProvider.check(DefaultAuthoriz ationProvider.java:242)
at org.apache.hadoop.hdfs.server.namenode.DefaultAuthoriz ationProvider.checkPermission(DefaultAuthoriz ationProvider.java:169)
at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:152)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkPermission(FSNamesystem.java:6590)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkPermission(FSNamesystem.java:6572)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkAncestorAccess(FSNamesystem.java:6524)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:2758)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInt(FSNamesystem.java:2676)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:2561)
at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.cre ate(NameNodeRpcServer.java:593)
at org.apache.hadoop.hdfs.server.namenode.Authoriz ationProviderProxyClientProtocol.cre ate(Authoriz ationProviderProxyClientProtocol.java:111)
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTransl atorPB.cre ate(ClientNamenodeProtocolServerSideTransl atorPB.java:393)
at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:617)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1073)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2086)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2082)
at java.security.AccessController.doPrivileged(N ative Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInform ation.doAs(UserGroupInform ation.java:1693)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2080)
at sun.reflect.N ativeConstructorAccessorImpl.newInstance0(N ative Method)
at sun.reflect.N ativeConstructorAccessorImpl.newInstance(N ativeConstructorAccessorImpl.java:62)
at sun.reflect.Deleg atingConstructorAccessorImpl.newInstance(Deleg atingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:422)
at org.apache.hadoop.ipc.RemoteException.instanti ateException(RemoteException.java:106)
at org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:73)
at org.apache.hadoop.hdfs.DFSOutputStream.newStreamForCre ate(DFSOutputStream.java:1653)
at org.apache.hadoop.hdfs.DFSClient.cre ate(DFSClient.java:1689)
at org.apache.hadoop.hdfs.DFSClient.cre ate(DFSClient.java:1624)
at org.apache.hadoop.hdfs.DistributedFileSystem$7.doCall(DistributedFileSystem.java:448)
at org.apache.hadoop.hdfs.DistributedFileSystem$7.doCall(DistributedFileSystem.java:444)
at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
at org.apache.hadoop.hdfs.DistributedFileSystem.cre ate(DistributedFileSystem.java:459)
at org.apache.hadoop.hdfs.DistributedFileSystem.cre ate(DistributedFileSystem.java:387)
at org.apache.hadoop.fs.FileSystem.cre ate(FileSystem.java:911)
at org.apache.hadoop.fs.FileSystem.cre ate(FileSystem.java:804)
at org.apache.hadoop.mapred.TextOutputForm at.getRecordWriter(TextOutputForm at.java:123)
at org.apache.spark.SparkHadoopWriter.open(SparkHadoopWriter.scala:90)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopD ataset$1$$anonfun$13 .apply(PairRDDFunctions.scala:1197)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:99)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:282)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)