我有一些镶木地板文件,每个镶木文件大约有300万行和6000列。我试图在每个数据帧上运行summary()
,如下所示:
data = spark.read.parquet('/HDFS/path/to/parquet/file/test.parquet')
stats = data.summary()
stats_pd = stats.toPandas()
但是,Spark抛出异常
Caused by: org.codehaus.janino.InternalCompilerException: Compiling "GeneratedClass": Code of method "apply(Ljava/lang/Object;)Ljava/lang/Object;" of class "org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificMutableProjection" grows beyond 64 KB
at org.codehaus.janino.UnitCompiler.compileUnit(UnitCompiler.java:361)
at org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:234)
at org.codehaus.janino.SimpleCompiler.compileToClassLoader(SimpleCompiler.java:446)
at org.codehaus.janino.ClassBodyEvaluator.compileToClass(ClassBodyEvaluator.java:313)
at org.codehaus.janino.ClassBodyEvaluator.cook(ClassBodyEvaluator.java:235)
at org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:204)
at org.codehaus.commons.compiler.Cookable.cook(Cookable.java:80)
at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:1417)
... 34 more
Caused by: org.codehaus.janino.InternalCompilerException: Code of method "apply(Ljava/lang/Object;)Ljava/lang/Object;" of class "org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificMutableProjection" grows beyond 64 KB
at org.codehaus.janino.CodeContext.makeSpace(CodeContext.java:990)
at org.codehaus.janino.CodeContext.write(CodeContext.java:867)
at org.codehaus.janino.UnitCompiler.writeOpcode(UnitCompiler.java:11901)
at org.codehaus.janino.UnitCompiler.load(UnitCompiler.java:11542)
at org.codehaus.janino.UnitCompiler.load(UnitCompiler.java:11536)
at org.codehaus.janino.UnitCompiler.compileGet2(UnitCompiler.java:4139)
at org.codehaus.janino.UnitCompiler.access$7200(UnitCompiler.java:212)
at org.codehaus.janino.UnitCompiler$12$1.visitLocalVariableAccess(UnitCompiler.java:4082)
at org.codehaus.janino.UnitCompiler$12$1.visitLocalVariableAccess(UnitCompiler.java:4074)
at org.codehaus.janino.Java$LocalVariableAccess.accept(Java.java:4103)
at org.codehaus.janino.UnitCompiler$12.visitLvalue(UnitCompiler.java:4074)
at org.codehaus.janino.UnitCompiler$12.visitLvalue(UnitCompiler.java:4070)
at org.codehaus.janino.Java$Lvalue.accept(Java.java:3977)
at org.codehaus.janino.UnitCompiler.compileGet(UnitCompiler.java:4070)
at org.codehaus.janino.UnitCompiler.compileGet2(UnitCompiler.java:4135)
at org.codehaus.janino.UnitCompiler.access$6700(UnitCompiler.java:212)
at org.codehaus.janino.UnitCompiler$12$1.visitAmbiguousName(UnitCompiler.java:4077)
at org.codehaus.janino.UnitCompiler$12$1.visitAmbiguousName(UnitCompiler.java:4074)
at org.codehaus.janino.Java$AmbiguousName.accept(Java.java:4053)
at org.codehaus.janino.UnitCompiler$12.visitLvalue(UnitCompiler.java:4074)
at org.codehaus.janino.UnitCompiler$12.visitLvalue(UnitCompiler.java:4070)
at org.codehaus.janino.Java$Lvalue.accept(Java.java:3977)
at org.codehaus.janino.UnitCompiler.compileGet(UnitCompiler.java:4070)
at org.codehaus.janino.UnitCompiler.compileGetValue(UnitCompiler.java:5253)
at org.codehaus.janino.UnitCompiler.compileGet2(UnitCompiler.java:4842)
at org.codehaus.janino.UnitCompiler.access$8300(UnitCompiler.java:212)
at org.codehaus.janino.UnitCompiler$12.visitMethodInvocation(UnitCompiler.java:4097)
at org.codehaus.janino.UnitCompiler$12.visitMethodInvocation(UnitCompiler.java:4070)
at org.codehaus.janino.Java$MethodInvocation.accept(Java.java:4902)
at org.codehaus.janino.UnitCompiler.compileGet(UnitCompiler.java:4070)
at org.codehaus.janino.UnitCompiler.compileGetValue(UnitCompiler.java:5253)
at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:3468)
at org.codehaus.janino.UnitCompiler.access$5100(UnitCompiler.java:212)
at org.codehaus.janino.UnitCompiler$9.visitMethodInvocation(UnitCompiler.java:3447)
at org.codehaus.janino.UnitCompiler$9.visitMethodInvocation(UnitCompiler.java:3419)
at org.codehaus.janino.Java$MethodInvocation.accept(Java.java:4902)
at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:3419)
at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:2339)
at org.codehaus.janino.UnitCompiler.access$1800(UnitCompiler.java:212)
at org.codehaus.janino.UnitCompiler$6.visitExpressionStatement(UnitCompiler.java:1473)
at org.codehaus.janino.UnitCompiler$6.visitExpressionStatement(UnitCompiler.java:1466)
at org.codehaus.janino.Java$ExpressionStatement.accept(Java.java:2851)
at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:1466)
at org.codehaus.janino.UnitCompiler.compileStatements(UnitCompiler.java:1546)
at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:3075)
at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1336)
at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1309)
at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:799)
at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:958)
at org.codehaus.janino.UnitCompiler.access$700(UnitCompiler.java:212)
at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:393)
at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:385)
at org.codehaus.janino.Java$MemberClassDeclaration.accept(Java.java:1286)
at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:385)
at org.codehaus.janino.UnitCompiler.compileDeclaredMemberTypes(UnitCompiler.java:1285)
at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:825)
at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:411)
at org.codehaus.janino.UnitCompiler.access$400(UnitCompiler.java:212)
at org.codehaus.janino.UnitCompiler$2.visitPackageMemberClassDeclaration(UnitCompiler.java:390)
at org.codehaus.janino.UnitCompiler$2.visitPackageMemberClassDeclaration(UnitCompiler.java:385)
at org.codehaus.janino.Java$PackageMemberClassDeclaration.accept(Java.java:1405)
at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:385)
at org.codehaus.janino.UnitCompiler.compileUnit(UnitCompiler.java:357)
... 41 more
有人可以阐明这一点吗?