Java Spark无法执行df.show()

时间:2019-01-10 17:34:01

标签: java scala apache-spark pyspark

我试图读取java / scala spark中的简单数据源。我能够在pyspark中使用它,但是当我将代码重写为java或scala时,会出现此错误

Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException: 10582
at com.thoughtworks.paranamer.BytecodeReadingParanamer$ClassReader.accept(BytecodeReadingParanamer.java:563)
at com.thoughtworks.paranamer.BytecodeReadingParanamer$ClassReader.access$200(BytecodeReadingParanamer.java:338)
at com.thoughtworks.paranamer.BytecodeReadingParanamer.lookupParameterNames(BytecodeReadingParanamer.java:103)
at com.thoughtworks.paranamer.CachingParanamer.lookupParameterNames(CachingParanamer.java:90)
at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.getCtorParams(BeanIntrospector.scala:44)
at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$1(BeanIntrospector.scala:58)
at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$1$adapted(BeanIntrospector.scala:58)
at scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:241)
at scala.collection.Iterator.foreach(Iterator.scala:944)
at scala.collection.Iterator.foreach$(Iterator.scala:944)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1432)
at scala.collection.IterableLike.foreach(IterableLike.scala:71)
at scala.collection.IterableLike.foreach$(IterableLike.scala:70)
at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
at scala.collection.TraversableLike.flatMap(TraversableLike.scala:241)
at scala.collection.TraversableLike.flatMap$(TraversableLike.scala:238)
at scala.collection.AbstractTraversable.flatMap(Traversable.scala:104)
at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.findConstructorParam$1(BeanIntrospector.scala:58)
at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$19(BeanIntrospector.scala:176)
at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:234)
at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:32)
at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:29)
at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:194)
at scala.collection.TraversableLike.map(TraversableLike.scala:234)
at scala.collection.TraversableLike.map$(TraversableLike.scala:227)
at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:194)
at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$14(BeanIntrospector.scala:170)
at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$14$adapted(BeanIntrospector.scala:169)
at scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:241)
at scala.collection.immutable.List.foreach(List.scala:389)
at scala.collection.TraversableLike.flatMap(TraversableLike.scala:241)
at scala.collection.TraversableLike.flatMap$(TraversableLike.scala:238)
at scala.collection.immutable.List.flatMap(List.scala:352)
at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.apply(BeanIntrospector.scala:169)
at com.fasterxml.jackson.module.scala.introspect.ScalaAnnotationIntrospector$._descriptorFor(ScalaAnnotationIntrospectorModule.scala:22)
at com.fasterxml.jackson.module.scala.introspect.ScalaAnnotationIntrospector$.fieldName(ScalaAnnotationIntrospectorModule.scala:30)
at com.fasterxml.jackson.module.scala.introspect.ScalaAnnotationIntrospector$.findImplicitPropertyName(ScalaAnnotationIntrospectorModule.scala:78)
at com.fasterxml.jackson.databind.introspect.AnnotationIntrospectorPair.findImplicitPropertyName(AnnotationIntrospectorPair.java:467)
at com.fasterxml.jackson.databind.introspect.POJOPropertiesCollector._addFields(POJOPropertiesCollector.java:351)
at com.fasterxml.jackson.databind.introspect.POJOPropertiesCollector.collectAll(POJOPropertiesCollector.java:283)
at com.fasterxml.jackson.databind.introspect.POJOPropertiesCollector.getJsonValueMethod(POJOPropertiesCollector.java:169)
at com.fasterxml.jackson.databind.introspect.BasicBeanDescription.findJsonValueMethod(BasicBeanDescription.java:223)
at com.fasterxml.jackson.databind.ser.BasicSerializerFactory.findSerializerByAnnotations(BasicSerializerFactory.java:348)
at com.fasterxml.jackson.databind.ser.BeanSerializerFactory._createSerializer2(BeanSerializerFactory.java:210)
at com.fasterxml.jackson.databind.ser.BeanSerializerFactory.createSerializer(BeanSerializerFactory.java:153)
at com.fasterxml.jackson.databind.SerializerProvider._createUntypedSerializer(SerializerProvider.java:1203)
at com.fasterxml.jackson.databind.SerializerProvider._createAndCacheUntypedSerializer(SerializerProvider.java:1157)
at com.fasterxml.jackson.databind.SerializerProvider.findValueSerializer(SerializerProvider.java:481)
at com.fasterxml.jackson.databind.SerializerProvider.findTypedValueSerializer(SerializerProvider.java:679)
at com.fasterxml.jackson.databind.ser.DefaultSerializerProvider.serializeValue(DefaultSerializerProvider.java:107)
at com.fasterxml.jackson.databind.ObjectMapper._configAndWriteValue(ObjectMapper.java:3559)
at com.fasterxml.jackson.databind.ObjectMapper.writeValueAsString(ObjectMapper.java:2927)
at org.apache.spark.rdd.RDDOperationScope.toJson(RDDOperationScope.scala:52)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:142)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
at org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:247)
at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:339)
at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38)
at org.apache.spark.sql.Dataset.collectFromPlan(Dataset.scala:3384)
at org.apache.spark.sql.Dataset.$anonfun$head$1(Dataset.scala:2545)
at org.apache.spark.sql.Dataset.$anonfun$withAction$2(Dataset.scala:3365)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:78)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3365)
at org.apache.spark.sql.Dataset.head(Dataset.scala:2545)
at org.apache.spark.sql.Dataset.take(Dataset.scala:2759)
at org.apache.spark.sql.Dataset.getRows(Dataset.scala:255)
at org.apache.spark.sql.Dataset.showString(Dataset.scala:292)
at org.apache.spark.sql.Dataset.show(Dataset.scala:746)
at org.apache.spark.sql.Dataset.show(Dataset.scala:705)
at org.apache.spark.sql.Dataset.show(Dataset.scala:714)
at com.deere.pops.dataproduct.etl.ETLApplication$.main(ETLApplication.scala:23)
at com.deere.pops.dataproduct.etl.ETLApplication.main(ETLApplication.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at com.intellij.rt.execution.application.AppMain.main(AppMain.java:144)

我的pyspark代码。

from pyspark.sql import SparkSession


spark = SparkSession\
    .builder\
    .appName("Python Spark")\
    .getOrCreate()
csv = spark.read.format('csv')\
    .option("header", "true")\
    .option("inferSchema", "true")\
    .option("delimiter", "\t")\
    .load('./demo.csv')

csv.printSchema()
csv.show(1)

spark.stop()

和我的Java代码

import org.apache.spark.SparkConf;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SparkSession;

public class Application {

    public static void main(String[] args) {
        SparkConf conf = new SparkConf().setAppName("Java Spark").setMaster("local[*]");

        SparkSession spark  = SparkSession.builder().config(conf).getOrCreate();

        Dataset csv = spark.read().format("csv")
            .option("header", "true")
            .option("inferSchema", "true")
            .option("delimiter", "\t")
            .load("./demo.csv");

        csv.printSchema();
        csv.show();
    }
}

更新:

我能够通过spark-submit来使命令工作,但是无法通过IDE或java -jar运行它。

我正在将intellij用于IDE。

1 个答案:

答案 0 :(得分:-1)

我认为thoughtworks.paranamer有问题,因此,如果您的项目使用的是maven build,请在pom.xml中添加以下依赖项,然后再次进行测试。

<dependency>
    <groupId>com.thoughtworks.paranamer</groupId>
    <artifactId>paranamer</artifactId>
    <version>2.8</version>
</dependency>

或者,如果您使用的是其他构建工具,请参阅here