Spark在kafka中写入数据集,启用KryoSerializer

时间:2019-06-20 13:26:40

标签: java json apache-spark serialization apache-kafka

我想在kafka主题数据集中写入json

我有Dataset对象,我将其转换为Dataset字符串,其中该字符串包含json对象。我在主题中记录了此内容。以前,一切都记录得很好,但是在添加了一个字段之后,开始出现异常。 我想尝试连接KryoSerializer,但我无法 型号:

public class ObjectCH implements Serializable {
    private static final long serialVersionUID = 8065906683154831478L;

    private Integer adtp;
    private String advid;
    private String app;
    private String date;
    private String deviceType;
    private Integer dnt;
    private Long duration;
    private Boolean geoIsRu;
    private Long inViewTime;
    private Boolean isError;
    private String ip;
    private Integer onScreenRate;
    private String os;
    private String osver;
    private Long sets;
    private String tmsec;
    private Integer tz;
    private String uid;
    private String ver;
    private Integer version;
    private Integer errorCode;
    private String hostname;
    private String referer;
    private Boolean viewableAnalyzed;
    private Boolean viewabilityMRC;
    private Boolean viewabilityExtMRC;
    private Boolean viewabilityDur;
    private Integer clientId;

    /* osr */
    private int osRangeLT25;
    private int osRangeLT50;
    private int osRangeLT75;
    private int osRangeLT100;
    private int osPrc0;
    private int osPrc1;
    private int osPrc2;
    private int osPrc3;
    private int osPrc4;
    private int osPrc5;
    private int osPrc6;
    private int osPrc7;
    private int osPrc8;
    private int osPrc9;
    private int osPrc10;
    private int osPrc11;
    private int osPrc12;
    private int osPrc13;
    private int osPrc14;
    private int osPrc15;
    private int osPrc16;
    private int osPrc17;
    private int osPrc18;
    private int osPrc19;
    private int osPrc20;
    private int osPrc21;
    private int osPrc22;
    private int osPrc23;
    private int osPrc24;
    private int osPrc25;
    private int osPrc26;
    private int osPrc27;
    private int osPrc28;
    private int osPrc29;
    private int osPrc30;
    private int osPrc31;
    private int osPrc32;
    private int osPrc33;
    private int osPrc34;
    private int osPrc35;
    private int osPrc36;
    private int osPrc37;
    private int osPrc38;
    private int osPrc39;
    private int osPrc40;
    private int osPrc41;
    private int osPrc42;
    private int osPrc43;
    private int osPrc44;
    private int osPrc45;
    private int osPrc46;
    private int osPrc47;
    private int osPrc48;
    private int osPrc49;
    private int osPrc50;
    private int osPrc51;
    private int osPrc52;
    private int osPrc53;
    private int osPrc54;
    private int osPrc55;
    private int osPrc56;
    private int osPrc57;
    private int osPrc58;
    private int osPrc59;
    private int osPrc60;
    private int osPrc61;
    private int osPrc62;
    private int osPrc63;
    private int osPrc64;
    private int osPrc65;
    private int osPrc66;
    private int osPrc67;
    private int osPrc68;
    private int osPrc69;
    private int osPrc70;
    private int osPrc71;
    private int osPrc72;
    private int osPrc73;
    private int osPrc74;
    private int osPrc75;
    private int osPrc76;
    private int osPrc77;
    private int osPrc78;
    private int osPrc79;
    private int osPrc80;
    private int osPrc81;
    private int osPrc82;
    private int osPrc83;
    private int osPrc84;
    private int osPrc85;
    private int osPrc86;
    private int osPrc87;
    private int osPrc88;
    private int osPrc89;
    private int osPrc90;
    private int osPrc91;
    private int osPrc92;
    private int osPrc93;
    private int osPrc94;
    private int osPrc95;
    private int osPrc96;
    private int osPrc97;
    private int osPrc98;
    private int osPrc99;
    private int osPrc100;

    /* ivt */
    private int ivt0;
    private int ivt1;
    private int ivt2;
    private int ivt3;
    private int ivt4;
    private int ivt5;
    private int ivt6;
    private int ivt7;
    private int ivt8;
    private int ivt9;
    private int ivt10;
    private int ivt11;
    private int ivt12;
    private int ivt13;
    private int ivt14;
    private int ivt15;
    private int ivt16;
    private int ivt17;
    private int ivt18;
    private int ivt19;
    private int ivt20;
    private int ivt21;
    private int ivt22;
    private int ivt23;
    private int ivt24;
    private int ivt25;
    private int ivt26;
    private int ivt27;
    private int ivt28;
    private int ivt29;
    private int ivt30;
    private int ivt31;
    private int ivt32;
    private int ivt33;
    private int ivt34;
    private int ivt35;
    private int ivt36;
    private int ivt37;
    private int ivt38;
    private int ivt39;
    private int ivt40;
    private int ivt41;
    private int ivt42;
    private int ivt43;
    private int ivt44;
    private int ivt45;
    private int ivt46;
    private int ivt47;
    private int ivt48;
    private int ivt49;
    private int ivt50;
    private int ivt51;
    private int ivt52;
    private int ivt53;
    private int ivt54;
    private int ivt55;
    private int ivt56;
    private int ivt57;
    private int ivt58;
    private int ivt59;
    private int ivt60;
    private int ivt65;
    private int ivt70;
    private int ivt75;
    private int ivt80;
    private int ivt85;
    private int ivt90;
    private int ivt95;
    private int ivt100;
    private int ivt105;
    private int ivt110;
    private int ivt115;
    private int ivt120;
    private Long ts;

    /* va */
    private Boolean va1;
    private Boolean va10;
    private Boolean va11;
    private Boolean va20;
    private Boolean va21;
    private Boolean va30;

    /* ve */
    private Long ve0;
    private Long ve1;
    private Long ve2;
    private Long ve3;
    private Long ve4;
    private Long ve5;
    private Long ve6;
    private List<Long> ve100;
    private List<Long> ve101;
    private List<Long> ve200;
    private List<Long> ve201;
    private List<Long> ve202;
    private List<Long> ve300;
    private Long ve301;
}

writerKafka:

dataset
          .toJSON()
          .as("value")
          .write()
          .format("kafka")
          .option("kafka.bootstrap.servers", bootstrapServers)
          .option("topic", topic)
          .save();

SparkConf:

new SparkConf()
                .setMaster("local[*]")
                .set("spark.executor.memory", "2G")
                .set("spark.driver.memory", "2G")
                .set("spark.sql.shuffle.partitions", "20")
                .set("spark.files.maxPartitionBytes", "64000000")
                .set("spark.kryo.registrationRequired", "true")
                .set("spark.serializer", KryoSerializer.class.getCanonicalName())
                .set("es.batch.size.entries", "1500")
                .set("spark.kryo.registrator", "net.***.core.configuration.CustomKryoRegistrator")

CustomKryoRegistrator:

public void registerClasses(Kryo kryo) {

        kryo.register(StructType[].class);
        kryo.register(StructType.class);
        kryo.register(StructField[].class);
        kryo.register(StructField.class);
        kryo.register(IntegerType$.class);
        kryo.register(Metadata.class);
        kryo.register(StringType$.class);
        kryo.register(LongType$.class);
        kryo.register(BooleanType$.class);
        kryo.register(ArrayType.class);
        kryo.register(BooleanWritable.class);
        kryo.register(ByteWritable.class);
        kryo.register(DoubleWritable.class);
        kryo.register(FloatWritable.class);
        kryo.register(IntWritable.class);
        kryo.register(LongWritable.class);
        kryo.register(NullWritable.class);
        kryo.register(ArrayWritable.class);
        kryo.register(Text.class);
        kryo.register(CounterObject.class);
        kryo.register(ViewabilityObject.class);
        kryo.register(ViewabilityObjectCH.class);
        kryo.register(ViewabilityAggregatedObjectCH.class);
    }

例外

ERROR Executor: Exception in task 1.0 in stage 5.0 (TID 11)
java.lang.NegativeArraySizeException
    at org.apache.spark.unsafe.types.UTF8String.getBytes(UTF8String.java:297)
    at org.apache.spark.unsafe.types.UTF8String.toString(UTF8String.java:1214)
    at org.apache.spark.sql.catalyst.json.JacksonGenerator$$anonfun$org$apache$spark$sql$catalyst$json$JacksonGenerator$$makeWriter$9.apply(JacksonGenerator.scala:112)
    at org.apache.spark.sql.catalyst.json.JacksonGenerator$$anonfun$org$apache$spark$sql$catalyst$json$JacksonGenerator$$makeWriter$9.apply(JacksonGenerator.scala:111)
    at org.apache.spark.sql.catalyst.json.JacksonGenerator.org$apache$spark$sql$catalyst$json$JacksonGenerator$$writeFields(JacksonGenerator.scala:176)
    at org.apache.spark.sql.catalyst.json.JacksonGenerator$$anonfun$write$1.apply$mcV$sp(JacksonGenerator.scala:228)
    at org.apache.spark.sql.catalyst.json.JacksonGenerator.org$apache$spark$sql$catalyst$json$JacksonGenerator$$writeObject(JacksonGenerator.scala:165)
    at org.apache.spark.sql.catalyst.json.JacksonGenerator.write(JacksonGenerator.scala:228)
    at org.apache.spark.sql.Dataset$$anonfun$toJSON$1$$anon$1.next(Dataset.scala:3203)
    at org.apache.spark.sql.Dataset$$anonfun$toJSON$1$$anon$1.next(Dataset.scala:3200)
    at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
    at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
    at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
    at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636)
    at org.apache.spark.sql.kafka010.KafkaWriteTask.execute(KafkaWriteTask.scala:45)
    at org.apache.spark.sql.kafka010.KafkaWriter$$anonfun$write$1$$anonfun$apply$1.apply$mcV$sp(KafkaWriter.scala:89)
    at org.apache.spark.sql.kafka010.KafkaWriter$$anonfun$write$1$$anonfun$apply$1.apply(KafkaWriter.scala:89)
    at org.apache.spark.sql.kafka010.KafkaWriter$$anonfun$write$1$$anonfun$apply$1.apply(KafkaWriter.scala:89)
    at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
    at org.apache.spark.sql.kafka010.KafkaWriter$$anonfun$write$1.apply(KafkaWriter.scala:89)
    at org.apache.spark.sql.kafka010.KafkaWriter$$anonfun$write$1.apply(KafkaWriter.scala:87)
    at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$28.apply(RDD.scala:935)
    at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$28.apply(RDD.scala:935)
    at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2101)
    at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2101)
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
    at org.apache.spark.scheduler.Task.run(Task.scala:121)
    at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
    at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:748)

更新: 此方法中出现负numBytes,不清楚在哪里。

public byte[] getBytes() {
    // avoid copy if `base` is `byte[]`
    if (offset == BYTE_ARRAY_OFFSET && base instanceof byte[]
      && ((byte[]) base).length == numBytes) {
      return (byte[]) base;
    } else {
      byte[] bytes = new byte[numBytes];
      copyMemory(base, offset, bytes, BYTE_ARRAY_OFFSET, numBytes);
      return bytes;
    }
  }

调试:

this = Method threw 'java.lang.NegativeArraySizeException' exception. Cannot evaluate org.apache.spark.unsafe.types.UTF8String.toString()
numBytes = -84627042
offset = 378
((byte[]) base).length = 2424
base

0 个答案:

没有答案