我想在kafka主题数据集中写入json
我有Dataset对象,我将其转换为Dataset字符串,其中该字符串包含json对象。我在主题中记录了此内容。以前,一切都记录得很好,但是在添加了一个字段之后,开始出现异常。 我想尝试连接KryoSerializer,但我无法 型号:
public class ObjectCH implements Serializable {
private static final long serialVersionUID = 8065906683154831478L;
private Integer adtp;
private String advid;
private String app;
private String date;
private String deviceType;
private Integer dnt;
private Long duration;
private Boolean geoIsRu;
private Long inViewTime;
private Boolean isError;
private String ip;
private Integer onScreenRate;
private String os;
private String osver;
private Long sets;
private String tmsec;
private Integer tz;
private String uid;
private String ver;
private Integer version;
private Integer errorCode;
private String hostname;
private String referer;
private Boolean viewableAnalyzed;
private Boolean viewabilityMRC;
private Boolean viewabilityExtMRC;
private Boolean viewabilityDur;
private Integer clientId;
/* osr */
private int osRangeLT25;
private int osRangeLT50;
private int osRangeLT75;
private int osRangeLT100;
private int osPrc0;
private int osPrc1;
private int osPrc2;
private int osPrc3;
private int osPrc4;
private int osPrc5;
private int osPrc6;
private int osPrc7;
private int osPrc8;
private int osPrc9;
private int osPrc10;
private int osPrc11;
private int osPrc12;
private int osPrc13;
private int osPrc14;
private int osPrc15;
private int osPrc16;
private int osPrc17;
private int osPrc18;
private int osPrc19;
private int osPrc20;
private int osPrc21;
private int osPrc22;
private int osPrc23;
private int osPrc24;
private int osPrc25;
private int osPrc26;
private int osPrc27;
private int osPrc28;
private int osPrc29;
private int osPrc30;
private int osPrc31;
private int osPrc32;
private int osPrc33;
private int osPrc34;
private int osPrc35;
private int osPrc36;
private int osPrc37;
private int osPrc38;
private int osPrc39;
private int osPrc40;
private int osPrc41;
private int osPrc42;
private int osPrc43;
private int osPrc44;
private int osPrc45;
private int osPrc46;
private int osPrc47;
private int osPrc48;
private int osPrc49;
private int osPrc50;
private int osPrc51;
private int osPrc52;
private int osPrc53;
private int osPrc54;
private int osPrc55;
private int osPrc56;
private int osPrc57;
private int osPrc58;
private int osPrc59;
private int osPrc60;
private int osPrc61;
private int osPrc62;
private int osPrc63;
private int osPrc64;
private int osPrc65;
private int osPrc66;
private int osPrc67;
private int osPrc68;
private int osPrc69;
private int osPrc70;
private int osPrc71;
private int osPrc72;
private int osPrc73;
private int osPrc74;
private int osPrc75;
private int osPrc76;
private int osPrc77;
private int osPrc78;
private int osPrc79;
private int osPrc80;
private int osPrc81;
private int osPrc82;
private int osPrc83;
private int osPrc84;
private int osPrc85;
private int osPrc86;
private int osPrc87;
private int osPrc88;
private int osPrc89;
private int osPrc90;
private int osPrc91;
private int osPrc92;
private int osPrc93;
private int osPrc94;
private int osPrc95;
private int osPrc96;
private int osPrc97;
private int osPrc98;
private int osPrc99;
private int osPrc100;
/* ivt */
private int ivt0;
private int ivt1;
private int ivt2;
private int ivt3;
private int ivt4;
private int ivt5;
private int ivt6;
private int ivt7;
private int ivt8;
private int ivt9;
private int ivt10;
private int ivt11;
private int ivt12;
private int ivt13;
private int ivt14;
private int ivt15;
private int ivt16;
private int ivt17;
private int ivt18;
private int ivt19;
private int ivt20;
private int ivt21;
private int ivt22;
private int ivt23;
private int ivt24;
private int ivt25;
private int ivt26;
private int ivt27;
private int ivt28;
private int ivt29;
private int ivt30;
private int ivt31;
private int ivt32;
private int ivt33;
private int ivt34;
private int ivt35;
private int ivt36;
private int ivt37;
private int ivt38;
private int ivt39;
private int ivt40;
private int ivt41;
private int ivt42;
private int ivt43;
private int ivt44;
private int ivt45;
private int ivt46;
private int ivt47;
private int ivt48;
private int ivt49;
private int ivt50;
private int ivt51;
private int ivt52;
private int ivt53;
private int ivt54;
private int ivt55;
private int ivt56;
private int ivt57;
private int ivt58;
private int ivt59;
private int ivt60;
private int ivt65;
private int ivt70;
private int ivt75;
private int ivt80;
private int ivt85;
private int ivt90;
private int ivt95;
private int ivt100;
private int ivt105;
private int ivt110;
private int ivt115;
private int ivt120;
private Long ts;
/* va */
private Boolean va1;
private Boolean va10;
private Boolean va11;
private Boolean va20;
private Boolean va21;
private Boolean va30;
/* ve */
private Long ve0;
private Long ve1;
private Long ve2;
private Long ve3;
private Long ve4;
private Long ve5;
private Long ve6;
private List<Long> ve100;
private List<Long> ve101;
private List<Long> ve200;
private List<Long> ve201;
private List<Long> ve202;
private List<Long> ve300;
private Long ve301;
}
writerKafka:
dataset
.toJSON()
.as("value")
.write()
.format("kafka")
.option("kafka.bootstrap.servers", bootstrapServers)
.option("topic", topic)
.save();
SparkConf:
new SparkConf()
.setMaster("local[*]")
.set("spark.executor.memory", "2G")
.set("spark.driver.memory", "2G")
.set("spark.sql.shuffle.partitions", "20")
.set("spark.files.maxPartitionBytes", "64000000")
.set("spark.kryo.registrationRequired", "true")
.set("spark.serializer", KryoSerializer.class.getCanonicalName())
.set("es.batch.size.entries", "1500")
.set("spark.kryo.registrator", "net.***.core.configuration.CustomKryoRegistrator")
CustomKryoRegistrator:
public void registerClasses(Kryo kryo) {
kryo.register(StructType[].class);
kryo.register(StructType.class);
kryo.register(StructField[].class);
kryo.register(StructField.class);
kryo.register(IntegerType$.class);
kryo.register(Metadata.class);
kryo.register(StringType$.class);
kryo.register(LongType$.class);
kryo.register(BooleanType$.class);
kryo.register(ArrayType.class);
kryo.register(BooleanWritable.class);
kryo.register(ByteWritable.class);
kryo.register(DoubleWritable.class);
kryo.register(FloatWritable.class);
kryo.register(IntWritable.class);
kryo.register(LongWritable.class);
kryo.register(NullWritable.class);
kryo.register(ArrayWritable.class);
kryo.register(Text.class);
kryo.register(CounterObject.class);
kryo.register(ViewabilityObject.class);
kryo.register(ViewabilityObjectCH.class);
kryo.register(ViewabilityAggregatedObjectCH.class);
}
例外
ERROR Executor: Exception in task 1.0 in stage 5.0 (TID 11)
java.lang.NegativeArraySizeException
at org.apache.spark.unsafe.types.UTF8String.getBytes(UTF8String.java:297)
at org.apache.spark.unsafe.types.UTF8String.toString(UTF8String.java:1214)
at org.apache.spark.sql.catalyst.json.JacksonGenerator$$anonfun$org$apache$spark$sql$catalyst$json$JacksonGenerator$$makeWriter$9.apply(JacksonGenerator.scala:112)
at org.apache.spark.sql.catalyst.json.JacksonGenerator$$anonfun$org$apache$spark$sql$catalyst$json$JacksonGenerator$$makeWriter$9.apply(JacksonGenerator.scala:111)
at org.apache.spark.sql.catalyst.json.JacksonGenerator.org$apache$spark$sql$catalyst$json$JacksonGenerator$$writeFields(JacksonGenerator.scala:176)
at org.apache.spark.sql.catalyst.json.JacksonGenerator$$anonfun$write$1.apply$mcV$sp(JacksonGenerator.scala:228)
at org.apache.spark.sql.catalyst.json.JacksonGenerator.org$apache$spark$sql$catalyst$json$JacksonGenerator$$writeObject(JacksonGenerator.scala:165)
at org.apache.spark.sql.catalyst.json.JacksonGenerator.write(JacksonGenerator.scala:228)
at org.apache.spark.sql.Dataset$$anonfun$toJSON$1$$anon$1.next(Dataset.scala:3203)
at org.apache.spark.sql.Dataset$$anonfun$toJSON$1$$anon$1.next(Dataset.scala:3200)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636)
at org.apache.spark.sql.kafka010.KafkaWriteTask.execute(KafkaWriteTask.scala:45)
at org.apache.spark.sql.kafka010.KafkaWriter$$anonfun$write$1$$anonfun$apply$1.apply$mcV$sp(KafkaWriter.scala:89)
at org.apache.spark.sql.kafka010.KafkaWriter$$anonfun$write$1$$anonfun$apply$1.apply(KafkaWriter.scala:89)
at org.apache.spark.sql.kafka010.KafkaWriter$$anonfun$write$1$$anonfun$apply$1.apply(KafkaWriter.scala:89)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.sql.kafka010.KafkaWriter$$anonfun$write$1.apply(KafkaWriter.scala:89)
at org.apache.spark.sql.kafka010.KafkaWriter$$anonfun$write$1.apply(KafkaWriter.scala:87)
at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$28.apply(RDD.scala:935)
at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$28.apply(RDD.scala:935)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2101)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2101)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:121)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
更新: 此方法中出现负numBytes,不清楚在哪里。
public byte[] getBytes() {
// avoid copy if `base` is `byte[]`
if (offset == BYTE_ARRAY_OFFSET && base instanceof byte[]
&& ((byte[]) base).length == numBytes) {
return (byte[]) base;
} else {
byte[] bytes = new byte[numBytes];
copyMemory(base, offset, bytes, BYTE_ARRAY_OFFSET, numBytes);
return bytes;
}
}
调试:
this = Method threw 'java.lang.NegativeArraySizeException' exception. Cannot evaluate org.apache.spark.unsafe.types.UTF8String.toString()
numBytes = -84627042
offset = 378
((byte[]) base).length = 2424
base