在Java Avro中,如何将 data1 , data2 和 data3 解析为 GenericRecord 。< / p>
//Schema
{
"type": "record", "name": "user",
"fields": [
{"name": "name", "type": "string"},
{"name": "colour", "type": "string", "default": "green"},
{"name": "mass", "type": "int", "default": 100}
]
}
//data 1
{"name":"Sean"}
//data 2
{"name":"Sean", "colour":"red"}
//data 3
{"name":"Sean", "colour":"red", "mass":200}
我已经看过一些关于模式演化等的讨论,并且能够将 writer的模式和 reader的模式传递给GenericDatumReader和ResolvingDecoder,但我只有一个模式。一般来说,我不知道作者使用的确切模式(如果有的话)。
我可以通过解析模式并使用默认值删除所有字段来“推断”“基础”模式。但是,如果有多个字段具有默认值,则某些字段可能存在/可能不存在,因此我将无法推断出符合数据的模式。
例如
有人有什么建议吗?
import java.io.IOException;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.JsonDecoder;
import org.apache.avro.io.ResolvingDecoder;
public class DefaultAvroTest2 {
private static String properSchama_string = "{" +
" \"type\": \"record\"," +
" \"name\": \"user\"," +
" \"fields\": [" +
" {\"name\": \"name\", \"type\": \"string\"}," +
" {\"name\": \"colour\", \"type\": \"string\", \"default\": \"green\"}," +
" {\"name\": \"mass\", \"type\": \"int\", \"default\": 100}" +
" ]" +
" }";
private static String inferred_base_schama_string = "{" +
" \"type\": \"record\"," +
" \"name\": \"user\"," +
" \"fields\": [" +
" {\"name\": \"name\", \"type\": \"string\"}" +
" ]" +
" }";
private static String data1 = "{\"name\":\"Sean\"}";
private static String data2 = "{\"name\":\"Sean\", \"colour\":\"red\"}";
private static String data3 = "{\"name\":\"Sean\", \"colour\":\"blue\", \"mass\":200}";
public static void main(String[] args) throws IOException {
System.out.println("\nObject 1 :\n"+data1);
System.out.println("\nObject 2 :\n"+data2);
System.out.println("\nObject 3 :\n"+data3);
Schema inferred_base_schema = new Schema.Parser().parse(inferred_base_schama_string);
Schema defined_schema = new Schema.Parser().parse(properSchama_string);
System.out.println("\nProper schema :\n"+defined_schema.toString(true));
System.out.println("\nA base schema that could be inferred from the proper schema :\n"+inferred_base_schema.toString(true));
JsonDecoder jsonDecoder_inferred_1 = DecoderFactory.get().jsonDecoder(inferred_base_schema, data1);
JsonDecoder jsonDecoder_inferred_2 = DecoderFactory.get().jsonDecoder(inferred_base_schema, data2);
JsonDecoder jsonDecoder_inferred_3 = DecoderFactory.get().jsonDecoder(inferred_base_schema, data3);
//Correct
GenericRecord object1_inferred = new GenericDatumReader<GenericRecord>(inferred_base_schema).read(null, jsonDecoder_inferred_1);
//Incorrect: colour is missing
GenericRecord object2_inferred = new GenericDatumReader<GenericRecord>(inferred_base_schema).read(null, jsonDecoder_inferred_2);
//Incorrect: colour and mass are missing
GenericRecord object3_inferred = new GenericDatumReader<GenericRecord>(inferred_base_schema).read(null, jsonDecoder_inferred_3);
ResolvingDecoder resolvingDecoder1 = DecoderFactory.get().resolvingDecoder(inferred_base_schema, defined_schema, DecoderFactory.get().jsonDecoder(defined_schema, data1));
ResolvingDecoder resolvingDecoder2 = DecoderFactory.get().resolvingDecoder(inferred_base_schema, defined_schema, DecoderFactory.get().jsonDecoder(defined_schema, data2));
ResolvingDecoder resolvingDecoder3 = DecoderFactory.get().resolvingDecoder(inferred_base_schema, defined_schema, DecoderFactory.get().jsonDecoder(defined_schema, data3));
//Correct
GenericRecord object1_resolved = new GenericDatumReader<GenericRecord>(defined_schema).read(null, resolvingDecoder1);
//Incorrect: colour is default(green) not red
GenericRecord object2_resolved = new GenericDatumReader<GenericRecord>(defined_schema).read(null, resolvingDecoder2);
//Incorrect: colour is default(green) not blue, and mass is default(100) not 200
GenericRecord object3_resovled = new GenericDatumReader<GenericRecord>(defined_schema).read(null, resolvingDecoder3);
JsonDecoder jsonDecoder_defined_1 = DecoderFactory.get().jsonDecoder(defined_schema, data1);
JsonDecoder jsonDecoder_defined_2 = DecoderFactory.get().jsonDecoder(defined_schema, data2);
JsonDecoder jsonDecoder_defined_3 = DecoderFactory.get().jsonDecoder(defined_schema, data3);
//Fail: org.apache.avro.AvroTypeException: Expected string. Got END_OBJECT
//GenericRecord object1_defined = new GenericDatumReader<GenericRecord>(defined_schema).read(null, jsonDecoder_defined_1);
//Fail: org.apache.avro.AvroTypeException: Expected int. Got END_OBJECT
//GenericRecord object2_defined = new GenericDatumReader<GenericRecord>(defined_schema).read(null, jsonDecoder_defined_2);
//Correct
GenericRecord object3_defined = new GenericDatumReader<GenericRecord>(defined_schema).read(null, jsonDecoder_defined_3);
//Correct
System.out.println("\nObject 1 read with inferred schema:\n"+object1_inferred);
//Incorrect: colour is missing
System.out.println("\nObject 2 read with inferred schema:\n"+object2_inferred);
//Incorrect: colour and mass are missing
System.out.println("\nObject 3 read with inferred schema:\n"+object3_inferred);
//Correct
System.out.println("\nObject 1 read with resolving decoder:\n"+object1_resolved);
//Incorrect: colour is default(green) not red
System.out.println("\nObject 2 read with resolving decoder:\n"+object2_resolved);
//Incorrect: colour is default(green) not blue, and mass is default(100) not 200
System.out.println("\nObject 3 read with resolving decoder:\n"+object3_resovled);
//Fail
//System.out.println("\nObject 1 read with defined schema:\n"+object1_defined);
//Fail
//System.out.println("\nObject 2 read with defined schema:\n"+object2_defined);
//Correct
System.out.println("\nObject 3 read with defined schema:\n"+object3_defined);
}
}
输出:
Object 1 :
{"name":"Sean"}
Object 2 :
{"name":"Sean", "colour":"red"}
Object 3 :
{"name":"Sean", "colour":"blue", "mass":200}
Proper schema :
{
"type" : "record",
"name" : "user",
"fields" : [ {
"name" : "name",
"type" : "string"
}, {
"name" : "colour",
"type" : "string",
"default" : "green"
}, {
"name" : "mass",
"type" : "int",
"default" : 100
} ]
}
A base schema that could be inferred from the proper schema :
{
"type" : "record",
"name" : "user",
"fields" : [ {
"name" : "name",
"type" : "string"
} ]
}
Object 1 read with inferred schema:
{"name": "Sean"}
Object 2 read with inferred schema:
{"name": "Sean"}
Object 3 read with inferred schema:
{"name": "Sean"}
Object 1 read with resolving decoder:
{"name": "Sean", "colour": "green", "mass": 100}
Object 2 read with resolving decoder:
{"name": "Sean", "colour": "green", "mass": 100}
Object 3 read with resolving decoder:
{"name": "Sean", "colour": "green", "mass": 100}
Object 3 read with defined schema:
{"name": "Sean", "colour": "blue", "mass": 200}