Question

我有一个像这样的Avro架构 -

{
   "type":"record",
   "name":"new_user",
   "namespace":"com.hello",
   "fields":[
      {
         "name":"user_id",
         "type":[
            "long",
            "null"
         ]
      },
      {
         "name":"segment",
         "type":[
            "string",
            "null"
         ]
      }
   ]
}

我正在使用我上面的Avro Schema来序列化数据，这给了我一个字节数组并且工作正常 -

public static void main(String[] args) throws IOException {
    Schema schema = new Parser()
            .parse("{ \"type\":\"record\", \"name\":\"new_user\", \"namespace\":\"com.hello\", \"fields\":[ { \"name\":\"user_id\", \"type\":[ \"long\", \"null\" ] }, { \"name\":\"segment\", \"type\":[ \"string\", \"null\" ] } ] }");

    byte[] originalAvrodata = getAvroBinaryData(schema);

    // how to get newAvroData byte array in which user_id 
    // is change to some other random long number?
}

private static byte[] getAvroBinaryData(Schema schema) throws IOException {
    GenericRecord record = new GenericData.Record(schema);
    record.put("user_id", 123456L);
    record.put("segment", "hello");

    GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema);
    ByteArrayOutputStream os = new ByteArrayOutputStream();

    Encoder e = EncoderFactory.get().binaryEncoder(os, null);

    writer.write(record, e);
    e.flush();
    byte[] byteData = os.toByteArray();
    return byteData;
}

问题陈述：

我需要解码originalAvrodata字节数组，然后将user_id字段值更改为其他long个数字，然后使用相同的模式构造newAvroData字节数组对于某个随机user_id数字应该有long字段值。使用Avro有可能做到这一点吗？

Answer 1

当然，这里有一些注释代码，可以帮助您入门：

public static void main(String[] args) throws IOException, JSONException {
        Schema schema = new Schema.Parser()
           .parse("{ \"type\":\"record\", \"name\":\"new_user\", \"namespace\":\"com.hello\", \"fields\":[ { \"name\":\"user_id\", \"type\":[ \"long\", \"null\" ] }, { \"name\":\"segment\", \"type\":[ \"string\", \"null\" ] } ] }");

        // create example record
        GenericRecord record = new GenericData.Record(schema);
        record.put("user_id", 123456L);
        record.put("segment", "hello");

        // serialize record
        byte[] recordData = getAvroBinaryData(schema, record);

        // de-serialize byte array to record  
        GenericRecord readRecord = readRecord(schema, recordData);

        // increment user_id field
        Long userId = (Long) readRecord.get("user_id");
        readRecord.put("user_id", userId + 1);

        // prints 123457 for the user_id
        System.out.println(readRecord);

        // serialize updated recored
        byte[] updatedRecordData = getAvroBinaryData(schema, readRecord);

        // do something with updatedRecordData
    }   

    private static GenericRecord readRecord(Schema schema, byte[] originalAvrodata) throws IOException {
        Decoder decoder = DecoderFactory.get().binaryDecoder(originalAvrodata, null);       
        DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schema);
        GenericRecord readRecord = null;

        try {
            readRecord = reader.read(null, decoder);            
        } catch (EOFException eofe) {
            eofe.printStackTrace();
        }

        return readRecord;
    }

    // takes the record to be serialized as an additonal parameter
    private static byte[] getAvroBinaryData(Schema schema, GenericRecord record) throws IOException {
        GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema);
        ByteArrayOutputStream os = new ByteArrayOutputStream();
        Encoder e = EncoderFactory.get().binaryEncoder(os, null);
        writer.write(record, e);
        e.flush();
        byte[] byteData = os.toByteArray();
        return byteData;
    }

解码原始字节数组后如何更改特定字段值？

1 个答案: