在没有第三方插件或库的情况下将avaf消息从kafka写入hdfs

时间:2018-01-28 15:07:48

标签: java hadoop apache-kafka hdfs

我已经生成了一个Apache Avro类,用于通过客户端应用程序以及服务器端代码生成和使用Avro消息。

我一直在网上搜索实现几个用例。

  • 将Avro消息作为二进制数据写入HDFS。
  • 使用生成的Avro转换字节数组二进制数据 class将消息作为字符串获取并将其写入HDFS。

在没有任何第三方库(如Confluent或Twitter Bijection API)的情况下,是否还有其他方法可以实现上述用例。

顺便说一下,我想将所有Avro消息写入单个HDFS文件。

Avro Generated类:

/**
 * Autogenerated by Avro
 *
 * DO NOT EDIT DIRECTLY
 */
package com.avrotohdfs.classes.avro;

import org.apache.avro.message.BinaryMessageDecoder;
import org.apache.avro.message.BinaryMessageEncoder;
import org.apache.avro.message.SchemaStore;
import org.apache.avro.specific.SpecificData;
import org.apache.avro.specific.SpecificRecordBase;

@SuppressWarnings("all")
@org.apache.avro.specific.AvroGenerated
public class AvroSyslogMessage extends SpecificRecordBase implements org.apache.avro.specific.SpecificRecord {
  private static final long serialVersionUID = -793689732516755717L;
  public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"AvroSyslogMessage\",\"namespace\":\"com.cisco.sso.ssodata.avro\",\"fields\":[{\"name\":\"partyID\",\"type\":\"string\"},{\"name\":\"partyName\",\"type\":[\"string\",\"null\"]},{\"name\":\"applianceID\",\"type\":\"string\"},{\"name\":\"message\",\"type\":\"string\"},{\"name\":\"inventoryName\",\"type\":[\"string\",\"null\"]},{\"name\":\"senttime\",\"type\":[\"string\",\"null\"]}]}");
  public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; }

  private static SpecificData MODEL$ = new SpecificData();

  private static final BinaryMessageEncoder<AvroSyslogMessage> ENCODER =
      new BinaryMessageEncoder<AvroSyslogMessage>(MODEL$, SCHEMA$);

  private static final BinaryMessageDecoder<AvroSyslogMessage> DECODER =
      new BinaryMessageDecoder<AvroSyslogMessage>(MODEL$, SCHEMA$);

  /**
   * Return the BinaryMessageDecoder instance used by this class.
   */
  public static BinaryMessageDecoder<AvroSyslogMessage> getDecoder() {
    return DECODER;
  }

  /**
   * Create a new BinaryMessageDecoder instance for this class that uses the specified {@link SchemaStore}.
   * @param resolver a {@link SchemaStore} used to find schemas by fingerprint
   */
  public static BinaryMessageDecoder<AvroSyslogMessage> createDecoder(SchemaStore resolver) {
    return new BinaryMessageDecoder<AvroSyslogMessage>(MODEL$, SCHEMA$, resolver);
  }

  /** Serializes this AvroSyslogMessage to a ByteBuffer. */
  public java.nio.ByteBuffer toByteBuffer() throws java.io.IOException {
    return ENCODER.encode(this);
  }

  /** Deserializes a AvroSyslogMessage from a ByteBuffer. */
  public static AvroSyslogMessage fromByteBuffer(
      java.nio.ByteBuffer b) throws java.io.IOException {
    return DECODER.decode(b);
  }

  @Deprecated public java.lang.CharSequence partyID;
  @Deprecated public java.lang.CharSequence partyName;
  @Deprecated public java.lang.CharSequence applianceID;
  @Deprecated public java.lang.CharSequence message;
  @Deprecated public java.lang.CharSequence inventoryName;
  @Deprecated public java.lang.CharSequence senttime;

  /**
   * Default constructor.  Note that this does not initialize fields
   * to their default values from the schema.  If that is desired then
   * one should use <code>newBuilder()</code>.
   */
  public AvroSyslogMessage() {}

  /**
   * All-args constructor.
   * @param partyID The new value for partyID
   * @param partyName The new value for partyName
   * @param applianceID The new value for applianceID
   * @param message The new value for message
   * @param inventoryName The new value for inventoryName
   * @param senttime The new value for senttime
   */
  public AvroSyslogMessage(java.lang.CharSequence partyID, java.lang.CharSequence partyName, java.lang.CharSequence applianceID, java.lang.CharSequence message, java.lang.CharSequence inventoryName, java.lang.CharSequence senttime) {
    this.partyID = partyID;
    this.partyName = partyName;
    this.applianceID = applianceID;
    this.message = message;
    this.inventoryName = inventoryName;
    this.senttime = senttime;
  }

  public org.apache.avro.Schema getSchema() { return SCHEMA$; }
  // Used by DatumWriter.  Applications should not call.
  public java.lang.Object get(int field$) {
    switch (field$) {
    case 0: return partyID;
    case 1: return partyName;
    case 2: return applianceID;
    case 3: return message;
    case 4: return inventoryName;
    case 5: return senttime;
    default: throw new org.apache.avro.AvroRuntimeException("Bad index");
    }
  }

  // Used by DatumReader.  Applications should not call.
  @SuppressWarnings(value="unchecked")
  public void put(int field$, java.lang.Object value$) {
    switch (field$) {
    case 0: partyID = (java.lang.CharSequence)value$; break;
    case 1: partyName = (java.lang.CharSequence)value$; break;
    case 2: applianceID = (java.lang.CharSequence)value$; break;
    case 3: message = (java.lang.CharSequence)value$; break;
    case 4: inventoryName = (java.lang.CharSequence)value$; break;
    case 5: senttime = (java.lang.CharSequence)value$; break;
    default: throw new org.apache.avro.AvroRuntimeException("Bad index");
    }
  }

  /**
   * Gets the value of the 'partyID' field.
   * @return The value of the 'partyID' field.
   */
  public java.lang.CharSequence getPartyID() {
    return partyID;
  }

  /**
   * Sets the value of the 'partyID' field.
   * @param value the value to set.
   */
  public void setPartyID(java.lang.CharSequence value) {
    this.partyID = value;
  }

  /**
   * Gets the value of the 'partyName' field.
   * @return The value of the 'partyName' field.
   */
  public java.lang.CharSequence getPartyName() {
    return partyName;
  }

  /**
   * Sets the value of the 'partyName' field.
   * @param value the value to set.
   */
  public void setPartyName(java.lang.CharSequence value) {
    this.partyName = value;
  }

  /**
   * Gets the value of the 'applianceID' field.
   * @return The value of the 'applianceID' field.
   */
  public java.lang.CharSequence getApplianceID() {
    return applianceID;
  }

  /**
   * Sets the value of the 'applianceID' field.
   * @param value the value to set.
   */
  public void setApplianceID(java.lang.CharSequence value) {
    this.applianceID = value;
  }

  /**
   * Gets the value of the 'message' field.
   * @return The value of the 'message' field.
   */
  public java.lang.CharSequence getMessage() {
    return message;
  }

  /**
   * Sets the value of the 'message' field.
   * @param value the value to set.
   */
  public void setMessage(java.lang.CharSequence value) {
    this.message = value;
  }

  /**
   * Gets the value of the 'inventoryName' field.
   * @return The value of the 'inventoryName' field.
   */
  public java.lang.CharSequence getInventoryName() {
    return inventoryName;
  }

  /**
   * Sets the value of the 'inventoryName' field.
   * @param value the value to set.
   */
  public void setInventoryName(java.lang.CharSequence value) {
    this.inventoryName = value;
  }

  /**
   * Gets the value of the 'senttime' field.
   * @return The value of the 'senttime' field.
   */
  public java.lang.CharSequence getSenttime() {
    return senttime;
  }

  /**
   * Sets the value of the 'senttime' field.
   * @param value the value to set.
   */
  public void setSenttime(java.lang.CharSequence value) {
    this.senttime = value;
  }

  /**
   * Creates a new AvroSyslogMessage RecordBuilder.
   * @return A new AvroSyslogMessage RecordBuilder
   */
  public static com.cisco.sso.ssodata.avro.AvroSyslogMessage.Builder newBuilder() {
    return new com.cisco.sso.ssodata.avro.AvroSyslogMessage.Builder();
  }

  /**
   * Creates a new AvroSyslogMessage RecordBuilder by copying an existing Builder.
   * @param other The existing builder to copy.
   * @return A new AvroSyslogMessage RecordBuilder
   */
  public static com.cisco.sso.ssodata.avro.AvroSyslogMessage.Builder newBuilder(com.cisco.sso.ssodata.avro.AvroSyslogMessage.Builder other) {
    return new com.cisco.sso.ssodata.avro.AvroSyslogMessage.Builder(other);
  }

  /**
   * Creates a new AvroSyslogMessage RecordBuilder by copying an existing AvroSyslogMessage instance.
   * @param other The existing instance to copy.
   * @return A new AvroSyslogMessage RecordBuilder
   */
  public static com.cisco.sso.ssodata.avro.AvroSyslogMessage.Builder newBuilder(com.cisco.sso.ssodata.avro.AvroSyslogMessage other) {
    return new com.cisco.sso.ssodata.avro.AvroSyslogMessage.Builder(other);
  }

  /**
   * RecordBuilder for AvroSyslogMessage instances.
   */
  public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase<AvroSyslogMessage>
    implements org.apache.avro.data.RecordBuilder<AvroSyslogMessage> {

    private java.lang.CharSequence partyID;
    private java.lang.CharSequence partyName;
    private java.lang.CharSequence applianceID;
    private java.lang.CharSequence message;
    private java.lang.CharSequence inventoryName;
    private java.lang.CharSequence senttime;

    /** Creates a new Builder */
    private Builder() {
      super(SCHEMA$);
    }

    /**
     * Creates a Builder by copying an existing Builder.
     * @param other The existing Builder to copy.
     */
    private Builder(com.cisco.sso.ssodata.avro.AvroSyslogMessage.Builder other) {
      super(other);
      if (isValidValue(fields()[0], other.partyID)) {
        this.partyID = data().deepCopy(fields()[0].schema(), other.partyID);
        fieldSetFlags()[0] = true;
      }
      if (isValidValue(fields()[1], other.partyName)) {
        this.partyName = data().deepCopy(fields()[1].schema(), other.partyName);
        fieldSetFlags()[1] = true;
      }
      if (isValidValue(fields()[2], other.applianceID)) {
        this.applianceID = data().deepCopy(fields()[2].schema(), other.applianceID);
        fieldSetFlags()[2] = true;
      }
      if (isValidValue(fields()[3], other.message)) {
        this.message = data().deepCopy(fields()[3].schema(), other.message);
        fieldSetFlags()[3] = true;
      }
      if (isValidValue(fields()[4], other.inventoryName)) {
        this.inventoryName = data().deepCopy(fields()[4].schema(), other.inventoryName);
        fieldSetFlags()[4] = true;
      }
      if (isValidValue(fields()[5], other.senttime)) {
        this.senttime = data().deepCopy(fields()[5].schema(), other.senttime);
        fieldSetFlags()[5] = true;
      }
    }

    /**
     * Creates a Builder by copying an existing AvroSyslogMessage instance
     * @param other The existing instance to copy.
     */
    private Builder(com.cisco.sso.ssodata.avro.AvroSyslogMessage other) {
            super(SCHEMA$);
      if (isValidValue(fields()[0], other.partyID)) {
        this.partyID = data().deepCopy(fields()[0].schema(), other.partyID);
        fieldSetFlags()[0] = true;
      }
      if (isValidValue(fields()[1], other.partyName)) {
        this.partyName = data().deepCopy(fields()[1].schema(), other.partyName);
        fieldSetFlags()[1] = true;
      }
      if (isValidValue(fields()[2], other.applianceID)) {
        this.applianceID = data().deepCopy(fields()[2].schema(), other.applianceID);
        fieldSetFlags()[2] = true;
      }
      if (isValidValue(fields()[3], other.message)) {
        this.message = data().deepCopy(fields()[3].schema(), other.message);
        fieldSetFlags()[3] = true;
      }
      if (isValidValue(fields()[4], other.inventoryName)) {
        this.inventoryName = data().deepCopy(fields()[4].schema(), other.inventoryName);
        fieldSetFlags()[4] = true;
      }
      if (isValidValue(fields()[5], other.senttime)) {
        this.senttime = data().deepCopy(fields()[5].schema(), other.senttime);
        fieldSetFlags()[5] = true;
      }
    }

    /**
      * Gets the value of the 'partyID' field.
      * @return The value.
      */
    public java.lang.CharSequence getPartyID() {
      return partyID;
    }

    /**
      * Sets the value of the 'partyID' field.
      * @param value The value of 'partyID'.
      * @return This builder.
      */
    public com.cisco.sso.ssodata.avro.AvroSyslogMessage.Builder setPartyID(java.lang.CharSequence value) {
      validate(fields()[0], value);
      this.partyID = value;
      fieldSetFlags()[0] = true;
      return this;
    }

    /**
      * Checks whether the 'partyID' field has been set.
      * @return True if the 'partyID' field has been set, false otherwise.
      */
    public boolean hasPartyID() {
      return fieldSetFlags()[0];
    }


    /**
      * Clears the value of the 'partyID' field.
      * @return This builder.
      */
    public com.cisco.sso.ssodata.avro.AvroSyslogMessage.Builder clearPartyID() {
      partyID = null;
      fieldSetFlags()[0] = false;
      return this;
    }

    /**
      * Gets the value of the 'partyName' field.
      * @return The value.
      */
    public java.lang.CharSequence getPartyName() {
      return partyName;
    }

    /**
      * Sets the value of the 'partyName' field.
      * @param value The value of 'partyName'.
      * @return This builder.
      */
    public com.cisco.sso.ssodata.avro.AvroSyslogMessage.Builder setPartyName(java.lang.CharSequence value) {
      validate(fields()[1], value);
      this.partyName = value;
      fieldSetFlags()[1] = true;
      return this;
    }

    /**
      * Checks whether the 'partyName' field has been set.
      * @return True if the 'partyName' field has been set, false otherwise.
      */
    public boolean hasPartyName() {
      return fieldSetFlags()[1];
    }


    /**
      * Clears the value of the 'partyName' field.
      * @return This builder.
      */
    public com.cisco.sso.ssodata.avro.AvroSyslogMessage.Builder clearPartyName() {
      partyName = null;
      fieldSetFlags()[1] = false;
      return this;
    }

    /**
      * Gets the value of the 'applianceID' field.
      * @return The value.
      */
    public java.lang.CharSequence getApplianceID() {
      return applianceID;
    }

    /**
      * Sets the value of the 'applianceID' field.
      * @param value The value of 'applianceID'.
      * @return This builder.
      */
    public com.cisco.sso.ssodata.avro.AvroSyslogMessage.Builder setApplianceID(java.lang.CharSequence value) {
      validate(fields()[2], value);
      this.applianceID = value;
      fieldSetFlags()[2] = true;
      return this;
    }

    /**
      * Checks whether the 'applianceID' field has been set.
      * @return True if the 'applianceID' field has been set, false otherwise.
      */
    public boolean hasApplianceID() {
      return fieldSetFlags()[2];
    }


    /**
      * Clears the value of the 'applianceID' field.
      * @return This builder.
      */
    public com.cisco.sso.ssodata.avro.AvroSyslogMessage.Builder clearApplianceID() {
      applianceID = null;
      fieldSetFlags()[2] = false;
      return this;
    }

    /**
      * Gets the value of the 'message' field.
      * @return The value.
      */
    public java.lang.CharSequence getMessage() {
      return message;
    }

    /**
      * Sets the value of the 'message' field.
      * @param value The value of 'message'.
      * @return This builder.
      */
    public com.cisco.sso.ssodata.avro.AvroSyslogMessage.Builder setMessage(java.lang.CharSequence value) {
      validate(fields()[3], value);
      this.message = value;
      fieldSetFlags()[3] = true;
      return this;
    }

    /**
      * Checks whether the 'message' field has been set.
      * @return True if the 'message' field has been set, false otherwise.
      */
    public boolean hasMessage() {
      return fieldSetFlags()[3];
    }


    /**
      * Clears the value of the 'message' field.
      * @return This builder.
      */
    public com.cisco.sso.ssodata.avro.AvroSyslogMessage.Builder clearMessage() {
      message = null;
      fieldSetFlags()[3] = false;
      return this;
    }

    /**
      * Gets the value of the 'inventoryName' field.
      * @return The value.
      */
    public java.lang.CharSequence getInventoryName() {
      return inventoryName;
    }

    /**
      * Sets the value of the 'inventoryName' field.
      * @param value The value of 'inventoryName'.
      * @return This builder.
      */
    public com.cisco.sso.ssodata.avro.AvroSyslogMessage.Builder setInventoryName(java.lang.CharSequence value) {
      validate(fields()[4], value);
      this.inventoryName = value;
      fieldSetFlags()[4] = true;
      return this;
    }

    /**
      * Checks whether the 'inventoryName' field has been set.
      * @return True if the 'inventoryName' field has been set, false otherwise.
      */
    public boolean hasInventoryName() {
      return fieldSetFlags()[4];
    }


    /**
      * Clears the value of the 'inventoryName' field.
      * @return This builder.
      */
    public com.cisco.sso.ssodata.avro.AvroSyslogMessage.Builder clearInventoryName() {
      inventoryName = null;
      fieldSetFlags()[4] = false;
      return this;
    }

    /**
      * Gets the value of the 'senttime' field.
      * @return The value.
      */
    public java.lang.CharSequence getSenttime() {
      return senttime;
    }

    /**
      * Sets the value of the 'senttime' field.
      * @param value The value of 'senttime'.
      * @return This builder.
      */
    public com.cisco.sso.ssodata.avro.AvroSyslogMessage.Builder setSenttime(java.lang.CharSequence value) {
      validate(fields()[5], value);
      this.senttime = value;
      fieldSetFlags()[5] = true;
      return this;
    }

    /**
      * Checks whether the 'senttime' field has been set.
      * @return True if the 'senttime' field has been set, false otherwise.
      */
    public boolean hasSenttime() {
      return fieldSetFlags()[5];
    }


    /**
      * Clears the value of the 'senttime' field.
      * @return This builder.
      */
    public com.cisco.sso.ssodata.avro.AvroSyslogMessage.Builder clearSenttime() {
      senttime = null;
      fieldSetFlags()[5] = false;
      return this;
    }

    @Override
    @SuppressWarnings("unchecked")
    public AvroSyslogMessage build() {
      try {
        AvroSyslogMessage record = new AvroSyslogMessage();
        record.partyID = fieldSetFlags()[0] ? this.partyID : (java.lang.CharSequence) defaultValue(fields()[0]);
        record.partyName = fieldSetFlags()[1] ? this.partyName : (java.lang.CharSequence) defaultValue(fields()[1]);
        record.applianceID = fieldSetFlags()[2] ? this.applianceID : (java.lang.CharSequence) defaultValue(fields()[2]);
        record.message = fieldSetFlags()[3] ? this.message : (java.lang.CharSequence) defaultValue(fields()[3]);
        record.inventoryName = fieldSetFlags()[4] ? this.inventoryName : (java.lang.CharSequence) defaultValue(fields()[4]);
        record.senttime = fieldSetFlags()[5] ? this.senttime : (java.lang.CharSequence) defaultValue(fields()[5]);
        return record;
      } catch (java.lang.Exception e) {
        throw new org.apache.avro.AvroRuntimeException(e);
      }
    }
  }

  @SuppressWarnings("unchecked")
  private static final org.apache.avro.io.DatumWriter<AvroSyslogMessage>
    WRITER$ = (org.apache.avro.io.DatumWriter<AvroSyslogMessage>)MODEL$.createDatumWriter(SCHEMA$);

  @Override public void writeExternal(java.io.ObjectOutput out)
    throws java.io.IOException {
    WRITER$.write(this, SpecificData.getEncoder(out));
  }

  @SuppressWarnings("unchecked")
  private static final org.apache.avro.io.DatumReader<AvroSyslogMessage>
    READER$ = (org.apache.avro.io.DatumReader<AvroSyslogMessage>)MODEL$.createDatumReader(SCHEMA$);

  @Override public void readExternal(java.io.ObjectInput in)
    throws java.io.IOException {
    READER$.read(this, SpecificData.getDecoder(in));
  }

}

将Avro消息作为字节数组写入HDFS的代码:

@Autowired
private PropertyConfig config;

FSDataOutputStream out = null;

public void consume() throws IOException {

    String topic = config.getDedupServiceConsumerTopic();
    String consGroup = config.getDedupServiceConsGroup();

    KafkaConsumer<String, AvroSyslogMessage> consumer = new GenericConsumer<String, AvroSyslogMessage>()
                .initialize(topic, consGroup, STREAMSERDE.STRINGDESER, STREAMSERDE.AVRODESER);

    logger.debug("Dedupe Kafka Consumer Initialized......");

    try {
        while (true) {
            ConsumerRecords<String, AvroSyslogMessage> records = consumer.poll(100);

        for (ConsumerRecord<String, AvroSyslogMessage> record : records) {

                    logger.debug("record.offset() = " + record.offset() + " : record.key() = " + record.key());

                    AvroSyslogMessage avroMessage = record.value();

                    logger.info("avro Message = " + avroMessage);

                    Configuration config = new Configuration();
                    FileSystem fs = FileSystem.get(config);
                    String s = fs.getHomeDirectory() + "/syslog";
                    Path path = new Path(s);
                    DistributedFileSystem dfs = new DistributedFileSystem();
                    if (!dfs.exists(path)) {
                        dfs.createNewFile(path);
                        out = fs.create(path);
                    }
                    ByteArrayOutputStream bos = new ByteArrayOutputStream();
                    ObjectOutput ooput = new ObjectOutputStream(bos);
                    ooput.writeObject(avroMessage);
                    logger.info("Writing avro message to hdfs");
                    out.write(bos.toByteArray());

                }
            }

        } catch (Exception e) {
            logger.error("Error occured while processing message", e);
        } finally {
            logger.debug("debupe kafka consume is closing");
            consumer.close();
            out.close();
        }

    }

我的问题是要了解这是否是将Avro消息作为字节数组写入HDFS的正确方法。

0 个答案:

没有答案