带有对象类映射的对象收集

时间:2020-09-25 12:06:05

标签: java java-8 apache-flink flink-streaming

我遇到的一个问题是,当我从flink平面图收集器收集对象时,我无法正确收集价值。我正在获取对象引用,但并没有给我实际价值。

dataStream.filter(new FilterFunction<GenericRecord>() {
      @Override
      public boolean filter(GenericRecord record) throws Exception {
        if (record.get("user_id") != null) {
          return true;
        }
        return false;
      }
    }).flatMap(new ProfileEventAggregateFlatMapFunction(aggConfig))
        .map(new MapFunction<ProfileEventAggregateEmittedTuple, String>() {
          @Override
          public String map(
              ProfileEventAggregateEmittedTuple profileEventAggregateEmittedTupleNew)
              throws Exception {
            String res=null;
            try {
              ObjectMapper mapper = new ObjectMapper();
              mapper.setVisibility(PropertyAccessor.FIELD, Visibility.ANY);
              res= mapper.writeValueAsString(profileEventAggregateEmittedTupleNew);
            } catch (Exception e) {
              e.printStackTrace();
            }
            return res;
          }
        }).print();




public class ProfileEventAggregateFlatMapFunction extends
    RichFlatMapFunction<GenericRecord, ProfileEventAggregateEmittedTuple> {

  private final ProfileEventAggregateTupleEmitter aggregator;
  ObjectMapper mapper = ObjectMapperPool.getInstance().get();

  public ProfileEventAggregateFlatMapFunction(String config) throws IOException {
    this.aggregator = new ProfileEventAggregateTupleEmitter(config);
  }

  @Override
  public void flatMap(GenericRecord event,
      Collector<ProfileEventAggregateEmittedTuple> collector) throws Exception {
    try {

      List<ProfileEventAggregateEmittedTuple> aggregateTuples = aggregator.runAggregates(event);

      for (ProfileEventAggregateEmittedTuple tuple : aggregateTuples) {

        collector.collect(tuple);
      }
}}

调试结果: 我正在收集器中收集的元组

tuple = {ProfileEventAggregateEmittedTuple@7880} 
 profileType = "userprofile"
 key = "1152473"
 businessType = "keyless"
 name = "consumer"
 aggregates = {ArrayList@7886}  size = 1
  0 = {ProfileEventAggregate@7888} "geo_id {geo_id=1} {keyless_select_destination_cnt=1, total_estimated_distance=12.5}"
   entityType = "geo_id"
   dimension = {LinkedHashMap@7891}  size = 1
    "geo_id" -> {Integer@7897} 1
     key = "geo_id"
     value = {Integer@7897} 1
   metrics = {LinkedHashMap@7892}  size = 2
    "keyless_select_destination_cnt" -> {Long@7773} 1
     key = "keyless_select_destination_cnt"
     value = {Long@7773} 1
    "total_estimated_distance" -> {Double@7904} 12.5
     key = "total_estimated_distance"
     value = {Double@7904} 12.5

这是我的地图函数.map(new MapFunction ()

 profileEventAggregateEmittedTuple = {ProfileEventAggregateEmittedTuple@7935} 
 profileType = "userprofile"
 key = "1152473"
 businessType = "keyless"
 name = "consumer"
 aggregates = {GenericData$Array@7948}  size = 1
  0 = {ProfileEventAggregate@7950} "geo_id {geo_id=java.lang.Object@863dce2} {keyless_select_destination_cnt=java.lang.Object@7cdb4bfc, total_estimated_distance=java.lang.Object@52e81f57}"
   entityType = "geo_id"
   dimension = {HashMap@7952}  size = 1
    "geo_id" -> {Object@7957} 
     key = "geo_id"
     value = {Object@7957} 
      Class has no fields
   metrics = {HashMap@7953}  size = 2
    "keyless_select_destination_cnt" -> {Object@7962} 
     key = "keyless_select_destination_cnt"
     value = {Object@7962} 
      Class has no fields
    "total_estimated_distance" -> {Object@7963} 

请帮助我了解为什么我没有得到正确的数据。

public class ProfileEventAggregateEmittedTuple implements Cloneable, Serializable {
  private String profileType;
  private String key;
  private String businessType;
  private String name;
  private List<ProfileEventAggregate> aggregates = new ArrayList<ProfileEventAggregate>();
  private long startTime;
  private long endTime;

  public String getProfileType() {
    return profileType;
  }

  public void setProfileType(String profileType) {
    this.profileType = profileType;
  }

  public String getKey() {
    return key;
  }

  public void setKey(String key) {
    this.key = key;
  }

  public String getBusinessType() {
    return businessType;
  }

  public void setBusinessType(String businessType) {
    this.businessType = businessType;
  }

  public String getName() {
    return name;
  }

  public void setName(String name) {
    this.name = name;
  }

  public List<ProfileEventAggregate> getAggregates() {
    return aggregates;
  }

  public void addAggregate(ProfileEventAggregate aggregate) {
    this.aggregates.add(aggregate);
  }

  public void setAggregates(List<ProfileEventAggregate> aggregates) {
    this.aggregates = aggregates;
  }

  public long getStartTime() {
    return startTime;
  }

  public void setStartTime(long startTime) {
    this.startTime = startTime;
  }

  public long getEndTime() {
    return endTime;
  }

  public void setEndTime(long endTime) {
    this.endTime = endTime;
  }

 @Override
  public ProfileEventAggregateEmittedTuple clone() {
    ProfileEventAggregateEmittedTuple clone = new ProfileEventAggregateEmittedTuple();

    clone.setProfileType(this.profileType);
    clone.setKey(this.key);
    clone.setBusinessType(this.businessType);
    clone.setName(this.name);

    for (ProfileEventAggregate aggregate : this.aggregates) {
      clone.addAggregate(aggregate.clone());
    }
    return clone;
  }

public class ProfileEventAggregate  implements Cloneable, Serializable {

  private String entityType;
  private Map<String, Object> dimension =new LinkedHashMap<String, Object>();
  private Map<String, Object> metrics = new LinkedHashMap<String, Object>();

  public Map<String, Object> getDimension() {
    return dimension;
  }

  public void setDimension(Map<String, Object> dimension) {
    this.dimension.putAll(dimension);
  }

  public void addDimension(String dimensionKey, Object dimensionValue) {
    this.dimension.put(dimensionKey, dimensionValue);
  }

  public Map<String, Object> getMetrics() {
    return metrics;
  }
  public void addMetric(String metricKey, Object metricValue) {
    this.metrics.put(metricKey, metricValue);
  }
  public void setMetrics(Map<String, Object> metrics) {
    this.metrics.putAll(metrics);
  }
  public String getEntityType() {
    return entityType;
  }
  public void setEntityType(String entityType) {
    this.entityType = entityType;
  }

  @Override
  public ProfileEventAggregate clone()  {
    ProfileEventAggregate clone = new ProfileEventAggregate();

    clone.setEntityType(this.entityType);
    clone.getDimension().putAll(this.getDimension());
    clone.getMetrics().putAll(this.metrics);
    return clone;
  }

1 个答案:

答案 0 :(得分:1)

如果您不enableObjectReuse,则使用配置的序列化程序(似乎是Avro?)来复制对象。

在您的情况下,您将使用Map 在其中无法推断出合理的模式。

最简单的解决方法是修复enableObjectReuse。否则,请确保您的序列化程序与您的数据匹配。因此,您可以在使用AvroSerializer#copy的地方添加一个单元测试,并确保您想坚持使用Avro反射,或者甚至最好采用模式优先方法({{3}),您的POJO为properly annotated },并使用特定的Avro。

让我们讨论一些替代方法:

  • 使用GenericRecord。无需将其转换为Java类型,而是直接访问GenericRecord。通常,这是灵活的完整记录的唯一方法(例如,您的工作接受任何输入并将其写到S3中)。
  • 反规范化架构。您可以使用class Event { int id; Map<String, Object> data; }来代替class EventInformation { int id; String predicate; Object value; }。您将需要对所有信息进行分组以进行处理。但是,您将在Avro中遇到相同类型的问题。
  • 使用宽模式。看一下先前的方法,如果事先知道不同的谓词,则可以使用它来构造宽模式class Event { int id; Long predicate1; Integer predicate2; ... String predicateN; },其中所有条目都是可空的,而大多数确是null。对null进行编码非常便宜。
  • 沟Avro。 Avro已完全键入。您可能想使用更动态的东西。 Protobuf有generate your Java POJO with a Avro schema支持任意子消息。
  • 使用Kryo。 Kryo可以序列化任意对象树,但代价是速度较慢且开销更大。

如果要写入数据,还需要考虑一种解决方案,其中添加类型信息以进行适当的反序列化。例如,请查看此Any。但是还有更多的方法可以实现它。