Jolt Spec - How to insert element into Array

时间:2019-04-17 02:20:13

标签: json apache-nifi jolt

How to insert an element, constant one no need from input json, into an Array?

My intention

I had captured MySQL CDC which formated in JSON and I had added a new column that indicated binlog time.

And then convert JSON to AVRO, so I need to generate avsc automatically(CDC included columns type information) in case of MySQL table changing.

Now, I failed to insert the element into $.fields[] in avsc

Input

{
  "database": "test",
  "es": 1555381078000,
  "table": "table_name",
  "mysqlType": {
    "bool_type": "tinyint(1)",
    "tinyint_type": "tinyint(4)",
    "SMALLINT_type": "smallint(6)",
    "MEDIUMINT_type": "mediumint(9)",
    "int_type": "int(11)",
    "integer_type": "int(11)",
    "bigint_type": "bigint(20)",
    "float_type": "float",
    "double_type": "double",
    "decimal_type": "decimal(10,0)",
    "decimal_type2": "decimal(20,20)",
    "varchar_type": "varchar(20)",
    "date_type": "date",
    "time_type": "time",
    "datetime_type": "datetime",
    "timestamp_type": "timestamp"
  }
}

Current spec

[
  {
    "operation": "shift",
    "spec": {
      "database": "schema.namespace",
      "table": "schema.name",
      "#record": "schema.type",
      "#auto generated by jolt": "schema.doc",
      "mysqlType": {
        "*": {
          "tinyint*|smallint*|mediumint*|int*|date": {
            "$1": "schema.fields.[#3].name",
            "#null": "schema.fields.[#3].type[]",
            "#int": "schema.fields.[#3].type[]"
          },
          "bigint*|datetime|timestamp": {
            "$1": "schema.fields.[#3].name",
            "#null": "schema.fields.[#3].type[]",
            "#long": "schema.fields.[#3].type[]"
          },
          "float|double|decimal*": {
            "$1": "schema.fields.[#3].name",
            "#null": "schema.fields.[#3].type[]",
            "#long": "schema.fields.[#3].type[]"
          },
          "*": {
            "$1": "schema.fields.[#3].name",
            "#null": "schema.fields.[#3].type[]",
            "#string": "schema.fields.[#3].type[]"
          }
        },
        "#__binlog_time": "schema.fields[#2].name",
        "#null": "schema.fields[#2].type[]",
        "#long": "schema.fields[#2].type[]"
      }
    }
    }
]

Current output

The current jolt spec did it wrong by putting the new element into an element of $.fields[]

{
  "schema" : {
    "type" : "record",
    "doc" : "auto generated by jolt",
    "namespace" : "test",
    "name" : "table_name",
    "fields" : [ {
      "name" : "bool_type",
      "type" : [ "null", "int" ]
    }, {
      "name" : "tinyint_type",
      "type" : [ "null", "int" ]
    }, { // wrong there
      "name" : [ "__binlog_time", "SMALLINT_type" ],
      "type" : [ "null", "long", "null", "int" ]
    }, {
      "name" : "MEDIUMINT_type",
      "type" : [ "null", "int" ]
    }, {
      "name" : "int_type",
      "type" : [ "null", "int" ]
    }, {
      "name" : "integer_type",
      "type" : [ "null", "int" ]
    }, {
      "name" : "bigint_type",
      "type" : [ "null", "long" ]
    }, {
      "name" : "float_type",
      "type" : [ "null", "long" ]
    }, {
      "name" : "double_type",
      "type" : [ "null", "long" ]
    }, {
      "name" : "decimal_type",
      "type" : [ "null", "long" ]
    }, {
      "name" : "decimal_type2",
      "type" : [ "null", "long" ]
    }, {
      "name" : "varchar_type",
      "type" : [ "null", "string" ]
    }, {
      "name" : "date_type",
      "type" : [ "null", "int" ]
    }, {
      "name" : "time_type",
      "type" : [ "null", "string" ]
    }, {
      "name" : "datetime_type",
      "type" : [ "null", "long" ]
    }, {
      "name" : "timestamp_type",
      "type" : [ "null", "long" ]
    } ]
  }
}

Wanted output

Inserts element {"name":"new_column","type":["null","string"]} into array $.fields[]

{
  "schema" : {
    "type" : "record",
    "doc" : "auto generated by jolt",
    "namespace" : "test",
    "name" : "table_name",
    "fields" : [ {
      "name" : "bool_type",
      "type" : [ "null", "int" ]
    }, {
      "name" : "tinyint_type",
      "type" : [ "null", "int" ]
    }, {
      "name" : "SMALLINT_type",
      "type" : [ "null", "int" ]
    }, {
      "name" : "MEDIUMINT_type",
      "type" : [ "null", "int" ]
    }, {
      "name" : "int_type",
      "type" : [ "null", "int" ]
    }, {
      "name" : "integer_type",
      "type" : [ "null", "int" ]
    }, {
      "name" : "bigint_type",
      "type" : [ "null", "long" ]
    }, {
      "name" : "float_type",
      "type" : [ "null", "long" ]
    }, {
      "name" : "double_type",
      "type" : [ "null", "long" ]
    }, {
      "name" : "decimal_type",
      "type" : [ "null", "long" ]
    }, {
      "name" : "decimal_type2",
      "type" : [ "null", "long" ]
    }, {
      "name" : "varchar_type",
      "type" : [ "null", "string" ]
    }, {
      "name" : "date_type",
      "type" : [ "null", "int" ]
    }, {
      "name" : "time_type",
      "type" : [ "null", "string" ]
    }, {
      "name" : "datetime_type",
      "type" : [ "null", "long" ]
    }, {
      "name" : "timestamp_type",
      "type" : [ "null", "long" ]
    }, { // new element(but no need be the last element)
      "name" : "__binlog_time",
      "type" : [ "null", "long" ]
    } ]
  }
}

1 个答案:

答案 0 :(得分:1)

您可以先使用default规范将新字段添加到输入JSON,然后更新“可选的长”匹配项以包含long

[
  {
    "operation": "default",
    "spec": {
      "mysqlType": {
        "binlog_time": "long"
      }
    }
  },
  {
    "operation": "shift",
    "spec": {
      "database": "schema.namespace",
      "table": "schema.name",
      "#record": "schema.type",
      "#auto generated by jolt": "schema.doc",
      "mysqlType": {
        "*": {
          "tinyint*|smallint*|mediumint*|int*|date": {
            "$1": "schema.fields.[#3].name",
            "#null": "schema.fields.[#3].type[]",
            "#int": "schema.fields.[#3].type[]"
          },
          "bigint*|datetime|timestamp|long": {
            "$1": "schema.fields.[#3].name",
            "#null": "schema.fields.[#3].type[]",
            "#long": "schema.fields.[#3].type[]"
          },
          "float|double|decimal*": {
            "$1": "schema.fields.[#3].name",
            "#null": "schema.fields.[#3].type[]",
            "#long": "schema.fields.[#3].type[]"
          },
          "*": {
            "$1": "schema.fields.[#3].name",
            "#null": "schema.fields.[#3].type[]",
            "#string": "schema.fields.[#3].type[]"
          }
        }
      }
    }
  }
]