如何从命令行简化aws DynamoDB查询JSON输出?

时间:2015-02-18 20:39:59

标签: json amazon-web-services amazon-dynamodb aws-cli jq

我正在使用The AWS Command Line Interface for DynamoDB

当我们查询一个项目时,我们得到一个非常详细的JSON输出。你得到这样的东西(它是从get-item构建的,以便几乎是详尽无遗的(NULL类型已被省略)aws command line help

{
    "Count": 1, 
    "Items": [
        {
            "Id": {
                "S": "app1"
            }, 
            "Parameters": {
                "M": {
                    "nfs": {
                        "M": {
                            "IP" : {
                                "S" : "172.16.0.178"
                            }, 
                            "defaultPath": {
                                "S": "/mnt/ebs/"
                            },
                            "key": {
                                "B": "dGhpcyB0ZXh0IGlzIGJhc2U2NC1lbmNvZGVk"
                            },
                            "activated": {
                                "BOOL": true 
                            }
                        }
                    },
                    "ws" : {
                        "M" : {
                            "number" : {
                                "N" : "5"
                            },
                            "values" : {
                                "L" : [
                                    { "S" : "12253456346346"},
                                    { "S" : "23452353463464"},
                                    { "S" : "23523453461232"},
                                    { "S" : "34645745675675"},
                                    { "S" : "46456745757575"}
                                ]
                            }
                        } 
                    }
                }
            },
            "Oldtypes": {
                "typeSS" : {"SS" : ["foo", "bar", "baz"]},
                "typeNS" : {"NS" : ["0", "1", "2", "3", "4", "5"]},
                "typeBS" : {"BS" : ["VGVybWluYXRvcgo=", "VGVybWluYXRvciAyOiBKdWRnbWVudCBEYXkK", "VGVybWluYXRvciAzOiBSaXNlIG9mIHRoZSBNYWNoaW5lcwo=", "VGVybWluYXRvciA0OiBTYWx2YXRpb24K","VGVybWluYXRvciA1OiBHZW5lc2lzCg=="]}
            }
        }
    ], 
    "ScannedCount": 1, 
    "ConsumedCapacity": null
}

有没有办法让Items部分获得更简单的输出?像这样:

{
    "ConsumedCapacity": null,
    "Count": 1,
    "Items": [
        {
            "Id": "app1",
            "Parameters": {
                "nfs": {
                    "IP": "172.16.0.178",
                    "activated": true,
                    "defaultPath": "/mnt/ebs/",
                    "key": "dGhpcyB0ZXh0IGlzIGJhc2U2NC1lbmNvZGVk"
                },
                "ws": {
                    "number": 5,
                    "values": ["12253456346346","23452353463464","23523453461232","34645745675675","46456745757575"]
                }
            },
            "Oldtypes": {
                "typeBS": ["VGVybWluYXRvcgo=", "VGVybWluYXRvciAyOiBKdWRnbWVudCBEYXkK", "VGVybWluYXRvciAzOiBSaXNlIG9mIHRoZSBNYWNoaW5lcwo=", "VGVybWluYXRvciA0OiBTYWx2YXRpb24K", "VGVybWluYXRvciA1OiBHZW5lc2lzCg=="],
                "typeNS": [0, 1, 2, 3, 4, 5],
                "typeSS": ["foo","bar","baz"]
            }
        }
    ],
    "ScannedCount": 1
}

dynamodb - AWS CLI 1.7.10 documentation没有任何帮助。

我们必须从命令行获取结果。如果有必要,我愿意使用其他命令行工具,例如jq,但这样的jq映射似乎对我来说很复杂。


更新1:基于jq的解决方案(在DanielH的帮助下回答)

使用jq很容易,但不是很漂亮,你可以做类似的事情:

$> aws dynamodb query --table-name ConfigCatalog --key-conditions '{ "Id" : {"AttributeValueList": [{"S":"app1"}], "ComparisonOperator": "EQ"}}' | jq -r '.Items[0].Parameters.M."nfs#IP".S'

结果将是:172.16.0.178

jq -r选项为您提供原始输出。


更新2:基于jq的解决方案(在@ jeff-mercado的帮助下)

以下是Jeff Mercado jq函数的更新和注释版本,用于解组DynamoDB输出。它会给你预期的输出:

$> cat unmarshal_dynamodb.jq
def unmarshal_dynamodb:
  # DynamoDB string type
  (objects | .S)

  # DynamoDB blob type
  // (objects | .B)

  # DynamoDB number type
  // (objects | .N | strings | tonumber)

  # DynamoDB boolean type
  // (objects | .BOOL)

  # DynamoDB map type, recursion on each item
  // (objects | .M | objects | with_entries(.value |= unmarshal_dynamodb))

  # DynamoDB list type, recursion on each item
  // (objects | .L | arrays | map(unmarshal_dynamodb))

  # DynamoDB typed list type SS, string set
  // (objects | .SS | arrays | map(unmarshal_dynamodb))

  # DynamoDB typed list type NS, number set
  // (objects | .NS | arrays | map(tonumber))

  # DynamoDB typed list type BS, blob set
  // (objects | .BS | arrays | map(unmarshal_dynamodb))

  # managing others DynamoDB output entries: "Count", "Items", "ScannedCount" and "ConsumedCapcity"
  // (objects | with_entries(.value |= unmarshal_dynamodb))
  // (arrays | map(unmarshal_dynamodb))

  # leaves values
  // .
  ;
unmarshal_dynamodb

如果将DynamoDB查询输出保存到文件中,可以说ddb-query-result.json,则可以执行以获得所需的结果:

$> jq -f unmarshal_dynamodb.jq ddb-query-result.json

5 个答案:

答案 0 :(得分:9)

您可以使用精心设计的函数递归地解码值。看起来键名对应于类型:

S -> string
N -> number
M -> map

尽可能处理您要解码的每个案例,否则将其过滤掉。您可以使用各种type filtersalternative operator来执行此操作。

$ cat input.json
{
  "Count": 1,
  "Items": [
    {
      "Id": { "S": "app1" },
      "Parameters": {
        "M": {
          "nfs#IP": { "S": "192.17.0.13" },
          "maxCount": { "N": "1" },
          "nfs#defaultPath": { "S": "/mnt/ebs/" }
        }
      }
    }
  ],
  "ScannedCount": 1,
  "ConsumedCapacity": null
}
$ cat ~/.jq
def decode_ddb:
    def _sprop($key): select(keys == [$key])[$key];                 # single property objects only
       ((objects | { value: _sprop("S") })                          # string (from string)
    // (objects | { value: _sprop("B") })                           # blob (from string)
    // (objects | { value: _sprop("N") | tonumber })                # number (from string)
    // (objects | { value: _sprop("BOOL") })                        # boolean (from boolean)
    // (objects | { value: _sprop("M") | map_values(decode_ddb) })  # map (from object)
    // (objects | { value: _sprop("L") | map(decode_ddb) })         # list (from encoded array)
    // (objects | { value: _sprop("SS") })                          # string set (from string array)
    // (objects | { value: _sprop("NS") | map(tonumber) })          # number set (from string array)
    // (objects | { value: _sprop("BS") })                          # blob set (from string array)
    // (objects | { value: map_values(decode_ddb) })                # all other non-conforming objects
    // (arrays | { value: map(decode_ddb) })                        # all other non-conforming arrays
    // { value: . }).value                                          # everything else
    ;
$ jq 'decode_ddb' input.json
{
  "Count": 1,
  "Items": [
    {
      "Id": "app1",
      "Parameters": {
        "nfs#IP": "192.17.0.13",
        "maxCount": 1,
        "nfs#defaultPath": "/mnt/ebs/"
      }
    }
  ],
  "ScannedCount": 1,
  "ConsumedCapacity": null
}

答案 1 :(得分:0)

据我所知,没有像"详细"一个你发布的。因此,我认为,您无法避免使用中间工具,例如jqsed

本文中有几个提案用于转换原始发电机数据:

Export data from DynamoDB

也许您可以将其中一个脚本与jqsed

结合使用

答案 2 :(得分:0)

实现帖子目标的另一种方法是使用node-dynamodbdynamodb-marshalernode.js扩展名,并构建node命令行工具。

使用commander package构建node.js命令行应用程序的有趣教程:Creating Your First Node.js Command-line Application

答案 3 :(得分:0)

这是另一种方法。这可能有点残酷,但它显示了基本的想法。

def unwanted:    ["B","BOOL","M","S","L","BS","SS"];
def fixpath(p):  [ p[] | select( unwanted[[.]]==[] ) ];
def fixnum(p;v):
    if   p[-2]=="NS" then [p[:-2]+p[-1:],(v|tonumber)]
    elif p[-1]=="N" then [p[:-1], (v|tonumber)]
    else [p,v] end;

reduce (tostream|select(length==2)) as [$p,$v] (
    {}
  ; fixnum(fixpath($p);$v) as [$fp,$fv]      
  | setpath($fp;$fv)
)

Try it online!

示例运行(假设filter.jq中的过滤器和data.json中的数据)

$ jq -M -f filter.jq data.json
{
  "ConsumedCapacity": null,
  "Count": 1,
  "Items": [
    {
      "Id": "app1",
      "Oldtypes": {
        "typeBS": [
          "VGVybWluYXRvcgo=",
          "VGVybWluYXRvciAyOiBKdWRnbWVudCBEYXkK",
          "VGVybWluYXRvciAzOiBSaXNlIG9mIHRoZSBNYWNoaW5lcwo=",
          "VGVybWluYXRvciA0OiBTYWx2YXRpb24K",
          "VGVybWluYXRvciA1OiBHZW5lc2lzCg=="
        ],
        "typeNS": [
          0,
          1,
          2,
          3,
          4,
          5
        ],
        "typeSS": [
          "foo",
          "bar",
          "baz"
        ]
      },
      "Parameters": {
        "nfs": {
          "IP": "172.16.0.178",
          "activated": true,
          "defaultPath": "/mnt/ebs/",
          "key": "dGhpcyB0ZXh0IGlzIGJhc2U2NC1lbmNvZGVk"
        },
        "ws": {
          "number": 5,
          "values": [
            "12253456346346",
            "23452353463464",
            "23523453461232",
            "34645745675675",
            "46456745757575"
          ]
        }
      }
    }
  ],
  "ScannedCount": 1
}

答案 4 :(得分:0)

这是节点中的脚本来执行此操作。

我将文件命名为reformat.js,但是您可以随意命名

'use strict';

/**
 * This script will parse the AWS dynamo CLI JSON response into JS.
 * This parses out the type keys in the objects.
 */

const fs = require('fs');

const rawData = fs.readFileSync('response.json'); // Import the raw response from the dynamoDB CLI query
const response = JSON.parse(rawData); // Parse to JS to make it easier to work with.

function shallowFormatData(data){
  // Loop through the object and replace the Type key with the value.
  for(const key in data){
    const innerRawObject = data[key]
    const innerKeys = Object.keys(innerRawObject)
    innerKeys.forEach(innerKey => {
      const innerFormattedObject = innerRawObject[innerKey]
      if(typeof innerFormattedObject == 'object'){
        data[key] = shallowFormatData(innerFormattedObject) // Recursively call formatData if there are nested objects
      }else{
        // Null items come back with a type of "NULL" and value of true. we want to set the value to null if the type is "NULL"
        data[key] = innerKey == 'NULL' ? null : innerFormattedObject
      }
    })
  }
  return data
}

// this only gets the Items and not the meta data.
const result = response.Items.map(item => {
  return shallowFormatData(item)
})

console.dir(result, {'maxArrayLength': null}); // There is a default limit on how big a console.log can be, this removes that limit.

步骤1)通过CLI运行dynamoDB查询,并将其保存到JSON文件。要从CLI保存响应,只需添加> somefile.json。为了方便起见,我将其保存在与重新格式化文件相同的目录中

// Example: Run in CLI

$ aws dynamodb query --table-name stage_requests-service_FoxEvents \
 --key-condition-expression "PK = :v1" \
 --expression-attribute-values file://expression-attributes.json > response.json

expression-attributes.json

{
  ":v1": {"S": "SOMEVAL"}
}

如果您需要有关如何查询DynamoDB的更多信息,请查看文档https://docs.aws.amazon.com/cli/latest/reference/dynamodb/query.html#examples

中的这些示例

现在您有了一个需要重新格式化的数据的JSON文件,可以从终端上运行format.js脚本

第2步)

// Run this in your terminal
$ node reformat.js > formatted.js 

如果要输出JSON对象,只需将JSON.stringify(result)放在脚本结尾的console.dir中,就应该有一个干净的JS Object输出