Mongo组聚合:生成键的字典而不是带有_ids的数组

时间:2016-11-15 11:01:47

标签: mongodb dictionary aggregation-framework

我们假设我们有以下来自MongoDB $group文档的书籍集:

{ "_id" : 8751, "title" : "The Banquet", "author" : "Dante", "copies" : 2 }
{ "_id" : 8752, "title" : "Divine Comedy", "author" : "Dante", "copies" : 1 }
{ "_id" : 8645, "title" : "Eclogues", "author" : "Dante", "copies" : 2 }
{ "_id" : 7000, "title" : "The Odyssey", "author" : "Homer", "copies" : 10 }
{ "_id" : 7020, "title" : "Iliad", "author" : "Homer", "copies" : 10 }

如果我们按照他们的例子,按照作者分组:

db.books.aggregate(
   [
     { $group : { _id : "$author", books: { $push: "$title" } } }
   ]
)
然后我们得到一个数组:

[
  { "_id" : "Homer", "books" : [ "The Odyssey", "Iliad" ] },
  { "_id" : "Dante", "books" : [ "The Banquet", "Divine Comedy", "Eclogues" ] }
]

但我希望有一个字典而不是数组

{
  "Homer": { "books" : [ "The Odyssey", "Iliad" ] },
  "Dante": { "books" : [ "The Banquet", "Divine Comedy", "Eclogues" ] }
}

换句话说,我想在字典中使用_id作为键。这对接收者来说更容易访问,因为当他们想要查找特定的作者时,他们不需要搜索数组。

显然接收者可以在获得数据时重新安排数据。但有没有办法通过Mongo的聚合管道来实现呢?

(对于奖励标记,当_id具有多个属性时输出嵌套字典,例如每个发布者的密钥,然后在发布者下方为每个作者提供密钥。)

2 个答案:

答案 0 :(得分:1)

如果您需要比聚合框架允许的更多灵活性,可以尝试使用map-reduce

map = function() {
  var books = {};
  books[this._id] = this.title;
  emit(this.author, books);
}

reduce = function(key, values) {
  var result = {};
  values.forEach(function(value) {
    for (var id in value) {
       result[id] = value[id];
    }
  });
  return result;
}

答案 1 :(得分:0)

我可能会尝试使用map-reduce方法。

现在我在收到数据时处理数据,用Javascript:

/**
 * Flattens an array of items with _ids into an object, using the _ids as keys.
 *
 * For example, given an array of the form:
 *
 *     [
 *       { _id: 'X', foo: 'bar' },
 *       { _id: 'Y', foo: 'baz' }
 *     ]
 *
 * Will produce an object ("dictionary") of the form:
 *
 *     {
 *       X: { foo: 'bar' },
 *       Y: { foo: 'baz' }
 *     }
 *
 * Note that the `_id` properties will be removed from the input array!
 */
function flattenBy_id (array) {
    const obj = {};
    array.forEach(item => {
        const id = item._id;

        if (typeof id !== 'string' && typeof id !== 'number' && typeof id !== 'boolean') {
            throw Error(`Cannot flatten: _id is non-primitive (${typeof id}) in item: ${JSON.stringify(item)}`);
        }

        delete item._id;
        obj[id] = item;
    });
    return obj;
}

使用LoDash :

可以在一行中生成类似的结果
_.keyBy(array, '_id')

但这不会删除_id属性,这对我来说更清晰。

这是一个在_id具有多个属性时创建嵌套对象的版本:

/**
 * Flattens an array of items with _ids into an object, using the _ids as keys.
 *
 * For example, given an array of the form:
 *
 *     [
 *       { _id: {publisher: 'P', author: 'Q', book: 'Alice in Wonderland'},   date: 1940, content: '...' },
 *       { _id: {publisher: 'X', author: 'Y', book: 'The Hobbit'},            date: 1950, content: '...' },
 *       { _id: {publisher: 'X', author: 'Y', book: 'The Lord of the Rings'}, date: 1960, content: '...' },
 *       { _id: {publisher: 'X', author: 'Z', book: 'Harry Potter'},          date: 1990, content: '...' },
 *     ]
 *
 * Will produce an object ("dictionary") of the form:
 *
 *     {
 *       P: {
 *         Q: {
 *           'Alice in Wonderland':   {date: 1940, content: '...'}
 *         }
 *       },
 *       X: {
 *         Y: {
 *           'The Hobbit':            {date: 1950, content: '...'},
 *           'The Lord of the Rings': {date: 1960, content: '...'}
 *         },
 *         Z: {
 *           'Harry Potter':          {date: 1990, content: '...'}
 *         }
 *       }
 *     }
 *
 * Note that the `_id` properties will be removed from the input array!
 */
function flattenBy_id (array) {
    const dictionary = {};
    array.forEach(item => {
        const path = item._id;

        const pathArray = typeof path === 'object' ? Object_values(path) : [path];

        let target = dictionary;

        pathArray.forEach((key, i) => {
            // Check that key is a primitive
            // Not throwing on 'undefined' because we sometimes have (very old) data with that key
            if (typeof key !== 'string' && typeof key !== 'number' && typeof key !== 'boolean') {
                throw Error(`Cannot flatten: _id is non-primitive (${typeof key}) in item: ${safeStringify(item)}`);
            }

            // Are we on the final step of the path, or before it?
            if (i < pathArray.length - 1) {
                // We are not at the end of the path.  Travel one step.
                if (target[key] === undefined) {
                    target[key] = {};
                }
                target = target[key];
            } else {
                // We are on the final step of the path

                // We don't want to overwrite data that already exists.  We should never be given input of that form.
                if (target[key] !== undefined) {
                    throw Error(`Cannot flatten: The key "${key}" already appears in ${safeStringify(target)} while trying to add: ${safeStringify(item._id)}`);
                }

                delete item._id;
                target[key] = item;
            }
        });

    });
    return dictionary;
}

它假定_id属性始终处于相同的顺序。希望这是Mongo的$group运算符的一致行为。

如果_id并不总是包含相同数量的属性,它将无法正常工作,并可能抛出错误。 (例如_id: {foo: 1, bar: 2}后跟_id: {foo: 1}会导致麻烦。如果某些文档的bar未定义,则会发生这种情况。)如果您拥有该类型的数据,则需要采用不同的方法。