如何在加入的'上执行$ text搜索通过$ lookup收集?

时间:2016-03-31 23:07:46

标签: node.js mongodb mongoose mongodb-query aggregation-framework

我是Mongo的新手,使用的是v3.2。我有2个收藏品Parent&儿童。我想使用Parent.aggregate并使用$ lookup来“加入”Child然后对Child中的字段执行$ text $ search,并在父级上执行日期范围搜索。这可能......?

1 个答案:

答案 0 :(得分:1)

根据已经给出的评论,您确实无法对$text的结果执行$lookup搜索,因为除了非{n}之外的任何阶段都没有可用的索引第一个管道阶段。确实,特别是考虑到你真的想要"加入"基于"孩子"的结果发生收集,那么搜索孩子"确实会更好。代替。

这带来了明显的结论,为了做到这一点,你在"孩子"上进行聚合。收集初始$text查询,然后$lookup"父母"而不是相反。

作为一个工作示例,仅使用核心驱动程序进行演示:

MongoClient.connect('mongodb://localhost/rlookup',function(err,db) {
  if (err) throw err;

  var Parent = db.collection('parents');
  var Child = db.collection('children');

  async.series(
    [
      // Cleanup
      function(callback) {
        async.each([Parent,Child],function(coll,callback) {
          coll.deleteMany({},callback);
        },callback);
      },
      // Create Index
      function(callback) {
        Child.createIndex({ "text": "text" },callback);
      },
      // Create Documents
      function(callback) {
        async.parallel(
          [
            function(callback) {
              Parent.insertMany(
                [
                  { "_id": 1, "name": "Parent 1" },
                  { "_id": 2, "name": "Parent 2" },
                  { "_id": 3, "name": "Parent 3" }
                ],
                callback
              );
            },
            function(callback) {
              Child.insertMany(
                [
                  {
                    "_id": 1,
                    "parent": 1,
                    "text": "The little dog laughed to see such fun"
                  },
                  {
                    "_id": 2,
                    "parent": 1,
                    "text": "The quick brown fox jumped over the lazy dog"
                  },
                  {
                    "_id": 3,
                    "parent": 1,
                    "text": "The dish ran away with the spoon"
                  },
                  {
                    "_id": 4,
                    "parent": 2,
                    "text": "Miss muffet on here tuffet"
                  },
                  {
                    "_id": 5,
                    "parent": 3,
                    "text": "Lady is a fox"
                  },
                  {
                    "_id": 6,
                    "parent": 3,
                    "text": "Every dog has it's day"
                  }
                ],
                callback
              )
            }
          ],
          callback
        );
      },
      // Aggregate with $text and $lookup
      function(callback) {
        Child.aggregate(
          [
            { "$match": {
              "$text": { "$search": "fox dog" }
            }},
            { "$project": {
              "parent": 1,
              "text": 1,
              "score": { "$meta": "textScore" }
            }},
            { "$sort": { "score": { "$meta": "textScore" } } },
            { "$lookup": {
              "from": "parents",
              "localField": "parent",
              "foreignField": "_id",
              "as": "parent"
            }},
            { "$unwind": "$parent" },
            { "$group": {
              "_id": "$parent._id",
              "name": { "$first": "$parent.name" },
              "children": {
                "$push": {
                  "_id": "$_id",
                  "text": "$text",
                  "score": "$score"
                }
              },
              "score": { "$sum": "$score" }
            }},
            { "$sort": { "score": -1 } }
          ],
          function(err,result) {
            console.log(JSON.stringify(result,undefined,2));
            callback(err);
          }
        )
      }
    ],
    function(err) {
      if (err) throw err;
      db.close();
    }
  );

});

这导致每个Child中填充的Parent上的查询$text次匹配,以及"score"订购:

[
  {
    "_id": 1,
    "name": "Parent 1",
    "children": [
      {
        "_id": 2,
        "text": "The quick brown fox jumped over the lazy dog",
        "score": 1.1666666666666667
      },
      {
        "_id": 1,
        "text": "The little dog laughed to see such fun",
        "score": 0.6
      }
    ],
    "score": 1.7666666666666666
  },
  {
    "_id": 3,
    "name": "Parent 3",
    "children": [
      {
        "_id": 5,
        "text": "Lady is a fox",
        "score": 0.75
      },
      {
        "_id": 6,
        "text": "Every dog has it's day",
        "score": 0.6666666666666666
      }
    ],
    "score": 1.4166666666666665
  }
]

这最终有意义,并且比从父母那样查询更有效率。找到所有"孩子"在$lookup然后"过滤"与$match删除任何"孩子"那些不符合标准,然后放弃了父母的#34;没有任何匹配。

同样的情况适用于猫鼬风格"引用"你在哪里包含一个"数组"儿童"在父母的内部"而不是录制在孩子身上。因此,只要子项上的"localField"(在这种情况下为_id)与父项中的数组中定义的类型"foriegnField"相同(如果它正在工作,它将是无论如何,.populate()然后你仍然得到匹配的父母#34;为每个孩子"在$lookup结果中。

这一切都归结为扭转你的想法并意识到$text结果是最重要的事情,因此"那"是需要启动操作的集合。

这是可能的,但只是反过来做。

使用mongoose样式和父

中引用的子项列表

只显示父项的引用和日期过滤的相反情况:

var async = require('async'),
    mongoose = require('mongoose'),
    Schema = mongoose.Schema;

mongoose.connect('mongodb://localhost/rlookup');

var parentSchema = new Schema({
  "_id": Number,
  "name": String,
  "date": Date,
  "children": [{ "type": Number, "ref": "Child" }]
});

var childSchema = new Schema({
  "_id": Number,
  "text": { "type": String, "index": "text" }
},{ "autoIndex": false });

var Parent = mongoose.model("Parent",parentSchema),
    Child = mongoose.model("Child",childSchema);

async.series(
  [
    function(callback) {
      async.each([Parent,Child],function(model,callback) {
        model.remove({},callback);
      },callback);
    },
    function(callback) {
      Child.ensureIndexes({ "background": false },callback);
    },
    function(callback) {
      async.parallel(
        [
          function(callback) {
            Parent.create([
              {
                "_id": 1,
                "name": "Parent 1",
                "date": new Date("2016-02-01"),
                "children": [1,2]
              },
              {
                "_id": 2,
                "name": "Parent 2",
                "date": new Date("2016-02-02"),
                "children": [3,4]
              },
              {
                "_id": 3,
                "name": "Parent 3",
                "date": new Date("2016-02-03"),
                "children": [5,6]
              },
              {
                "_id": 4,
                "name": "Parent 4",
                "date": new Date("2016-01-15"),
                "children": [1,2,6]
              }
            ],callback)
          },
          function(callback) {
            Child.create([
              {
                "_id": 1,
                "text": "The little dog laughed to see such fun"
              },
              {
                "_id": 2,
                "text": "The quick brown fox jumped over the lazy dog"
              },
              {
                "_id": 3,
                "text": "The dish ran awy with the spoon"
              },
              {
                "_id": 4,
                "text": "Miss muffet on her tuffet"
              },
              {
                "_id": 5,
                "text": "Lady is a fox"
              },
              {
                "_id": 6,
                "text": "Every dog has it's day"
              }
            ],callback);
          }
        ],
        callback
      );
    },
    function(callback) {
      Child.aggregate(
        [
          { "$match": {
            "$text": { "$search": "fox dog" }
          }},
          { "$project": {
            "text": 1,
            "score": { "$meta": "textScore" }
          }},
          { "$sort": { "score": { "$meta": "textScore" } } },
          { "$lookup": {
            "from": "parents",
            "localField": "_id",
            "foreignField": "children",
            "as": "parent"
          }},
          { "$project": {
            "text": 1,
            "score": 1,
            "parent": {
              "$filter": {
                "input": "$parent",
                "as": "parent",
                "cond": {
                  "$and": [
                    { "$gte": [ "$$parent.date", new Date("2016-02-01") ] },
                    { "$lt": [ "$$parent.date", new Date("2016-03-01") ] }
                  ]
                }
              }
            }
          }},
          { "$unwind": "$parent" },
          { "$group": {
            "_id": "$parent._id",
            "name": { "$first": "$parent.name" },
            "date": { "$first": "$parent.date" },
            "children": {
              "$push": {
                "_id": "$_id",
                "text": "$text",
                "score": "$score"
              }
            },
            "score": { "$sum": "$score" }
          }},
          { "$sort": { "score": -1 } }
        ],
        function(err,result) {
          console.log(JSON.stringify(result,undefined,2));
          callback(err);
        }
      )
    }
  ],
  function(err) {
    if (err) throw err;
    mongoose.disconnect();
  }
);

输出:

[
  {
    "_id": 1,
    "name": "Parent 1",
    "date": "2016-02-01T00:00:00.000Z",
    "children": [
      {
        "_id": 2,
        "text": "The quick brown fox jumped over the lazy dog",
        "score": 1.1666666666666667
      },
      {
        "_id": 1,
        "text": "The little dog laughed to see such fun",
        "score": 0.6
      }
    ],
    "score": 1.7666666666666666
  },
  {
    "_id": 3,
    "name": "Parent 3",
    "date": "2016-02-03T00:00:00.000Z",
    "children": [
      {
        "_id": 5,
        "text": "Lady is a fox",
        "score": 0.75
      },
      {
        "_id": 6,
        "text": "Every dog has it's day",
        "score": 0.6666666666666666
      }
    ],
    "score": 1.4166666666666665
  }
]

注意到排除最大排名的"Parent 4"已删除,因为该日期不属于$filter应用的查询范围。