在Mongodb中查找基于Year的文档

时间:2017-07-17 08:54:35

标签: javascript node.js mongodb mongoose mongodb-query

有一种名为电影的模式,其中包含有关电影的信息。

电影模式

var mongoose = require('mongoose');
var movieSchema = new mongoose.Schema({
    m_tmdb_id: {
        type: Number,
        unique: true,
        index: true
    },
    m_backdrop_path: {
        type: String,
    },
    m_budget: {
        type: Number,
    },
    m_homepage: {
        type: String
    },
    m_imdb_id: {
        type: String,
    },
    m_original_language: {
        type: String
    },
    m_original_title: {
        type: String
    },
    m_poster_path: {
        type: String
    },
    m_poster_key: {
        type: String
    },
    m_release_date: {
        type: Date
    },
    m_revenue: {
        type: Number
    },
    m_runtime: {
        type: Number
    },
    m_title: {
        type: String
    },
    m_genres: {
        type: Array
    },
    created_at: {
        type: Date
    },
    updated_at: {
        type: Date,
        default: Date.now
    }
});
var MovieModel = mongoose.model('Movie', movieSchema);
module.exports = {
    movie: MovieModel
}

我需要从不同条件的电影集合中的每个查询[分页]中选择10个项目。我在我的API中添加了3个条件[基于通用名称,发布日期,语言]。

Js代码

router.post('/movies', function(req, res, next) {
    var perPage = parseInt(req.query.limit);
    var page = req.query.page;
    var datefrom = new Date();
    var dateto = new Date();
    var generNames = req.body.generNames;
    dateto.setMonth(dateto.getMonth() - 2);
    var queryOptions = {
        $and: [{
            'm_release_date': {
                $lte: datefrom,
                $gte: dateto

            }
        }, {
            "m_genres.name": {
                $in: generNames
            }
        }, {
            'm_original_language': 'en'
        }, ]
    };
    Movie
        .find(queryOptions)
        .select('_id m_tmdb_id m_poster_path m_original_title')
        .sort('-m_release_date')
        .limit(perPage)
        .skip(perPage * page)
        .exec(function(err, movies) {
            if (movies) {
                return res.status(200).json(movies);
            }
        }).catch(function(error) {
            return res.status(500).json(error);
        });
});

我需要再添加一个条件,条件是来自集合电影的选择项目,其具有来自年份[ex:2003,2004,2010等]的发布日期[m_release_date]。我该怎么做?enter code here

示例:

电影收藏

[   
    {
        "_id": "59420dff3d729440f200bccc",
        "m_tmdb_id": 453651,
        "m_original_title": "PIETRO",
        "m_poster_path": "/3sTFUZorLGOU06A7P3XxjLVKKGD.jpg",
        "m_release_date": "2017-07-14T00:00:00.000Z",
        "m_runtime": 8,
        "m_genres": [{
            "id": 18,
            "name": "Drama"
        }]
    },
    {
        "_id": "594602610772b119e788edab",
        "m_tmdb_id": 425136,
        "m_original_title": "Bad Dads",
        "m_poster_path": null,
        "m_release_date": "2017-07-14T00:00:00.000Z",
        "m_runtime": 0,
        "m_credits_cast": [],
        "m_genres": [{
            "id": 35,
            "name": "Comedy"
        }]
    },
    {
        "_id": "59587747d282843883df755e",
        "m_tmdb_id": 364733,
        "m_original_title": "Blind",
        "m_poster_path": "/cXyObe5aB63ueOndEXxXabgAvIi.jpg",
        "m_release_date": "2017-07-14T00:00:00.000Z",
        "m_runtime": 105,
        "m_genres": [{
            "id": 18,
            "name": "Drama"
        }]
    },
    {
        "_id": "595d93f9c69ab66c4f48254f",
        "m_tmdb_id": 308149,
        "m_original_title": "The Beautiful Ones",
        "m_poster_path": "/kjy1obH5Oy1IsjTViYVJDQufeZP.jpg",
        "m_release_date": "2017-07-14T00:00:00.000Z",
        "m_runtime": 94,

        "m_genres": [{
            "id": 18,
            "name": "Drama"
        }]
    },
    {
        "_id": "59420de63d729440f200bcc7",
        "m_tmdb_id": 460006,
        "m_original_title": "Черная вода",
        "m_poster_path": "/kpiLwx8MGGWgZMMHUnvydZkya0H.jpg",
        "m_release_date": "2017-07-13T00:00:00.000Z",
        "m_runtime": 0,

        "m_genres": []
    },
    {
        "_id": "594602390772b119e788eda3",
        "m_tmdb_id": 281338,
        "m_original_title": "War for the Planet of the Apes",
        "m_poster_path": "/y52mjaCLoJJzxfcDDlksKDngiDx.jpg",
        "m_release_date": "2017-07-13T00:00:00.000Z",
        "m_runtime": 142,
        "m_genres": [{
                "id": 28,
                "name": "Action"
            }

        ]
    }
]

API请求

enter image description here

2 个答案:

答案 0 :(得分:1)

以最高效率修复数据

通过在m_release_year的数据中创建一个新字段,老实说这种情况最为明显。然后,为查询提供$in条件代替日期范围变得很简单,但这当然可以使用索引。

因此,使用这样的字段,然后启动查询的代码变为:

// Just to simulate the request
const req = {
  body: {
    "generNames": ["Action"],
    "selectedYear": ["2003,2004,2005,2017"]
  }
}

// Your selectedYear input looks wrong. So correcting from a single string
// to an actual array of integers
function fixYearSelection(input) {
  return  [].concat.apply([],input.map(e => e.split(",") )).map(e => parseInt(e) ).sort()
}

// Outputs like this - [ 2003, 2004, 2005, 2017 ]
let yearSelection = fixYearSelection(req.body.selectedYear);

Movie.find({
   "m_release_year": { "$in": yearSelection },
   "m_genres.name": { "$in": req.body.generNames },
   "m_original_language": "en"
})
.select('_id m_tmdb_id m_poster_path m_original_title')
.sort('-m_release_date')
.limit(perPage)
.skip(perPage * page)
.exec(function(err, movies) {

将新字段放在数据中是一件很简单的事情,可以在mongo shell中运行:

let ops = [];
db.movies.find({ "m_release_year": { "$exists": false } }).forEach( doc => {
  ops.push({
    "updateOne": { 
      "filter": { "_id": doc._id },
      "update": { "$set": { "m_release_year": doc.m_release_date.getUTCFullYear() } }
  });

  if ( ops.length >= 1000 ) {
    db.movies.bulkWrite(ops);
    ops = [];
  }
});

if ( ops.length > 0 ) {
  db.movies.bulkWrite(ops);
  ops = [];
}

将迭代集合中的所有项目并“提取”年份信息,然后写入新字段。然后,最好创建一个与查询选择中使用的字段匹配的索引。

强制计算

如果没有它,那么你基本上是“强迫计算”,没有数据库可以有效地做到这一点。 MongoDB中的两个方法使用$where$redact,其中“后者”应始终优先于前者使用,因为至少$redact使用本机编码操作进行比较,如反对$where的JavaScript评估,它的运行速度要慢得多。

// Just to simulate the request
const req = {
  body: {
    "generNames": ["Action"],
    "selectedYear": ["2003,2004,2005,2017"]
  }
}

// Your selectedYear input looks wrong. So correcting from a single string
// to an actual array of integers
function fixYearSelection(input) {
  return  [].concat.apply([],input.map(e => e.split(",") )).map(e => parseInt(e) ).sort()
}

// Outputs like this - [ 2003, 2004, 2005, 2017 ]
let yearSelection = fixYearSelection(req.body.selectedYear);

/* 
 * Not stored, so we try to "guestimate" the reasonable "range" to at
 * least give some query condtion on the date and not search everything
 */

var startDate = new Date(0),
    startDate = new Date(startDate.setUTCFullYear(yearSelection[0])),
    endDate  = new Date(0),
    endDate  = new Date(endDate.setUTCFullYear(yearSelection.slice(-1)[0]+1));

// Helper to switch our $redact "if" based on supported MongoDB
const version = "3.4";
function makeIfCondition() {
  return ( version === "3.4" )
    ? { "$in": [ { "$year": "$m_release_date" }, yearSelection ] }
    : { "$or": yearSelection.map(y => 
        ({ "$eq": [{ "$year": "$m_release_date" }, y })
      ) };
}

然后使用$redact

Movie.aggregate(
  [
    { "$match": {
      "m_release_date": {
        "$gte": startDate, "$lt": endDate
      },
      "m_genres.name": { "$in": req.body.generNames },
      "m_original_language": "en"
    }},
    { "$redact": {
      "$cond": {
        "if": makeIfCondition(),
        "then": "$$KEEP",
        "else": "$$PRUNE"
      }
    }},
    { "$sort": { "m_release_date": -1 } },
    { "$project": {
      "m_tmdb_id": 1,
      "m_poster_path": 1,
      "m_original_title": 1
    }},
    { "$skip": perPage * page },
    { "$limit": perPage }
  ],
  (err,movies) => {

  }
)

或通过$where

Movie.find({
   "m_release_date": {
     "$gte": startDate, "$lt": endDate
   },
   "m_genres.name": { "$in": req.body.generNames },
   "m_original_language": "en",
   "$where": function() {
     return yearSelection.indexOf(this.m_release_date.getUTCFullYear()) !== -1         
   }
})
.select('_id m_tmdb_id m_poster_path m_original_title')
.sort('-m_release_date')
.limit(perPage)
.skip(perPage * page)
.exec(function(err, movies) {    

因为基本逻辑是从m_release_date字段中提取当前年份$year.getUTCFullYear(),然后将其与yearSelection字段中的app.set('views', path.join(__dirname, 'views')); 列表进行比较命令只返回匹配的那些。

对于$redact的使用,对于最新版本(3.4及更高版本),或者使用$in中我们有效{{3}在条件数组上而不是直接将数组作为参数应用。

结论

此处的一般建议是,如果您打算定期查询,请在集合中包含实际数据。使用实际值,您可以在字段上放置索引,常规查询运算符可以使用这些值以及利用索引。

如果不将“年”的值放在集合中,则后续的“计算”需要应用于所有可能的条目,以确定哪个匹配。所以效率不高。

即使在这个例子中,我们试图通过至少根据给定的条目抛出日期的“可能范围”来“重新获得”一些效率,从最小到最大推测。但是当然在这个选择中有“未使用的年份”,但它比提供任何东西更好,只是简单地选择计算。

答案 1 :(得分:0)

我可以建议使用$where运算符。

这里的主要思想是构造一个函数,它将适合您的参数及其值的数量。 不精确,但关键解决方案:

const year1 = 2005;
const year2 = 2007;    
const yearFinder = new Function('',`return new Date(this.m_release_date).getFullYear() === ${year1} || new Date(this.m_release_date).getFullYear() === ${year2}`);

Movie
    .find(queryOptions)
    .$where(yearFinder)
    .select('_id m_tmdb_id m_poster_path m_original_title')
    .sort('-m_release_date')
    .limit(perPage)
    .skip(perPage * page)
    .exec(function(err, movies) {
        if (movies) {
            return res.status(200).json(movies);
        }
    }).catch(function(error) {
        return res.status(500).json(error);
    });