根据条件计算现有字段的分数

时间:2016-06-13 14:03:25

标签: node.js mongodb mongoose mongodb-query

我正在使用MongoDB 2.6.9和NodeJs 0.10.37,我有一个集合vols,这意味着航班。

> db.vols.findOne()
{
    "_id" : ObjectId("5717a5d4578f3f2556f300f2"),
    "Orig" : "AGP",
    "Dest" : "OTP",
    "Flight" : 126,
    "Routing" : "AGP-OTP",
    "Stops" : 0,
    "Seats" : 169,
    "Ops_Week" : 3,
    "Eff_Date" : "2016-04-14",
    "Mkt_Al" : "0B",
    "Dep_Time" : 1110,
    "Thru_Point" : "",
    "Arr_Time" : 1600,
    "Block_Mins" : 230

}

每份文件都是指由航空公司完成的一次航班,并提供详细信息,例如,之前的文件是指直接完成的航班(停止:0)。但是接下来的一次,飞行就停止了。

db.vols.findOne({Stops:1})
{
    "_id" : ObjectId("5717a5d4578f3f2556f301c5"),
    "Orig" : "CEK",
    "Dest" : "IKT",
    "Flight" : 7756,
    "Routing" : "KZN-CEK-OVB-IKT",
    "Stops" : 1,
    "Seats" : 70,
    "Ops_Week" : 2,
    "Eff_Date" : "2016-04-11",
    "Mkt_Al" : "2G",
    "Dep_Time" : 1655,
    "Thru_Point" : "OVB",
    "Arr_Time" : 140,
    "Block_Mins" : 345
}

重要:

每个Airline每个路线都有一个scoreOrigin - Destination

如何计算得分?

enter image description here

所以,我需要做这些计算并插入一个新的字段" QSI"进入我的收藏vols

重要提示:

c4中的平均已用时间表示:

例如,我们有一个停止的航班,请说:从 A C 的航班 B ,整个例如飞行60分钟,但从 A B 制作20分钟,从 B C 制作20分钟min,这个平均值应该返回40分钟。

我尝试了这个解决方案,但对于c4来说,事情看起来不像是工作:

var mongoose = require('mongoose'),
    express  = require('express'),
    Schema   = mongoose.Schema;

mongoose.connect('mongodb://localhost/ramtest');

var volsSchema = new Schema({}, { strict : false, collection : 'vols' });
var MyModel    = mongoose.model("MyModel", volsSchema);

mongoose.set('debug', true);

mongoose.connection.on("open", function(err) {
  if (err) throw err;

  var bulkUpdateOps = MyModel.collection.initializeUnorderedBulkOp(),
      counter       = 0;

  MyModel.find({}).lean().exec(function(err, docs) {
    if (err) throw err;

    docs.forEach(function(doc) {
      // computations
      var c1, c2, c3, c4, qsi, first_leg, second_leg, total_flight;

      c1 = 0.3728 + (0.00454 * doc.Seats);
      c2 = (doc.Stops == 1) ? 0.03 : 1;
      c3 = doc.Ops_Week;

      if (doc.Stops == 1) {
        var Mkt_Air        = doc.Mkt_Al,
            Origin         = doc.Orig,
            Destination    = doc.Dest,
            Thru_Point     = doc.Thru_Point,
            Effective_Date = doc.Eff_Date,
            Block_Mins     = doc.Block_Mins;

        MyModel.find({ Mkt_Al : Mkt_Air }, { Orig : Origin }, { Dest : Thru_Point }, { Eff_Date : Effective_Date }).lean().exec(function(err, docs) {
          docs.forEach(function(doc) {
            var first_leg = doc.Block_Mins;
            MyModel.find({ Mkt_Al : Mkt_Air }, { Orig : Thru_Point }, { Dest : Destination }, { Eff_Date : Effective_Date }).lean().exec(function(err, docs) {
              docs.forEach(function(doc) {
                var second_leg = doc.Block_Mins, total_flight = second_leg + first_leg;
                c4 = Math.pow((Block_Mins / total_flight), -0.675);
                qsi = c1 * c2 * c3 * c4;
              }); // the end of docs.forEach(function (doc){
            }); // the end of MyModel.find..
          }); // the end of docs.forEach(function (doc){
        }); // the end of MyModel.find..
      } // end if
      else {
        c4 = 1;
      }

      qsi = c1 * c2 * c3 * c4;

      counter++;

      bulkUpdateOps.find({ "_id" : doc._id }).updateOne({
        "$set" : { "Qsi" : qsi }
      });

      if (counter % 500 == 0) {
        bulkUpdateOps.execute(function(err, result) {
          if (err) throw err;
          bulkUpdateOps = MyModel.collection.initializeUnorderedBulkOp();
          console.log(result);
          console.log(doc);
        });
      }

    });

    if (counter % 500 != 0) {
      bulkUpdateOps.execute(function(err, result) {
        if (err) throw err;
        console.log(result);
      });
    }
  });

  var app = express();
  app.listen(3000, function() {
    console.log('Ready to calculate and insert the QSI');
  });
});

问题:

我认为问题出在MyModel.find上,如果我在这条指令中丢失了数据......,我的score会在Stops = 0时干净地计算出来,但如果是Stops = 1 ,我的分数取值Nan,我在一些迭代后遇到错误callback(null, docs)请谁帮忙?

我如何实现上述目标?

2 个答案:

答案 0 :(得分:1)

问题是,在Stops == 1的情况下,您正在进行异步调用,在您使用它来计算c4之前,不会设置qsi的值。相关的块在这里:

  if (doc.Stops == 1) {
    // do some stuff
    MyModel.find({/* some query */}).lean().exec(function(err, docs) {
      // SITE A: this function will not be called for a few milliseconds
      // do some more stuff
      c4 = Math.pow((Block_Mins / total_flight), -0.675);
      qsi = c1 * c2 * c3 * c4;
    }); // the end of MyModel.find..
  } // end if
  else {
    c4 = 1;
  }

  qsi = c1 * c2 * c3 * c4; // SITE B: this will be called before SITE A, and qsi will be NaN since c4 is not initialized
  bulkUpdateOps.find({ "_id" : doc._id }).updateOne({
    "$set" : { "Qsi" : qsi } // qsi here will be from SITE B, and SITE A still hasn't been reached yet
  });

粗略的解决方法是做类似

的事情
  function setQsi(qsi) {
    bulkUpdateOps.find({ "_id" : doc._id }).updateOne({
      "$set" : { "Qsi" : qsi }
    });
  }

  if (doc.Stops == 1) {
    // do some stuff
    MyModel.find({/* some query */}).lean().exec(function(err, docs) {
      // do some more stuff
      c4 = Math.pow((Block_Mins / total_flight), -0.675);
      setQsi(c1 * c2 * c3 * c4);
    }); // the end of MyModel.find..
  } // end if
  else {
    c4 = 1;
    setQsi(c1 * c2 * c3 * c4);
  }

对于更复杂的异步模式,您应该考虑使用Promises

答案 1 :(得分:1)

您的实施存在一些问题。首先,您正在使用 find() 方法,因为您为查询指定了太多参数:

MyModel.find(
    { Mkt_Al : Mkt_Air }, 
    { Orig : Origin }, 
    { Dest : Thru_Point }, 
    { Eff_Date : Effective_Date }
).lean().exec(function(err, docs) { .. }

应该是

MyModel.find({ 
    Mkt_Al: Mkt_Air, 
    Orig: Origin, 
    Dest: Thru_Point, 
    Eff_Date: Effective_Date 
}).lean().exec(function(err, docs) { ... }

同样,您不应该在此实例中使用 find() 方法,因为您只需要一个与您的计算中使用的查询匹配的文档。采用前一个封闭问题的复杂算法:

  

现在我想计算得分 c4 并将其插入我的收藏中:

     

为此,我应该像这样计算一个值 c4

     

1)首先我验证每个文件是否 ( Field2 == 1 ) 如果它是真的我   继续它很简单 c4 取值1.

     

2)然后我应该为“for”创建一个循环并查看哪些文档验证这些   条件: doc.Field1 == this.Field1 && doc.Field6 == this.Field6 && doc.Field7 == this.Field8

     

3)然后我将 doc.Field4 添加到另一个文档中   的 Field4

     

4)我继续,我再做一个循环,寻找另一个文件   验证这些条件:

     

它应该与之前的文档及其相同,具有相同的 Field1    Field6 等于上一个文档 Field7 及其 Field8 相同   作为第一个文件中的Field8

     

5)然后我将 doc.Field4 添加到之前的 doc.Field4

使用MyModel.findOne()应该足以满足上面的任务3,4和5。但是,由于调用的异步性质,您需要嵌套查询,但幸运的是嵌套调用的深度不大于3,否则您会发现自己有一个单向的回调地狱票。为了避免这些常见的陷阱,最好使用 Promises (因为默认情况下本机猫鼬查询可以返回 Promise )或使用 node-async 包,其中包含许多处理此类情况的函数。

如果使用 async 库,它会有效地允许您运行多个异步任务(如MyModel.findOne()调用),这些任务相互依赖,何时完成做点别的。在上文中,您可以使用 async.series() 方法。

以下示例演示了上述概念,您可以从测试数据库中的以下示例文档中计算 Qsi

填充测试db的vol集合:

db.vols.insert([
    {    
        "Mkt_Al" : "2G",
        "Stops" : 0,
        "Seats" : 169,
        "Block_Mins" : 230,                
        "Ops_Week" : 3,        
        "Orig" : "AGP",
        "Dest" : "OTP",
        "Thru_Point" : "",
    },
    {    
        "Mkt_Al" : "2G",
        "Stops" : 1,
        "Seats" : 260,              
        "Block_Mins" : 260,
        "Ops_Week" : 2,  
        "Orig" : "CEK",
        "Dest" : "IKT",
        "Thru_Point" : "OVB",
    },
    {    
        "Mkt_Al" : "2G",
        "Stops" : 0,
        "Seats" : 140,
        "Block_Mins" : 60,
        "Ops_Week" : 2,        
        "Orig" : "BEK",
        "Dest" : "OTP",
        "Thru_Point" : "",
    },
    {    
        "Mkt_Al" : "2G",
        "Stops" : 0,
        "Seats" : 160,
        "Block_Mins" : 90,
        "Ops_Week" : 3,        
        "Orig" : "CEK",
        "Dest" : "OVB",
        "Thru_Point" : "",
    },
    {    
        "Mkt_Al" : "2G",        
        "Stops" : 0,
        "Seats" : 60,
        "Block_Mins" : 50,
        "Ops_Week" : 3,        
        "Orig" : "OVB",
        "Dest" : "IKT",
        "Thru_Point" : "",
    }
])

Node.js app:

var mongoose = require('mongoose'),
    express = require('express'),
    async = require('async'),
    Schema = mongoose.Schema;

mongoose.connect('mongodb://localhost/test');
var volSchema = new Schema({},{ strict: false, collection: 'vols' }),    
    Vol = mongoose.model("Vol", volSchema);

mongoose.set('debug', false);

mongoose.connection.on("open", function (err) {
    if (err) throw err;  
    var bulkUpdateOps = Vol.collection.initializeUnorderedBulkOp(), 
        counter = 0;

    Vol.find({}).lean().exec(function (err, docs) {
        if (err) throw err; 
        var locals = {};

        docs.forEach(function(doc) {            
            locals.c1 = 0.3728 + (0.00454 * doc.Seats);         
            locals.c3 = doc.Ops_Week;

            if (doc.Stops == 1) {               
                async.series([
                    // Load doc with first leg first
                    function(callback) {
                        Vol.findOne({ 
                            Mkt_Al: doc.Mkt_Al,
                            Orig: doc.Orig,
                            Dest: doc.Dest                          
                        }).lean().exec(function (err, flight) {
                            if (err) return callback(err);
                            locals.first_leg = flight.Block_Mins;
                            callback();
                        });
                    },
                    // Load second leg doc 
                    // (won't be called before task 1's "task callback" 
                    // has been called)
                    function(callback) {                    
                        Vol.findOne({ 
                            Mkt_Al: doc.Mkt_Al,
                            Orig: doc.Thru_Point,
                            Dest: doc.Dest                          
                        }).lean().exec(function (err, flight) {
                            if (err) return callback(err);
                            locals.second_leg = flight.Block_Mins;
                            callback();
                        });
                    }
                ], function(err) { // This function gets called after the
                    // two tasks have called their "task callbacks"
                    if (err) throw err;
                    // Here locals will be populated with `first_leg` 
                    // and `second_leg`
                    // Just like in the previous example
                    var total_flight = locals.second_leg + locals.first_leg;                    
                    locals.c2 = 0.03;
                    locals.c4 = Math.pow((doc.Block_Mins / total_flight), -0.675);                    

                }); 
            } else {
                locals.c2 = 1;
                locals.c4 = 1;
            }

            counter++;
            console.log(locals);
            bulkUpdateOps.find({ "_id" : doc._id }).updateOne({ 
                "$set": { 
                    "Qsi": (locals.c1 * locals.c2 * locals.c3 * locals.c4) 
                } 
            });

            if (counter % 500 == 0) {
               bulkUpdateOps.execute(function(err, result) {          
                    if (err) throw err; 
                    bulkUpdateOps = Vol.collection.initializeUnorderedBulkOp();                        
                });
            } 
        });

        if (counter % 500 != 0) {
            bulkUpdateOps.execute(function(err, result) {
                if (err) throw err; 
                console.log(result.nModified);                
            });
        }   
    });
});

示例输出:

db.vols.find()

/* 1 */
{
    "_id" : ObjectId("5767e7549ebce6d574702221"),
    "Mkt_Al" : "2G",
    "Stops" : 0,
    "Seats" : 169,
    "Block_Mins" : 230,
    "Ops_Week" : 3,
    "Orig" : "AGP",
    "Dest" : "OTP",
    "Thru_Point" : "",
    "Qsi" : 3.42018
}

/* 2 */
{
    "_id" : ObjectId("5767e7549ebce6d574702222"),
    "Mkt_Al" : "2G",
    "Stops" : 1,
    "Seats" : 260,
    "Block_Mins" : 260,
    "Ops_Week" : 2,
    "Orig" : "CEK",
    "Dest" : "IKT",
    "Thru_Point" : "OVB",
    "Qsi" : 3.1064
}

/* 3 */
{
    "_id" : ObjectId("5767e7549ebce6d574702223"),
    "Mkt_Al" : "2G",
    "Stops" : 0,
    "Seats" : 140,
    "Block_Mins" : 60,
    "Ops_Week" : 2,
    "Orig" : "BEK",
    "Dest" : "OTP",
    "Thru_Point" : "",
    "Qsi" : 2.0168
}

/* 4 */
{
    "_id" : ObjectId("5767e7549ebce6d574702224"),
    "Mkt_Al" : "2G",
    "Stops" : 0,
    "Seats" : 160,
    "Block_Mins" : 90,
    "Ops_Week" : 3,
    "Orig" : "CEK",
    "Dest" : "OVB",
    "Thru_Point" : "",
    "Qsi" : 3.2976
}

/* 5 */
{
    "_id" : ObjectId("5767e7549ebce6d574702225"),
    "Mkt_Al" : "2G",
    "Stops" : 0,
    "Seats" : 60,
    "Block_Mins" : 50,
    "Ops_Week" : 3,
    "Orig" : "OVB",
    "Dest" : "IKT",
    "Thru_Point" : "",
    "Qsi" : 1.9356
}