Mongodb map-reduce排序降序

时间:2013-08-17 09:47:07

标签: mongodb mapreduce mongodb-query

我有一个包含以下数据的集合:

    { "_id" : "279771168740729_161573583988659_462046", "user_likes" : false, "message" : "good morning ICICI Bank have a great day...waiting for today surprise", "like_count" : 0, "message_tags" : [ { "id" : "279771168740729", "name" : "ICICI Bank", "length" : 10, "offset" : 13, "type" : "page" } ], "page_username" : "icicibank", "page_id" : "279771168740729", "can_remove" : false, "from" : { "id" : "100002801855936", "name" : "Kowshik Krankz" }, "page_name" : "ICICI Bank", "post_id" : "279771168740729_161573583988659", "created_time" : "2012-11-03T04:10:31+0000" }
    { "_id" : "279771168740729_203743029752972", "icon" : "http://static.ak.fbcdn.net/rsrc.php/v2/yj/r/v2OnaTyTQZE.gif", "link" : "http://youtu.be/eKxIbLVRHRE", "page_username" : "icicibank", "caption" : "www.youtube.com", "from" : { "id" : "279771168740729", "category" : "Bank/financial institution", "name" : "ICICI Bank" }, "type" : "video", "updated_time" : "2012-07-18T04:32:24+0000", "shares" : { "count" : 40 }, "id" : "279771168740729_203743029752972", "message" : "Like Raghu, you too could be at the wrong place at the wrong time. But would you be able to clear your unpaid bills like Raghu did? Now you can! To know how, check out this video. For more details, visit http://bit.ly/NsoCY3", "picture" : "http://external.ak.fbcdn.net/safe_image.php?d=AQADR4-ELAVCbuSI&w=130&h=130&url=http%3A%2F%2Fi2.ytimg.com%2Fvi%2FeKxIbLVRHRE%2Fmqdefault.jpg", "source" : "http://www.youtube.com/v/eKxIbLVRHRE?version=3&autohide=1&autoplay=1", "status_type" : "shared_story", "likes" : { "count" : 643, "data" : [ { "id" : "100002247030669", "name" : "Angel Zoya" }, { "id" : "100002257585478", "name" : "Rakesh Kumar" }, { "id" : "100002062205767", "name" : "P.k. Choudhury" }, { "id" : "100000484071154", "name" : "Balaji Jadhvar" } ] }, "name" : "ICICI Bank", "page_id" : "279771168740729", "page_name" : "ICICI Bank", "created_time" : "2012-07-18T04:32:24+0000", "comments" : { "count" : 48 }, "actions" : [ { "link" : "http://www.facebook.com/279771168740729/posts/203743029752972", "name" : "Comment" }, { "link" : "http://www.facebook.com/279771168740729/posts/203743029752972", "name" : "Like" } ] }
    { "_id" : "279771168740729_203743029752972_572142", "user_likes" : false, "message" : ":-)", "like_count" : 4, "page_username" : "icicibank", "page_id" : "279771168740729", "can_remove" : false, "from" : { "id" : "1060073189", "name" : "Raja Bhowmik" }, "page_name" : "ICICI Bank", "post_id" : "279771168740729_203743029752972", "created_time" : "2012-07-18T04:33:57+0000" }
    { "_id" : "279771168740729_203743029752972_572155", "user_likes" : false, "message" : "@?", "like_count" : 4, "page_username" : "icicibank", "page_id" : "279771168740729", "can_remove" : false, "from" : { "id" : "100001965306815", "name" : "Akhil Pandit" }, "page_name" : "ICICI Bank", "post_id" : "279771168740729_203743029752972", "created_time" : "2012-07-18T04:39:55+0000" }
    { "_id" : "279771168740729_203743029752972_572157", "user_likes" : false, "message" : "This ad is in very bad taste given the timing of it's release and the passing away of Satwik in the Bannerghata forests in Bangalore. Maybe there is no relation, but the similarity of the situation is uncanny.", "like_count" : 4, "page_username" : "icicibank", "page_id" : "279771168740729", "can_remove" : false, "from" : { "id" : "588391958", "name" : "Vijay Alphonse" }, "page_name" : "ICICI Bank", "post_id" : "279771168740729_203743029752972", "created_time" : "2012-07-18T04:41:05+0000" }
    { "_id" : "279771168740729_203743029752972_572182", "user_likes" : false, "message" : "Lv 2 do job in a bank", "like_count" : 6, "page_username" : "icicibank", "page_id" : "279771168740729", "can_remove" : false, "from" : { "id" : "100002492179903", "name" : "Monica Chandwani" }, "page_name" : "ICICI Bank", "post_id" : "279771168740729_203743029752972", "created_time" : "2012-07-18T04:48:51+0000" }

{ "_id" : "279771168740729_203743029752972_572228", "user_likes" : false, "message" : "R u working in ici bnk", "like_count" : 4, "page_username" : "icicibank", "page_id" : "279771168740729", "can_remove" : false, "from" : { "id" : "100002412887446", "name" : "Brijesh Gaur" }, "page_name" : "ICICI Bank", "post_id" : "279771168740729_203743029752972", "created_time" : "2012-07-18T05:10:06+0000" }

在这里,我需要根据喜欢的数量显示前2个帖子(like_count键的值)。所以id为279771168740729_203743029752972_572182的帖子将是第一个(6是最高的,如计数),id为279771168740729_203743029752972_572142秒(4是次高),依此类推。

我提出了两个步骤:

  1. 发出likeCount和postId
  2. 将likeCount降序排序并显示前两个条目
  3. 相应地:

    var mapFunction = function() {
        var likeCount = this.like_count;
        var postId = this._id;
    
        if(postId != null && likeCount  !=  null){
            emit(likeCount, postId);
        }
    };
    
    var reduceFuntion = function(likeCount, postIdCollection) {
    /*How to maintain a single sorted list of likeCount and show the corresponding post?*/
    
    };
    

    我已经对mongo db doc的排序功能感到困惑。 - 请参阅this帖子

1 个答案:

答案 0 :(得分:1)

除非您实际上计划使用MapReduce功能执行其他操作,否则您最好只使用普通的Mongo查询。您最好的选择是使用查找查询:

db.collectionName.find().sort({ like_count: -1 }).limit(2);

如果您处理的是大量数据,我还建议您在like_count列上编制索引:

db.collectionName.ensureIndex({like_count: -1})

如果您真的非常希望使用map reduce,那么您可能希望在map reduce命令中使用排序限制选项

db.collectionName.mapReduce(mapFunction, reduceFunction, { sort: {like_count: -1, limit: 2}})

基本上对进入的数据集执行相同的查询,然后在出去的路上将其删除,但是,这意味着MapReduce步骤它不会为你做很多。

如果您想尝试使用纯MapReduce,那么您需要一种完全不同的地图方法并减少功能。 MapReduce进程在键上有一个隐式排序,这意味着你可以运行这样的东西:

var mapFunction = function() {
    var likeCount = - this.like_count;
    var postId = this._id;

    if(postId != null && likeCount !=  null){
        emit(likeCount,postId);
    }
};

var reduceFunction = function(a,b) {
    var out = b.join();
    return(a, out);
};

db.test.mapReduce(mapFunction, reduceFunction, {out: { inline: 1 }, limit: 2});

然后处理结果集的最后一个成员以从末尾获取条目,然后展开回去抓取帖子,尽管你需要对该结果集进行一些展开以使其变得合理。请注意,因为隐式排序顺序正在增加,我们实际上发出负数like_count,而不是正数,这意味着我们可以使用limit。这不是严格的前两个帖子,而是前两个like_count值和与之关联的所有帖子,因此您仍需要一些后期处理。

当然,如果您想尝试其他方法,也可以使用聚合框架:

db.collectionName.aggregate([{$sort: { like_count: -1 }}, {$limit: 2}]);
相关问题