Question

我想根据使用GCP云功能的大量粉丝用户发布的内容，向近一百万用户填充Feed。

为了做到这一点，我正在设计将feed的firebase更新分成多个小批量。那是因为我认为如果我不拆分更新，我可能会遇到以下问题：

i）保持内存中的一百万用户将超过分配的最大2GB内存。

ii）一次更新一百万个条目将无效（更新一百万个条目需要多长时间？）

但是，批处理更新仅适用于批处理每次更新调用仅插入约100个条目的情况。当我尝试每批1000只时，只插入了第一批。我想知道这是否是由于：

i）超时？但是我没有在日志中看到这个错误。

ii）数组变量userFeeds {}，当函数超出范围时，是否会销毁批处理？

以下是我的代码：

var admin = require('firebase-admin');
var spark = require('./spark');
var user = require('./user');
var Promise = require('promise');
var sparkRecord;

exports.newSpark = function (sparkID) {

    var getSparkPromise = spark.getSpark(sparkID);


    Promise.all([getSparkPromise]).then(function(result) {

        var userSpark = result[0];
        sparkRecord = userSpark;
        sparkRecord.sparkID = sparkID;

        // the batch update only works if the entries per batch is aroud 100 instead of 1000
        populateFeedsToFollowers(sparkRecord.uidFrom, 100, null, myCallback);
    });

};

var populateFeedsToFollowers = function(uid, fetchSize, startKey, callBack){

    var fetchCount = 0;

    //retrieving only follower list by batch
    user.setFetchLimit(fetchSize);
    user.setStartKey(startKey);

    //I use this array variable to keep the entries by batch
    var userFeeds = {};

    user.getFollowersByBatch(uid).then(function(users){

        if(users == null){
            callBack(null, null, null);
            return; 
        }

        //looping thru the followers by batch size
        Object.keys(users).forEach(function(userKey) {

            fetchCount += 1;
            if(fetchCount > fetchSize){
                // updating users feed by batch
                admin.database().ref().update(userFeeds);  
                callBack(null, userKey);
                fetchCount = 0;
                return;

            }else{

                userFeeds['/userFeed/' + userKey + '/' + sparkRecord.sparkID] = {
                        phase:sparkRecord.phase,
                        postTimeIntervalSince1970:sparkRecord.postTimeIntervalSince1970
                }

            }
        });//Object.keys(users).forEach


        if(fetchCount > 0){
            admin.database().ref().update(userFeeds);
        }

    });//user.getFollowersByBatch
};

var myCallback = function(err, nextKey) {

      if (err) throw err; // Check for the error and throw if it exists.

      if(nextKey != null){ //if having remaining followers, keep populating
          populateFeedsToFollowers(sparkRecord.uidFrom, 100, nextKey, myCallback);
      }
};

批量生成的firebase更新不适用于大型数据集

0 个答案: