控制javascript异步流的速率(在循环中)

时间:2016-11-04 17:31:24

标签: javascript loops asynchronous exec

假设您要在短代码中为列表中的每个文件夹启动(随机)进程:

var exec = require('child_process').exec;
var folders = [...]; // a list from somewhere

_.each(folders, function(folder) {
    exec("tar cvf " + folder + ".tgz " + folder);
});

如果列表很长,我可能最终会同时运行大量进程,这是应该避免的。以受控速率运行执行的相当简单的方法是什么(这里最多5个并发进程)?

编辑:这个问题适用于各种异步流程(您要在其中控制速率),而不仅仅是针对exec-over-folders问题。

3 个答案:

答案 0 :(得分:6)

使用async套餐及其功能:eachLimit

它与lodash的作用相同,但是使用异步流处理并使迭代不会一次超出限制:

var async = require('async');
var exec = require('child_process').exec;
var folders = [...]; // a list from somewhere

var maxProcesses = 5; // 5 items at a time
async.eachLimit(
  folders, // collection
  maxProcesses, // limit
  function(folder, next) { // iterator function. args: item, callback
    var cmd = "tar -cf " + folder + ".tgz " + folder;
    console.log('calling:', cmd);
    exec(cmd, function(err, stdOut, stdErr) { // executing cmd
      if(err) console.error(err); // if error putting to console
      next(); // passing the async flow to handle the next iteration
    });
  },
  function() { // after all iterations finished
    console.log('finished processing commands');
  });

parallelLimit

var async = require('async');
var _ = require('lodash');
var exec = require('child_process').exec;
var folders = [...]; // a list from somewhere

var callStack = [];
_.each(folders, function(folder) { // generating our stack of commands
  callStack.push(function(done) {
    var cmd = "tar -cf " + folder + ".tgz " + folder;
    exec(cmd, function(err, stdOut, stdErr) {
      if(err) console.error(err);
      done(null, folder);
    });
  });
});

var maxProcesses = 5; // 5 items at a time
async.parallelLimit(callStack, maxProcesses, function() {console.log('finished');});

"让它看起来更短" :)

const
  async = require('async'),
  exec = require('child_process').exec;

let folders = [...]; 
async.eachLimit(folders, 5, 
  (folder, next) => 
    exec("tar -cf " + folder + ".tgz " + folder, () => next()),
    () => console.log('finished'));

const
  async = require('async'),
  exec = require('child_process').exec;

let folders = [...]; 
let commands = folders.map(folder => done => exec("tar -cf " + folder + ".tgz " + folder, () => done());
async.parallelLimit(commands, 5, () => console.log('finished'));



如果这些示例中的任何一个对您不好,或者您的系统非常大,那么让我们尝试使用像rsmq这样的消息队列系统

答案 1 :(得分:2)

<强>承诺

我只是喜欢承诺,喜欢尽可能地坚持下去。

我认为这是一个适合您案例的解决方案。

var exec = require('child_process').exec;
var folders = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"];
var maxConcurrentProcessCount = 5;
var promiseArr = [];

folders.forEach(function (folder) {
    var pr = {
        start: function () {
            if (pr.promise) return pr.promise;
            return pr.promise = new Promise(function (resolve) {
                exec("tar cvf " + folder + ".tgz " + folder,
                  undefined, (err, stdout, stderr) => {
                      // This is your logic, you can reject depending on err
                      var ind = promiseArr.indexOf(pr);
                      if (ind >= 0) promiseArr.splice(ind, 1);
                      resolve(stdout);
                  });
            });
        }
    };
    promiseArr.push(pr);
});

var racePromises = function () {
    if (!promiseArr.length) return;
    Promise.race(promiseArr.slice(0, maxConcurrentProcessCount).map(x => x.start())).then(racePromises);
    console.log("Current running process count: " + promiseArr.filter(x => x.promise).length);
}
racePromises();

简短说明

创建一个数组,其中每个元素代表一个任务。首先选择其中的5个然后启动它们。每当其中一个完成时,将其从阵列中删除并再次从阵列中启动5个任务。

运行示例

Test Example

使用承诺重新创建eachLimit只是为了好玩

var myEachLimit = function (collection, maxConcurrentCalls, callback) {
    return new Promise(function (resolve, reject) {
        var promiseArr = [];

        collection.forEach(function (item) {
            var pr = {
                start: function () {
                    if (pr.promise) return pr.promise;
                    return pr.promise = new Promise(function (resolve) {
                        callback.call(item, item, function () {
                            var ind = promiseArr.indexOf(pr);
                            if (ind >= 0) promiseArr.splice(ind, 1);
                            resolve();
                        });

                    });
                }
            };
            promiseArr.push(pr);
        });

        var racePromises = function () {
            if (!promiseArr.length) {
                resolve();
                return;
            }
            Promise.race(promiseArr.slice(0, maxConcurrentProcessCount).map(x => x.start())).then(racePromises);
            console.log("Current running process count: " + promiseArr.filter(x => x.promise).length);
        }
        racePromises();
    });
}


// Demo

var exec = require('child_process').exec;
var folders = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"];
var maxConcurrentProcessCount = 5;

myEachLimit(folders, maxConcurrentProcessCount, function (folder, next) {
    exec("tar cvf " + folder + ".tgz " + folder, (err, stdout, stderr) => {
        next();
    });
}).then(function () {
    console.log("Finished all processes");
});

答案 2 :(得分:1)

原生Javascript

您需要的只是某种负载均衡器。将你的循环放入一个单独的函数中:

  /**
  * Loops through your Folderarray and begins at the given index.
  * @param  {[type]} lastIndex       [last index which your loop stopped.]
  * @param  {[type]} maxProcesses    [maximum of processes you want to have.]
  * @param  {[type]} folderArray     [folder array.]
  */
  function loopFolders(maxProcesses, lastIndex, folderArray){

    // counter for our actual processes.
    let processes = 0;
    // This is to stop the loop, since JavaScript has no built-in break for loops.
    let maximumReached = false;

    // loop through array.
    folderArray.forEach(function(element, index, array){

      // Do stuff once you hit the last point.
      if(index > lastIndex && !maximumReached){

        // Check how many processes are running.
        if(processes <= maxProcesses){

          // create your child process.
          let exec = require('child_process').exec;
          // Do stuff with Folderelement from Folderarray.
          exec("tar cvf " + element + ".tgz " + element);

          // Increment processes.
          processes++;

        }else{
          /**
           * Magic begins here:
           * If max processes was reached, retry after a while.
           */

          // We are done for this loop.
           maximumReached = true;

           // Set timeout for 10 seconds and then retry.
          setTimeout(function(){
            // Calll function again.
            loopFolders(maxProcesses, index, array);
          }, 10000);
        }

      }

    });

  }

要从头开始调用此循环,您可以这样:

// your Array of folders from somewhere.    
let folders = [...];
// Call loopFolders with maximum of 5 and the beginning index of 0.
loopFolders(5, 0, folders);

此代码是负载均衡器的一个非常基本的示例。请记住,我的示例永远不会知道是否已完成任何其他过程。您可以使用某种回调来确定。但至少从某事开始,这应该可以帮到你。

要使用NodeJS Childprocess事件,请查看https://nodejs.org/api/child_process.html

你可以在&#39;退出&#39;中为循环做回调。事件,以确保您的孩子的过程不会失控。

希望这有帮助。

此致 Megajin