同时完成儿童程序和承诺决议

时间:2017-08-01 15:05:58

标签: javascript node.js promise parent-child child-process

tl; dr:我编写的函数创建了几个子进程,这些进程在消息中提交数据时解析了一个承诺。虽然函数将所有这些promise包含在Promise.All中,但函数将突然返回,并且promise.all既不会解析也不会拒绝,即使所有进程都完成且没有错误。任何想法为什么会发生这种情况?

为了加快数据收集过程,我有一个父进程获取一些输入数据(准确地在SQL数据库中查询日期)并将其发送到相同大小的块中的一些子进程,等待孩子们通过将结果包装在一个巨大的承诺中来完成他们的数据处理。

虽然这适用于较小的数据集,但对于较大的数据集,父承诺将简单地返回到命令行 - 既不解析也不拒绝甚至继续该功能。在查看几个日志之后,似乎所有子进程都正确地处理并发送了他们的数据,但是父进程并没有收到一些(即10个中的2个)进程的结果。丢失的消息发生在数据处理结束时(几个子进程正在完成并几乎同时发送消息)

缩写代码:

// main function
function createArray(i,j) // returns an array of i empty arrays, each of length j
function chunkify(a, n, balanced) // divides array a into n chunks (balancing them in size if true) returning an array of chunks

function kidcollector(snaptimes,course) {
  var done = 1;
  var numchild  = 10
  const chunked = chunkify(snaptimes,numchild,true);

  // array of numchild promises to be resolved upon arrival of data
  var collectedPromises = _.times(numchild).map(i => {
    return new Promise((resolve, reject) => {
      var child = child_process.fork('./child.js');
      // send chunk of data to each child 
      child.send({
        times:chunked[i],
        c:course
      });

      child.on('error', (err) => {
        console.log('Child error.');
        reject(err)
      });

      child.on('message', function(m) {
        if (m.err) {
          console.log('Got error from '+ m.child, m.err);
          reject(m.err);
        } else {
          console.log('recieved data from ' + m.child + '! ' + done + ' out of ' + numchild);
          done++;
          resolve(m.data);
        }

      });
    });
  })

  return Promise.all(collectedPromises)
    .then(results => {
      // compile all data into one array then return it
    })
    .catch(err => {
      console.log("One of the kids messed up:", err);
    })
};

// child.js, a separate file

const connString = // it's a secret!
const client = new Client(connString);
client.connect();

client.on('error', (err) => {
 console.error('Client error:', err.stack)
})

process.on('exit', (err) => {
  if (err) console.log(process.pid + ' has recieved error:', err);
  client.end(() => console.log(process.pid + ' has disconnected on process end', err));
})

process.on('disconnect', (err) => {
  if (err) console.log(process.pid + ' has recieved error:', err);
  client.end(() => console.log(process.pid + ' has disconnected on process disconnect'));
})

process.on('message', function(m) {
  collector(m.times,m.c,process.pid) // async function which compiles data across SQL databases
  .then(async function(subdata) {
    console.log("all done");
    await process.send({
      child: process.pid,
      data: subdata
    });
    await process.disconnect();
  })
  .catch(async function(err) {
    console.log("FAILED IN CHILD", err)
    await process.send({
      child: process.pid,
      err: err
    });
    await process.disconnect();
  })
});

因此,在预期运行一段时间后,接近数据处理结束时,日志如下所示:

all done // child says they're done
recieved data from 5486! 5 out of 10 // parent has received their data
5486 has disconnected on process disconnect // child disconnects
5481 processing snaptime #35 at 2017-07-31T20:26:40.322Z // child is now processing a new time from their given array
all done
recieved data from 5478! 6 out of 10
5478 has disconnected on process disconnect
5483 processing snaptime #34 at 2017-07-31T20:26:51.065Z
5485 processing snaptime #35 at 2017-07-31T20:27:01.876Z
all done // child says they're done
5477 has disconnected on process disconnect // child disconnects, but parent hasn't received data
all done
recieved data from 5481! 7 out of 10 // all good here
5481 has disconnected on process disconnect
5483 processing snaptime #35 at 2017-07-31T20:27:47.834Z
all done
5485 has disconnected on process disconnect // didn't receive message here
all done
recieved data from 5483! 8 out of 10
5483 has disconnected on process disconnect
hansy@Hansys-MacBook-Air ~/Documents/GitHub // and we're at the command line...?

在promise.all()解析时,代码应该记录运行时间,并且在拒绝时,它应该记录其中一个孩子搞砸了,以及它的错误。

关于发生了什么和/或如何解决此问题的任何想法,特别是因为它只发生在较大的数据集中? (我使用带有10个子进程的node v8.0.0)

1 个答案:

答案 0 :(得分:0)

您的问题似乎是process.send没有返回您可以await的承诺,而是接受(可选)回调。因此,您的disconnect来电不会等待发送消息。

当队列中没有更多事件要处理时,您的父进程刚刚完成,即使承诺尚未解决。你想要听的东西是子进程的exit event,而不仅仅是error。当您reject时,无论子进程执行什么操作,都要确保Promise.all始终能够解决问题。

我推荐

// parent
…
new Promise((resolve, reject) => {
  const child = child_process.fork('./child.js');
  child.on('error', reject);
  child.on('exit', reject);
  child.on('message', resolve); // should happen before exit

  child.send({
    times: chunked[i],
    c: course
  });
}).then(function(m) {
  if (m.err) {
    console.log(`received error from #${i} (${m.child})`, m.err);
    throw m.err;
  } else {
    console.log(`received data from #${i} (${m.child})`);
    return m.data;
  }
}, function(err) {
  console.log(`Got abort from #${i} (${m.child})`);
  throw err;
});

// child
…
process.on('message', function(m) {
  collector(m.times, m.c, process.pid) // async function which compiles data across SQL databases
  .then(function(subdata) {
    console.log(process.pid+" done");
    return {
      child: process.pid,
      data: subdata
    };
  }, function(err) {
    console.log(process.pid+" FAILED:", err)
    return {
      child: process.pid,
      err: err
    };
  }).then(function(data) {
    return new Promise(function(resolve, reject) {
      process.send(data, function(err) {
        if (err) reject(err);
        else resolve();
      });
    });
  }).catch(function(err) {
    console.log(process.pid+" FAILED to send result", err)
  }).then(function() {
    process.disconnect();
  })
});