NodeJS createWriteStream JavaScript堆内存不足

时间:2020-09-03 19:20:41

标签: javascript node.js asynchronous memory-leaks

我正在尝试编写一个函数,该函数生成1000万条伪记录,以最终植入Postgres数据库中。我有三个单独的writeStreams异步写入不同的文件。我一直在尝试为流失事件实现流侦听器,但在达到堆限制之前最多只能达到500万左右。任何建议或帮助将不胜感激。

const faker = require('faker');
const fs = require('fs');
const moment = require('moment');
require('moment-precise-range-plugin')

let begin = moment();

let Reviews = fs.createWriteStream('./data/Reviews.json', {flags: 'a'});
let Images = fs.createWriteStream('./data/Images.json', {flags: 'a'});
let Chars = fs.createWriteStream('./data/Characteristics.json', {flags: 'a'});

Reviews.write('[\n')
Images.write('[\n')
Chars.write('[\n')


let makeReview = async (num) => {
   review = {
    review_id : num,
    product_id: Math.round(random(1, 1000000)),
    summary: faker.random.words(Math.round(random(5,10))),
    body: faker.random.words(Math.round(random(10,50))),
    rating: random(0, 40),
    name: faker.name.findName(),
    email: faker.internet.email(),
    date: moment(faker.date.past()).format("YYYY[-]MM[-]DD[T]HH:mm:ss.SSS[Z]"),
    recommend: Boolean(Math.round(random(0, 1))),
    helpfulness: random(0, 100),
    response: Math.round(random(1, 10)) === 5 ? faker.random.words(random(5, 25)) : 'null',
    reported: Math.round(random(1, 1000)) === 500
  };
  // Reviews.write((JSON.stringify(review) + ',\n'));
  return Reviews.write((JSON.stringify(review) + ',\n'));
}

let makeChars = async(num) => {
  let str = '';
  for(c = 1 ; c <= 4 ; c++){
    let characteristic = {
      character_id: c,
      review_id: num,
      value: random(0,5)
    }
    str += JSON.stringify(characteristic) + ',\n';
  }
  return Chars.write(str);
}

let makeImages = async(num) => {
  let photos = random(0, 5);
  let str = '';
  for (p = 0; p <= photos; p ++){
    let photo = {
      review_id: num,
      url: faker.image.image()
    }
    str += JSON.stringify(photo) + ',\n';
  }

  return Images.write(str);
}

const makeData = async(num1) => {
  const r = makeReview(num1)
  const c = makeChars(num1)
  const i = makeImages(num1)
  const arr = [r,c,i]
  return await Promise.all(arr)

}

let totalReviews = 10000000;
(async() => {
  for (let i = 1; i <= totalReviews; i++) {

    console.log(makeData(i))
    if(!makeReview(i)) {
      await new Promise(resolve => Reviews.once('drain', resolve));
    }
    if(!makeChars(i)) {
      await new Promise(resolve => Chars.once('drain', resolve));
    }
    if(!makeImages(i)) {
      await new Promise(resolve => Images.once('drain', resolve));
    }

    // makeData(i)
    // .then(async values => {
    //   if(!values[0]) {
    //     await new Promise(resolve => Reviews.once('drain', resolve));
    //   }
    //   if(!values[1]) {
    //     await new Promise(resolve => Chars.once('drain', resolve));
    //   }
    //   if(!values[2]) {
    //     await new Promise(resolve => Images.once('drain', resolve));
    //   }
    // })

    if(i % 100000 === 0){
      let batchDone = moment();
      console.log( i + ' Records Written in ' + batchDone.preciseDiff(begin))
    }
  }
})();

Reviews.write('\n]');
Chars.write('\n]');
Images.write('\n]');

Reviews.end();
Chars.end();
Images.end();

let finished = moment();
console.log('Generated ' + totalReviews + ' Records in ' + finished.preciseDiff(begin));

// helper functions
function random(start, end) {
  // not truly random, somewhat biased
  return Math.floor(Math.random() * (end - start) + 10) / 10;
};

1 个答案:

答案 0 :(得分:0)

尝试使用以下命令设置内存空间:

设置node_options =-max_old_space_size = 8192