我正在尝试编写一个函数,该函数生成1000万条伪记录,以最终植入Postgres数据库中。我有三个单独的writeStreams异步写入不同的文件。我一直在尝试为流失事件实现流侦听器,但在达到堆限制之前最多只能达到500万左右。任何建议或帮助将不胜感激。
const faker = require('faker');
const fs = require('fs');
const moment = require('moment');
require('moment-precise-range-plugin')
let begin = moment();
let Reviews = fs.createWriteStream('./data/Reviews.json', {flags: 'a'});
let Images = fs.createWriteStream('./data/Images.json', {flags: 'a'});
let Chars = fs.createWriteStream('./data/Characteristics.json', {flags: 'a'});
Reviews.write('[\n')
Images.write('[\n')
Chars.write('[\n')
let makeReview = async (num) => {
review = {
review_id : num,
product_id: Math.round(random(1, 1000000)),
summary: faker.random.words(Math.round(random(5,10))),
body: faker.random.words(Math.round(random(10,50))),
rating: random(0, 40),
name: faker.name.findName(),
email: faker.internet.email(),
date: moment(faker.date.past()).format("YYYY[-]MM[-]DD[T]HH:mm:ss.SSS[Z]"),
recommend: Boolean(Math.round(random(0, 1))),
helpfulness: random(0, 100),
response: Math.round(random(1, 10)) === 5 ? faker.random.words(random(5, 25)) : 'null',
reported: Math.round(random(1, 1000)) === 500
};
// Reviews.write((JSON.stringify(review) + ',\n'));
return Reviews.write((JSON.stringify(review) + ',\n'));
}
let makeChars = async(num) => {
let str = '';
for(c = 1 ; c <= 4 ; c++){
let characteristic = {
character_id: c,
review_id: num,
value: random(0,5)
}
str += JSON.stringify(characteristic) + ',\n';
}
return Chars.write(str);
}
let makeImages = async(num) => {
let photos = random(0, 5);
let str = '';
for (p = 0; p <= photos; p ++){
let photo = {
review_id: num,
url: faker.image.image()
}
str += JSON.stringify(photo) + ',\n';
}
return Images.write(str);
}
const makeData = async(num1) => {
const r = makeReview(num1)
const c = makeChars(num1)
const i = makeImages(num1)
const arr = [r,c,i]
return await Promise.all(arr)
}
let totalReviews = 10000000;
(async() => {
for (let i = 1; i <= totalReviews; i++) {
console.log(makeData(i))
if(!makeReview(i)) {
await new Promise(resolve => Reviews.once('drain', resolve));
}
if(!makeChars(i)) {
await new Promise(resolve => Chars.once('drain', resolve));
}
if(!makeImages(i)) {
await new Promise(resolve => Images.once('drain', resolve));
}
// makeData(i)
// .then(async values => {
// if(!values[0]) {
// await new Promise(resolve => Reviews.once('drain', resolve));
// }
// if(!values[1]) {
// await new Promise(resolve => Chars.once('drain', resolve));
// }
// if(!values[2]) {
// await new Promise(resolve => Images.once('drain', resolve));
// }
// })
if(i % 100000 === 0){
let batchDone = moment();
console.log( i + ' Records Written in ' + batchDone.preciseDiff(begin))
}
}
})();
Reviews.write('\n]');
Chars.write('\n]');
Images.write('\n]');
Reviews.end();
Chars.end();
Images.end();
let finished = moment();
console.log('Generated ' + totalReviews + ' Records in ' + finished.preciseDiff(begin));
// helper functions
function random(start, end) {
// not truly random, somewhat biased
return Math.floor(Math.random() * (end - start) + 10) / 10;
};
答案 0 :(得分:0)
尝试使用以下命令设置内存空间:
设置node_options =-max_old_space_size = 8192