Question

update Ok, this seems to be linked to through2's "highWaterMark" property. Basically, it means "don't buffer more than x files, wait for someone to consume it and then only then accept another batch of files". Since it works this way by design, the snippet in this question is being reviewed. There must be a better way to handle many files.

Quick fix, allowing 8000 files:

  through.obj({ highWaterMark: 8000 }, (file, enc, next) => { ... })

Original question

I'm using a gulp task to create translation files. It scans an src folder for *.i18n.json files and saves one .json per language it finds within the source files.

It works fine - until it finds more than 16 files. It's using through2 for the processing of each file. See source code below. The method processAll18nFiles() is a custom pipe that receives the matching input files, reads the content of each files, constructs the resulting dictionaries on the fly, then finally hands it over to the on('finish) handler to write the dictionaries.

Tested on windows and mac. There seems to be a limitation that my approach hits, because it's working just fine with 16 files or less.

Still looking, clues welcome :-)

source file example: signs.i18n.json

{
  "path": "profile.signs",
  "data": {
    "title": {
      "fr": "mes signes précurseurs",
      "en": "my warning signs"
    },
    "add": {
      "fr": "ajouter un nouveau signe",
      "en": "add a new warning sign"
    }
  }
}

output file example: en.json

{"profile":{"signs":{"title":"my warning signs","add":"add a new warning sign"}}}

gulpfile.js

const fs = require('fs');
const path = require('path');
const gulp = require('gulp');
const watch = require('gulp-watch');
const through = require('through2');

const searchPatternFolder = 'src/app/**/*.i18n.json';
const outputFolder = path.join('src', 'assets', 'i18n');

gulp.task('default', () => {
  console.log('Ionosphere Gulp tasks');
  console.log(' > gulp i18n         builds the i18n file.');
  console.log(' > gulp i18n:watch   watches i18n file and trigger build.');
});

gulp.task('i18n:watch', () => watch(searchPatternFolder, { ignoreInitial: false }, () => gulp.start('i18n')));
gulp.task('i18n', done => processAll18nFiles(done));

function processAll18nFiles(done) {
  const dictionary = {};
  console.log('[i18n] Rebuilding...');
  gulp
    .src(searchPatternFolder)
    .pipe(
      through.obj((file, enc, next) => {
        console.log('doing ', file.path);
        const i18n = JSON.parse(file.contents.toString('utf8'));
        composeDictionary(dictionary, i18n.data, i18n.path.split('.'));
        next(null, file);
      })
    )
    .on('finish', () => {
      const writes = [];
      Object.keys(dictionary).forEach(langKey => {
        console.log('lang key ', langKey);
        writes.push(writeDictionary(langKey, dictionary[langKey]));
      });
      Promise.all(writes)
        .then(data => done())
        .catch(err => console.log('ERROR ', err));
    });
}

function composeDictionary(dictionary, data, path) {
  Object.keys(data)
    .map(key => ({ key, data: data[key] }))
    .forEach(({ key, data }) => {
      if (isString(data)) {
        setDictionaryEntry(dictionary, key, path, data);
      } else {
        composeDictionary(dictionary, data, [...path, key]);
      }
    });
}

function isString(x) {
  return Object.prototype.toString.call(x) === '[object String]';
}

function initDictionaryEntry(key, dictionary) {
  if (!dictionary[key]) {
    dictionary[key] = {};
  }
  return dictionary[key];
}

function setDictionaryEntry(dictionary, langKey, path, data) {
  initDictionaryEntry(langKey, dictionary);
  let subDict = dictionary[langKey];
  path.forEach(subKey => {
    isLastToken = path[path.length - 1] === subKey;
    if (isLastToken) {
      subDict[subKey] = data;
    } else {
      subDict = initDictionaryEntry(subKey, subDict);
    }
  });
}

function writeDictionary(lang, data) {
  return new Promise((resolve, reject) => {
    fs.writeFile(
      path.join(outputFolder, lang + '.json'),
      JSON.stringify(data),
      'utf8',
      err => (err ? reject(err) : resolve())
    );
  });
}

Answer 1

好，如here所述，一个必须消耗管道。这是通过添加“数据”事件的处理程序来完成的，例如：

  gulp
    .src(searchPatternFolder)
    .pipe(
      through.obj({ highWaterMark: 4, objectMode: true }, (file, enc, next) => {
        const { data, path } = JSON.parse(file.contents.toString('utf8'));
        next(null, { data, path });
      })
    )
    // The next line handles the "consumption" of upstream pipings
    .on('data', ({ data, path }) => ++count && composeDictionary(dictionary, data, path.split('.')))
    .on('end', () =>
      Promise.all(Object.keys(dictionary).map(langKey => writeDictionary(langKey, dictionary[langKey])))
        .then(() => {
          console.log(`[i18n] Done, ${count} files processed, language count: ${Object.keys(dictionary).length}`);
          done();
        })
        .catch(err => console.log('ERROR ', err))
    );

Nodejs > Gulp > through2 > Limitation to 16 files?

1 个答案:

Nodejs &gt; Gulp &gt; through2 &gt; Limitation to 16 files?

1 个答案:

Nodejs > Gulp > through2 > Limitation to 16 files?