重构同步代码以释放node.js异步性的力量

时间:2017-08-15 21:40:08

标签: node.js asynchronous

我们长期的Python和PHP编码器有一些整齐的同步代码(下面的示例)。大多数函数都有异步对应函数。我们真的很想得到' Javascript和Node的强大功能,并且相信这是一个理想的情况,即异步node.js可以加快速度并减轻我们的速度。

使用异步节点重构以下内容的教科书是什么方式? Async / awaitpromise.all?怎么样? (使用Node 8.4.0。向后兼容性不是问题。)

var fs = require('fs');

// This could list over 10,000 files of various size 
const fileList = ['file1', 'file2', 'file3']; 

const getCdate = file => fs.statSync(file).ctime; // Has async method

const getFSize = file => fs.statSync(file).size; // Has async method

// Can be async through file streams (see resources below)
const getMd5 = (file) => {
  let fileData = new Buffer(0);
  fileData = fs.readFileSync(file);
  const hash = crypto.createHash('md5');
  hash.update(fileData);
  return hash.digest('hex');
};

let filesObj = fileList.map(file => [file, {
  datetime: getCdate(file),
  filesize: getFSize(file),
  md5hash: getMd5(file),
}]);

console.log(filesObj);

注意:

  • 我们需要保持功能模块化和可重复使用。
  • 获取filesObj的内容的功能多于此处列出的
  • 大多数功能都可以重写为异步,有些则不能。
  • 理想情况下,我们需要保留fileList
  • 的原始顺序
  • 理想情况下,我们希望使用最新的Node和JS功能 - 而不是依赖外部模块。

用于异步获取md5的各种文件流方法:

2 个答案:

答案 0 :(得分:1)

有多种不同的方法可以异步处理此代码。您可以使用节点async库来更优雅地处理所有回调。如果你不想深入了解承诺,那么那就是" easy"选项。我很容易引用引号,因为如果你理解得很好,承诺实际上会更容易。异步库很有帮助,但它仍然有很多不希望的错误传播方式,并且你需要包含许多样板代码来包装你的所有调用。

更好的方法是使用promises。 Async / Await仍然很新。如果没有像Bable或Typescript这样的预处理器,节点7(不确定节点8)甚至都不支持。此外,无论如何,async / await都会使用承诺。

以下是使用promises的方法,甚至包括文件统计信息缓存以获得最佳性能:

const fs = require('fs');
const crypto = require('crypto');
const Promise = require('bluebird');
const fileList = ['file1', 'file2', 'file3'];

// Use Bluebird's Promise.promisifyAll utility to turn all of fs'
// async functions into promise returning versions of them.
// The new promise-enabled methods will have the same name but with
// a suffix of "Async". Ex: fs.stat will be fs.statAsync.
Promise.promisifyAll(fs);

// Create a cache to store the file if we're planning to get multiple
// stats from it.
let cache = {
  fileName: null,
  fileStats: null
};
const getFileStats = (fileName, prop) => {
  if (cache.fileName === fileName) {
    return cache.fileStats[prop];
  }
  // Return a promise that eventually resolves to the data we're after
  // but also stores fileStats in our cache for future calls.
  return fs.statAsync(fileName).then(fileStats => {
    cache.fileName = fileName;
    cache.fileStats = fileStats;
    return fileStats[prop];
  })
};

const getMd5Hash = file => {
  // Return a promise that eventually resolves to the hash we're after.
  return fs.readFileAsync(file).then(fileData => {
    const hash = crypto.createHash('md5');
    hash.update(fileData);
    return hash.digest('hex');
  });
};

// Create a promise that immediately resolves with our fileList array.
// Use Bluebird's Promise.map utility. Works very similar to Array.map 
// except it expects all array entries to be promises that will
// eventually be resolved to the data we want.
let results = Promise.resolve(fileList).map(fileName => {
  return Promise.all([

    // This first gets a promise that starts resolving file stats
    // asynchronously. When the promise resolves it will store file
    // stats in a cache and then return the stats value we're after.
    // Note that the final return is not a promise, but returning raw
    // values from promise handlers implicitly does
    // Promise.resolve(rawValue)
    getFileStats(fileName, 'ctime'),

    // This one will not return a promise. It will see cached file
    // stats for our file and return the stats value from the cache
    // instead. Since it's being returned into a Promise.all, it will
    // be implicitly wrapped in Promise.resolve(rawValue) to fit the
    // promise paradigm.
    getFileStats(fileName, 'size'),

    // First returns a promise that begins resolving the file data for
    // our file. A promise handler in the function will then perform
    // the operations we need to do on the file data in order to get
    // the hash. The raw hash value is returned in the end and
    // implicitly wrapped in Promise.resolve as well.
    getMd5(file)
  ])
  // .spread is a bluebird shortcut that replaces .then. If the value
  // being resolved is an array (which it is because Promise.all will
  // resolve an array containing the results in the same order as we
  // listed the calls in the input array) then .spread will spread the
  // values in that array out and pass them in as individual function
  // parameters.
  .spread((dateTime, fileSize, md5Hash) => [file, { dateTime, fileSize, md5Hash }]);
}).catch(error => {
  // Any errors returned by any of the Async functions in this promise
  // chain will be propagated here.
  console.log(error);
});

以下是代码,但没有注释,以便更容易查看:

const fs = require('fs');
const crypto = require('crypto');
const Promise = require('bluebird');
const fileList = ['file1', 'file2', 'file3'];

Promise.promisifyAll(fs);

let cache = {
  fileName: null,
  fileStats: null
};
const getFileStats = (fileName, prop) => {
  if (cache.fileName === fileName) {
    return cache.fileStats[prop];
  }
  return fs.statAsync(fileName).then(fileStats => {
    cache.fileName = fileName;
    cache.fileStats = fileStats;
    return fileStats[prop];
  })
};

const getMd5Hash = file => {
  return fs.readFileAsync(file).then(fileData => {
    const hash = crypto.createHash('md5');
    hash.update(fileData);
    return hash.digest('hex');
  });
};

let results = Promise.resolve(fileList).map(fileName => {
  return Promise.all([
    getFileStats(fileName, 'ctime'),
    getFileStats(fileName, 'size'),
    getMd5(file)
  ]).spread((dateTime, fileSize, md5Hash) => [file, { dateTime, fileSize, md5Hash }]);
}).catch(console.log);

最后,结果将是一个数组,有希望与原始代码的结果相匹配,但在基准测试中表现要好得多:

[
  ['file1', { dateTime: 'data here', fileSize: 'data here', md5Hash: 'data here' }],
  ['file2', { dateTime: 'data here', fileSize: 'data here', md5Hash: 'data here' }],
  ['file3', { dateTime: 'data here', fileSize: 'data here', md5Hash: 'data here' }]
]

提前为任何拼写错误道歉。没有时间或能力实际运行任何此类操作。不过,我对它进行了广泛的调查。

在7.6发现async / await在节点后,我决定昨晚玩一下。对于不需要并行完成的递归异步任务或者您希望可以同步编写的嵌套异步任务,这似乎最有用。对于你需要的东西,没有任何令人兴奋的方式来使用async / await我可以看到但是有一些地方代码可以更干净地阅读。这里是代码,但有一些小的异步/等待便利。

const fs = require('fs');
const crypto = require('crypto');
const Promise = require('bluebird');
const fileList = ['file1', 'file2', 'file3'];

Promise.promisifyAll(fs);

let cache = {
  fileName: null,
  fileStats: null
};
async function getFileStats (fileName, prop) {
  if (cache.fileName === fileName) {
    return cache.fileStats[prop];
  }
  let fileStats = await fs.stat(fileName);
  cache.fileName = fileName;
  cache.fileStats = fileStats;
  return fileStats[prop];
};

async function getMd5Hash (file) {
  let fileData = await fs.readFileAsync(file);
  const hash = crypto.createHash('md5');
  hash.update(fileData);
  return hash.digest('hex');
};

let results = Promise.resolve(fileList).map(fileName => {
  return Promise.all([
    getFileStats(fileName, 'ctime'),
    getFileStats(fileName, 'size'),
    getMd5(file)
  ]).spread((dateTime, fileSize, md5Hash) => [file, { dateTime, fileSize, md5Hash }]);
}).catch(console.log);

答案 1 :(得分:0)

我会使getCDategetFSizegetMd5全部异步并进行保护,然后将它们包装在另一个异步的promise-returns函数中,此处称为statFile

function statFile(file) {
    return Promise.all([
        getCDate(file),
        getFSize(file),
        getMd5(file)
    ]).then((datetime, filesize, md5hash) => ({datetime, filesize, md5hash}))
    .catch(/*handle error*/);
}

然后您可以将映射功能更改为

const promises = fileList.map(statFile);

然后使用Promise.all很简单:

Promise.all(promises)
    .then(filesObj => /*do something*/)
    .catch(err => /*handle error*/)

这使事情变得模块化,不需要异步/等待,允许您将额外的功能插入statFile,并保留您的文件顺序。