Node.js readStream用于大数据处理

时间:2016-09-16 00:15:11

标签: javascript node.js mongodb

我无法在没有高RAM使用率的情况下逐行创建处理大型nessus xml文件的node.js方法。在目前的形式中,它正在正确地保存MongoDB中的数据,但是RAM使用率不断增加,并且文件错误超过~1.5GB。

我尝试在readStream上使用.pause(),但是,我必须错误地实现它,因为它似乎从来没有实际暂停流。

以下是代码:

// LR.JS Imports
var fs = require('fs');
var readline = require('readline');
var stream = require('stream');
var instream = fs.createReadStream('test.nessus');
var outstream = new stream;
var rl = readline.createInterface(instream, outstream);
var buff = false;
var parseString = require('xml2js').parseString;
var buffStream = '';
//Mongoose Imports
var mongoose = require('mongoose');
var ReportHostDoc = require('./schemas/report-host.model.js');
var ReportItemDoc = require('./schemas/report-item.model.js');
var PluginDetailDoc = require('./schemas/plugin-detail.model.js');
mongoose.Promise = require('bluebird');
// Mongoose Connect
mongoose.connect('mongodb://localhost/test');
var db = mongoose.connection;
db.on('error', console.error.bind(console, 'connection error:'));
db.once('open', () => {
  // Create counters for _taskCheck
  var reportHostCounter = 0;
  var reportHostSaveCounter = 0;
  rl.on('line', (line) => {
    // process line here
    if (/[<]ReportHost/.test(line)) {
      buff = true;
      reportHostCounter++;
    }
    if (buff == true) {
      buffStream += line + '\n';
    }
    if (/[<][/]ReportHost/i.test(line)) {
      buff = false; // changed to = not == 9/6
      // XML2JS Parse ReportHost Buffstream
      parseString(buffStream, (err, result) => {
        // Loop through ReportHost properties to reliably find IP
        var reportHostIP = '';
        var reportHostOS = '';
        result.ReportHost.HostProperties[0].tag.forEach((entry) => {
          if (entry.$.name === 'host-ip') {
            reportHostIP = entry._;
          }
          if (entry.$.name === 'operating-system') {
            reportHostOS = entry._;
          }
        });
        // Save Report Host Document
        var host = new ReportHostDoc({
          hostname: result.ReportHost.$.name,
          ip: reportHostIP,
          os: reportHostOS,
          high: 0,
          critical: 0
        });
        // Process Each Report Item
        result.ReportHost.ReportItem.forEach((entry) => {
          var cvssScore = '';
          if (entry.cvss_base_score) {
            cvssScore = JSON.stringify(entry.cvss_base_score).slice(2, 5)
          } else {
            cvssScore = 0;
          }
          var item = new ReportItemDoc({
            itemName: entry.$.pluginName,
            pluginID: entry.$.pluginID,
            ipAddress: reportHostIP,
            exploitAvailable: entry.exploit_available,
            cvssBaseScore: cvssScore,
            pluginPublishedDate: entry.plugin_publication_date,
            pluginModifiedDate: entry.plugin_modification_date,
            description: entry.description
          })
          if (item.cvssBaseScore >= 7 && item.cvssBaseScore < 10) {
            host.high++;
          }
          if (item.cvssBaseScore == 10) {
            host.critical++;
          }
          item.save((err, item) => {
            if (err) return console.log(err);
          })
        });
        host.save((err, host) => {
          if (err) return console.log(err);
          reportHostSaveCounter++;
        });
      })
      buffStream = ''; // Empty buffer for next report host
    }
  });
  rl.on('close', () => { // Read Stream Finished
    console.log('Log Parse finished!');
    var _taskCheck = setInterval(() => { // Async loop waits for all tasks to finish
      if (reportHostCounter == reportHostSaveCounter) {
        clearInterval(_taskCheck);
        var pluginCounter = 0;
        var pluginSaveCounter = 0;
        ReportItemDoc.distinct('pluginID', (err, ids) => {
          ids.forEach((id) => {
            pluginCounter++;
            ReportItemDoc.findOne({
              'pluginID': id
            }, (err, plugin) => {
              ReportItemDoc.count({
                'pluginID': id
              }, (err, count) => {
                var pluginSeverity = '';
                var cvss = plugin.cvssBaseScore;
                if (cvss >= 7 && cvss < 10) {
                  pluginSeverity = 'High';
                }
                if (cvss == 10) {
                  pluginSeverity = 'Critical';
                }
                item = new PluginDetailDoc({
                  pluginName: plugin.itemName,
                  pluginID: id,
                  severity: pluginSeverity,
                  quantity: count,
                  description: plugin.description
                })
                item.save((err, host) => {
                  if (err) return console.log(err);
                  pluginSaveCounter++;
                });
              })
            });
          })
        })
        var _pluginTaskCheck = setInterval(() => { // Async loop waits for all tasks to finish
          if (pluginCounter == pluginSaveCounter) {
            clearInterval(_pluginTaskCheck);
            mongoose.connection.close();
          }
        }, 100);
      }
    }, 100);
  });
});

0 个答案:

没有答案