Nodejs流可写流排放事件未触发

时间:2014-11-02 20:11:29

标签: node.js macos stream large-files osx-yosemite

我试图读取一个大文件,做一些计算,然后写入一个更大的文件。为了防止过多的内存消耗,我正在使用流。我面临的问题是写入流没有触发“漏极”事件,这表示写入已刷新到磁盘。为了防止“背压”,我在等待再次开始写入缓冲区之前触发排水事件。在调试时我发现在.write()调用返回false并且执行了行fvfileStream.once('drain',test)后,程序就会停止并且什么都不做。

以下是代码:

var fs = require('fs');

//a test function I created to see if the callback is called after drain.
var test = function(){
	console.log("Done Draining");
}

fs.readFile('/another/file/to/be/read', {
	encoding: "utf8"
}, function(err, data) {
	if (err) throw err;

	//Make an array containing tags.
	var tags = data.split('\n');

	//create a write stream.
	var fvfileStream = fs.createWriteStream('/path/TagFeatureVectors.csv');

	//read in the question posts
	var qfileStream = fs.createReadStream('/Big/file/QuestionsWithTags.csv', {
		encoding: "utf8"
	});

	var partialRow = null;
	var writable = true;
	var count = 0;

	var doRead = function() {
		var qData = qfileStream.read();
		var questions = qData.split('\n');

		if (partialRow != null) {
			questions[0] = partialRow + questions[0];
			partialRow = null;
		}

		var lastRow = questions[questions.length - 1];
		if (lastRow.charAt(lastRow.length - 1) != '\n') {
			partialRow = lastRow;
		}

		questions.forEach(function(row, index, array) {
			count++;

			var fields = row.split(',');

			console.log("Processing question number: " + count + " id: " + fields[0]);
			var tagString = fields[1];

			var regex = new RegExp(/<([^>]+)>/g);

			tags.forEach(function(tag, index, array) {
				var found = false;
				var questionTags;

				while ((questionTags = regex.exec(tagString)) != null) {
					var currentTag = questionTags[1]

					if (currentTag === tag) {
						found = true;
						break;
					}
				};

                //This is where the writestream is written to
				if (found) {
					writable = fvfileStream.write("1,", "utf8");
				}else {
					writable = fvfileStream.write("0,","utf8");
				}
			});
		});

		fvfileStream.write("\n");
	}

	qfileStream.on('readable', function() {
		if (writable) {
			doRead();
		} else {
            //Waiting for drain event.
			fvfileStream.once('drain', test);
		}
	});

	qfileStream.on('end', function() {
		fvfileStream.end();
	});
});

更新

根据@loganfsmyth提供的建议,我实现了转换流,但仍然遇到了同样的问题。这是我更新的代码:

var fs = require('fs');
var stream = require('stream');
var util = require('util');

var Transform = stream.Transform;

function FVCreator(options) {
  // allow use without new
  if (!(this instanceof FVCreator)) {
    return new FVCreator(options);
  }

  // init Transform
  Transform.call(this, options);
}

util.inherits(FVCreator, Transform);

var partialRow = null;
var count = 0;
var tags;

FVCreator.prototype._transform = function(chunk, enc, cb) {
  var that = this;
  var questions = chunk.toString().split('\n');

  if (partialRow != null) {
    questions[0] = partialRow + questions[0];
    partialRow = null;
  }

  var lastRow = questions[questions.length - 1];
  if (lastRow.charAt(lastRow.length - 1) != '\n') {
    partialRow = lastRow;
    questions.splice(questions.length - 1, 1);
  }

  questions.forEach(function(row, index, array) {
    count++;

    var fields = row.split(',');

    console.log("Processing question number: " + count + " id: " + fields[0]);
    var tagString = fields[1];

    var regex = new RegExp(/<([^>]+)>/g);

    tags.forEach(function(tag, index, array) {
      var found = false;
      var questionTags;

      while ((questionTags = regex.exec(tagString)) != null) {
        var currentTag = questionTags[1]

        if (currentTag === tag) {
          found = true;
          break;
        }
      };

      if (found) {
        that.push("1,", "utf8");
      } else {
        that.push("0,", "utf8");
      }
    });
  });

  this.push("\n", "utf8");
  cb();
};

fs.readFile('/another/file/to/be/read', {
  encoding: "utf8"
}, function(err, data) {
  if (err) throw err;

  //Make an array containing tags.
  tags = data.split('\n');

  //write to a file.
  var fvfileStream = fs.createWriteStream('/path/TagFeatureVectors.csv');

  //read in the question posts
  var qfileStream = fs.createReadStream('/large/file/to/be/read', {
    encoding: "utf8"
  });

  var fvc = new FVCreator();

  qfileStream.pipe(fvc).pipe(fvfileStream);
});

我在OSX Yosemite上运行它。

0 个答案:

没有答案