如何使用文件系统拆分获取完整的数据块?

时间:2017-03-23 15:26:23

标签: javascript node.js fs

我在fs上实现了搜索功能,所以当我从客户端输入字符串时,我会根据每一行分割文件数据,但如果你看到server.log我是否根据行提取数据 它丢失了来自具有多行的块的数据,例如,您可以看到第一个事件是两行,因此根据我的搜索,它将返回[2017-03-22T20:25:04Z]|zldv6658|info|bmid: n/a|infra.actorRouter|Adding event to queue: { queue: 'd-email',
它会错过该赛事的第二线。如何获得完整的数据可能基于时间变量?

searchService.js

async.eachSeries(filesData.logFiles, function(logfile, done) {
    // read file
    console.log('SearchEnv in eachSeries', filesData.searchEnv);
    fs.createReadStream('./logs/' + filesData.searchEnv + '/' + logfile.filename)
        .pipe(split())
        .on('data', function(line) {
            if (line.toLowerCase().indexOf(searchStr.toLowerCase()) != -1) parseLog(line, prevLine);
            else prevLine = line;
        });

    function parseLog(line, prev) {
        // Very rudimentary check...
        if (line.indexOf('|') === -1) line = prev + line;
        // Parse as you were doing
        var messageDateInfo = line.split('|')[0].replace(/[\[\]']+/g, '');
        console.log('1st message date is', messageDateInfo)
        messageDateInfo = new Date(messageDateInfo).getTime();
        searchStartDate = new Date(searchStartDate).getTime();
        searchEndDate = new Date(searchEndDate).getTime();
        console.log('message date is', messageDateInfo)
        console.log('start date is ', messageDateInfo - searchStartDate);
        console.log('end date is ', searchEndDate - messageDateInfo);
        if (messageDateInfo - searchStartDate > 0 && searchEndDate - messageDateInfo > 0) {
            // console.log("message date is within this time range");
            results.push({
                filename: logfile.filename,
                value: line
            });
        }

    }
    done();

}, function(err) {
    if (err) {
        console.log('error', err);
    }
    // wrong: results.map(result, function (result){
    results.map(function(result) {
        console.log('results');
    });

    // send back results
    callback(results);
    results = [];
    logFiles = null;
});
}

的server.log

[2017-03-22T20:25:04Z]|zldv6658|info|bmid: n/a|infra.actorRouter|Adding event to queue:  { queue: 'd-email',
  msgId: '7eec01e9-6395-4fee-b44f-f09a40e56978' }
[2017-03-22T20:25:04Z]|zldv6658|info|bmid: n/a|infra.templateActor|Filter match for actor/rule (d-email/email_service) with msgId: 7eec01e9-6395-4fee-b44f-f09a40e56978
[2017-03-22T20:25:04Z]|zldv6658|info|bmid: 7eec01e9-6395-4fee-b44f-f09a40e56978|mailDispatcher|Received mail event. msgId=7eec01e9-6395-4fee-b44f-f09a40e56978
[2017-03-22T20:25:04Z]|zldv6658|info|bmid: n/a|mailDispatcher|Mail event with msgId 7eec01e9-6395-4fee-b44f-f09a40e56978 successful: 3 messages delivered
[2017-03-22T20:25:05Z]|zldv6658|verbose|bmid: n/a|routes.event|Received Event from IP (::ffff:130.9.137.139): 74609753-143b-4e06-845c-9a5721575c19
 {"event":{"header":{"eventSource":"AOTSvTM","timestamp":1481966987000,"eventType":"http://aotsvtm.eventing.att.com/SendEscalationsEvent/V1","entityId":"ENTITYID_1"}

2 个答案:

答案 0 :(得分:0)

您可以使用split模块(我在my other answer to your very similar question中演示的方式)使用fs模块。

fs.createReadStream(file)
  .pipe(split())
  .on('data', function (line) {
    //each chunk now is a seperate line! 
  });

请参阅文档:https://www.npmjs.com/package/split

答案 1 :(得分:0)

如果您的日志实际上有多行事件,您可以在解析时将前一行保留在内存中。另外,不要只是在内存中加载整个东西。使用流来减轻机器的压力。

let prevLine;

fs.createReadStream(file)
  .pipe(split())
  .on('data', function (line) {
    if (line.toLowerCase().indexOf(searchStr.toLowerCase()) != -1) parseLog(line, prevLine);
    else prevLine = line;
  });
});

function parseLog(line, prev) {
  // Very rudimentary check...
  if (line.indexOf('|') === -1) line = prev + line;
  // Parse as you were doing
}

作为未来的经验法则,使用单行json构建时,日志文件更容易管理。