我试图读取一个日志文件,其中每个条目都是一行JSON(JSON结构化文本)。
我最终希望做的是遍历每一行,如果
"事件":" SparkListenerTaskEnd"
发现JSON行将被解析为键的值"结束时间"和"执行者CPU时间"。
我是node.js的新手,所以不是完全错误的,但到目前为止,我已经有了这段代码来遍历文件:
exports.getId(function(err, id){
console.log(id);
var data = fs.readFileSync('../PC Files/' + id, 'utf8', function(err, data) {
var content = data.split('\n');
async.map(content, function (item, callback) {
callback(null, JSON.parse(item));
}, function (err, content) {
console.log(content);
});
});
//console.log(data);
});
这似乎并没有做任何事情。但是,我知道如果我取消注释//console.log(data);
,我可以看到日志文件。
下面是我正在谈论的一个示例JSON行:
{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1514983570810,"Executor ID":"0","Host":"192.168.111.123","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1514983574496,"Failed":false,"Killed":false,"Accumulables":[{"ID":22,"Name":"internal.metrics.input.recordsRead","Update":99171,"Value":99171,"Internal":true,"Count Failed Values":true},{"ID":20,"Name":"internal.metrics.shuffle.write.writeTime","Update":5893440,"Value":5893440,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":3872,"Value":3872,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":1468516,"Value":1468516,"Internal":true,"Count Failed Values":true},{"ID":10,"Name":"internal.metrics.peakExecutionMemory","Update":16842752,"Value":16842752,"Internal":true,"Count Failed Values":true},{"ID":9,"Name":"internal.metrics.diskBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":8,"Name":"internal.metrics.memoryBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":7,"Name":"internal.metrics.resultSerializationTime","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":6,"Name":"internal.metrics.jvmGCTime","Update":103,"Value":103,"Internal":true,"Count Failed Values":true},{"ID":5,"Name":"internal.metrics.resultSize","Update":2597,"Value":2597,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.executorCpuTime","Update":1207164005,"Value":1207164005,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorRunTime","Update":2738,"Value":2738,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorDeserializeCpuTime","Update":542927064,"Value":542927064,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeTime","Update":835,"Value":835,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":835,"Executor Deserialize CPU Time":542927064,"Executor Run Time":2738,"Executor CPU Time":1207164005,"Result Size":2597,"JVM GC Time":103,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":1468516,"Shuffle Write Time":5893440,"Shuffle Records Written":3872},"Input Metrics":{"Bytes Read":0,"Records Read":99171},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[{"Block ID":"broadcast_1_piece0","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Deserialized":false,"Replication":1},"Memory Size":5941,"Disk Size":0}},{"Block ID":"broadcast_1","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Deserialized":true,"Replication":1},"Memory Size":9568,"Disk Size":0}},{"Block ID":"broadcast_0_piece0","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Deserialized":false,"Replication":1},"Memory Size":25132,"Disk Size":0}},{"Block ID":"broadcast_0","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Deserialized":true,"Replication":1},"Memory Size":390808,"Disk Size":0}}]}}
更新 这是我的整个代码。我确定它不漂亮但它有效。我现在看看改进它。
var http = require("http");
var fs = require('fs');
var async = require('async');
var readline = require('readline')
//get file name
var options = {
"method" : "GET",
"hostname" : "xxx.xxx.xxx.xxx",
"port" : "18080",
"path" : "/api/v1/applications/"
};
exports.getId = function(callback) {
var req = http.request(options, function (res) {
var chunks = [];
res.on("data", function (chunk) {
chunks.push(chunk);
});
res.on("end", function () {
var body = JSON.parse(Buffer.concat(chunks));
var arrFound = Object.keys(body).filter(function(key) {
if (body[key].name.indexOf("TestName") > -1) {
return body[key].name;
}
}).reduce(function(obj, key){
obj = body[key].id;
return obj;
}, {});;
//console.log("ID: ", arrFound);
callback(null, arrFound);
});
});
req.end();
}
// parse log file line at a time and for any use lines where the Event = SparkListenerTaskEnd
exports.getId(function(err, id){
console.log(id);
var lineReader = readline.createInterface({
input: fs.createReadStream('../PC Files/' + id, 'utf8')
});
lineReader.on('line', function (line) {
var obj = JSON.parse(line);
if(obj.Event == "SparkListenerTaskEnd") {
console.log('Line from file:', obj['Task Info']['Finish Time']);
}
});
});
Adam,我尝试了你建议的代码但得到了以下错误:
null
fs.js:646
return binding.open(pathModule._makeLong(path), stringToFlags(flags), mode);
^
Error: ENOENT: no such file or directory, open '../PC Files/null'
at Object.fs.openSync (fs.js:646:18)
at Object.fs.readFileSync (fs.js:551:33)
at /test-runner/modules/getEventLog.js:61:19
at IncomingMessage.<anonymous> (/test-runner/modules/getEventLog.js:35:13)
at emitNone (events.js:111:20)
at IncomingMessage.emit (events.js:208:7)
at endReadableNT (_stream_readable.js:1056:12)
at _combinedTickCallback (internal/process/next_tick.js:138:11)
at process._tickCallback (internal/process/next_tick.js:180:9)
答案 0 :(得分:1)
乍一看,您似乎错误地使用了回调。
我假设您正在使用getId
函数,如:
getId(function(error, data) {
// Do something with data
}
在这种情况下,应该返回回调函数,如:
// Remove the error, this will not be entered as a parameter
// Add callback as parameter
exports.getId(function(id, callback){
console.log(id);
var data = fs.readFileSync('../PC Files/' + id, 'utf8', function(err, data) {
var content = data.split('\n');
// Removed callback from here
// We will not have access to the
// to it here
async.map(content, function (item) {
callback(null, JSON.parse(item));
// Add callback with error in place of null
}, function (err, content) {
callback(err)
console.log(content);
});
});
//console.log(data);
});