我只是在学习服务器端JavaScript,所以请承担我所犯的任何明显错误。
我正在尝试编写一个文件解析器,它对目录中的HTML文件进行操作,并在解析完所有文件后返回一个JSON字符串。我用一个文件启动它,它工作正常。它从运行在同一台机器上的Apache加载资源,注入jquery,进行解析并返回我的JSON。
var request = require('request'),
jsdom = require('jsdom'),
sys = require('sys'),
http = require('http');
http.createServer(function (req, res) {
request({uri:'http://localhost/tfrohe/Car3E.html'}, function (error, response, body) {
if (!error && response.statusCode == 200) {
var window = jsdom.jsdom(body).createWindow();
jsdom.jQueryify(window, 'http://ajax.googleapis.com/ajax/libs/jquery/1.4.4/jquery.min.js', function (window, jquery) {
// jQuery is now loaded on the jsdom window created from 'body'
var emps = {};
jquery("tr td img").parent().parent().each(function(){
var step = 0;
jquery(this).children().each(function(index){
if (jquery(this).children('img').attr('src') !== undefined) {
step++;
var name = jquery(this).parent().next().next().children('td:nth-child('+step+')').children().children().text();
var name_parts = name.split(",");
var last = name_parts[0];
var name_parts = name_parts[1].split(/\u00a0/g);
var first = name_parts[2];
emps[last + ",_" + first] = jquery(this).children('img').attr('src');
}
});
});
emps = JSON.stringify(emps);
//console.log(emps);
res.writeHead(200, {'Content-Type': 'text/plain'});
res.end(emps);
});
} else {
res.writeHead(200, {"Content-Type": "text/plain"});
res.end("empty");
//console.log(response.statusCode);
}
});
}).listen(8124);
现在我尝试将其扩展为使用常规文件系统(fs)并获取目录中的所有HTML文件,并以相同的方式解析它们,并在解析完所有文件后返回单个组合的JSON对象。这是我到目前为止所做的,但它不起作用。
var sys = require("sys"),
fs = require("fs"),
jsdom = require("jsdom"),
emps = {};
//path = '/home/inet/www/media/employees/';
readDirectory = function(path) {
fs.readdir(path, function(err, files) {
var htmlfiles = [];
files.forEach(function(name) {
if(name.substr(-4) === "html") {
htmlfiles.push(name);
}
});
var count = htmlfiles.length;
htmlfiles.forEach(function(filename) {
fs.readFile(path + filename, "binary", function(err, data) {
if(err) throw err;
window = jsdom.jsdom(data).createWindow();
jsdom.jQueryify(window, 'http://ajax.googleapis.com/ajax/libs/jquery/1.4.4/jquery.min.js', function (window, jquery) {
jquery("tr td img").parent().parent().each(function(){
var step = 0;
jquery(this).children().each(function(index){
if (jquery(this).children('img').attr('src') !== undefined) {
step++;
var empname = jquery(this).parent().next().next().children('td:nth-child('+step+')').children().children().text();
var name_parts = empname.split(",");
var last = name_parts[0];
var name_parts = name_parts[1].split(/\u00a0/g);
var first = name_parts[2]
emps[last + ",_" + first] = jquery(this).children('img').attr('src');
}
});
});
});
});
});
});
}
readDirectory('/home/inet/www/media/employees/', function() {
console.log(emps);
});
在这种特殊情况下,目录中有2个html文件。如果我在console.log(emps)
期间htmlfiles.forEach()
向我显示第一个文件的结果,那么两个文件的结果将按照我的预期方式组合在一起。如何将emps返回到readDirectory,以便我可以根据需要输出它?
在下面的答案之后,这是带有httpServer的完整脚本,用于提供详细信息。
var sys = require('sys'),
fs = require("fs"),
http = require('http'),
jsdom = require('jsdom'),
emps = {};
var timed = setInterval(function() {
emps = {};
readDirectory('/home/inet/www/media/employees/', function(emps) {
});
}, 3600000);
readDirectory = function(path, callback) {
fs.readdir(path, function(err, files) {
var htmlfiles = [];
files.forEach(function(name) {
if(name.substr(-4) === "html") {
htmlfiles.push(name);
}
});
var count = htmlfiles.length;
htmlfiles.forEach(function(filename) {
fs.readFile(path + filename, "binary", function(err, data) {
if(err) throw err;
window = jsdom.jsdom(data).createWindow();
jsdom.jQueryify(window, 'http://ajax.googleapis.com/ajax/libs/jquery/1.4.4/jquery.min.js', function (window, jquery) {
var imagecount = jquery("tr td img").length;
jquery("tr td img").parent().parent().each(function(){
var step = 0;
jquery(this).children().each(function(index){
if (jquery(this).children('img').attr('src') !== undefined) {
step += 1;
var empname = jquery(this).parent().next().next().children('td:nth-child('+step+')').children().children().text();
var name_parts = empname.split(",");
var last = name_parts[0];
var name_parts = name_parts[1].split(/\u00a0/g);
var first = name_parts[2]
emps[last + ",_" + first] = jquery(this).children('img').attr('src');
}
});
});
count -= 1;
if (count <= 0) {
callback(JSON.stringify(emps));
}
});
});
});
});
}
var init = readDirectory('/home/inet/www/media/employees/', function(emps) {
});
http.createServer(function (req, res) {
res.writeHead(200, {'Content-Type': 'text/plain'});
res.end(JSON.stringify(emps));
}).listen(8124);
答案 0 :(得分:4)
这肯定是很多代码都有一些错误。
readDirectory
emps
这应该有效:
var sys = require("sys"),
fs = require("fs"),
jsdom = require("jsdom"),
//path = '/home/inet/www/media/employees/';
// This is a nicer way
function readDirectory(path, callback) {
fs.readdir(path, function(err, files) {
// make this local
var emps = {};
var htmlfiles = [];
files.forEach(function(name) {
if(name.substr(-4) === "html") {
htmlfiles.push(name);
}
});
// Keep track of the number of files we have parsed
var count = htmlfiles.length;
var done = 0;
htmlfiles.forEach(function(filename) {
fs.readFile(path + filename, "binary", function(err, data) {
if(err) throw err;
window = jsdom.jsdom(data).createWindow();
jsdom.jQueryify(window, 'http://ajax.googleapis.com/ajax/libs/jquery/1.4.4/jquery.min.js', function (window, jquery) {
jquery("tr td img").parent().parent().each(function(){
var step = 0;
jquery(this).children().each(function(index){
if (jquery(this).children('img').attr('src') !== undefined) {
step++;
var empname = jquery(this).parent().next().next().children('td:nth-child('+step+')').children().children().text();
var name_parts = empname.split(",");
var last = name_parts[0];
var name_parts = name_parts[1].split(/\u00a0/g);
var first = name_parts[2]
emps[last + ",_" + first] = jquery(this).children('img').attr('src');
}
});
});
// As soon as all have finished call the callback and supply emps
done++;
if (done === count) {
callback(emps);
}
});
});
});
});
}
readDirectory('/home/inet/www/media/employees/', function(emps) {
console.log(emps);
});
答案 1 :(得分:1)
你好像这样做有点错误
readDirectory('/home/inet/www/media/employees/', function() {
console.log(emps);
});
但是你已经将你的功能定义为:
readDirectory = function(path) {
回调参数在哪里?试试这个:
readDirectory = function(path, callback) {
然后在emps[last + ",_" + first] = jquery(this).children('img').attr('src');
put
callback.call(null, emps);
您的回调函数将被调用,但循环继续进行多次。如果你想让它同时返回所有这些,你需要计算循环运行的次数,计算直到该数字然后当emps数组充满数据时调用你的回调你需要。