我正在使用this方法列出包含大量文件(超过10K以及更多...)的目录:
scanDirStream = (needle, params) => {
var options = {
// find -type
type: 'f',
// find -name
name: '*',
limit: 100
};
for (var attrname in params) { options[attrname] = params[attrname]; }
return new Promise((resolve, reject) => {
var opt = [needle];
for (var k in options) {
var v = options[k];
if (!Util.empty(v) && k != 'limit') {
opt.push('-' + k);
opt.push(v);
}
};
var res = '';
var find = spawn('find', opt);
var head = spawn('head',[ '-n', options.limit]);
find.stdout.pipe(head.stdin);
head.stdout.on('data', _data => {
var buff = Buffer.from(_data, 'utf-8').toString();
if (buff != '') res += buff;
})
find.stderr.on('data', error => {
return reject(Buffer.from(error, 'utf-8').toString());
});
head.stderr.on('data', error => {
return reject(Buffer.from(error, 'utf-8').toString());
});
head.on('close', (_) => {
data = res.split('\n');
data = data.slice(0, data.length - 1);
return resolve(data);
});
});
}//scanDirStream
所以我称它为传递目录路径和将要传递到head -n
命令的要列出文件的限制,而find
命令将列出具有指定名称的文件,就像这样
// test.js
scanDirStream(mediaRoot, {
name: "*.mp3",
limit: 100
})
.then(res => {
console.debug("files %d", res.length);
})
.catch(err => console.error(err))
假设当前mediaRoot
中有10000个文件,那么会发生
limit
<10000 node.js的值挂起主循环:我得到控制台输出,但脚本运行node test.js
不会终止。limit
值> = 10000可以正确列出文件并输出并终止运行。要进行尝试,请指向包含大约~8000-100000
个文件的目录并运行test.js
脚本,将limit
的值更改为100
,然后将{{1} }:
10000
和
// this will hang node
scanDirStream(mediaRoot, {
name: "*.mp3",
limit: 100
})
您有时也会收到此错误:
// this will not hang node
scanDirStream(mediaRoot, {
name: "*.mp3",
limit: 10000
})
答案 0 :(得分:1)
我猜想您从head
进程中获得了EPIPE(管道中断),因为一旦读取了所需数量的文件,该文件便会终止。
我看不出使用head
的理由;最好在Node代码中跟踪文件计数。
这是我的事–请注意,它不会跟踪自己找到的文件名;这是在fileCallback
上进行的。如果该回调将文件视为要归入limit
的文件,则它必须返回真实值。
const { spawn } = require("child_process");
function findFiles(
rootPath,
findParams,
limit = 0,
fileCallback = () => true,
) {
return new Promise((resolve, reject) => {
// Files found so far.
let nFound = 0;
// Whether we killed `find` on our own (limit reached).
let killed = false;
// Buffer to hold our results so far.
let buffer = Buffer.alloc(0);
// Build args for `find`. Note `-print0` to delimit the files
// with NUL bytes for extra safety (i.e. this can handle filenames
// with newlines).
const args = [rootPath].concat(findParams).concat("-print0");
// Spawn the find process.
const findProc = spawn("/usr/bin/find", args, { stdio: "pipe" });
// Figure out what to do when the process exits; this may be
// because it's done, or because we've closed things down upon
// finding `limit` files.
findProc.on("close", (code, signal) => {
if (code && code !== 0) {
return reject("find died with error " + code);
}
if (!killed && signal) {
return reject("find died from signal " + signal);
}
resolve(nFound);
});
findProc.stdout.on("data", chunk => {
// Concatenate the new chunk of data into the buffer.
buffer = Buffer.concat([buffer, chunk]);
// Split the buffer; see below.
let searchOffset = 0;
for (;;) {
// Find the next zero byte (which delimits files).
const nextOffset = buffer.indexOf(0, searchOffset);
// If there is none, we're done -- the rest of the buffer
// is the start of a new filename, if any.
if (nextOffset === -1) {
break;
}
// Stringify the segment between `searchOffset` and `nextOffset`...
const filename = buffer.toString("utf8", searchOffset, nextOffset);
// ... pass it to the callback to see if it's valid...
if (fileCallback(filename)) {
nFound++;
// ... see if we need to stop iterating over files...
if (limit > 0 && nFound >= limit) {
killed = true;
findProc.kill();
break;
}
}
// and move on to the byte after the NUL.
searchOffset = nextOffset + 1;
}
// Cut off the already-walked start of the buffer, if any.
if (searchOffset > 0) {
buffer = buffer.slice(searchOffset);
}
});
});
}
findFiles("/Users/akx", ["-type", "f"], 1000, path => {
console.log(path);
return true;
}).then(
res => {
console.log(res);
},
err => {
throw new Error(err);
},
);