Search file contents for string in Node.js

时间:2017-11-08 22:01:51

标签: javascript node.js

I am trying to replicate the Dropbox javascript SDK filesSearch on a local directory using Node.js. I need to basically walk through the directory and search every filename, and it's contents, against a search term, and if a match(es) are found, add them to a results array that can be sorted on matches at the end.

After endlessly searching for a node module that can do this, I decided to do it myself. I've successfully got the walking through the directory down fine, and I can do a simple match on the folder name and file name, but I cannot see how to search the file contents?! The files are predominantly PDF's but may also be Word and Excel documents also.

This is my 'walking' code so far:

walk_through_local_repository(store.get("local_repository_path"), search_term);
var results = [];

function walk_through_local_repository(path, search_term) {
    fs.readdir(path, (err, dir) => {
        for(var i = 0; i < dir.length; ++i) {
            // ignore hidden files/folders
            if(dir[i].charAt(0) !== "." && dir[i] !== "TEMP")
            {
                // determine whether or not the item is a file or a folder
                if(fs.lstatSync(path+"/"+dir[i]).isDirectory())
                {
                    // it's a folder, check the folder name for matches
                    if(dir[i].toLowerCase().indexOf(search_term) !== -1)
                    {
                        var score = 1;
                    }
                    else
                    {
                        var score = 0;
                    }

                    // go again
                    console.log("going to walk_through_local_repository again for "+path+"/"+dir[i]+".");
                    walk_through_local_repository(path+"/"+dir[i], search_term);
                }
                else if(fs.lstatSync(path+"/"+dir[i]).isFile())
                {
                    // It's a file, check the filename for matches
                    if(dir[i].toLowerCase().indexOf(search_term) !== -1)
                    {
                        var score = 1;
                    }
                    else
                    {
                        var score = 0;
                    }

                    // check the file CONTENTS here and update the score for this file again
                }
                results[i] = {path:path+"/"+dir[i], score:score};
            }
        }
    });
}

So I have three questions really...

  1. Is there a better way to do this? Have I missed a module somewhere?!
  2. If not, how do I search the contents of the files? PDF's, Word docs and Excel Spreadsheets
  3. How do I know when it's been through the whole directory and I can display the results array?

0 个答案:

没有答案