如何选择特定的文本文件

时间:2018-04-02 09:18:09

标签: apache-poi

使用Apache POI解析文本文档并在其中搜索关键字

1 个答案:

答案 0 :(得分:0)

我只是简单地创建简历解析器,解析每个文档(.doc,.docx)以在文件夹或子文件夹中查找给定的关键字。请阅读HWPFDocument的Java-Doc。

这是代码

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.apache.poi.hwpf.HWPFDocument;


public class ResumeParser {
    HWPFDocument document;
    FileInputStream fileInputStream;
    String rootFolderPath;
    Map<String, List<String>> displayContent = new HashMap<String, List<String>>();
    List<String> keywordList;

    public ResumeParser(String rootFolderPath, List<String> keywordList) throws IOException {
        this.rootFolderPath = rootFolderPath;
        this.keywordList = keywordList;
        startParsing();
    }

    public void startParsing() throws IOException {
        createDisplayContent(rootFolderPath);
        printContent(displayContent);
    }

    /**
     * Creates the display content it parse the each file under given folder and
     * also in sub folder. folder
     * 
     * @param rootFolderPath
     *            the root folder path
     */
    public void createDisplayContent(String rootFolderPath) {
        File fileL = new File(rootFolderPath);
        File[] fileArrL = fileL.listFiles();
        for (int i = 0; i < fileArrL.length; i++) {
            File fileTempL = fileArrL[i];
            if (fileTempL.isFile()) {
                String name = fileTempL.getName();
                String extension = name.substring(name.indexOf('.') + 1, name.length()).toUpperCase();
                if (extension.equals("DOC") || extension.equals("DOCX")) {
                    parseDocFile(fileTempL.getAbsolutePath());
                }
            } else if (fileTempL.isDirectory()) {
                createDisplayContent(fileTempL.getAbsolutePath());
            }
        }
    }

    public void parseDocFile(String filePath) {
        try {
            fileInputStream = new FileInputStream(filePath);
            document = new HWPFDocument(fileInputStream);
            String text = document.getText().toString().toUpperCase();
            Iterator<String> iteratorL = keywordList.iterator();
            List<String> matchKeywordListL = new ArrayList<String>();
            String keywordL = "";
            while (iteratorL.hasNext()) {
                keywordL = iteratorL.next().toUpperCase();
                if (text.contains(keywordL)) {
                    matchKeywordListL.add(keywordL.toLowerCase());
                }
            }
            displayContent.put(filePath, matchKeywordListL);
        } catch (IOException ioe) {
            ioe.printStackTrace();
        }
    }

    public void printContent(Map<String, List<String>> displayContent) {
        Iterator<String> iteratorL = displayContent.keySet().iterator();
        String keyL = "";
        while (iteratorL.hasNext()) {
            keyL = iteratorL.next();
            System.out.println("File Name: " + keyL + "  Match Keywords: " + displayContent.get(keyL));
        }
    }

    public static void main(String args[]) throws IOException {
        List<String> keywordListL = new ArrayList<String>();
        keywordListL.add("Java");
        keywordListL.add("PHP");
        keywordListL.add("Andriod");
        keywordListL.add("John");
        new ResumeParser("D:\\Doc", keywordListL);
    }
}