Java:如何在文件中查找满足单词的数量?

时间:2013-02-23 12:35:18

标签: java multithreading frequency

程序需要查找word是否在文件中。如果此搜索成功则会显示文件中遇到单词的频率。但这不能正常工作,匹配不正确。

import java.util.concurrent.*;
import java.util.*;
import java.io.*;

class FolderScan implements Runnable {

    private String path;
    private BlockingQueue<File> queue;
    private CountDownLatch latch;
    private File endOfWorkFile;

    FolderScan(String path, BlockingQueue<File> queue, CountDownLatch latch,
            File endOfWorkFile) {
        this.path = path;
        this.queue = queue;
        this.latch = latch;
        this.endOfWorkFile = endOfWorkFile;
    }

    public FolderScan() {
    }

    @Override
    public void run() {
        findFiles(path);
        queue.add(endOfWorkFile);
        latch.countDown();
    }

    private void findFiles(String path) {

        try {
            File root = new File(path);
            File[] list = root.listFiles();
            for (File currentFile : list) {
                if (currentFile.isDirectory()) {
                    findFiles(currentFile.getAbsolutePath());
                } else {
                    if (currentFile.getName().toLowerCase().endsWith((".txt"))) {
                        queue.put(currentFile);
                    }
                }
            }
        } catch (InterruptedException e) {
            e.printStackTrace();
        }

    }

}

public class FileScan implements Runnable {

    private String whatFind;
    private BlockingQueue<File> queue;
    private CountDownLatch latch;
    private File endOfWorkFile;

    public FileScan(String whatFind, BlockingQueue<File> queue,
            CountDownLatch latch, File endOfWorkFile) {
        this.whatFind = whatFind;
        this.queue = queue;
        this.latch = latch;
        this.endOfWorkFile = endOfWorkFile;
    }

    public FileScan() {
    }

    Set<String> words = new HashSet<String>();

    @Override
    public void run() {

        while (true) {
            try {
                File file;
                file = queue.take();

                if (file == endOfWorkFile) {
                    break;
                }

                scan(file);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }

        latch.countDown();
    }

    private void scan(File file) {
        Scanner scanner = null;
        int matches = 0;

        try {
            scanner = new Scanner(file);
        } catch (FileNotFoundException e) {
            System.out.println("File Not Found.");
            e.printStackTrace();
        }

        while (scanner.hasNext()) {
            String word = scanner.next();
            words.add(word);
        }

        if (words.contains(this.whatFind)) {
            matches++;
        }

        String myStr = String.format("File: %s - and the number of matches "
                + "is: %d", file.getAbsolutePath(), matches);
        System.out.println(myStr);
    }

    // ask user about input
    public void askUserPathAndWord() {

        BufferedReader bufferedReader = new BufferedReader(
                new InputStreamReader(System.in));
        String path;
        String whatFind;
        BlockingQueue<File> queue = new LinkedBlockingQueue<File>();

        try {
            System.out.println("Please, enter a Path and Word"
                    + "(which you want to find):");
            System.out.println("Please enter a Path:");
            path = bufferedReader.readLine();
            System.out.println("Please enter a Word:");
            whatFind = bufferedReader.readLine();

            if (path != null && whatFind != null) {

                File endOfWorkFile = new File("GameOver.tmp");
                CountDownLatch latch = new CountDownLatch(2);

                FolderScan folderScan = new FolderScan(path, queue, latch,
                        endOfWorkFile);
                FileScan fileScan = new FileScan(whatFind, queue, latch,
                        endOfWorkFile);

                Executor executor = Executors.newCachedThreadPool();
                executor.execute(folderScan);
                executor.execute(fileScan);

                latch.await();
                System.out.println("Thank you!");
            } else {
                System.out.println("You did not enter anything");
            }

        } catch (IOException | RuntimeException e) {
            System.out.println("Wrong input!");
            e.printStackTrace();
        } catch (InterruptedException e) {
            System.out.println("Interrupted.");
            e.printStackTrace();
        }
    }

    /**
     * @param args
     */

    public static void main(String[] args) {
        long startTime = System.currentTimeMillis();

        new FileScan().askUserPathAndWord();

        long stopTime = System.currentTimeMillis();
        long elapsedTime = stopTime - startTime;
        System.out.println("\nRuntime time " + elapsedTime + " milliseconds.");
    }
}

问题:

  • 如何解决此问题并正确组织此文件?
  • 也许更好地使用另一种逻辑?

3 个答案:

答案 0 :(得分:3)

这看起来像一个问题:

while (scanner.hasNext()) {
    String word = scanner.next();
    words.add(word);
}

if (words.contains(this.whatFind)) {
    matches++;
}

这只会检查单词是否存在,而不是单词的出现次数。

将其更改为:

while (scanner.hasNext()) {
    String word = scanner.next();
    if (word.equals(whatFind))
        matches++;
}

或更简单:

while (scanner.hasNext())
    if (scanner.next().equals(whatFind))
        matches++;

答案 1 :(得分:1)

经过一些建议(Dukeling非常感谢你!):

package task;

import java.util.concurrent.*;
import java.util.*;
import java.io.*;

class FolderScan implements Runnable {

    private String path;
    private BlockingQueue<File> queue;
    private CountDownLatch latch;
    private File endOfWorkFile;

    FolderScan(String path, BlockingQueue<File> queue, CountDownLatch latch,
            File endOfWorkFile) {
        this.path = path;
        this.queue = queue;
        this.latch = latch;
        this.endOfWorkFile = endOfWorkFile;
    }

    public FolderScan() {
    }

    @Override
    public void run() {
        findFiles(path);
        queue.add(endOfWorkFile);
        latch.countDown();
    }

    private void findFiles(String path) {

        try {
            File root = new File(path);
            File[] list = root.listFiles();
            for (File currentFile : list) {
                if (currentFile.isDirectory()) {
                    findFiles(currentFile.getAbsolutePath());
                } else {
                    if (currentFile.getName().toLowerCase().endsWith((".txt"))) {
                        queue.put(currentFile);
                    }
                }
            }
        } catch (InterruptedException e) {
            e.printStackTrace();
        }

    }

}

public class FileScan implements Runnable {

    private String whatFind;
    private BlockingQueue<File> queue;
    private CountDownLatch latch;
    private File endOfWorkFile;

    public FileScan(String whatFind, BlockingQueue<File> queue,
            CountDownLatch latch, File endOfWorkFile) {
        this.whatFind = whatFind;
        this.queue = queue;
        this.latch = latch;
        this.endOfWorkFile = endOfWorkFile;
    }

    public FileScan() {
    }

    @Override
    public void run() {

        while (true) {
            try {
                File file;
                file = queue.take();

                if (file == endOfWorkFile) {
                    break;
                }

                scan(file);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }

        latch.countDown();
    }

    private void scan(File file) {
        Scanner scanner = null;
        int matches = 0;

        try {
            scanner = new Scanner(file);
        } catch (FileNotFoundException e) {
            System.out.println("File Not Found.");
            e.printStackTrace();
        }

        while (scanner.hasNext())
            if (scanner.next().equals(whatFind)) {
                matches++;
            }

        if (matches > 0) {
            String myStr = String.format(
                    "File: %s - and the number of matches " + "is: %d",
                    file.getAbsolutePath(), matches);
            System.out.println(myStr);
        }
    }

    // ask user about input
    public void askUserPathAndWord() {

        BufferedReader bufferedReader = new BufferedReader(
                new InputStreamReader(System.in));
        String path;
        String whatFind;
        BlockingQueue<File> queue = new LinkedBlockingQueue<File>();

        try {
            System.out.println("Please, enter a Path and Word"
                    + "(which you want to find):");
            System.out.println("Please enter a Path:");
            path = bufferedReader.readLine();
            System.out.println("Please enter a Word:");
            whatFind = bufferedReader.readLine();

            if (path != null && whatFind != null) {

                File endOfWorkFile = new File("GameOver.tmp");
                CountDownLatch latch = new CountDownLatch(2);

                FolderScan folderScan = new FolderScan(path, queue, latch,
                        endOfWorkFile);
                FileScan fileScan = new FileScan(whatFind, queue, latch,
                        endOfWorkFile);

                Executor executor = Executors.newCachedThreadPool();
                executor.execute(folderScan);
                executor.execute(fileScan);

                latch.await();
                System.out.println("Thank you!");
            } else {
                System.out.println("You did not enter anything");
            }

        } catch (IOException | RuntimeException e) {
            System.out.println("Wrong input!");
            e.printStackTrace();
        } catch (InterruptedException e) {
            System.out.println("Interrupted.");
            e.printStackTrace();
        }
    }

    /**
     * @param args
     */

    public static void main(String[] args) {
        long startTime = System.currentTimeMillis();

        new FileScan().askUserPathAndWord();

        long stopTime = System.currentTimeMillis();
        long elapsedTime = stopTime - startTime;
        System.out.println("\nRuntime time " + elapsedTime + " milliseconds.");
    }
}

答案 2 :(得分:-1)

 Set<String> words = new HashSet<String>();

选择List实现的数据结构。

将以下内容置于while循环中。

 if (words.contains(this.whatFind)) {
        matches++;
    }