用线程计算不同的单词

时间:2013-03-08 10:56:17

标签: java multithreading semaphore bufferedreader filereader

目标是计算文件中不同的单词。

更新:上一代码已成功完成。现在我必须做同样的但是使用线程(哦,伙计,我讨厌它们......)此外我想用信号量来实现更好的流量。< / p>

代码包含了之前尝试遗漏的一些额外内容,我正在试图找出可以使用的内容..

我可以一次读一个单词,但大多数情况下我在容器中得到一个“null”。所以,直到我从容器中得到任何东西,我总是无法测试 Sorter 类等等......

该程序的新增功能是 WordContainer 类,用于存储一个单词以将其从读取器传递到分拣机:

 package main2;

import java.util.ArrayList;

public class WordContainer
{
   private ArrayList<String> words;

   public synchronized String take()
   {
      String nextWord = null;
      while (words.isEmpty())
      {
         try
         {
            wait();
         }
         catch (InterruptedException e)
         {
         }
      }

      nextWord = words.remove(0);

      notify();
      return nextWord;
   }

   public synchronized void put(String word)
   {
      while (words.size() > 999)
      {
         try
         {
            wait();
         }
         catch (InterruptedException e)
         {
         }
      }
      words.add(word);
      notify();
   }
}

DataSet类与Sorter方法相结合,产生 Sorter Class:

    package main2;

import java.util.concurrent.Semaphore;

public class Sorter extends Thread
{
   private WordContainer wordContainer;
   private int top;
   private String[] elements;
   private boolean stopped;
   private Semaphore s;
   private Semaphore s2;

   public Sorter(WordContainer wordContainer, Semaphore s, Semaphore s2)
   {
      this.wordContainer = wordContainer;
      elements = new String[1];
      top = 0;
      stopped = false;
      this.s = s;
      this.s2 = s2;
   }

   public void run()
   {
      String nextWord = wordContainer.take();
      while (nextWord != null)
      {
         try
         {
            s.acquire();
         }
         catch (InterruptedException e)
         {
            e.printStackTrace();
         }

         nextWord = wordContainer.take();
         s2.release();
         add(nextWord);
      }
   }

   public void startSorting()
   {
      start();
   }

   public void stopSorting()
   {
      stopped = true;
   }

   public boolean member(String target)
   {
      if (top > 0)
      {
         return binarySearch(target, 0, top);
      }
      else
      {
         return false;
      }
   }

   private boolean binarySearch(String target, int from, int to)
   {

      if (from == to - 1)
      {
         return elements[from].equals(target);
      }

      int middle = (to - from) / 2 + from;

      if (elements[from].equals(target))
      {
         return true;
      }

      if (elements[middle].compareTo(target) > 0)
      {
         // search left
         return binarySearch(target, from, middle);
      }
      else
      {
         // search right
         return binarySearch(target, middle, to);
      }
   }

   public void add(String nextElement)
   {
      if (top < elements.length)
      {
         elements[top++] = nextElement;
         System.out.println("[" + top + "] " + nextElement);
         sort();
      }
      else
      {
         String[] newArray = new String[elements.length * 2];
         for (int i = 0; i < elements.length; i++)
         {
            newArray[i] = elements[i];
         }
         elements = newArray;
         add(nextElement);
      }
   }

   private void sort()
   {
      int index = 0;

      while (index < top - 1)
      {
         if (elements[index].compareTo(elements[index + 1]) < 0)
         {
            index++;
         }
         else
         {
            String temp = elements[index];
            elements[index] = elements[index + 1];
            elements[index + 1] = temp;
            if (index > 0)
            {
               index--;
            }
         }
      }
   }

   public int size()
   {
      return top;
   }

   public String getSortedWords()
   {
      String w = "";
      for (int i = 0; i < elements.length; i++)
      {
         w += elements[i] + ", ";
      }

      return w;
   }

   public int getNumberOfDistinctWords()
   {
      return top;
   }
}

Reader Class现在看起来像这样:

    package main2;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.concurrent.Semaphore;

public class Reader extends Thread
{
   private static final int whitespace = 45;
   private static final int word = 48;
   private static final int finished = -1;
   private WordContainer wordContainer;
   private Semaphore s;
   private Semaphore s2;
   private String[] wordsR;
   private int state;
   private BufferedReader reader;
   private int nextFreeIndex;

   public Reader(File words, WordContainer wordContainer, Semaphore s,
         Semaphore s2)
   {
      state = whitespace;
      try
      {
         reader = new BufferedReader(new FileReader(words));
      }
      catch (FileNotFoundException e)
      {
         e.printStackTrace();
      }
      nextFreeIndex = 0;
      wordsR = new String[1];
      this.wordContainer = wordContainer;
      this.s = s;
      this.s2 = s;
   }

   public void startReading()
   {
      start();
   }

   public void run()
   {
      String nextWord = readNext();
      while (nextWord != null)
      {
         nextWord = readNext();
         wordContainer.put(nextWord);

         s.release();
         try
         {
            s2.acquire();
         }
         catch (InterruptedException e)
         {
            e.printStackTrace();
         }
      }
   }

   public String readNext()
   {
      int next;
      StringBuffer nextWord = new StringBuffer();

      while (true)
      {
         try
         {
            next = reader.read();
         }
         catch (IOException e)
         {
            next = -1;
         }

         char nextChar = (char) next;

         switch (state)
         {
            case whitespace:
               if (isWhiteSpace(nextChar))
               {
                  state = whitespace;
               }
               else if (next == -1)
               {
                  state = finished;
               }
               else
               {
                  nextWord.append(nextChar);
                  state = word;
               }
               break;
            case word:
               if (isWhiteSpace(nextChar))
               {
                  state = whitespace;
                  return nextWord.toString();
               }
               else if (next == -1)
               {
                  state = finished;
                  return nextWord.toString();
               }
               else
               {
                  nextWord.append(nextChar);
                  state = word;
               }
               break;
            case finished:
               return null;
         }
      }
   }

   private boolean isWhiteSpace(char nextChar)
   {

      switch (nextChar)
      {
         case '-':
         case '"':
         case ':':
         case '\'':
         case ')':
         case '(':
         case '!':
         case ']':
         case '?':
         case '.':
         case ',':
         case ';':
         case '[':
         case ' ':
         case '\t':
         case '\n':
         case '\r':
            return true;
      }
      return false;
   }

   public void close()
   {
      try
      {
         reader.close();
      }
      catch (IOException e)
      {
      }
   }

   public String getWords()
   {
      return wordContainer.take();
   }
}

测试类

package test;

import java.io.File;
import java.io.IOException;
import java.util.concurrent.Semaphore;

import main2.Reader;
import main2.Sorter;
import main2.WordContainer;

import junit.framework.Assert;
import junit.framework.TestCase;

public class TestDistinctWordsWithThreads extends TestCase
{
   public void test() throws IOException, InterruptedException
   {
      File words = new File("resources" + File.separator + "AV1611Bible.txt");

      if (!words.exists())
      {
         System.out.println("File [" + words.getAbsolutePath()
               + "] does not exist");
         Assert.fail();
      }

      WordContainer container = new WordContainer();

      Semaphore s = new Semaphore(0);
      Semaphore s2 = new Semaphore(0);

      Reader reader = new Reader(words, container, s, s2);
      Sorter sorter = new Sorter(container, s, s2);

      reader.startReading();
      sorter.startSorting();

      reader.join();
      sorter.join();

      System.out.println(reader.getWords());
      Assert.assertTrue(sorter.getNumberOfDistinctWords() == 14720);

      /*
       * String bible = reader.getWords(); System.out.println(bible); String[]
       * bible2 = sorter.getSortedWords(); System.out.println(bible2);
       * assertTrue(bible2.length < bible.length());
       */
   }
}

1 个答案:

答案 0 :(得分:2)

你为什么不尝试这样的事情:

public int countWords(File file) {
    Scanner sc = new Scanner(file);
    Set<String> allWords = new HashSet<String>();
    while(sc.hasNext()) {
        allWords.add(sc.next());
    }
    return allWords.size();
}