目标是计算文件中不同的单词。
更新:上一代码已成功完成。现在我必须做同样的但是使用线程(哦,伙计,我讨厌它们......)此外我想用信号量来实现更好的流量。< / p>
代码包含了之前尝试遗漏的一些额外内容,我正在试图找出可以使用的内容..
我可以一次读一个单词,但大多数情况下我在容器中得到一个“null”。所以,直到我从容器中得到任何东西,我总是无法测试 Sorter 类等等......
该程序的新增功能是 WordContainer 类,用于存储一个单词以将其从读取器传递到分拣机:
package main2;
import java.util.ArrayList;
public class WordContainer
{
private ArrayList<String> words;
public synchronized String take()
{
String nextWord = null;
while (words.isEmpty())
{
try
{
wait();
}
catch (InterruptedException e)
{
}
}
nextWord = words.remove(0);
notify();
return nextWord;
}
public synchronized void put(String word)
{
while (words.size() > 999)
{
try
{
wait();
}
catch (InterruptedException e)
{
}
}
words.add(word);
notify();
}
}
DataSet类与Sorter方法相结合,产生 Sorter Class:
package main2;
import java.util.concurrent.Semaphore;
public class Sorter extends Thread
{
private WordContainer wordContainer;
private int top;
private String[] elements;
private boolean stopped;
private Semaphore s;
private Semaphore s2;
public Sorter(WordContainer wordContainer, Semaphore s, Semaphore s2)
{
this.wordContainer = wordContainer;
elements = new String[1];
top = 0;
stopped = false;
this.s = s;
this.s2 = s2;
}
public void run()
{
String nextWord = wordContainer.take();
while (nextWord != null)
{
try
{
s.acquire();
}
catch (InterruptedException e)
{
e.printStackTrace();
}
nextWord = wordContainer.take();
s2.release();
add(nextWord);
}
}
public void startSorting()
{
start();
}
public void stopSorting()
{
stopped = true;
}
public boolean member(String target)
{
if (top > 0)
{
return binarySearch(target, 0, top);
}
else
{
return false;
}
}
private boolean binarySearch(String target, int from, int to)
{
if (from == to - 1)
{
return elements[from].equals(target);
}
int middle = (to - from) / 2 + from;
if (elements[from].equals(target))
{
return true;
}
if (elements[middle].compareTo(target) > 0)
{
// search left
return binarySearch(target, from, middle);
}
else
{
// search right
return binarySearch(target, middle, to);
}
}
public void add(String nextElement)
{
if (top < elements.length)
{
elements[top++] = nextElement;
System.out.println("[" + top + "] " + nextElement);
sort();
}
else
{
String[] newArray = new String[elements.length * 2];
for (int i = 0; i < elements.length; i++)
{
newArray[i] = elements[i];
}
elements = newArray;
add(nextElement);
}
}
private void sort()
{
int index = 0;
while (index < top - 1)
{
if (elements[index].compareTo(elements[index + 1]) < 0)
{
index++;
}
else
{
String temp = elements[index];
elements[index] = elements[index + 1];
elements[index + 1] = temp;
if (index > 0)
{
index--;
}
}
}
}
public int size()
{
return top;
}
public String getSortedWords()
{
String w = "";
for (int i = 0; i < elements.length; i++)
{
w += elements[i] + ", ";
}
return w;
}
public int getNumberOfDistinctWords()
{
return top;
}
}
Reader Class现在看起来像这样:
package main2;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.concurrent.Semaphore;
public class Reader extends Thread
{
private static final int whitespace = 45;
private static final int word = 48;
private static final int finished = -1;
private WordContainer wordContainer;
private Semaphore s;
private Semaphore s2;
private String[] wordsR;
private int state;
private BufferedReader reader;
private int nextFreeIndex;
public Reader(File words, WordContainer wordContainer, Semaphore s,
Semaphore s2)
{
state = whitespace;
try
{
reader = new BufferedReader(new FileReader(words));
}
catch (FileNotFoundException e)
{
e.printStackTrace();
}
nextFreeIndex = 0;
wordsR = new String[1];
this.wordContainer = wordContainer;
this.s = s;
this.s2 = s;
}
public void startReading()
{
start();
}
public void run()
{
String nextWord = readNext();
while (nextWord != null)
{
nextWord = readNext();
wordContainer.put(nextWord);
s.release();
try
{
s2.acquire();
}
catch (InterruptedException e)
{
e.printStackTrace();
}
}
}
public String readNext()
{
int next;
StringBuffer nextWord = new StringBuffer();
while (true)
{
try
{
next = reader.read();
}
catch (IOException e)
{
next = -1;
}
char nextChar = (char) next;
switch (state)
{
case whitespace:
if (isWhiteSpace(nextChar))
{
state = whitespace;
}
else if (next == -1)
{
state = finished;
}
else
{
nextWord.append(nextChar);
state = word;
}
break;
case word:
if (isWhiteSpace(nextChar))
{
state = whitespace;
return nextWord.toString();
}
else if (next == -1)
{
state = finished;
return nextWord.toString();
}
else
{
nextWord.append(nextChar);
state = word;
}
break;
case finished:
return null;
}
}
}
private boolean isWhiteSpace(char nextChar)
{
switch (nextChar)
{
case '-':
case '"':
case ':':
case '\'':
case ')':
case '(':
case '!':
case ']':
case '?':
case '.':
case ',':
case ';':
case '[':
case ' ':
case '\t':
case '\n':
case '\r':
return true;
}
return false;
}
public void close()
{
try
{
reader.close();
}
catch (IOException e)
{
}
}
public String getWords()
{
return wordContainer.take();
}
}
测试类
package test;
import java.io.File;
import java.io.IOException;
import java.util.concurrent.Semaphore;
import main2.Reader;
import main2.Sorter;
import main2.WordContainer;
import junit.framework.Assert;
import junit.framework.TestCase;
public class TestDistinctWordsWithThreads extends TestCase
{
public void test() throws IOException, InterruptedException
{
File words = new File("resources" + File.separator + "AV1611Bible.txt");
if (!words.exists())
{
System.out.println("File [" + words.getAbsolutePath()
+ "] does not exist");
Assert.fail();
}
WordContainer container = new WordContainer();
Semaphore s = new Semaphore(0);
Semaphore s2 = new Semaphore(0);
Reader reader = new Reader(words, container, s, s2);
Sorter sorter = new Sorter(container, s, s2);
reader.startReading();
sorter.startSorting();
reader.join();
sorter.join();
System.out.println(reader.getWords());
Assert.assertTrue(sorter.getNumberOfDistinctWords() == 14720);
/*
* String bible = reader.getWords(); System.out.println(bible); String[]
* bible2 = sorter.getSortedWords(); System.out.println(bible2);
* assertTrue(bible2.length < bible.length());
*/
}
}
答案 0 :(得分:2)
你为什么不尝试这样的事情:
public int countWords(File file) {
Scanner sc = new Scanner(file);
Set<String> allWords = new HashSet<String>();
while(sc.hasNext()) {
allWords.add(sc.next());
}
return allWords.size();
}