我尝试以块的形式读取文件并将每个块传递给一个线程,该线程将计算块中每个字节包含多少次。麻烦的是,当我将整个文件传递给一个线程时,我得到了正确的结果,但是将它传递给多个线程,结果变得非常奇怪。这是我的代码:
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.HashSet;
import java.util.Scanner;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
public class Main{
public static void main(String[] args) throws InterruptedException, ExecutionException, IOException
{
// get number of threads to be run
Scanner in = new Scanner(System.in);
int numberOfThreads = in.nextInt();
// read file
File file = new File("testfile.txt");
long fileSize = file.length();
long chunkSize = fileSize / numberOfThreads;
FileInputStream input = new FileInputStream(file);
byte[] buffer = new byte[(int)chunkSize];
ExecutorService pool = Executors.newFixedThreadPool(numberOfThreads);
Set<Future<int[]>> set = new HashSet<Future<int[]>>();
while(input.available() > 0)
{
if(input.available() < chunkSize)
{
chunkSize = input.available();
}
input.read(buffer, 0, (int) chunkSize);
Callable<int[]> callable = new FrequenciesCounter(buffer);
Future<int[]> future = pool.submit(callable);
set.add(future);
}
// let`s assume we will use extended ASCII characters only
int alphabet = 256;
// hold how many times each character is contained in the input file
int[] frequencies = new int[alphabet];
// sum the frequencies from each thread
for(Future<int[]> future: set)
{
for(int i = 0; i < alphabet; i++)
{
frequencies[i] += future.get()[i];
}
}
input.close();
for(int i = 0; i< frequencies.length; i++)
{
if(frequencies[i] > 0) System.out.println((char)i + " " + frequencies[i]);
}
}
}
//help class for multithreaded frequencies` counting
class FrequenciesCounter implements Callable<int[]>
{
private int[] frequencies = new int[256];
private byte[] input;
public FrequenciesCounter(byte[] buffer)
{
input = buffer;
}
public int[] call()
{
for(int i = 0; i < input.length; i++)
{
frequencies[(int)input[i]]++;
}
return frequencies;
}
}
我的testfile.txt是aaaaaaaaaaaaaabbbbcccccc
。
使用1个线程,输出为:
a 14
b 4
c 6`
使用2个线程,输出为:
a 4
b 8
c 12
使用3个线程,输出为:
b 6
c 18
所以其他奇怪的结果我无法弄清楚。有人可以帮忙吗?
答案 0 :(得分:4)
每个线程使用相同的缓冲区,并且一个线程将覆盖缓冲区,因为另一个线程正在尝试处理它。
您需要确保每个线程都有自己的缓冲区,其他任何人都无法修改。
答案 1 :(得分:1)
为每个线程创建byte []数组。
public static void main(String[] args) throws InterruptedException, ExecutionException, IOException {
// get number of threads to be run
Scanner in = new Scanner(System.in);
int numberOfThreads = in.nextInt();
// read file
File file = new File("testfile.txt");
long fileSize = file.length();
long chunkSize = fileSize / numberOfThreads;
FileInputStream input = new FileInputStream(file);
ExecutorService pool = Executors.newFixedThreadPool(numberOfThreads);
Set<Future<int[]>> set = new HashSet<Future<int[]>>();
while (input.available() > 0) {
//create buffer for every thread.
byte[] buffer = new byte[(int) chunkSize];
if (input.available() < chunkSize) {
chunkSize = input.available();
}
input.read(buffer, 0, (int) chunkSize);
Callable<int[]> callable = new FrequenciesCounter(buffer);
Future<int[]> future = pool.submit(callable);
set.add(future);
}
// let`s assume we will use extended ASCII characters only
int alphabet = 256;
// hold how many times each character is contained in the input file
int[] frequencies = new int[alphabet];
// sum the frequencies from each thread
for (Future<int[]> future : set) {
for (int i = 0; i < alphabet; i++) {
frequencies[i] += future.get()[i];
}
}
input.close();
for (int i = 0; i < frequencies.length; i++) {
if (frequencies[i] > 0)
System.out.println((char) i + " " + frequencies[i]);
}
}
}