我正在编写一个MapReduce程序,用于消除给定输入中的停用词,并且我的程序正在运行,但未显示输出。 我应该从文件中读取数据。
“ INMapReduceuce。工作:正在运行的工作:job_1549892265940_0006”。
输入:
Hello this is abc.
Hello this is abc from xyz
输出:
Hello abc
Hello abc xyz
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class StopWord {
public static class Valuemap extends Mapper<LongWritable,Text,Text,IntWritable>
{
ArrayList<String> list = new ArrayList<>(Arrays.asList("a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", "as", "at", "be", "because", "been", "before", "being", "below", "between", "both", "but", "by", "could", "did", "do", "does", "doing", "down", "during", "each", "few", "for",
"from", "further", "had", "has", "have", "having", "he", "he'd", "he'll", "he's", "her", "here", "here's", "hers", "herself", "him", "himself", "his", "how", "how's", "i", "i'd", "i'll", "i'm", "i've", "if", "in", "into", "is", "it", "it's", "its", "itself",
"let's", "me", "more", "most", "my", "myself", "nor", "of", "on", "once", "only", "or", "other", "ought", "our", "ours",
"ourselves", "out", "over", "own", "same", "she", "she'd", "she'll", "she's", "should", "so", "some", "such", "than", "that", "that's", "the", "their", "theirs", "them", "themselves", "then", "there", "there's", "these", "they", "they'd", "they'll", "they're", "they've", "this", "those", "through", "to", "too", "under", "until", "up", "very", "was",
"we", "we'd", "we'll", "we're", "we've", "were", "what", "what's", "when", "when's", "where", "where's", "which", "while", "who", "who's", "whom",
"why", "why's", "with", "would", "you", "you'd", "you'll", "you're", "you've",
"your", "yours", "yourself", "yourselves"));
private static final IntWritable one = new IntWritable(1);
private static final IntWritable zero = new IntWritable(0);
public void map(LongWritable key,Text value,Context context) throws IOException, InterruptedException
{
//String[] words = value.toString().toLowerCase().split(" ");
StringTokenizer tokens = new StringTokenizer(value.toString().toLowerCase());
while(tokens.hasMoreTokens())
{
String string = tokens.nextToken();
if(list.contains(string))
{
context.write(new Text(string), zero);
}
else
{
context.write(new Text(string), one);
}
}
}
}
public class Valuereduce extends Reducer<Text, IntWritable, Text, NullWritable>
{
protected void reduce(Text key,Iterable<IntWritable> values,Reducer<Text, IntWritable, Text, NullWritable>.Context context) throws IOException,InterruptedException
{
int sum = 0;
for(IntWritable value : values)
{
sum += value.get();
if(sum != 0)
context.write(key, NullWritable.get());
}
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
@SuppressWarnings("deprecation")
Job job = new Job(conf,"StopWordElimination");
job.setJarByClass(StopWord.class);
job.setMapperClass(Valuemap.class);
job.setReducerClass(Valuereduce.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}