编程以从CSV文件中存在的数十亿个数字中找到最大值。
package org.devender;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class SortMapper extends Mapper<LongWritable, Text, LongWritable, Text> {
public void map(LongWritable ikey, Text ivalue, Context context)
throws IOException, InterruptedException {
String line = ivalue.toString();
String TextInt[]=line.split(",");
int MAX =0;
for (int i=0;i>TextInt.length;i++) {
int n=Integer.parseInt(TextInt[i].toString());
if (n>MAX) {
MAX = n;
}
}
Text max = new Text("Maximum");
LongWritable BIG = new LongWritable(MAX);
context.write(BIG,max);
}
}
Getting below error
错误:java.io.IOException:键入map中的键不匹配:expected org.apache.hadoop.io.Text,收到org.apache.hadoop.io.LongWritable at org.apache.hadoop.mapred.MapTask $ MapOutputBuffer.collect(MapTask.java:1072) 在org.apache.hadoop.mapred.MapTask $ NewOutputCollector.write(MapTask.java:715) at org.apache.hadoop.mapreduce.task.TaskInputOutputContextImpl.write(TaskInputOutputContextImpl.java:89) at org.apache.hadoop.mapreduce.lib.map.WrappedMapper $ Context.write(WrappedMapper.java:112) 在org.devender.SortMapper.map(SortMapper.java:31) at org.devender.SortMapper.map(SortMapper.java:1) 在org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:145) 在org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:787) 在org.apache.hadoop.mapred.MapTask.run(MapTask.java:341) 在org.apache.hadoop.mapred.YarnChild $ 2.run(YarnChild.java:163) at java.security.AccessController.doPrivileged(Native Method) 在javax.security.auth.Subject.doAs(Subject.java:415)
驱动 这是我的驱动程序驱动程序
package org.devender;
import org.apache.hadoop.conf.Configuration;
public class SortMapReduce {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "JobName");
job.setJarByClass(org.devender.SortMapReduce.class);
job.setMapperClass(org.devender.SortMapper.class);
job.setReducerClass(org.devender.SortReducer.class);
// TODO: specify output types
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
// TODO: specify input and output DIRECTORIES (not files)
FileInputFormat.setInputPaths(job,new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path(args[1]));
if (!job.waitForCompletion(true))
return;
}
}
//Reducer - The output coming as 0,Maximum ...0,Maxium but i was expecting the maximum value from the file and the "Highest number" tag along with the value.
------------------------------------------------------------------------
public void reduce(Iterable<LongWritable> _key, Text values, Context context)
throws IOException, InterruptedException {
// process values
LongWritable MAX = new LongWritable(0);
for (LongWritable val : _key) {
if (val.compareTo(MAX)>0) {
MAX=val;
}
}
Text t=new Text("Highest Number ");
context.write(MAX,t);
}
}
我使用LongWritable作为键,并且在mapper参数中使用了相同的但不知道编译器为什么说预期的Text。我试图从文件中读取一行并分成单独的数字,然后首先转换为int并将其与行中的每个数字进行比较。并将其保存到输出上下文中,但是编译器正在说预期的Text,我不知道为什么comiler期待Text,当我在Mapper中特别提到它的longWritable时。有人可以帮助解决此编译器错误..现在输出为0最大值,0最大....所以...
答案 0 :(得分:0)
您的工作配置是什么?您使用job.setOutputKeyClass
,job.setOutputValueClass
,job.setMapOutputKeyClass
,j ob.setMapOutputValueClass
作为代码的一部分吗?
你能分享整个代码吗?
编辑1:这是代码,代码中有很多问题。您可以详细了解地图缩小here
import java.io.IOException;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class TestingMapReduce extends Configured implements Tool {
public static class SortMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
public void map(LongWritable ikey, Text ivalue, Context context) throws IOException, InterruptedException {
String line = ivalue.toString();
String TextInt[] = line.split(",");
int MAX = 0;
for (int i = 0; i < TextInt.length; i++) {
int n = Integer.parseInt(TextInt[i].toString());
if (n > MAX) {
MAX = n;
}
}
Text max = new Text("Maximum");
LongWritable BIG = new LongWritable(MAX);
context.write(max, BIG);
}
}
public static class SortReducer extends Reducer<Text, LongWritable, Text, LongWritable> {
Long MAX = 0L;
public void reduce(Text _key, Iterable<LongWritable> values, Context context)
throws IOException, InterruptedException {
// process values
for (LongWritable val : values) {
if (val.get() > MAX) {
MAX = val.get();
}
}
context.write(_key, new LongWritable(MAX));
}
}
public int run(String[] arg0) throws Exception {
Job job = Job.getInstance(getConf());
// job.setJar("nyse-0.0.1-SNAPSHOT.jar");
job.setJarByClass(getClass());
job.setMapperClass(SortMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
job.setReducerClass(SortReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
job.setNumReduceTasks(1);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.setInputPaths(job, new Path(arg0[0]));
FileOutputFormat.setOutputPath(job, new Path(arg0[1]));
return job.waitForCompletion(true) ? 0 : 1;
}
public static void main(String args[]) throws Exception {
System.exit(ToolRunner.run(new TestingMapReduce(), args));
}
}