我试图逐行读取CSV文件,并发送包含“India”作为子字符串的那些行的键值对。为此,我开发了以下代码。
映射器代码
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class MapperCode extends Mapper<LongWritable,Text,Text,IntWritable> {
public void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException
{
String Line=value.toString();
String out="";
String search_line=Line;
String splitter[]=Line.split(" ");
String searchStr="india";
for(String words:splitter)
{
if(searchStr.equals(words))
{
out=out+"\n"+search_line;
System.out.println(out);
}
}
String keyvalpair[]=out.split(",");
context.write(new Text(keyvalpair[2].trim()), new IntWritable(Integer.parseInt(keyvalpair[9].trim())));
}
}
数据集
Clarissa Chun,30,United States,2012,08-12-2012,Wrestling,0,0,1,1
Yogeshwar Dutt,29,India,2012,08-12-2012,Wrestling,0,0,1,1
Jaime Espinal,27,Puerto Rico,2012,08-12-2012,Wrestling,0,1,0,1
Johan Eurén,27,Sweden,2012,08-12-2012,Wrestling,0,0,1,1
Karam Gaber,32,Egypt,2012,08-12-2012,Wrestling,0,1,0,1
异常
17/03/17 21:11:08 INFO mapred.JobClient: Task Id : attempt_201703140915_0030_m_000000_1, Status : FAILED
java.lang.ArrayIndexOutOfBoundsException: 2
at MapperCode.map(MapperCode.java:26)
at MapperCode.map(MapperCode.java:1)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:647)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:323)
at org.apache.hadoop.mapred.Child$4.run(Child.java:270)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:396)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1127)
at org.apache.hadoop.mapred.Child.main(Child.java:264)
请帮帮我。提前谢谢!
答案 0 :(得分:2)
只是因为您尝试访问的数组中的索引不是那么大。让我跟踪一下。
1- Clarissa Chun,30,United States,2012,08-12-2012,Wrestling,0,0,1,1
2- splitter = ["Clarissa", "Chun,30,United", "States,2012,08-12-2012,Wrestling,0,0,1,1]
3- keyvalpair = ["Clarissa"]
4. keyvalpair[2] ==> ????
你明白了吗?希望这可以帮助。
对于那个特定目标,尝试更简单的事情,你只需要这样做:
public class MapperCode extends Mapper<LongWritable,Text,Text,IntWritable> {
public void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException {
String line = value.toString();
if(line.contains("india")) {
String keyvalpair[] = line.split(",");
context.write(new Text(keyvalpair[2].trim()), new IntWritable(Integer.parseInt(line)));
}
}
}
答案 1 :(得分:2)
问题出在这里。
String splitter[]=Line.split(" ");
你试图使用'space'分裂。在这个特例的情况下你可以使用','
String splitter[]=Line.split(",");
这应该可以正常工作。