不确定是否有人遇到此问题。我正在尝试使用oozie来运行一个简单的MapReduce作业,该作业在HDFS位置搜索字符串值,如果找到它则输出它。当我提交作业时,oozie成功执行但我没有得到预期的输出。似乎我的mapper从未被调用过,因为它没有过滤掉任何预期的结果。输出文件只包含所有输入记录。我已经将System.out.printlns放在代码中,我在任务日志文件中看不到它们。我已经检查了配置和作业结果,我没有看到我添加的计数器或任何System.out。作业配置虽然说明我的mapper类已被执行。这对我没有任何意义。下面是我的Mapper代码片段:
private static final String SEARCH_FOR_STRING = "poc.search.string";
enum SearchCounters {
NUMBER_OF_MATCHES;
}
private NullWritable nullValue = NullWritable.get();
private Text outputLine = new Text();
private String searchString = null;
@Override
protected void setup(Context context) {
searchString = context.getConfiguration().get(SEARCH_FOR_STRING);
System.out.println("Searching for: [" + searchString + "]");
context.getCounter(SearchCounters.NUMBER_OF_MATCHES).increment(0);
}
@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException
{
String inputLine = value.toString();
System.out.println("key: " + key.toString() + "value: " + inputLine
+ " searchString: [" + searchString + "]");
if(inputLine.contains(searchString)) {
context.getCounter(SearchCounters.NUMBER_OF_MATCHES).increment(1);
outputLine.set(inputLine);
context.write(outputLine, nullValue);
}
}
这是我的workflow.xml
<workflow-app xmlns="uri:oozie:workflow:0.1" name="search-wf">
<start to="mr-node"/>
<action name="mr-node">
<map-reduce>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<prepare>
<delete path="${nameNode}/user/${wf:user()}/${outputDir}"/>
</prepare>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
<property>
<name>mapreduce.map.class</name>
<value>poc.SearchForValueMapper</value>
</property>
<property>
<name>mapreduce.reducer.class</name>
<value>poc.SearchForValueReducer</value>
</property>
<property>
<name>mapred.map.tasks</name>
<value>100</value>
</property>
<property>
<name>mapred.input.dir</name>
<value>/user/${wf:user()}/${inputDir}</value>
</property>
<property>
<name>com.disney.search.string</name>
<value>${searchString}</value>
</property>
<property>
<name>mapred.output.dir</name>
<value>/user/${wf:user()}/${outputDir}</value>
</property>
</configuration>
</map-reduce>
<ok to="end"/>
<error to="fail"/>
</action>
<kill name="fail">
<message>Map/Reduce failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<end name="end"/>
</workflow-app>