我创建了一个自定义xmloutputformat类,它将reducer的输出转换为xml格式。
这里的问题是代码执行成功,但最终输出是正常格式而不是XML格式。
任何人都可以帮帮我..?
package dd;
import java.io.IOException;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class MaxTemperature extends Configured implements Tool {
public static class MapMapper extends
Mapper<LongWritable, Text, Text, IntWritable> {
private static final int MISSING = 9999;
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
String year = line.substring(15, 19);
int airTemperature;
if (line.charAt(87) == '+') { // parseInt doesn't like leading plus
// signs
airTemperature = Integer.parseInt(line.substring(88, 92));
} else {
airTemperature = Integer.parseInt(line.substring(87, 92));
}
String quality = line.substring(92, 93);
if (airTemperature != MISSING && quality.matches("[01459]")) {
context.write(new Text(year), new IntWritable(airTemperature));
}
}
}
public static class Mapreducers extends
Reducer<Text, IntWritable, Text, IntWritable> {
public void reduce(Text key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
int maxValue = Integer.MIN_VALUE;
for (IntWritable value : values) {
maxValue = Math.max(maxValue, value.get());
}
context.write(key, new IntWritable(maxValue));
}
}
public int run(String[] args) throws Exception {
Job job = new Job();
job.setJarByClass(MaxTemperature.class);
job.setJobName("MaxTemperature");
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(args[0]), conf);
if (fs.exists(new Path(args[1]))) {
fs.delete(new Path(args[1]), true);
}
FileInputFormat.addInputPath(job, new Path(args[0]));
XmlOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(MapMapper.class);
job.setCombinerClass(Mapreducers.class);
job.setReducerClass(Mapreducers.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
return job.waitForCompletion(true) ? 0 : 1;
}
public static void main(String[] args) throws Exception {
int xx = 1;
xx = ToolRunner.run(new MaxTemperature(), args);
System.exit(xx);
}
}
自定义xmlformat代码如下所示: -
package dd;
import java.io.DataOutputStream;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/** An {@link OutputFormat} that writes plain text files. */
public class XmlOutputFormat<K, V> extends FileOutputFormat {
protected static class XmlRecordWriter<K, V> extends RecordWriter<K, V> {
private static final String utf8 = "UTF-8";
protected DataOutputStream out;
public XmlRecordWriter(DataOutputStream out) throws IOException {
this.out = out;
out.writeBytes("<results>\n");
}
/**
* Write the object to the byte stream, handling Text as a special case.
*
* @param o
* the object to print
* @throws IOException
* if the write throws, we pass it on
*/
private void writeObject(Object o) throws IOException {
if (o instanceof Text) {
Text to = (Text) o;
out.write(to.getBytes(), 0, to.getLength());
} else {
out.write(o.toString().getBytes(utf8));
}
}
private void writeKey(Object o, boolean closing) throws IOException {
out.writeBytes("<");
if (closing) {
out.writeBytes("/");
}
writeObject(o);
out.writeBytes(">");
if (closing) {
out.writeBytes("\n");
}
}
public synchronized void write(K key, V value) throws IOException {
boolean nullKey = key == null || key instanceof NullWritable;
boolean nullValue = value == null || value instanceof NullWritable;
if (nullKey && nullValue) {
return;
}
Object keyObj = key;
if (!nullKey) {
keyObj = "value";
}
writeKey(keyObj, false);
if (!nullValue) {
writeObject(value);
}
writeKey(keyObj, true);
}
public synchronized void close(TaskAttemptContext context)
throws IOException {
out.close();
}
}
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job)
throws IOException, InterruptedException {
Path file = FileOutputFormat.getOutputPath(job);
Configuration conf = new Configuration();
FileSystem fs = file.getFileSystem(conf);
FSDataOutputStream fileout = fs.create(file);
return new XmlRecordWriter<K, V>(fileout);
}
}
-
提前致谢。
答案 0 :(得分:2)
希望这会对你有所帮助。
您可以通过此操作并相应地修改代码。
<强>更新强>
@Override
protected void setup(Context context)
throws IOException, InterruptedException {
context.write(new Text("<MapReduce>"), null);
}
public static class Mapreducers extends
Reducer<Text, IntWritable, Text, IntWritable> {
public void reduce(Text key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
int maxValue = Integer.MIN_VALUE;
for (IntWritable value : values) {
maxValue = Math.max(maxValue, value.get());
}
Text out = new Text(constructPropertyXml(key, maxValue));
context.write(out, null);
}
}
public static String constructPropertyXml(Text key, Text maxvalue) {
StringBuilder sb = new StringBuilder();
sb.append("<result><key>").append(key)
.append("</key><value>").append(maxvalue)
.append("</value></result>");
return sb.toString();
}
@Override
protected void cleanup(Context context)
throws IOException, InterruptedException {
context.write(new Text("</MapReduce>"), null);
}
答案 1 :(得分:0)
只需你可以覆盖你的RecordWriter。您可以更改根标记,父标记和子标记名称。
protected static class XMLRecordWriter extends RecordWriter<Text, IntWritable> {
private DataOutputStream out;
public XMLRecordWriter(DataOutputStream out) throws IOException
{
this.out = out;
out.writeBytes("<Output>\n");
}
private void writeStyle(String xml_tag,String tag_value) throws IOException{
out.writeBytes("<"+xml_tag+">"+tag_value+"</"+xml_tag+">\n");
}
public synchronized void write(Text key, IntWritable value) throws IOException
{
out.writeBytes("<record>\n");
this.writeStyle("key", key.toString());
this.writeStyle("value", value.toString());
out.writeBytes("</record>\n");
}
public synchronized void close(TaskAttemptContext job) throws IOException
{
try {
out.writeBytes("</Output>\n");
} finally {
out.close();
}
}
}
您还可以参考以下链接,了解有关在Hadoop中实现自定义输出格式的更多详细信息。 https://acadgild.com/blog/implementing-custom-output-format-hadoop/