Follwing问题和代码来自MapReduce设计模式。
问题:给定一个帖子和评论列表,创建一个结构化的XML层次结构,以便将评论与相关帖子嵌套。
抛出NullPointer异常。我无法理解我的错误。我真的很感激我能得到的任何帮助。
这是我的类和nestElement方法:
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
//import javax.swing.text.html.parser.Element;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
//import org.apache.hadoop.mapreduce.*;
//import org.apache.hadoop.mapred.TextInputFormat;
//import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
//import org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer.Context;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
//import org.w3c.dom.UserDataHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
public class PostCommentHierarchy {
public static class PostMapper extends Mapper<Object,Text,Text,Text>{
private Text outkey = new Text();
private Text outvalue = new Text();
public void map(Object key,Text value,Context context)throws IOException ,InterruptedException{
Map<String,String> parsed = MRDPUtils.transformXmlToMap(value.toString());
String line = value.toString();
if(!(line.length()<=0)){
outkey.set(parsed.get("Id"));
outvalue.set("P"+value.toString());
context.write(outkey, outvalue);
}
}
}
public static class CommentMapper extends Mapper<Object,Text,Text,Text>{
private Text outkey = new Text();
private Text outvalue = new Text();
public void map(Object key, Text value,Context context)throws IOException,InterruptedException{
Map<String,String> parsed = MRDPUtils.transformXmlToMap(value.toString());
String line = value.toString();
if(!(line.length()<=0)){
outkey.set(parsed.get("PostId"));
outvalue.set("C"+value.toString());
context.write(outkey, outvalue);
}
}
}
public static class PostCommentHierarchyReducer extends Reducer <Text,Text,Text,NullWritable>{
private ArrayList<String> comments = new ArrayList<String>();
private DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
private String post = null;
public void reduce(Text key,Iterable<Text>values,Context context)throws IOException,InterruptedException{
post =null;
comments.clear();
for(Text t:values){
if(t.charAt(0)== 0){
return;
}else if(t.charAt(0)=='P'){
post = t.toString().substring(1,t.toString().length()).trim();
}else{
comments.add(t.toString().substring(1,t.toString().length()).trim());
}
}
if(post != null){
String postWithCommentChildren = null;
try {
try {
postWithCommentChildren = nestElements(post,comments);
} catch (TransformerException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} catch (ParserConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (SAXException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
context.write(new Text(postWithCommentChildren), NullWritable.get());
}
}
private String nestElements(String post,List<String> comments) throws ParserConfigurationException, SAXException, IOException, TransformerException{
DocumentBuilder bldr = dbf.newDocumentBuilder();
Document doc = bldr.newDocument();
org.w3c.dom.Element postEl = getXmlElementFromString(post);
org.w3c.dom.Element toAddPostEl = doc.createElement("post");
// Element toAddPostEl1 = doc.createElement("post");
copyAttributesToElement(postEl.getAttributes(),toAddPostEl);
for(String commentXml:comments){
org.w3c.dom.Element commentEl = getXmlElementFromString(commentXml);
org.w3c.dom.Element toAddCommentEl = doc.createElement("comments");
copyAttributesToElement(commentEl.getAttributes(),toAddCommentEl);
toAddPostEl.appendChild(toAddPostEl);
}
doc.appendChild(toAddPostEl);
// Transform the document into a String of XML and return
return transformDocumentToString(doc);
}
private org.w3c.dom.Element getXmlElementFromString(String xml) throws ParserConfigurationException, SAXException, IOException{
DocumentBuilder bldr = dbf.newDocumentBuilder();
return bldr.parse(new InputSource(new StringReader(xml)))
.getDocumentElement();
}
private void copyAttributesToElement(NamedNodeMap attributes,org.w3c.dom.Element toAddPostEl){
for(int i = 0; i<attributes.getLength();++i){
Attr toCopy = (Attr) attributes.item(i);
toAddPostEl.setAttribute(toCopy.getName(),toCopy.getValue());
}
}
private String transformDocumentToString(Document doc) throws TransformerException{
TransformerFactory tf = TransformerFactory.newInstance();
Transformer transformer = tf.newTransformer();
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
StringWriter writer = new StringWriter();
//Acts as a holder for a transformation Source tree in the form of a Document Object Model (DOM) tree.
transformer.transform(new DOMSource(doc), new StreamResult(writer));
return writer.getBuffer().toString().replaceAll("\n|\r", "");
}
}
public static void main(String[] args)throws IOException, ClassNotFoundException, InterruptedException{
//check how multiple inputs job works?
Configuration conf = new Configuration();
@SuppressWarnings("deprecation")
Job job = new Job(conf,"PostCommentHierarchy");
job.setJarByClass(PostCommentHierarchy.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
MultipleInputs.addInputPath(job, new Path(args[0]),TextInputFormat.class,PostMapper.class);
MultipleInputs.addInputPath(job, new Path(args[1]),TextInputFormat.class ,CommentMapper.class);
job.setReducerClass(PostCommentHierarchyReducer.class);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(args[2]));
System.exit(job.waitForCompletion(true)? 0 :1);
}
}