使用SAX解析器在java中解析XML文件

时间:2014-01-12 20:29:59

标签: java saxparser

我想用java编写程序并使用saxparser来解析像这样的link XML文件 做以下事情; 从输入中接收 ID 并解析并搜索XML文件并写入文本标题以及用户名(XML标记)在文件中为收到的 ID (仅在ns标记后为 * ID * s)指定。 和程序假设对其他四个ID做同样的事情

需要你的帮助......

public class ReadXMLFile {
public static int ID_number_1 ;


public static void main(String[] args) {

    for(int x=1; x<=5; x++){

        if(x==1)System.out.println("enter an integer as ID:\n");
        else System.out.println("enter another ID:\n");

        try{
        Scanner sc = new Scanner(System.in);

        ID_number_1 = sc.nextInt();
        /*
         * some process happening here;
        */
        SAXParserFactory factory = SAXParserFactory.newInstance();
        try{

        SAXParser saxParser = factory.newSAXParser();
        MyProjectHandler handler = new MyProjectHandler();
        saxParser.parse("src\\SAX-XML-FAWiki.xml", handler);


        } catch (ParserConfigurationException | SAXException | IOException e) {
            e.printStackTrace();
        }
        /*
         * 
        */
        System.out.println("writing in file "+ID_number_1);

        switch (x) {
        case 1:
            System.out.println("we got your first id :"+ID_number_1);
            break;
        case 2:
            System.out.println("we got your second id :"+ID_number_1);
            break;
        case 3:
            System.out.println("we got your third id :"+ID_number_1);
            break;
        case 4:
            System.out.println("we got your fourth id :"+ID_number_1);
            break;
        case 5:
            System.out.println("we got your fifth id :"+ID_number_1);
            break;  

        }

        }catch (Exception e) {
            System.out.println("You should enter a valid integer");
        }

    }
}

}

public class MyProjectHandler extends DefaultHandler {


private FAWiki wiki = null;


public String getFwkList() {
    return wiki.toString();
}

boolean bid = false;
boolean btitle = false;
boolean btext = false;

@Override
    public void startElement(String uri, String localName, String qName, Attributes attributes)
        throws SAXException{

        if(qName.equalsIgnoreCase("id")){

            bid = true;
        }

    }

@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
    if (qName.equalsIgnoreCase("id")) {


    }
}

@Override   
    public void characters(char ch[], int start, int length) throws SAXException{

        if(bid){

            int temp = Integer.parseInt(new String(ch, start, length));


            if(ReadXMLFile.ID_number_1 == temp){

                wiki = new FAWiki();
                wiki.setid(temp);
                btitle = true;
                btext = true;

            }
        }
        if(btitle){
            wiki.settitle(new String(ch, start, length));
            btitle = false;

        }
        if(btext){
            wiki.settext(new String(ch, start, length));
            btext = false;

            System.out.println(getFwkList());
        }
    }   



}

公共班FAWiki {

private String title;
private int id;
private String text;


public String gettitle(){
    return title;
}
public void settitle(String title){
    this.title = title;
}
public int getid(){
    return id;
}
public void setid(int id){
    this.id = id;
}
public String gettext(){
    return text;
}
public void settext(String text){
    this.text = text;
}


public String toString(){
    return "<page>\n"+"\t<title>"+this.title+"</title>\n"+"\t<id>"+this.id+"  </id>\n"+"\t<text>"+this.text+"</text>\n"+"</page>";
}
}

我希望每个ID都有这样的结果:

<Page>
     <title>AccessibleComputing</title>  
     <id>654982</id>
     <text>#REDIRECT [[Computer accessibility]] {{R from CamelCase}}</text>
     <username>Xqbot</username>
</Page>

2 个答案:

答案 0 :(得分:0)

代码看起来

InputStream in = getClass()。getResourceAsStream(“/ SAX-XML-FAWiki.xml”);    saxParser.parse(in,handler);

更好,而不是相对文件。

对于长文本,characters可能会被多次调用。

解析后遗漏(我认为):

String wiki = handler.getFwklist();
Path path = Paths.get(ID_number_1);
Files.write(path, wiki.getBytes(StandardCharsets.UTF_8,
    StandardOpenOptions.CREATE, StandardOpenOptions.WRITE);

备注:setId/getId有资本是惯例。

答案 1 :(得分:0)

感谢所有人(特别是Joop Eggen)

我成功更改了我的代码

...见

public class ReadXMLFile {

        public static int ID_number_1 ;

        public static void main(String[] args) {

            for(int x=1; x<=5; x++) {

                if(x==1)
                    System.out.println("enter an integer as ID:\n");
                else 
                    System.out.println("enter your next ID:\n");

                try {
                    Scanner sc = new Scanner(System.in);

                    ID_number_1 = sc.nextInt();
                    switch (x) {
                        case 1:
                            System.out.println("we got your first id :"+ID_number_1);
                            break;
                        case 2:
                            System.out.println("we got your second id :"+ID_number_1);
                            break;
                        case 3:
                            System.out.println("we got your third id :"+ID_number_1);
                            break;
                        case 4:
                            System.out.println("we got your fourth id :"+ID_number_1);
                            break;
                        case 5:
                            System.out.println("we got your fifth id :"+ID_number_1);
                            break;  
                    }

                } catch (Exception e) {
                    System.out.println("You should enter a valid integer");
                }

                /*
                 * some process happening here;
                */
                SAXParserFactory factory = SAXParserFactory.newInstance();

                try {

                    SAXParser saxParser = factory.newSAXParser();
                    MyProjectHandler handler = new MyProjectHandler();
                    saxParser.parse("src\\SAX-XML-FAWiki.xml", handler);

                    System.out.println(handler.getFwkList());

                } catch (ParserConfigurationException | SAXException | IOException e) {
                    e.printStackTrace();
            }
            /*
             * 
            */
            System.out.println("writing in file "+ID_number_1);
        }
    }
}

public class MyProjectHandler extends DefaultHandler {


    FAWiki wiki = new FAWiki() ;
    String titlestr = null;
    String textstr = null;
    String usernamestr = null;

    boolean flag1 = false;
    boolean flag2 = false;
    boolean bid = false;
    boolean btitle = false;
    boolean btext = false;
    boolean busername = false;
    boolean match = false;
    boolean bns = false;
    boolean flag3 = false;

    public String getFwkList() {
        return wiki.toString(wiki , match);
    }

    @Override
    public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
        if(qName.equalsIgnoreCase("title")) {btitle = true;}

        if(qName.equalsIgnoreCase("ns")) {bns = true;}

        if(qName.equalsIgnoreCase("id")) {bid = true;}

        if(qName.equalsIgnoreCase("text")) {btext = true;}

        if(qName.equalsIgnoreCase("username")) {busername = true;}
    }

    @Override
    public void endElement(String uri, String localName, String qName) throws SAXException {}

    @Override   
    public void characters(char ch[], int start, int length) throws SAXException {

        if(btitle) {
            titlestr = new String(ch, start, length);
            btitle= false;
        }

        if(bid && bns) {

            int temp = Integer.parseInt(new String(ch, start, length));
            if(ReadXMLFile.ID_number_1 == temp) {

                wiki.setid(temp);
                wiki.settitle(titlestr);
                match = true;
                btext = false;
                flag1 = true;
                busername = false;
                flag2 = true;
            }    

            bns = false;
        }

        bid = false;

        if(btext && flag1) {
            textstr = new String(ch, start, length);
            wiki.settext(textstr);
            btext = false;
            flag1 = false;
        }

        if(busername && flag2) {
            usernamestr = new String(ch, start, length);
            wiki.setUsername(usernamestr);
            busername = false;
            flag2 = false;
        }
    }   
}

public class FAWiki {

    private String title;
    private int id;
    private String text;
    private String username;

    public String gettitle() {
        return title;
    }

    public void settitle(String title) {
        this.title = title;
    }

    public int getid() {
        return id;
    }

    public void setid(int id) {
        this.id = id;
    }

    public String gettext() {
        return text;
    }

    public void settext(String text) {
        this.text = text;
    }

    public String getUsername() {
        return username;
    }

    public void setUsername(String username) {
        this.username = username;
    }

    public String toString(FAWiki fwik , Boolean match) {
        if(match) {
            return "<page>\n"+"\t<title>"+fwik.gettitle()+"</title>\n"+"\t<id>"+fwik.getid()+"</id>\n"+"\t<text>"+fwik.gettext()+"</text>\n"+"\t<username>"+fwik.getUsername()+"</username>\n"+"</page>";
        } else {
            return  "Your entered id doesn't match";
        }
    }
}

这是另一个问题,在文件中编写输出而不是在控制台中打印的最佳方法是什么?