BlackBerry使用SAX Parser解析UTF-8 XML文件

时间:2011-12-01 18:18:55

标签: java blackberry utf-8 xml-parsing saxparser

我正在尝试使用SAX解析器解析UTF-8 xml文件,并且我使用了解析器,但它会导致异常,它的消息是“期待元素”

<?xml version='1.0' encoding='UTF-8' standalone='yes' ?>
<config>
<filepath>/mnt/sdcard/Audio_Recorder/anonymous22242.3gp</filepath>
<filename>anonymous22242.3gp</filename>
<annotation>
    <file>anonymous22242.3gp</file>
    <timestamp>0:06</timestamp>
    <note>test1</note>
</annotation>
<annotation>
    <file>anonymous22242.3gp</file>
    <timestamp>0:09</timestamp>
    <note>لول</note>
</annotation>
<annotation>
    <file>anonymous22242.3gp</file>
    <timestamp>0:09</timestamp>
    <note>لولو</note>
</annotation>
</config> 


     private static String fileDirectory;
private final static ArrayList<String> allFileNames = new ArrayList<String>();
private final static ArrayList<String[]> allAnnotations = new ArrayList<String[]>();
private static String[] currentAnnotation = new String[3];

public static void main(String[] args) {
// TODO Auto-generated method stub
try {

    SAXParserFactory factory = SAXParserFactory.newInstance();
    SAXParser playbackParser = factory.newSAXParser();

    DefaultHandler handler = new DefaultHandler() {

        boolean audioFullPath = false;
        boolean audioName = false;
        boolean annotationFile = false;
        boolean annotationTimestamp = false;
        boolean annotationNote = false;

        public void startElement(String uri, String localName,
                String qName, Attributes attributes)
                throws SAXException {

            System.out.println("Start Element :" + qName);

            if (qName.equalsIgnoreCase("filepath")) {
                audioFullPath = true;
            }

            if (qName.equalsIgnoreCase("filename")) {
                audioName = true;
            }

            if (qName.equalsIgnoreCase("file")) {
                annotationFile = true;
            }

            if (qName.equalsIgnoreCase("timestamp")) {
                annotationTimestamp = true;
            }

            if (qName.equalsIgnoreCase("note")) {
                annotationNote = true;
            }

        }

        public void endElement(String uri, String localName,
                String qName) throws SAXException {

            System.out.println("End Element :" + qName);

        }

        public void characters(char ch[], int start, int length)
                throws SAXException {

            if (audioFullPath) {
                String filePath = new String(ch, start, length);
                System.out.println("Full Path : " + filePath);
                fileDirectory = filePath;
                audioFullPath = false;
            }

            if (audioName) {
                String fileName = new String(ch, start, length);
                System.out.println("File Name : " + fileName);
                allFileNames.add(fileName);
                audioName = false;
            }

            if (annotationFile) {
                String fileName = new String(ch, start, length);
                currentAnnotation[0] = fileName;
                annotationFile = false;
            }

            if (annotationTimestamp) {
                String timestamp = new String(ch, start, length);
                currentAnnotation[1] = timestamp;
                annotationTimestamp = false;
            }
            if (annotationNote) {
                String note = new String(ch, start, length);
                currentAnnotation[2] = note;
                annotationNote = false;
                allAnnotations.add(currentAnnotation);
            }

        }

    };

    InputStream inputStream = getStream("http://www.example.com/example.xml");
    Reader xmlReader = new InputStreamReader(inputStream, "UTF-8");

    InputSource xmlSource = new InputSource(xmlReader);
    xmlSource.setEncoding("UTF-8");

    playbackParser.parse(xmlSource, handler);

    System.out.println(fileDirectory);
    System.out.println(allFileNames);
    System.out.println(allAnnotations);

} catch (Exception e) {
    e.printStackTrace();
}
}
}

public Static InputStream getStream(String url)
{
    try
    {
        connection = getConnection(url);
        connection.setRequestProperty("User-Agent",System.getProperty("microedition.profiles"));
        connection.setRequestProperty("Connection", "Keep-Alive");
        connection.setRequestProperty("Content-Type", "text/xml; charset=UTF-8");

        inputStream = connection.openInputStream();
        return inputStream;
    }
    catch(Exception e)
    {
        System.out.println("NNNNNNN "+e.getMessage());

        return null;
      }

    }

 public HttpConnection getConnection(String url)
{

    try
    {
      connection = (HttpConnection) Connector.open(url+getConnectionString());


    }
    catch(Exception e)

    {

    }


    return connection;
}

但是当我传递给parse方法时,inputStream而不是inputSource,它会解析文件,但仍然存在阿拉伯字符之间的问题

playbackParser.parse(inputStream, handler);

1 个答案:

答案 0 :(得分:1)

您展示的XML中包含未编码的阿拉伯字符。这违反了XML声明的编码,这意味着XML格式错误。 SAX解析器按顺序逐个处理数据,触发每个部分的事件。在它到达包含那些错误字符的部分之前,它不会检测到这样的编码错误。你无能为力。 XML需要由其原始作者修复。