我正在尝试使用SAX解析器解析UTF-8 xml文件,并且我使用了解析器,但它会导致异常,它的消息是“期待元素”
<?xml version='1.0' encoding='UTF-8' standalone='yes' ?>
<config>
<filepath>/mnt/sdcard/Audio_Recorder/anonymous22242.3gp</filepath>
<filename>anonymous22242.3gp</filename>
<annotation>
<file>anonymous22242.3gp</file>
<timestamp>0:06</timestamp>
<note>test1</note>
</annotation>
<annotation>
<file>anonymous22242.3gp</file>
<timestamp>0:09</timestamp>
<note>لول</note>
</annotation>
<annotation>
<file>anonymous22242.3gp</file>
<timestamp>0:09</timestamp>
<note>لولو</note>
</annotation>
</config>
private static String fileDirectory;
private final static ArrayList<String> allFileNames = new ArrayList<String>();
private final static ArrayList<String[]> allAnnotations = new ArrayList<String[]>();
private static String[] currentAnnotation = new String[3];
public static void main(String[] args) {
// TODO Auto-generated method stub
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser playbackParser = factory.newSAXParser();
DefaultHandler handler = new DefaultHandler() {
boolean audioFullPath = false;
boolean audioName = false;
boolean annotationFile = false;
boolean annotationTimestamp = false;
boolean annotationNote = false;
public void startElement(String uri, String localName,
String qName, Attributes attributes)
throws SAXException {
System.out.println("Start Element :" + qName);
if (qName.equalsIgnoreCase("filepath")) {
audioFullPath = true;
}
if (qName.equalsIgnoreCase("filename")) {
audioName = true;
}
if (qName.equalsIgnoreCase("file")) {
annotationFile = true;
}
if (qName.equalsIgnoreCase("timestamp")) {
annotationTimestamp = true;
}
if (qName.equalsIgnoreCase("note")) {
annotationNote = true;
}
}
public void endElement(String uri, String localName,
String qName) throws SAXException {
System.out.println("End Element :" + qName);
}
public void characters(char ch[], int start, int length)
throws SAXException {
if (audioFullPath) {
String filePath = new String(ch, start, length);
System.out.println("Full Path : " + filePath);
fileDirectory = filePath;
audioFullPath = false;
}
if (audioName) {
String fileName = new String(ch, start, length);
System.out.println("File Name : " + fileName);
allFileNames.add(fileName);
audioName = false;
}
if (annotationFile) {
String fileName = new String(ch, start, length);
currentAnnotation[0] = fileName;
annotationFile = false;
}
if (annotationTimestamp) {
String timestamp = new String(ch, start, length);
currentAnnotation[1] = timestamp;
annotationTimestamp = false;
}
if (annotationNote) {
String note = new String(ch, start, length);
currentAnnotation[2] = note;
annotationNote = false;
allAnnotations.add(currentAnnotation);
}
}
};
InputStream inputStream = getStream("http://www.example.com/example.xml");
Reader xmlReader = new InputStreamReader(inputStream, "UTF-8");
InputSource xmlSource = new InputSource(xmlReader);
xmlSource.setEncoding("UTF-8");
playbackParser.parse(xmlSource, handler);
System.out.println(fileDirectory);
System.out.println(allFileNames);
System.out.println(allAnnotations);
} catch (Exception e) {
e.printStackTrace();
}
}
}
public Static InputStream getStream(String url)
{
try
{
connection = getConnection(url);
connection.setRequestProperty("User-Agent",System.getProperty("microedition.profiles"));
connection.setRequestProperty("Connection", "Keep-Alive");
connection.setRequestProperty("Content-Type", "text/xml; charset=UTF-8");
inputStream = connection.openInputStream();
return inputStream;
}
catch(Exception e)
{
System.out.println("NNNNNNN "+e.getMessage());
return null;
}
}
public HttpConnection getConnection(String url)
{
try
{
connection = (HttpConnection) Connector.open(url+getConnectionString());
}
catch(Exception e)
{
}
return connection;
}
但是当我传递给parse方法时,inputStream而不是inputSource,它会解析文件,但仍然存在阿拉伯字符之间的问题
playbackParser.parse(inputStream, handler);
答案 0 :(得分:1)
您展示的XML中包含未编码的阿拉伯字符。这违反了XML声明的编码,这意味着XML格式错误。 SAX解析器按顺序逐个处理数据,触发每个部分的事件。在它到达包含那些错误字符的部分之前,它不会检测到这样的编码错误。你无能为力。 XML需要由其原始作者修复。