我正在使用此方法来验证XML:
@param :
- Fs_Path_Xsd : XSD File path
- hadoopConfiguration : hadoop Conf
- XML_Path_File : Xml File path
@return :
- (number error, error message)**
使用XSD验证XML的功能:
def XmlXsdValidate(Fs_Path_Xsd: String, hadoopConfiguration: Configuration, XML_Path_File: String): ArrayBuffer[(Int, String)] = {
try {
// schema from XSD
@transient lazy val schema = {
val factory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI)
val Fs_Path_Xsd_file = new org.apache.hadoop.fs.Path(Fs_Path_Xsd)
val File_Fs_Path_Xsd = org.apache.hadoop.fs.FileSystem.get(hadoopConfiguration).open(Fs_Path_Xsd_file)
val schema = factory.newSchema(new StreamSource(File_Fs_Path_Xsd))
schema
}
// parseur XML
@transient lazy val saxParser = {
val f = SAXParserFactory.newInstance()
f.setNamespaceAware(true)
f.setSchema(schema)
f.newSAXParser()
}
var listexceptions = ArrayBuffer[SAXParseException]()
@transient lazy val handler = new DefaultHandler() {
override def error(ex: SAXParseException) {
listexceptions += ex
}
override def warning(ex: SAXParseException) {
listexceptions += ex
}
override def fatalError(ex: SAXParseException) {
listexceptions += ex
}
}
//recuperation XML File
val File_Fs_Path_File = org.apache.hadoop.fs.FileSystem.get(hadoopConfiguration).open(new Path(XML_Path_File))
//Xml validate from XSD
saxParser.parse(File_Fs_Path_File, handler)
var i = 0
var exceptions = ArrayBuffer[(Int, String)]()
if (listexceptions.size > 0) {
exceptions = listexceptions.map { ex =>
i += 1
(i, "Error at Line: " + ex.getLineNumber + " Column: " + ex.getColumnNumber + " in " + XML_Path_File + " Message: " + ex.getMessage)
}
} else {
exceptions = ArrayBuffer((0, "message valid"))
}
return exceptions
} catch {
case t: Throwable =>
t.printStackTrace() // TODO: handle error
return null
}
}
关于如何处理我想拒绝的错误标签的任何想法?