我需要针对XSD架构验证XML文件。模式针对多个XSD文件进行传播(使用include和import指令)。
通过跟踪我在SO上发现的问题/答案,我已经提出了以下解决方案。
(请注意,以下代码只是一个快速开发的原型,而不是最终的解决方案。)
private static final String PROJECT_ROOT_DIR_PATH = "--- project-root-path ---";
private static final String SCHEMAS_ROOT_PATH = "--- schemas root path ---";
private void validate() throws Exception
{
DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
builderFactory.setNamespaceAware(true);
DocumentBuilder parser = builderFactory.newDocumentBuilder();
// parse the XML into a document object
Document document = parser.parse(
new File(
PROJECT_ROOT_DIR_PATH +
"src\\test\\resources\\example.xml"
)
);
SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
// associate the schema factory with the resource resolver, which is responsible for resolving the imported XSD's
factory.setResourceResolver(new ResourceResolver(PROJECT_ROOT_DIR_PATH + SCHEMAS_ROOT_PATH));
Schema schema = factory.newSchema(
new File(
PROJECT_ROOT_DIR_PATH +
SCHEMAS_ROOT_PATH +
"--- root-xsd-file-path\\root-schema.xsd ---"
)
);
Validator validator = schema.newValidator();
validator.validate(new DOMSource(document));
}
ResourceResolver.java:
public class ResourceResolver implements LSResourceResolver
{
@Override
public LSInput resolveResource(String type, String namespaceURI,
String publicId, String systemId, String baseURI)
{
if (!"http://www.w3.org/2001/XMLSchema".equals(type))
{
throw new IllegalArgumentException(
"Unexpected resource type [" + type + "]."
);
}
if (systemId == null)
{
throw new IllegalArgumentException(
"Unexpected resource system-id [" + systemId + "]."
);
}
System.out.println("base-uri: " + baseURI);
System.out.println("system-id: " + systemId);
URI targetURI = getTargetURI(baseURI, systemId);
System.out.println("target-uri: " + targetURI);
System.out.println("---");
Input input = null;
try {
input = new Input(baseURI, publicId, systemId, targetURI.toURL().openStream());
}
catch (Exception ex)
{
throw new RuntimeException(
"Could not open resource stream - " + ex.getMessage()
);
}
return input;
}
private static URI getTargetURI(String baseURI, String relativePath)
{
URI targetURI = null;
try {
targetURI = (new URI(baseURI)).resolve(relativePath);
}
catch (URISyntaxException ex)
{
throw new RuntimeException(
"Could not resolve target URI - " + ex.getMessage()
);
}
return targetURI;
}
}
Input.java:
public class Input implements LSInput
{
private BufferedInputStream inputStream;
private String baseURI;
private String publicId;
private String systemId;
public Input(String baseURI, String publicId, String sysId, InputStream input)
{
this.baseURI = baseURI;
this.publicId = publicId;
this.systemId = sysId;
this.inputStream = new BufferedInputStream(input);
}
public String getPublicId()
{
return publicId;
}
public void setPublicId(String publicId)
{
this.publicId = publicId;
}
public String getBaseURI()
{
return baseURI;
}
public InputStream getByteStream()
{
return null;
}
public boolean getCertifiedText()
{
return false;
}
public Reader getCharacterStream()
{
return null;
}
public String getEncoding()
{
return null;
}
public String getStringData()
{
synchronized (inputStream)
{
try {
return IOUtils.toString(inputStream);
}
catch (IOException e) {
e.printStackTrace();
System.out.println("Exception " + e);
return null;
}
}
}
public void setBaseURI(String baseURI) {
}
public void setByteStream(InputStream byteStream) {
}
public void setCertifiedText(boolean certifiedText) {
}
public void setCharacterStream(Reader characterStream) {
}
public void setEncoding(String encoding) {
}
public void setStringData(String stringData) {
}
public String getSystemId() {
return systemId;
}
public void setSystemId(String systemId) {
this.systemId = systemId;
}
public BufferedInputStream getInputStream()
{
return inputStream;
}
public void setInputStream(BufferedInputStream inputStream)
{
this.inputStream = inputStream;
}
}
通过观察此解决方案生成的日志,它似乎真正按深度优先搜索顺序处理include / import语句。
但是,在某些地方,某些导入语句会被忽略(例如根本没有处理)。
我无法找到处理哪些语句以及哪些语句不处理的模式。例如,在处理文件的以下三行时,只处理第一行和第三行。尽管它们似乎都与我相同。
<import namespace="schemas/src/x20130601" schemaLocation="../../x20130601/Personalnumber.xsd"/>
<import namespace="schemas/src/common/2008/01/03" schemaLocation="../../../contract/x20080103/Contractnumber.xsd"/>
<import namespace="schemas/src/20100504" schemaLocation="../../../system/x20100504/Contractidentification.xsd" />
忽略某些import语句会导致以下类型的异常。
Exception in thread "main" org.xml.sax.SAXParseException; systemId: file:/z:/--- project path ---/schemas//x20130504/Identification.xsd; lineNumber: 18; columnNumber: 61; src-resolve: Cannot resolve the name 'dat20080103:Contractnumber' to a(n) 'element declaration' component.
遗漏文件的声明位于已跳过的XSD文件中。
请告诉我在哪里寻找错误或要包含哪些其他信息。