我有一个非常奇怪的问题,使用C#XDocument.Validate或带有所需配置的XMLReaderSettings来验证针对有效XSD的XML文档。问题是:当XML文档中存在错误时,验证过程无法在特定条件下捕获所有错误,并且我无法找到此异常的模式。
这是我的XSD:
<?xml version="1.0" encoding="utf-8"?>
<xs:schema attributeFormDefault="unqualified" elementFormDefault="qualified"
targetNamespace="http://www.somesite.com/somefolder/messages"
xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:element name="Message">
<xs:complexType>
<xs:sequence>
<xs:element name="Header">
<xs:complexType>
<xs:sequence>
<xs:element name="MessageId" type="xs:string" />
<xs:element name="MessageSource" type="xs:string" />
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element name="Body">
<xs:complexType>
<xs:sequence>
<xs:element name="Abc001">
<xs:complexType>
<xs:sequence>
<xs:element name="Abc002" type="xs:string" />
<xs:element name="Abc003" type="xs:string" minOccurs="0" />
<!--<xs:element name="Abc004" type="xs:string" />-->
<xs:element name="Abc004">
<xs:simpleType>
<xs:restriction base="xs:string">
<xs:maxLength value="200"/>
</xs:restriction>
</xs:simpleType>
</xs:element>
<xs:element name="Abc005">
<xs:complexType>
<xs:sequence>
<xs:element name="Abc006" type="xs:unsignedShort" />
<xs:element name="Abc007">
<xs:complexType>
<xs:sequence>
<xs:element name="Abc008" type="xs:string"/>
<xs:element name="Abc009" type="xs:string" minOccurs="0"/>
<xs:element name="Abc010" type="xs:string"/>
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element name="Abc011" type="xs:date" />
<xs:element name="Abc012">
<xs:complexType>
<xs:sequence>
<xs:element name="Abc013" type="xs:string" />
<xs:element name="Abc014" type="xs:string" />
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:schema>
&#13;
以下是针对此XSD验证的XML文档:
<?xml version="1.0" encoding="utf-8"?>
<Message xmlns="http://www.somesite.com/somefolder/messages">
<Header>
<MessageId>Lorem</MessageId>
<MessageSource>Ipsum</MessageSource>
</Header>
<Body>
<Abc001>
<Abc002>dolor</Abc002>
<Abc003>sit amet</Abc003>
<Abc004>consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.</Abc004>
<Abc005>
<Abc006>1234</Abc006>
<Abc007>
<Abc008>Ut enim</Abc008>
<Abc009>ad</Abc009>
<Abc010>minim</Abc010>
</Abc007>
<Abc011>1982-10-17</Abc011>
<Abc012>
<Abc013>veniam</Abc013>
<Abc014>nostrud</Abc014>
</Abc012>
</Abc005>
</Abc001>
</Body>
</Message>
&#13;
现在,当我在XML中引入一些验证错误并根据XSD对其进行验证时,它确实找到了所有错误。这是容易出错的xml(我已经标记了引入错误的位置):
<?xml version="1.0" encoding="utf-8"?>
<Message xmlns="http://www.somesite.com/somefolder/messages">
<Header>
<MessageId>Lorem</MessageId>
<MessageSource>Ipsum</MessageSource>
</Header>
<Body>
<Abc001>
<Abc002>dolor</Abc002>
<Abc003>sit amet</Abc003>
<!--The value for Abc004 is increased beyond the allowed 200 characters-->
<Abc004>Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</Abc004>
<Abc005>
<Abc006>1234</Abc006>
<Abc007>
<Abc008>Ut enim</Abc008>
<ABC009>AD</ABC009>
<!--<Abc010>minim</Abc010> Required element removed-->
</Abc007>
<!--Date formate below is wrong-->
<Abc011>1982-10-37</Abc011>
<Abc012>
<Abc013>veniam</Abc013>
<Abc014>nostrud</Abc014>
</Abc012>
</Abc005>
<!--the element below is not allowed-->
<Abc15>Not allowed</Abc15>
</Abc001>
</Body>
</Message>
&#13;
这是我生成的xml,显示所有错误:
<MessageResponse xmlns="http://www.somesite.com/somefolder/messages">
<Result>false</Result>
<Status>Failed</Status>
<FaultCount>4</FaultCount>
<Faults>
<Fault>
<FaultCode>ERR01</FaultCode>
<FaultMessage>The 'http://www.somesite.com/somefolder/messages:Abc004' element is invalid - The value 'Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.' is invalid according to its datatype 'String' - The actual length is greater than the MaxLength value.</FaultMessage>
</Fault>
<Fault>
<FaultCode>ERR02</FaultCode>
<FaultMessage>The element 'Abc007' in namespace 'http://www.somesite.com/somefolder/messages' has invalid child element 'ABC009' in namespace 'http://www.somesite.com/somefolder/messages'. List of possible elements expected: 'Abc009, Abc010' in namespace 'http://www.somesite.com/somefolder/messages'.</FaultMessage>
</Fault>
<Fault>
<FaultCode>ERR03</FaultCode>
<FaultMessage>The 'http://www.somesite.com/somefolder/messages:Abc011' element is invalid - The value '1982-10-37' is invalid according to its datatype 'http://www.w3.org/2001/XMLSchema:date' - The string '1982-10-37' is not a valid Date value.</FaultMessage>
</Fault>
<Fault>
<FaultCode>ERR04</FaultCode>
<FaultMessage>The element 'Abc001' in namespace 'http://www.somesite.com/somefolder/messages' has invalid child element 'Abc15' in namespace 'http://www.somesite.com/somefolder/messages'.</FaultMessage>
</Fault>
</Faults>
</MessageResponse>
&#13;
这是奇怪的部分。当我在&#34; Abc001&#34;的开头引入一个错误时元素,并保持所有其他现有的错误,结果完全搞砸了。以下是带有新引入错误的XML:
<?xml version="1.0" encoding="utf-8"?>
<Message xmlns="http://www.somesite.com/somefolder/messages">
<Header>
<MessageId>Lorem</MessageId>
<MessageSource>Ipsum</MessageSource>
</Header>
<Body>
<Abc001>
<!--newly introduced error - removed the following element-->
<!--<Abc002>dolor</Abc002>-->
<Abc003>sit amet</Abc003>
<!--The value for Abc004 is increased beyond the allowed 200 characters-->
<Abc004>Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</Abc004>
<Abc005>
<Abc006>1234</Abc006>
<Abc007>
<Abc008>Ut enim</Abc008>
<ABC009>AD</ABC009>
<!--<Abc010>minim</Abc010>-->
</Abc007>
<Abc011>1982-10-37</Abc011>
<Abc012>
<Abc013>veniam</Abc013>
<Abc014>nostrud</Abc014>
</Abc012>
</Abc005>
<!--the element below is not allowed-->
<Abc15>Not allowed</Abc15>
</Abc001>
</Body>
</Message>
&#13;
最后,这是验证结果:
<MessageResponse xmlns="http://www.somesite.com/somefolder/messages">
<Result>false</Result>
<Status>Failed</Status>
<FaultCount>1</FaultCount>
<Faults>
<Fault>
<FaultCode>ERR01</FaultCode>
<FaultMessage>The element 'Abc001' in namespace 'http://www.somesite.com/somefolder/messages' has invalid child element 'Abc003' in namespace 'http://www.somesite.com/somefolder/messages'. List of possible elements expected: 'Abc002' in namespace 'http://www.somesite.com/somefolder/messages'.</FaultMessage>
</Fault>
</Faults>
</MessageResponse>
&#13;
以下是我用来验证的C#代码:
public async Task<IMIDPreValidationAckMessage> ValidateXmlMessage( XDocument doc )
{
var result = new PreValidationAckMessage();
result.Result = true;
result.Status = "Succeeded";
var xsd = HttpContext.Current.Server.MapPath( "~/message01.xsd" );
try
{
var uri = new System.Uri(xsd);
var localPath = uri.LocalPath;
var docNameSpace = doc.Root.Name.Namespace.NamespaceName;
XmlSchemaSet schemas = new XmlSchemaSet();
schemas.Add( docNameSpace, localPath );
XmlReaderSettings xrs = new XmlReaderSettings();
xrs.ValidationType = ValidationType.Schema;
xrs.ValidationFlags |= XmlSchemaValidationFlags.ReportValidationWarnings;
xrs.Schemas = schemas;
result.XSDNamespace = doc.Root.GetDefaultNamespace().NamespaceName;
var errCode = 1;
xrs.ValidationEventHandler += ( s, e ) =>
{
var msg = e.Message;
result.Result = false;
result.Status = "Failed";
result.FaultCount++;
result.Faults.Add( new Fault
{
FaultCode = "ERR" + errCode++.ToString().PadLeft( 2, '0' ),
FaultMessage = e.Message
} );
};
using ( XmlReader xr = XmlReader.Create( doc.CreateReader(), xrs ) )
{
while ( xr.Read() ) { }
}
}
catch ( System.Exception ex )
{
result.Result = false;
result.Status = "Unknown Error";
}
return result;
}
有人可以告诉我这里有什么问题吗?
答案 0 :(得分:1)
似乎XmlReader
在第一次遇到错误时停止对元素的验证。以下是旧(过时)XmlValidatingReader
dokumentation:
如果元素报告验证错误,则其余内容 该元素的模型未经过验证,但是它的子元素是 验证。读者只报告给定的第一个错误 元件。
似乎与常规XmlReader
相同(尽管其文档未明确提及)。
在第一个示例中,错误位于最内层元素(例如元素的无效文本值)或最后一个子元素中,因此它们都被报告并且没有被跳过。但是在上一个示例中,您在根Abc001
元素的开头引入了错误,因此将跳过其余的Abc001
内容以及所有错误。
答案 1 :(得分:0)
我编写了这段代码,用于验证xml消息的每个元素,并尝试更正可以纠正的内容:
static void ValidateMessage(XDocument xDoc)
{
var schemas = new XmlSchemaSet();
schemas.Add("", @"Messages_Schema.xsd");
schemas.Compile();
var schemaElements = (XmlSchemaElement)schemas.GlobalElements.Values.OfType<XmlSchemaObject>()
.FirstOrDefault(e => ((XmlSchemaElement)e).Name == xDoc.Root?.Name.LocalName);
var xmlElement =
(XmlSchemaSequence)((XmlSchemaComplexType)schemaElements?.ElementSchemaType)?.ContentTypeParticle;
var elementNameList = new Dictionary<string, XmlSchemaObject>();
AddElementsInDictionary(xmlElement?.Items.OfType<XmlSchemaObject>().ToList(), elementNameList, new List<string>());
var errorList = new List<string>();
bool elementFixed;
do
{
errorList.Clear();
ValidateElements(xDoc.Root?.Elements().ToList(), elementNameList, schemas, new List<string>(), errorList);
errorList.ForEach(e =>
{
if (!ErrorLogs.Contains(e))
ErrorLogs.Add(e);
});
elementFixed = false;
if (XElementsToDelete.Any() || XElementsToCut.Any() || XElementsInvalid.Any())
{
XElementsInvalid.ForEach(xElement =>
{
var xParent = xElement.Parent;
var xParentSchema =
(xParent?.GetSchemaInfo()?.SchemaType as XmlSchemaComplexType)?.ContentTypeParticle as
XmlSchemaSequence;
var elementNameLists = new List<string>();
AddElementsInList(xParentSchema?.Items.OfType<XmlSchemaObject>().ToList(), elementNameLists);
var index = elementNameLists.IndexOf(xElement.Name.LocalName);
if (index == 0)
{
xElement.Remove();
var log = $" Move: {xElement.Name} element to the top of the sequence";
if (!FixLogs.Contains(log))
FixLogs.Add(log);
elementFixed = true;
xParent?.AddFirst(xElement);
}
else
{
var xNextElement = xElement.NextNode as XElement;
var indexNext = elementNameLists.IndexOf(xNextElement?.Name.LocalName);
if (indexNext - index > 1)
{
var xPreviousElement = xElement.PreviousNode as XElement;
var indexPrevious = elementNameLists.IndexOf(xPreviousElement?.Name.LocalName);
do
{
indexPrevious -= 1;
var xLastValidElement = xParent?.Element(elementNameLists[indexPrevious]);
if (xLastValidElement == null) continue;
xPreviousElement?.Remove();
var log = $" Move: {xPreviousElement?.Name} element after {xLastValidElement.Name}";
if (!FixLogs.Contains(log))
FixLogs.Add(log);
elementFixed = true;
xLastValidElement.AddAfterSelf(xPreviousElement);
break;
} while (indexPrevious > 0);
}
else
{
do
{
index -= 1;
var xLastValidElement = xParent?.Element(elementNameLists[index]);
if (xLastValidElement == null) continue;
xElement.Remove();
var log = $" Move: {xElement.Name} element after {xLastValidElement.Name}";
if (!FixLogs.Contains(log))
FixLogs.Add(log);
elementFixed = true;
xLastValidElement.AddAfterSelf(xElement);
break;
} while (index > 0);
}
}
});
XElementsToDelete.ForEach(e =>
{
e.Remove();
var log = $" Delete: {e.Name} element";
if (!FixLogs.Contains(log))
FixLogs.Add(log);
elementFixed = true;
});
XElementsToCut.ForEach(e =>
{
var schemaType = (XmlSchemaSimpleType)e.GetSchemaInfo()?.SchemaType;
var restriction = (XmlSchemaSimpleTypeRestriction)schemaType?.Content;
var enumFacets = restriction?.Facets.OfType<XmlSchemaMaxLengthFacet>();
var maxLengthFacet = enumFacets?.ToList().FirstOrDefault();
if (maxLengthFacet != null)
{
var maxLength = int.Parse(maxLengthFacet.Value);
var log = $" Cut: {e.Name} value to maxLength: {maxLength}";
if (!FixLogs.Contains(log))
FixLogs.Add(log);
elementFixed = true;
e.Value = e.Value.Substring(0, maxLength);
}
});
if (!elementFixed)
{
var log = " Cannot fix:";
if (!FixLogs.Contains(log))
FixLogs.Add(log);
errorList.ForEach(e =>
{
if (!FixLogs.Contains($" {e}"))
FixLogs.Add($" {e}");
});
}
}
XElementsToDelete.Clear();
XElementsInvalid.Clear();
XElementsToCut.Clear();
} while (errorList.Count > 0 && elementFixed);
Console.WriteLine($"Validating a {xDoc.Root?.Name.LocalName}");
Console.WriteLine("");
ErrorLogs.ForEach(e =>
{
Console.WriteLine(" {0}", e);
});
Console.WriteLine("");
Console.WriteLine("Fixing");
Console.WriteLine("");
FixLogs.ForEach(e =>
{
Console.WriteLine("{0}", e);
});
Console.WriteLine("");
Console.WriteLine("Message {0}",
errorList.Count > 0 ? "did not validate" : "validated");
Console.ReadKey();
}
private static void AddElementsInList(IEnumerable<XmlSchemaObject> schemaObjectList,
ICollection<string> schemaObjectKeys)
{
schemaObjectList.ToList().ForEach(se =>
{
switch (se.GetType().ToString())
{
case "System.Xml.Schema.XmlSchemaElement":
var element = se as XmlSchemaElement;
var name = (se as XmlSchemaElement)?.QualifiedName.Name;
if (!schemaObjectKeys.Contains(name))
schemaObjectKeys.Add(name);
var elementSchemaType = element?.ElementSchemaType;
if (elementSchemaType != null)
AddElementsInList(new List<XmlSchemaObject> { elementSchemaType }, schemaObjectKeys);
break;
case "System.Xml.Schema.XmlSchemaComplexType":
break;
case "System.Xml.Schema.XmlSchemaSequence":
var sequence = se as XmlSchemaSequence;
var sequenceItems = sequence?.Items.OfType<XmlSchemaObject>().ToList();
if (sequenceItems != null)
AddElementsInList(sequenceItems, schemaObjectKeys);
break;
case "System.Xml.Schema.XmlSchemaChoice":
var choice = se as XmlSchemaChoice;
var choiceItems = choice?.Items.OfType<XmlSchemaObject>().ToList();
if (choiceItems != null)
AddElementsInList(choiceItems, schemaObjectKeys);
break;
case "System.Xml.Schema.XmlSchemaGroupRef":
var group = se as XmlSchemaGroupRef;
var groupParticle = group?.Particle;
if (groupParticle != null)
AddElementsInList(new List<XmlSchemaObject> { groupParticle }, schemaObjectKeys);
break;
}
});
}
private static void AddElementsInDictionary(IEnumerable<XmlSchemaObject> schemaObjectList,
IDictionary<string, XmlSchemaObject> schemaObjectDictionary, IList<string> schemaObjectDictionaryKeys)
{
schemaObjectList.ToList().ForEach(se =>
{
switch (se.GetType().ToString())
{
case "System.Xml.Schema.XmlSchemaElement":
var element = se as XmlSchemaElement;
schemaObjectDictionaryKeys.Add((se as XmlSchemaElement)?.QualifiedName.Name);
var path = string.Join("/", schemaObjectDictionaryKeys);
if (!schemaObjectDictionary.ContainsKey(path))
schemaObjectDictionary.Add(path, se);
var elementSchemaType = element?.ElementSchemaType;
if (elementSchemaType != null)
AddElementsInDictionary(new List<XmlSchemaObject> { elementSchemaType },
schemaObjectDictionary, schemaObjectDictionaryKeys);
if (schemaObjectDictionaryKeys.Count > 0)
schemaObjectDictionaryKeys.RemoveAt(schemaObjectDictionaryKeys.Count - 1);
break;
case "System.Xml.Schema.XmlSchemaComplexType":
var complexType = se as XmlSchemaComplexType;
var complexTypeParticle = complexType?.ContentTypeParticle;
if (complexTypeParticle != null)
AddElementsInDictionary(new List<XmlSchemaObject> { complexTypeParticle },
schemaObjectDictionary, schemaObjectDictionaryKeys);
break;
case "System.Xml.Schema.XmlSchemaSequence":
var sequence = se as XmlSchemaSequence;
var sequenceItems = sequence?.Items.OfType<XmlSchemaObject>().ToList();
if (sequenceItems != null)
AddElementsInDictionary(sequenceItems, schemaObjectDictionary, schemaObjectDictionaryKeys);
break;
case "System.Xml.Schema.XmlSchemaChoice":
var choice = se as XmlSchemaChoice;
var choiceItems = choice?.Items.OfType<XmlSchemaObject>().ToList();
if (choiceItems != null)
AddElementsInDictionary(choiceItems, schemaObjectDictionary, schemaObjectDictionaryKeys);
break;
case "System.Xml.Schema.XmlSchemaGroupRef":
var group = se as XmlSchemaGroupRef;
var groupParticle = group?.Particle;
if (groupParticle != null)
AddElementsInDictionary(new List<XmlSchemaObject> { groupParticle }, schemaObjectDictionary,
schemaObjectDictionaryKeys);
break;
}
});
}
private static void ValidateElements(List<XElement> xElementList,
IReadOnlyDictionary<string, XmlSchemaObject> schemaObjectDictionary, XmlSchemaSet schemas,
IList<string> schemaObjectDictionaryKeys, ICollection<string> errorList)
{
xElementList.ForEach(xElement =>
{
schemaObjectDictionaryKeys.Add(xElement.Name.LocalName);
var path = string.Join("/", schemaObjectDictionaryKeys);
if (schemaObjectDictionary.ContainsKey(path))
{
var validateObject = schemaObjectDictionary[path];
xElement.Validate(validateObject, schemas,
(o, e) =>
{
if (!errorList.Contains(e.Message))
errorList.Add(e.Message);
if (e.Message.Contains("has incomplete content"))
{
if (!XElementsToDelete.Contains((XElement)o))
XElementsToDelete.Add((XElement)o);
}
if (e.Message.Contains("has invalid child element"))
{
if (!XElementsInvalid.Contains((XElement)o))
XElementsInvalid.Add((XElement)o);
}
if (e.Message.Contains("actual length is greater than the MaxLength value"))
{
if (!XElementsToCut.Contains((XElement)o))
XElementsToCut.Add((XElement)o);
}
}, true);
if (xElement.HasElements)
ValidateElements(xElement.Elements().ToList(), schemaObjectDictionary, schemas,
schemaObjectDictionaryKeys, errorList);
}
else
{
var log = $"The element '{xElement.Name.LocalName}' is unknown. It should be delete.";
if (!errorList.Contains(log))
errorList.Add(log);
if (XElementsInvalid.Contains(xElement))
XElementsInvalid.Remove(xElement);
if (XElementsToCut.Contains(xElement))
XElementsToCut.Remove(xElement);
if (!XElementsToDelete.Contains(xElement))
XElementsToDelete.Add(xElement);
}
if (schemaObjectDictionaryKeys.Count > 0)
schemaObjectDictionaryKeys.RemoveAt(schemaObjectDictionaryKeys.Count - 1);
});
}