我正在尝试将XHTML文档加载到XDocument中,但是我得到了“对未声明的实体的引用”的例外情况。我需要解析®
和»
等实体。
我相信我的文件是正确形成的,这是头部:
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
当我执行XDocument.Load(<StringReader>)
时,我会抛出这些异常。
答案 0 :(得分:10)
这是msdn和博客帖子的合作。
XDocument document;
using (var stringReader = new StringReader(output))
{
var settings = new XmlReaderSettings
{
ProhibitDtd = false,
XmlResolver = new LocalXhtmlXmlResolver(bool.Parse(ConfigurationManager.AppSettings["CacheDTDs"]))
};
document = XDocument.Load(XmlReader.Create(stringReader, settings));
}
private class LocalXhtmlXmlResolver : XmlUrlResolver
{
private static readonly Dictionary<string, Uri> KnownUris = new Dictionary<string, Uri>
{
{ "-//W3C//DTD XHTML 1.0 Strict//EN", new Uri("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd") },
{ "-//W3C XHTML 1.0 Transitional//EN", new Uri("http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd") },
{ "-//W3C//DTD XHTML 1.0 Transitional//EN", new Uri("http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd") },
{ "-//W3C XHTML 1.0 Frameset//EN", new Uri("http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd") },
{ "-//W3C//DTD XHTML 1.1//EN", new Uri("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd") }
};
private bool enableHttpCaching;
private ICredentials credentials;
public LocalXhtmlXmlResolver(bool enableHttpCaching)
{
this.enableHttpCaching = enableHttpCaching;
}
public override Uri ResolveUri(Uri baseUri, string relativeUri)
{
Debug.WriteLineIf(!KnownUris.ContainsKey(relativeUri), "Could not find: " + relativeUri);
return KnownUris.ContainsKey(relativeUri) ? KnownUris[relativeUri] : base.ResolveUri(baseUri, relativeUri);
}
public override object GetEntity(Uri absoluteUri, string role, Type ofObjectToReturn)
{
if (absoluteUri == null)
{
throw new ArgumentNullException("absoluteUri");
}
//resolve resources from cache (if possible)
if (absoluteUri.Scheme == "http" && this.enableHttpCaching && (ofObjectToReturn == null || ofObjectToReturn == typeof(Stream)))
{
var request = WebRequest.Create(absoluteUri);
request.CachePolicy = new HttpRequestCachePolicy(HttpRequestCacheLevel.Default);
if (this.credentials != null)
{
request.Credentials = this.credentials;
}
var response = request.GetResponse();
return response.GetResponseStream();
}
//otherwise use the default behavior of the XmlUrlResolver class (resolve resources from source)
return base.GetEntity(absoluteUri, role, ofObjectToReturn);
}
}
答案 1 :(得分:8)
我和Dave有同样的问题,并且遇到了这个帮助我很多的问题。根据戴夫的回答和帕维尔的优化建议,我更新了课程。现在,DTD可以存储为嵌入式资源,并在必要时加载。我知道这个帖子已经有几年了但也许这可以帮助别人。
使用示例:
XmlReaderSettings readerSettings = new XmlReaderSettings
{
DtdProcessing = DtdProcessing.Parse,
XmlResolver = new LocalXhtmlXmlResolver()
};
using (XmlReader reader = XmlReader.Create(xhtmlStream, readerSettings))
{
XDocument xhtml = XDocument.Load(reader);
...
}
LocalXhtmlXmlResolver类:
public class LocalXhtmlXmlResolver : XmlUrlResolver
{
private const string ResourcePrefix = "Your.Namespace.Here.";
private static readonly Dictionary<string, string> _knownDtds = new Dictionary<string, string>
{
{ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd", ResourcePrefix + "xhtml1-strict.dtd" },
{ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", ResourcePrefix + "xhtml1-transitional.dtd" },
{ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd", ResourcePrefix + "xhtml1-frameset.dtd" },
{ "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd", ResourcePrefix + "xhtml11.dtd" },
{ "http://www.w3.org/TR/xhtml1/DTD/-//W3C//ENTITIES Latin 1 for XHTML//EN", ResourcePrefix + "xhtml-lat1.ent" },
{ "http://www.w3.org/TR/xhtml1/DTD/-//W3C//ENTITIES Special for XHTML//EN", ResourcePrefix + "xhtml-special.ent" },
{ "http://www.w3.org/TR/xhtml1/DTD/-//W3C//ENTITIES Symbols for XHTML//EN", ResourcePrefix + "xhtml-symbol.ent" }
};
private static readonly Dictionary<string, Uri> _knownUris = new Dictionary<string, Uri>
{
{ "-//W3C//DTD XHTML 1.0 Strict//EN", new Uri("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd") },
{ "-//W3C XHTML 1.0 Transitional//EN", new Uri("http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd") },
{ "-//W3C//DTD XHTML 1.0 Transitional//EN", new Uri("http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd") },
{ "-//W3C XHTML 1.0 Frameset//EN", new Uri("http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd") },
{ "-//W3C//DTD XHTML 1.1//EN", new Uri("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd") }
};
public override Uri ResolveUri(Uri baseUri, string relativeUri)
{
return _knownUris.ContainsKey(relativeUri) ? _knownUris[relativeUri] : base.ResolveUri(baseUri, relativeUri);
}
public override object GetEntity(Uri absoluteUri, string role, Type ofObjectToReturn)
{
if (absoluteUri == null)
{
throw new ArgumentNullException("absoluteUri");
}
if (_knownDtds.ContainsKey(absoluteUri.OriginalString))
{
string resourceName = _knownDtds[absoluteUri.OriginalString];
Assembly assembly = Assembly.GetAssembly(typeof(LocalXhtmlXmlResolver));
return assembly.GetManifestResourceStream(resourceName);
}
return base.GetEntity(absoluteUri, role, ofObjectToReturn);
}
}