我找到了一种方法来清理无效字符的XML文件,这样可以正常工作,但它有点慢。清洁需要大约10-20秒,这是用户不喜欢的。
使用streamread / write创建一个干净的文件然后使用xmlreader似乎是浪费大量的时间,是否可以在XMLread期间清理该行或至少使用streamReader作为XMLreader的输入来节省时间文件?
我试图让创建数据库的团队在上传之前创建干净的文件,但这是一个缓慢的过程......
XmlReaderSettings settings = new XmlReaderSettings { CheckCharacters = false};
cleanDatabase = createCleanSDDB(database);
using (XmlReader sddbReader = XmlReader.Create(cleanDatabase, settings))
{ //Parse XML... }
private string createCleanSDDB(String sddbPath)
{
string fileName = getTmpFileName(); // get a temporary file name from the OS
string line;
string cleanLine;
using (StreamReader streamReader = new StreamReader(sddbPath, Encoding.UTF8))
using (StreamWriter streamWriter = new StreamWriter(fileName))
{
while ((line = streamReader.ReadLine()) != null)
{
cleanLine = getCleanLine(line);
streamWriter.WriteLine(cleanLine);
}
}
return fileName;
}
private string getCleanLine(string dirtyLine)
{
const string regexPattern = @"[^\x09\x0A\x0D\x20-\xD7FF\xE000-\xFFFD\x10000-x10FFFF]";
string cleanLine = Regex.Replace(dirtyLine, regexPattern, "");
return cleanLine;
}