Coldfusion XML字符编码第2​​部分

时间:2011-04-24 15:32:10

标签: xml coldfusion character-encoding

我有一个UTF-8 xml文件,其中包含ÃÂ&xA7;等代码,用于cedilla等 我已经编写了下面的代码段来删除或替换为可接受的值 1.有更好的方法吗? 2.当我在一些大型XML文件(> 50MB)上运行时,我可能会出现内存不足错误。如果没有更好的方法我如何优化它以避免OOM错误?

<cffile 
    action="read"
    file="#ExpandPath('./xs.xml')#"
    variable="myfile"/>

<cfset myfile =ReReplace(myfile,'&##xC2;&##x2013;','.','all')/>
<cfset myfile =ReReplace(myfile,'&##xC2;&##x2019;','''','all')/>
<cfset myfile =ReReplace(myfile,'&##xC2;&##x201D;','"','all')/>

<cfset myfile =ReReplace(myfile,'&##xC3;&##x192;&##xC2;&##xA7;','c','all')/>
<cfset myfile =ReReplace(myfile,'&##xC3;&##xA7;','c','all')/>
<cfset myfile =ReReplace(myfile,'&##xC3;&##xA9;','e','all')/>
<cfset myfile =ReReplace(myfile,'&##xC3;&##x201A;&##xC2;&##x2022;','(*)','all')/>
<cfset myfile =ReReplace(myfile,'&##xC3;&##x192;&##xC2;&##x201A;\?','(*)','all')/>
<cfset myfile =ReReplace(myfile,'&##xC3;&##x201A;&##xC2;&##xB7;','-','all')/>
<cfset myfile =ReReplace(myfile,'&##xC3;&##x201A;&##xC2;&##x2018;','''','all')/>
<cfset myfile =ReReplace(myfile,' &##xC3;&##x201A;&##xC2;&##x201C;',' "','all')/>

<cfset myfile =ReReplace(myfile,'&##xE2;&##x20AC;&##x201C;','-','all')/>
<cfset myfile =ReReplace(myfile,'&##xE2;&##x20AC;&##x2122;','''','all')/>
<cfset myfile =ReReplace(myfile,' &##xE2;&##x20AC;&##x153;',' "','all')/>
<cfset myfile =ReReplace(myfile,'&##xE2;&##x20AC;&##x153;','-','all')/>
<cfset myfile =ReReplace(myfile,'&##xE2;&##x20AC;&##xFFFD; ','" ','all')/>
<cfset myfile =ReReplace(myfile,'&##xE2;&##x20AC;&##xFFFD;','-','all')/>
<cfset myfile =ReReplace(myfile,'&##xE2;&##x201E;&##xA2;','(TM)','all')/>
<cfset myfile =ReReplace(myfile,'&##xE2;&##x20AC;&##xA2;','(*)','all')/>

<cfset myfile =ReReplace(myfile,'&##xEF;&##x201A;&##xA7;','(*)','all')/>

<cfset myfile =ReReplace(myfile,'(&##[^;]*;)','','all')/>

<cffile action="write"
     file="#ExpandPath('./xs_new.xml')#"
     output="#myfile#"/>

感谢

1 个答案:

答案 0 :(得分:1)

使用ColdFusion的文件函数一次处理一行,而不是将整个内容读入内存:

<cfscript>
myfile = FileOpen(ExpandPath('./xs.xml'), "read"); 
myNewFile = FileOpen(ExpandPath('./xs_new.xml'), "write"); 

while(NOT FileisEOF(myfile)) { 
    line = FileReadLine(myfile); // read line 

    line = ReReplace(line,'&##xC2;&##x2013;','.','all');
    line = ReReplace(line,'&##xC2;&##x2019;','''','all');
    line = ReReplace(line,'&##xC2;&##x201D;','"','all');

    line = ReReplace(line,'&##xC3;&##x192;&##xC2;&##xA7;','c','all');
    line = ReReplace(line,'&##xC3;&##xA7;','c','all');
    line = ReReplace(line,'&##xC3;&##xA9;','e','all');
    line = ReReplace(line,'&##xC3;&##x201A;&##xC2;&##x2022;','(*)','all');
    line = ReReplace(line,'&##xC3;&##x192;&##xC2;&##x201A;\?','(*)','all');
    line = ReReplace(line,'&##xC3;&##x201A;&##xC2;&##xB7;','-','all');
    line = ReReplace(line,'&##xC3;&##x201A;&##xC2;&##x2018;','''','all');
    line = ReReplace(line,' &##xC3;&##x201A;&##xC2;&##x201C;',' "','all');

    line = ReReplace(line,'&##xE2;&##x20AC;&##x201C;','-','all');
    line = ReReplace(line,'&##xE2;&##x20AC;&##x2122;','''','all');
    line = ReReplace(line,' &##xE2;&##x20AC;&##x153;',' "','all');
    line = ReReplace(line,'&##xE2;&##x20AC;&##x153;','-','all');
    line = ReReplace(line,'&##xE2;&##x20AC;&##xFFFD; ','" ','all');
    line = ReReplace(line,'&##xE2;&##x20AC;&##xFFFD;','-','all');
    line = ReReplace(line,'&##xE2;&##x201E;&##xA2;','(TM)','all');
    line = ReReplace(line,'&##xE2;&##x20AC;&##xA2;','(*)','all');

    line = ReReplace(line,'&##xEF;&##x201A;&##xA7;','(*)','all');

    line = ReReplace(line,'(&##[^;]*;)','','all');  

    fileWrite(line);
} 

FileClose(myfile); 
FileClose(myNewFile);
</cfscript>