我正在尝试使用StAX替换xml中的特殊字符(变音字符)。
当input
和output
xml文件不同时,我能够实现此目的。但100MB文件的处理时间约为10分钟。我认为这是由于IO操作需要时间来写入新文件
逐行。
是否可以通过StAX在同一个xml文件中读取和写入替换的字符串,这样可以节省IO操作?
任何帮助/线索对我都有帮助。
public class StAXXMLFileDemo {
static XMLEventFactory m_eventFactory = XMLEventFactory.newInstance();
static String[] searchList = { "Ä", "ä", "Ö", "ö", "Ü", "ü", "ß" };
static String[] replacementList = { "Ae", "ae", "Oe", "oe", "Ue", "ue", "ss" };
public static void main(String[] args) {
if(args.length == 2)
{
File inputfileDirectory = new File(args[0]);
File outputfileDirectory = new File(args[1]);
try {
FileUtils.cleanDirectory(outputfileDirectory);
} catch (IOException e1) {
e1.printStackTrace();
System.out.println("Exception in deleting output directory files");
}
if (inputfileDirectory.isDirectory() && outputfileDirectory.isDirectory())
{
File[] files = inputfileDirectory.listFiles();
for (File file : files) {
if ( (file.isDirectory() == false) && (file.getAbsolutePath().endsWith(".xml")) ){
try {
String outputFileName = file.getName();
String absoluteFilePath = outputfileDirectory+File.separator+outputFileName;
//StAXXMLFileDemo ms = new StAXXMLFileDemo();
XMLInputFactory factory = XMLInputFactory.newInstance();
XMLOutputFactory outFactory = XMLOutputFactory.newInstance();
//XMLEventFactory xmlEventFactory = XMLEventFactory.newInstance();
InputStream is = null;
try {
is = new FileInputStream(file);
} catch (FileNotFoundException e1) {
e1.printStackTrace();
}
XMLEventReader eventReader =
factory.createXMLEventReader(is,"UTF-8");
OutputStream outputStream = new FileOutputStream(absoluteFilePath);
XMLEventWriter eventWriter =
outFactory.createXMLEventWriter(
outputStream,"UTF-8");
//XMLStreamWriter writer = outFactory.createXMLStreamWriter(outputStream,"UTF-8");
// writer.writeStartDocument();
// StartDocument startDocument = xmlEventFactory.createStartDocument("UTF-8", "1.0", false);
//eventWriter.add(startDocument);
while(eventReader.hasNext()){
XMLEvent event = eventReader.nextEvent();
// eventWriter.add(event);
switch(event.getEventType()){
case XMLStreamConstants.SPACE:
eventWriter.add(event);
break;
case XMLStreamConstants.NAMESPACE:
eventWriter.add(event);
break;
case XMLStreamConstants.ATTRIBUTE:
eventWriter.add(event);
break;
case XMLStreamConstants.CDATA:
eventWriter.add(event);
break;
case XMLStreamConstants.NOTATION_DECLARATION:
eventWriter.add(event);
break;
case XMLStreamConstants.PROCESSING_INSTRUCTION:
eventWriter.add(event);
break;
case XMLStreamConstants.START_DOCUMENT:
eventWriter.add(event);
break;
case XMLStreamConstants.START_ELEMENT:
eventWriter.add(event);
break;
case XMLStreamConstants.CHARACTERS:
//String replaceString = event.toString();
// String replaced = StringUtils.replaceEachRepeatedly(event.toString(), searchList, replacementList);
//eventWriter.add(ms.getNewCharactersEvent(event.asCharacters()));
eventWriter.add(m_eventFactory.createCharacters(StringUtils.replaceEachRepeatedly(event.toString(), searchList, replacementList)));
break;
case XMLStreamConstants.END_ELEMENT:
eventWriter.add(event);
break;
}
}
eventWriter.flush();
eventWriter.close();
try {
outputStream.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (XMLStreamException e) {
e.printStackTrace();
}
}
}
}
}
}