我希望将大型xml拆分成小块。我正在使用VTDGen将xml文件拆分成小块,它适用于文件大小< 2 GB。 VTD-xml使用IN-Memory来解析xml,我不想将xml加载到内存中。所以我试图使用VTDGenHuge映射内存。
代码适用于VTDGen,但是当我使用VTDGenHuge时,它无效。
String prefix = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"+"\n";
String suffix = "\n</Employees>\n";
try {
VTDGenHuge vg = new VTDGenHuge();
if (vg.parseFile("C:\\Users\\abc\\Desktop\\latestxml\\Input_1.xml", true,VTDGenHuge.MEM_MAPPED)) {
int splitBy = ;
System.out.println("Started time"+ new Date());
VTDNavHuge vn = vg.getNav();
AutoPilotHuge ap = new AutoPilotHuge(vn);
ap.selectXPath("/Employees/Employee");
FastLongBuffer flb = new FastLongBuffer(4);
int i;
byte[] xml = vn.getXML().getBytes();
while ((i = ap.evalXPath()) != -1) {
flb.append(vn.getElementFragment());
}
int size = flb.size();
if (size != 0) {
File fo = null;
FileOutputStream fos = null;
for (int k = 0; k < size; k++) {
if (k % splitBy == 0) {
if (fo != null) {
fos.write(suffix.getBytes());
fos.close();
fo = null;
}
}
if (fo == null) {
fo = new File("C:\\Users\\abc\\Desktop\\Test\\xml\\"+"out" + k + ".xml");
fos = new FileOutputStream(fo);
fos.write(prefix.getBytes());
}
fos.write("\n".getBytes());
fos.write(xml, flb.lower32At(k), flb.upper32At(k));
}
if (fo != null) {
fos.write(suffix.getBytes());
fos.close();
fo = null;
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
我在“byte [] xml = vn.getXML()获取NUll值.getBytes();” 当你做syso vn.getXML()时,你得到了对象的价值。但是“getBytes()”返回null。我不知道为什么。但是如果你做“byteAt(x)”x =任何长值它返回值。
我的xml文件是:
<?xml version="1.0" encoding="UTF-8"?>
<Employees>
<Employee id="1">
<age>29</age>
<name>Pankaj</name>
<gender>Male</gender>
<role>Java Developer</role>
</Employee>
<Employee id="2">
<age>35</age>
<name>Lisa</name>
<gender>Female</gender>
<role>CEO</role>
</Employee>
<Employee id="3">
<age>40</age>
<name>Tom</name>
<gender>Male</gender>
<role>Manager</role>
</Employee>
<Employee id="1">
<age>29</age>
<name>Pankaj</name>
<gender>Male</gender>
<role>Java Developer</role>
</Employee>
<Employee id="2">
<age>35</age>
<name>Lisa</name>
<gender>Female</gender>
<role>CEO</role>
</Employee>
<Employee id="3">
<age>40</age>
<name>Tom</name>
<gender>Male</gender>
<role>Manager</role>
</Employee>
<Employees>
我想要这样做。
<?xml version="1.0" encoding="UTF-8"?>
<Employees>
<Employee id="1">
<age>29</age>
<name>Pankaj</name>
<gender>Male</gender>
<role>Java Developer</role>
</Employee>
<Employee id="2">
<age>35</age>
<name>Lisa</name>
<gender>Female</gender>
<role>CEO</role>
</Employee>
<Employee id="3">
<age>40</age>
<name>Tom</name>
<gender>Male</gender>
<role>Manager</role>
</Employee>
<Employees>
<?xml version="1.0" encoding="UTF-8"?>
<Employees>
<Employee id="1">
<age>29</age>
<name>Pankaj</name>
<gender>Male</gender>
<role>Java Developer</role>
</Employee>
<Employee id="2">
<age>35</age>
<name>Lisa</name>
<gender>Female</gender>
<role>CEO</role>
</Employee>
<Employee id="3">
<age>40</age>
<name>Tom</name>
<gender>Male</gender>
<role>Manager</role>
</Employee>
<Employees>
答案 0 :(得分:1)
我认为扩展vtd-xml的vn.getXML()从标准vtd-xml返回一个IbyteBuffer接口对象不同。你可以调用名为 writeOutputToFile()的inteface方法,并将偏移量和值参数传递给它..对不起它的文档部分缺乏,但这是基本的低级...
答案 1 :(得分:-1)
试试这个
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml;
using System.Xml.Linq;
namespace ConsoleApplication1
{
class Program
{
const string FILENAME = @"c:\temp\test.xml";
const int OUTPUT_ELEMENTS = 3;
static void Main(string[] args)
{
XmlReader reader = XmlTextReader.Create(FILENAME, new XmlReaderSettings() { IgnoreWhitespace = true });
int count = 0;
XDocument doc = null;
XElement employees = null;
reader.ReadToFollowing("Employee");
while (!reader.EOF)
{
if (reader.Name == "Employee")
{
if (doc == null)
{
string root = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" +
"<Employees>" +
"</Employees>";
doc = XDocument.Parse(root);
employees = (XElement)doc.FirstNode;
}
employees.Add(XElement.Parse(reader.ReadOuterXml()));
count += 1;
if (count % OUTPUT_ELEMENTS == 0)
{
doc.Save(string.Format(@"c:\temp\test{0}.xml", (int)(count / OUTPUT_ELEMENTS)));
doc = null;
}
}
else
{
if (reader.Value == "")
{
break;
}
else
{
reader.Read();
}
}
}
if (doc != null)
{
doc.Save(string.Format(@"c:\temp\test{0}", (int)(count / OUTPUT_ELEMENTS)));
}
}
}
}