我是Docx4j
的新手,需要帮助根据使用docx
Java的字符串拆分docx4j
文件,以便将输出写入多个文件。
我尝试使用Apache POI做同样的事情并得到输出,但是当试图将其转换为HTML时,缺少样式问题,以后也添加了样式,仍然面临同样的问题。
以下是使用apache poi的代码:
public static int pos = 0;
public static int posc = 0;
public static String ind = "n";
final static int DEFAULT_FONT_SIZE = 10;
public static void main(String[] args) throws FileNotFoundException,
IOException, XmlException {
File file = null;
File outfilep = null;
File outfilec = null;
File dir = new File(PropertyUtils.getProperty("INPUT_DIR"));
String[] files = dir.list();
if (files.length == 0) {
System.out.println("The directory is empty");
} else {
for (String aFile : files) {
System.out.println(aFile);
file = new File(PropertyUtils.getProperty("INPUT_DIR") + aFile
+ "/" + aFile + ".docx");
outfilep = new File(PropertyUtils.getProperty("INPUT_DIR")
+ aFile + "/" + aFile + "-Product.docx");
outfilec = new File(PropertyUtils.getProperty("INPUT_DIR")
+ aFile + "/" + aFile + "-Component.docx");
// Write Soruce file
}
}
XWPFDocument doc = new XWPFDocument(new FileInputStream(file));
XWPFDocument destDoc = new XWPFDocument();
copyLayout(doc, destDoc);
XWPFDocument destDocc = new XWPFDocument();
OutputStream out = new FileOutputStream(outfilep);
OutputStream outc = new FileOutputStream(outfilec);
for (IBodyElement bodyElement : doc.getBodyElements()) {
BodyElementType elementType = bodyElement.getElementType();
if (elementType.name().equals("PARAGRAPH")) {
XWPFParagraph pr = (XWPFParagraph) bodyElement;
if (pr.getText().contains("CONSTRUCTION DETAILS:"))
{
ind = "y";
System.out.println("ind is Y++++++++++++");
}
if (ind == "n")
{
copyStyle(doc, destDoc,
doc.getStyles().getStyle(pr.getStyleID()));
XWPFParagraph dstPr = destDoc.createParagraph();
dstPr.createRun();
pos = destDoc.getParagraphs().size() - 1;
CTPPr ppr = pr.getCTP().getPPr();
if (ppr == null) ppr = pr.getCTP().addNewPPr();
CTSpacing spacing = ppr.isSetSpacing()? ppr.getSpacing() : ppr.addNewSpacing();
spacing.setAfter(BigInteger.valueOf(0));
spacing.setBefore(BigInteger.valueOf(0));
spacing.setLineRule(STLineSpacingRule.AUTO);
spacing.setLine(BigInteger.valueOf(240));
destDoc.setParagraph(pr, pos);
// System.out.println("prod "
// + destDoc.getParagraphArray(pos).getParagraphText());
}
else {
copyStyle(doc, destDocc,
doc.getStyles().getStyle(pr.getStyleID()));
XWPFParagraph dstPrr = destDocc.createParagraph();
dstPrr.createRun();
pos = destDocc.getParagraphs().size() - 1;
CTPPr ppr = pr.getCTP().getPPr();
if (ppr == null) ppr = pr.getCTP().addNewPPr();
CTSpacing spacing = ppr.isSetSpacing()? ppr.getSpacing() : ppr.addNewSpacing();
spacing.setAfter(BigInteger.valueOf(0));
spacing.setBefore(BigInteger.valueOf(0));
spacing.setLineRule(STLineSpacingRule.AUTO);
spacing.setLine(BigInteger.valueOf(240));
destDocc.setParagraph(pr, pos);
//// System.out.println("comp "
//// + destDoc.getParagraphArray(pos).getParagraphText());
}
} else if (elementType.name().equals("TABLE")) {
XWPFTable table = (XWPFTable) bodyElement;
if (ind == "n")
{
copyStyle(doc, destDoc,
doc.getStyles().getStyle(table.getStyleID()));
destDoc.createTable();
pos = destDoc.getTables().size() - 1;
destDoc.setTable(pos, table);
// System.out.println("prodtable " + destDoc.getParagraphArray(pos).getParagraphText());
}
else {
copyStyle(doc, destDocc,
doc.getStyles().getStyle(table.getStyleID()));
destDocc.createTable();
pos = destDocc.getTables().size() - 1;
destDocc.setTable(pos, table);
// System.out.println("comptable " + destDoc.getParagraphArray(pos).getParagraphText());
}
}
}
destDoc.write(out);
destDocc.write(outc);
}
// Copy Styles of Table and Paragraph.
private static void copyStyle(XWPFDocument srcDoc, XWPFDocument destDoc,
XWPFStyle style) {
if (destDoc == null || style == null)
return;
if (destDoc.getStyles() == null) {
destDoc.createStyles();
}
List<XWPFStyle> usedStyleList = srcDoc.getStyles().getUsedStyleList(
style);
for (XWPFStyle xwpfStyle : usedStyleList) {
destDoc.getStyles().addStyle(xwpfStyle);
}
}
private static void copyLayout(XWPFDocument srcDoc, XWPFDocument destDoc)
{
CTPageMar pgMar = srcDoc.getDocument().getBody().getSectPr().getPgMar();
BigInteger bottom = pgMar.getBottom();
BigInteger footer = pgMar.getFooter();
BigInteger gutter = pgMar.getGutter();
BigInteger header = pgMar.getHeader();
BigInteger left = pgMar.getLeft();
BigInteger right = pgMar.getRight();
BigInteger top = pgMar.getTop();
CTPageMar addNewPgMar = destDoc.getDocument().getBody().addNewSectPr().addNewPgMar();
addNewPgMar.setBottom(bottom);
addNewPgMar.setFooter(footer);
addNewPgMar.setGutter(gutter);
addNewPgMar.setHeader(header);
addNewPgMar.setLeft(left);
addNewPgMar.setRight(right);
addNewPgMar.setTop(top);
CTPageSz pgSzSrc = srcDoc.getDocument().getBody().getSectPr().getPgSz();
BigInteger code = pgSzSrc.getCode();
BigInteger h = pgSzSrc.getH();
Enum orient = pgSzSrc.getOrient();
BigInteger w = pgSzSrc.getW();
CTPageSz addNewPgSz = destDoc.getDocument().getBody().addNewSectPr().addNewPgSz();
addNewPgSz.setCode(code);
addNewPgSz.setH(h);
addNewPgSz.setOrient(orient);
addNewPgSz.setW(w);
}
答案 0 :(得分:1)
分割docx很容易以某种方式进行蛮力:你可以删除你不想要的内容(段落等),然后保存结果。
这样,原始关系将保持不变,但是您的docx容器可能比必要的大,因为它可能具有不再使用的图像等。
通过这种方式,您仍需要注意以下事项:
显然,您可以编写代码来解决此类问题。
或者,使用我们的商业企业版docx4j,您可以使用它的&#34; merge&#34;代码,你想说你想要段落X到Y,它会给你一个仅包含它的docx(即docx容器中没有无关的图像,分开的书签等)。
答案 1 :(得分:0)
我希望这能解决问题。
public class SplitUsingDocx4j {
/**
* @param args
* @throws Docx4JException
* @throws FileNotFoundException
*/
public static void main(String[] args) throws Docx4JException,
FileNotFoundException {
File dir = new File(PropertyUtils.getProperty("INPUT_DIR"));
String[] files = dir.list();
File file = null;
if (files.length == 0) {
System.out.println("The directory is empty");
} else {
for (String aFile : files) {
System.out.println(aFile);
file = new File(PropertyUtils.getProperty("INPUT_DIR") + aFile
+ "/" + aFile + ".docx");
}
}
// Creating new documents
WordprocessingMLPackage doc1 = WordprocessingMLPackage.createPackage();
WordprocessingMLPackage doc2 = WordprocessingMLPackage.createPackage();
// loading existing document
WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage
.load(new java.io.File(file.getPath()));
MainDocumentPart tempDocPart = wordMLPackage.getMainDocumentPart();
List<Object> obj = wordMLPackage.getMainDocumentPart().getContent();
// for copying styles from existing doc to new docs
StyleDefinitionsPart sdp = tempDocPart.getStyleDefinitionsPart();
Styles tempStyle = sdp.getJaxbElement();
doc1.getMainDocumentPart().getStyleDefinitionsPart()
.setJaxbElement(tempStyle);
doc2.getMainDocumentPart().getStyleDefinitionsPart()
.setJaxbElement(tempStyle);
boolean flag = false;
for (Object object : obj) {
if (!flag) {
if (object.toString().equalsIgnoreCase("CONSTRUCTION DETAILS:")) {
flag = true;
}
doc1.getMainDocumentPart().addObject(object);
} else {
doc2.getMainDocumentPart().addObject(object);
}
}
String fileName = file.getName().toString().replace(".docx", "");
doc1.save(new File(fileName + "-1.docx"));
doc2.save(new File(fileName + "-2.docx"));
}}