我有一个项目从html转换为带有java的docx,在html文档中我有2个段落,带有2个标题作为标题,但是当它们转换为docx格式时,只有一个段落成功转换,但另一个段落即使具有相同的标记也不会转换。看下面的图片
代码看起来像这样
import java.io.File;
import java.io.FileOutputStream;
import java.util.List;
import java.util.Set;
import static org.apache.poi.hslf.model.textproperties.TextPropCollection.TextPropType.paragraph;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.usermodel.VerticalAlign;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.zwobble.mammoth.DocumentConverter;
import org.zwobble.mammoth.Result;
/**
*
* @author Alwan
*/
public class TestWord {
/**
* @param args the command line arguments
*/
public static void main(String[] args) {
// TODO code application logic here
try
{
File file = new File("src/test/TEST.docx");
DocumentConverter converter = new DocumentConverter();
Result<String> result = converter.extractRawText(file);
String html = result.getValue(); // The generated HTML
Set<String> warnings = result.getWarnings(); // Any warnings during conversion
String[] part = html.split("<p>");
String[] part2 = html.split("<h1>");
FileOutputStream out = new FileOutputStream(new File("testformat.docx"));
XWPFDocument doc = new XWPFDocument();
XWPFParagraph paragraph = doc.createParagraph();
XWPFRun paragraphOneRunOne = paragraph.createRun();
XWPFRun paragraphOneRunThree = paragraph.createRun();
for (int i = 0; i < html.length(); i++)
{
if (i % 2 != 0)
{
paragraphOneRunOne.setBold(true);
paragraphOneRunOne.setItalic(true);
paragraphOneRunOne.setText(part[i].trim());
paragraphOneRunOne.addBreak();
paragraphOneRunThree.setStrike(true);
paragraphOneRunThree.setFontSize(20);
paragraphOneRunThree.setSubscript(VerticalAlign.SUBSCRIPT);
paragraphOneRunThree.setText(part2[i].trim());
System.out.println(part2[i].trim());
System.out.println(part[i].trim());
doc.write(out);
out.close();
}
System.out.println("testformat.docx written successully");
}
System.out.println("Success");
} catch(Exception e) {
e.printStackTrace();
}
}
}
问题是,当它具有相同的标签时,如何将html中的所有段落转换为docx格式?谢谢你的关注。抱歉我的英文不好