解析包含标记的HTML文本不会在
之后重置大小和字体。除了之后,我的代码运行良好。
在之前blablabla ..文本大小为11。我期望在之后,文本大小被重置为11,但仍保持为9。
当然,我误解了如何使用jsoup。我最好使用CSS,但我不知道该怎么做。
感谢帮助。
package test;
import java.awt.Color;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.math.BigInteger;
import org.apache.poi.xwpf.usermodel.UnderlinePatterns;
import org.apache.poi.xwpf.usermodel.VerticalAlign;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.apache.poi.xwpf.usermodel.XWPFTable;
import org.apache.poi.xwpf.usermodel.XWPFTableCell;
import org.apache.poi.xwpf.usermodel.XWPFTableRow;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.jsoup.select.Elements;
import org.jsoup.select.NodeTraversor;
import org.jsoup.select.NodeVisitor;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTblLayoutType;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STTblLayoutType;
public class ReadHtml
{
protected static java.util.Vector<String> contenu = null;
org.apache.xmlbeans.XmlCursor cursor = null;
class WStyle
{
protected String police = "Times New Roman";
protected int taille = 11;
protected Color couleur = Color.black;
protected boolean gras = false;
protected boolean italique = false;
public WStyle() // constructeur
{
super();
}
protected String getPolice() {return police;}
protected int getTaille() {return taille;}
protected Color getCouleur() {return couleur;}
protected boolean getGras() {return gras;}
protected boolean getItalique() {return italique;}
protected void setPolice(String p) {police=p;}
protected void setTaille(int t) {taille=t;}
protected void setCouleur(Color c) {couleur=c;}
protected void setGras(boolean g) {gras=g;}
protected void setItalique(boolean i) {italique=i;}
}
public ReadHtml()
{
super();
contenu = new java.util.Vector<String>();
createWordFile();
}
private XWPFParagraph getTableParagraph(XWPFTableCell cell, String html)
{
cell.removeParagraph(0);
XWPFParagraph paragraph = cell.addParagraph();
paragraph.setSpacingAfterLines(0);
paragraph.setSpacingAfter(0);
Document htmlDocument = Jsoup.parse(html);
Elements htmlParagraphs = htmlDocument.select("p");
for(Element htmlParagraph : htmlParagraphs)
{
System.out.println(htmlParagraph);
ParagraphNodeVisitor nodeVisitor = new ParagraphNodeVisitor(paragraph);
NodeTraversor.traverse(nodeVisitor, htmlParagraph);
}
return paragraph;
}
private void createWordFile()
{
XWPFParagraph para = null;
try
{
XWPFDocument document = new XWPFDocument();
FileOutputStream out = new FileOutputStream(new File("./", "NewTable.docx"));
XWPFTable table = document.createTable();
CTTblLayoutType type = table.getCTTbl().getTblPr().addNewTblLayout();
type.setType(STTblLayoutType.FIXED);
table.getCTTbl().addNewTblGrid().addNewGridCol().setW(BigInteger.valueOf(1670));
table.getCTTbl().getTblGrid().addNewGridCol().setW(BigInteger.valueOf(6000));
String myTexte = "<html><head</head><body><p><font face=\"Verdana\" size=11>Good Morning</font> <font size=9 face=\"Times\"> " +
"<i><b>how are you today </b></i></font> Not so bad.<br>Thanks";
// first line
XWPFTableRow tableRow= table.getRow(0);
para = getTableParagraph(tableRow.getCell(0), "<p>Row #1, Col. #1");
tableRow .getCell(0).setParagraph(para);
XWPFTableCell cell = tableRow.createCell();
para = getTableParagraph(cell, myTexte); // Row #1, Col. #2
tableRow .getCell(1).setParagraph(para);
// seconde line
tableRow= table.createRow();
para = getTableParagraph(tableRow.getCell(0), "<p>Row #2, Col. #1");
tableRow .getCell(0).setParagraph(para);
para = getTableParagraph(tableRow.getCell(1), "<p>Row #2, Col. #2");
tableRow.getCell(1).setParagraph(para);
document.write(out);
document.close();
out.close();
System.out.println("NewTable.docx written successully");
}
catch (FileNotFoundException e) {System.out.println("File exception --> " + e.toString()); }
catch (IOException e) {System.out.println("I/O exception --> " + e.toString()); }
catch (Exception e) {System.out.println("Other exception --> " + e.toString()); }
}
public class ParagraphNodeVisitor implements NodeVisitor
{
String nodeName;
String fontFace;
String fontType;
boolean needNewRun;
boolean isItalic;
boolean isBold;
boolean isUnderlined;
int fontSize;
String fontColor;
VerticalAlign align = VerticalAlign.BASELINE ;
XWPFParagraph paragraph;
XWPFRun run;
ParagraphNodeVisitor(XWPFParagraph paragraph)
{
this.paragraph = paragraph;
this.run = paragraph.createRun();
this.nodeName = "";
this.needNewRun = false;
this.isItalic = false;
this.isBold = false;
this.isUnderlined = false;
this.fontSize = 11;
this.fontColor = "000000";
this.fontFace="Times";
}
@Override
public void head(Node node, int depth)
{
nodeName = node.nodeName();
needNewRun = false;
if ("#text".equals(nodeName))
{
run.setText(((TextNode)node).text());
needNewRun = true; //after setting the text in the run a new run is needed
}
else if ("i".equals(nodeName)) {isItalic = true;}
else if ("b".equals(nodeName)) {isBold = true;}
else if ("sup".equals(nodeName)){align = VerticalAlign.SUPERSCRIPT ;}
else if ("u".equals(nodeName)) {isUnderlined = true;}
else if ("br".equals(nodeName)) {run.addBreak();}
else if ("p".equals(nodeName)) {run.addBreak();}
else if ("font".equals(nodeName))
{
fontColor = (!"".equals(node.attr("color")))?node.attr("color").substring(1):"000000";
fontSize = (!"".equals(node.attr("size")))?Integer.parseInt(node.attr("size")):11;
fontFace = (!"".equals(node.attr("face")))?node.attr("face"):"Times";
}
if (needNewRun) run = paragraph.createRun();
needNewRun = false;
run.setItalic(isItalic);
run.setBold(isBold);
if (isUnderlined) run.setUnderline(UnderlinePatterns.SINGLE);
else run.setUnderline(UnderlinePatterns.NONE);
run.setColor(fontColor);
run.setFontSize(fontSize);
run.setFontFamily(fontFace);
run.setSubscript(align);
}
@Override
public void tail(Node node, int depth)
{
nodeName = node.nodeName();
System.out.println("Node=" + nodeName);
if ("i".equals(nodeName)) {isItalic = false;}
else if ("b".equals(nodeName)) {isBold = false;}
else if ("u".equals(nodeName)) {isUnderlined = false;}
else if ("sup".equals(nodeName)) {align= VerticalAlign.BASELINE ;}
else if ("font".equals("nodeName"))
{
fontColor = "000000";
fontSize = 11;
fontFace="Times";
System.out.println("Family=" + fontFace + " Taille=" + fontSize);
}
if (needNewRun) run = paragraph.createRun();
needNewRun = false;
run.setItalic(isItalic);
run.setBold(isBold);
if (isUnderlined) run.setUnderline(UnderlinePatterns.SINGLE); else run.setUnderline(UnderlinePatterns.NONE);
run.setColor(fontColor);
run.setFontSize(fontSize);
run.setFontFamily(fontFace);
run.setSubscript(align);
}
}
public static void main(String[] args)
{
new ReadHtml() ;
}
}
答案 0 :(得分:1)
请在您的tail
方法中更改以下行,
else if ("font".equals("nodeName"))
到
else if ("font".equals(nodeName))
您已经比较了两个字符串文字,而不是将字符串文字与变量进行比较。由于输入错误,该条件始终为false
,因此fontSize
永远不会重置。