是否有可以执行此操作的库?我查看了docx4j,但它没有(我认为)有计算docx文件单词的功能。
答案 0 :(得分:1)
最好的java库是Apache POI
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.sql.Statement;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
public class NewDocReader {
public static void main(String args[]) throws FileNotFoundException, IOException
{
File docFile=new File("c:\\multi\\multi.docx"); // file object was created
FileInputStream finStream=new FileInputStream(docFile.getAbsolutePath()); // file input stream with docFile
HWPFDocument doc=new HWPFDocument(finStream);// throws IOException and need to import org.apache.poi.hwpf.HWPFDocument;
WordExtractor wordExtract=new WordExtractor(doc); // import org.apache.poi.hwpf.extractor.WordExtractor
String [] dataArray =wordExtract.getParagraphText();
// dataArray stores the each line from the document
int pozicijaBlankoMesta;
for(int i=0;i<dataArray.length;i++)
{ .............
}
}