我需要阅读 pdf文件中的表格,然后在 HTML格式或控制台中以PDF格式打印表格。我有一个示例代码,用于读取表格中的文本,但是我需要按照列中的方式逐行读取并按照我们在Image.I中使用PDFBox的方式进行打印。请参阅此示例图像
import org.pdfbox.cos.COSDocument;
import org.pdfbox.pdfparser.PDFParser;
import org.pdfbox.pdmodel.PDDocument;
import org.pdfbox.pdmodel.PDDocumentInformation;
import org.pdfbox.util.PDFTextStripper;
import java.io.File;
import java.io.FileInputStream;
import java.io.PrintWriter;
public class PDFreader {
PDFParser parser;
String parsedText;
PDFTextStripper pdfStripper;
PDDocument pdDoc;
COSDocument cosDoc;
PDDocumentInformation pdDocInfo;
// PDFTextParser Constructor
public PDFTextParser() {
}
// Extract text from PDF Document
String pdftoText(String fileName) {
System.out.println("Parsing text from PDF file " + fileName +"....");
File f = new File(fileName);
if (!f.isFile()) {
System.out.println("File " + fileName + " does not exist.");
return null;
}
try {
parser = new PDFParser(new FileInputStream(f));
} catch (Exception e) {
System.out.println("Unable to open PDF Parser.");
return null;
}
try {
parser.parse();
cosDoc = parser.getDocument();
pdfStripper = new PDFTextStripper();
pdDoc = new PDDocument(cosDoc);
parsedText = pdfStripper.getText(pdDoc);
} catch (Exception e) {
System.out.println("An exception occured in parsing the PDF Document.");
e.printStackTrace();
try {
if (cosDoc != null) cosDoc.close();
if (pdDoc != null) pdDoc.close();
} catch (Exception e1) {
e.printStackTrace();
}
return null;
}
System.out.println("Done.");
return parsedText;
}
// Write the parsed text from PDF to a file
void writeTexttoFile(String pdfText, String fileName) {
System.out.println("\nWriting PDF text to output text file " + fileName + "....");
try {
PrintWriter pw = new PrintWriter(fileName);
pw.print(pdfText);
pw.close();
} catch (Exception e) {
System.out.println("An exception occured in writing the pdf text to file.");
e.printStackTrace();
}
System.out.println("Done.");
}
public static void main(String args[]) {
String fileList[] = {"SO115638.pdf","New_Document.txt"};
if (fileList.length != 2) {
System.out.println("Usage: java PDFTextParser <InputPDFFilename> <OutputTextFile>");
System.exit(1);
}
PDFTextParser pdfTextParserObj = new PDFTextParser();
String pdfToText = pdfTextParserObj.pdftoText(fileList[0]);
if (pdfToText == null) {
System.out.println("PDF to Text Conversion failed.");
}
else {
System.out.println("\nThe text parsed from the PDF Document....\n" + pdfToText);
pdfTextParserObj.writeTexttoFile(pdfToText, fileList[1]);
}
} }
答案 0 :(得分:1)
只需在代码中进行简单编辑即可。 您应该给出该文件名的绝对路径,而不是文件名。 例如:
String fileList[] = {"E:\\JavaApplication14\\src\\javaapplication14\\p10071.pdf", "E:\\JavaApplication14\\src\\javaapplication14\\newTextDocument.txt"};
将jar包含在项目类路径中。
commons-logging-1.1.1.jar
fontbox-1.4.0.jar
pdfbox-1.2.0.jar
完整的代码就像(在运行之前更改您的类名和构造函数):
public class PDFTextParser {
PDFParser parser;
String parsedText;
PDFTextStripper pdfStripper;
PDDocument pdDoc;
COSDocument cosDoc;
// PDDocumentInformation pdDocInfo;
// PDFTextParser Constructor
public PDFTextParser() {
}
// Extract text from PDF Document
String pdftoText(String fileName) {
System.out.println("Parsing text from PDF file " + fileName + "....");
File f = new File(fileName);
if (!f.isFile()) {
System.out.println("File " + fileName + " does not exist.");
return null;
}
try {
parser = new PDFParser(new FileInputStream(f));
} catch (Exception e) {
System.out.println("Unable to open PDF Parser.");
return null;
}
try {
parser.parse();
cosDoc = parser.getDocument();
pdfStripper = new PDFTextStripper();
pdDoc = new PDDocument(cosDoc);
parsedText = pdfStripper.getText(pdDoc);
} catch (Exception e) {
System.out.println("An exception occured in parsing the PDF Document.");
e.printStackTrace();
try {
if (cosDoc != null) {
cosDoc.close();
}
if (pdDoc != null) {
pdDoc.close();
}
} catch (Exception e1) {
e.printStackTrace();
}
return null;
}
System.out.println("Done.");
return parsedText;
}
// Write the parsed text from PDF to a file
void writeTexttoFile(String pdfText, String fileName) {
System.out.println("\nWriting PDF text to output text file " + fileName + "....");
try {
PrintWriter pw = new PrintWriter(fileName);
pw.print(pdfText);
pw.close();
} catch (Exception e) {
System.out.println("An exception occured in writing the pdf text to file.");
e.printStackTrace();
}
System.out.println("Done.");
}
public static void main(String args[]) {
String fileList[] = {"E:\\JavaApplication14\\src\\javaapplication14\\p10071.pdf", "E:\\JavaApplication14\\src\\javaapplication14\\newTextDocument.txt"};
if (fileList.length != 2) {
System.out.println("Usage: java PDFTextParser <InputPDFFilename> <OutputTextFile>");
System.exit(1);
}
PDFTextParser pdfTextParserObj = new PDFTextParser();
String pdfToText = pdfTextParserObj.pdftoText(fileList[0]);
if (pdfToText == null) {
System.out.println("PDF to Text Conversion failed.");
} else {
System.out.println("\nThe text parsed from the PDF Document....\n" + pdfToText);
pdfTextParserObj.writeTexttoFile(pdfToText, fileList[1]);
}
}
}