我在Visual Studio中创建了一个控制台应用程序,我试图通过使用IKVM和PDFBox将PDF的所有内容提取到文本文件中,使用microsoft visual studio中提供的编码UI(C#)功能来验证PDF。在这样做时,我在“return stripper.getText(doc)”行中获得Null引用异常。有人能帮助我吗?
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using org.pdfbox.pdmodel;
using org.pdfbox.util;
using System.IO;
using System.Windows.Forms;
using System.Collections;
using java.io;
using iTextSharp.text;
using iTextSharp.text.pdf;
namespace ConsoleApplication1
{
class program
{
static void Main(string[] args)
{
string path = @"C:\Trishna Chopade\Packages\";
string fileIn = path + "JAVA.pdf";
string fileOut = path + "Trish.txt";
FileStream MyFileStream;
try
{
// Get file encoding
MyFileStream = new FileStream(@"C:\Trishna Chopade\Packages\Trish.txt", FileMode.Open);
System.Text.Encoding fileInEnc = GetFileEncoding(fileIn);
MyFileStream.Close();
// Read from PDF.
ReadFile1(fileIn, fileOut, fileInEnc);
Console.ReadLine();
}
catch (System.IO.IOException ioe)
{
Console.WriteLine("error " + ioe);
}
Console.ReadLine();
}
private static Encoding GetFileEncoding(string fileIn)
{
// Read the BOM
var bom = new byte[4];
using (var file = new FileStream(fileIn, FileMode.Open, FileAccess.Read))
{
file.Read(bom, 0, 4);
}
// Analyze the BOM
if (bom[0] == 0x2b && bom[1] == 0x2f && bom[2] == 0x76) return Encoding.UTF7;
if (bom[0] == 0xef && bom[1] == 0xbb && bom[2] == 0xbf) return Encoding.UTF8;
if (bom[0] == 0xff && bom[1] == 0xfe) return Encoding.Unicode; //UTF-16LE
if (bom[0] == 0xfe && bom[1] == 0xff) return Encoding.BigEndianUnicode; //UTF-16BE
if (bom[0] == 0 && bom[1] == 0 && bom[2] == 0xfe && bom[3] == 0xff) return Encoding.UTF32;
return Encoding.ASCII;
}
public static void ReadFile1(string fileIn, string fileOut, System.Text.Encoding fileInEnc)
{
using (FileStream fs = new FileStream(fileOut, FileMode.OpenOrCreate, FileAccess.Write))
{
using (BinaryWriter bw = new BinaryWriter(fs))//, Encoding.Default))
{
bw.Write(ParseUsingPDFBox(fileIn));
}
}
}
private static string ParseUsingPDFBox(string input)
{
PDDocument doc= PDDocument.load(input);
PDFTextStripper stripper = new PDFTextStripper();
return stripper.getText(doc);
}
}
}