我尝试读取已经OCR和可搜索的PDF文件。我使用itextSharp库和C#。我尝试在特定的位置/盒子上重新调整。但是,我找不到方法。因此,我选择删除不需要的PDF部分。我使用PDF压模。问题是,当我阅读加盖印章的PDF文件时,我的应用程序仍会读取已经加盖印章的信息。看起来压模仅隐藏内容,而不删除。如何删除内容?
string newFileName = "process_temp.pdf";
string processPdf = filePdf.DirectoryName + "\\" + newFileName;
PdfReader pdfOriginal = new PdfReader(filePdf.FullName);
PdfStamper stamper = new PdfStamper(pdfOriginal, new FileStream(processPdf, FileMode.Create, FileAccess.Write));
for (int page = 1; page <= pdfOriginal.NumberOfPages; page++)
{
List<PdfCleanUpLocation> cleanUpLocations = new List<PdfCleanUpLocation>();
const float dpi = 72;
if (page == 1)
{
cleanUpLocations.Add(new PdfCleanUpLocation(1, new iTextSharp.text.Rectangle((float)(3.5 * 72), 0f, 8 * dpi, 8 * dpi), iTextSharp.text.BaseColor.WHITE));
cleanUpLocations.Add(new PdfCleanUpLocation(1, new iTextSharp.text.Rectangle(0f, (float)(5.5 * 72), 8 * dpi, 10 * dpi), iTextSharp.text.BaseColor.WHITE));
}
else
{
cleanUpLocations.Add(new PdfCleanUpLocation(page, new iTextSharp.text.Rectangle((float)(3.5 * 72), 0f, 8 * dpi, 8 * dpi), iTextSharp.text.BaseColor.WHITE));
cleanUpLocations.Add(new PdfCleanUpLocation(page, new iTextSharp.text.Rectangle(0f, 7 * 72, 8 * dpi, 10 * dpi), iTextSharp.text.BaseColor.WHITE));
}
PdfCleanUpProcessor cleaner = new PdfCleanUpProcessor(cleanUpLocations, stamper);
cleaner.CleanUp();
}
stamper.Close();
pdfOriginal.Close();
PdfReader pdfProcess = new PdfReader(filePdf.FullName);
for (int page = 1; page <= pdfProcess.NumberOfPages; page++)
{
ITextExtractionStrategy strategy = new LocationTextExtractionStrategy();
string currentText = PdfTextExtractor.GetTextFromPage(pdfProcess, page, strategy);
currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
text.Append(currentText);
}
pdfProcess.Close();
您能帮我解决这个问题吗?预先感谢。