任何人都可以提供如何获取文本坐标的帮助?这有可能吗?因为我只想要一个用户在文本框中输入单词的窗体应用程序,应用程序使用iTextSharp读取现有PDF,突出显示匹配的单词(如果找到),并使用突出显示的文本保存PDF。到目前为止,我几乎已经完成了所有工作,包括绘制黄色矩形,但缺少的是如何获取匹配模式的文本坐标以突出显示它们,提前感谢:(顺便说一下:sb是搜索文本框,tb是一个富文本框,其中展示了PDF文本)
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.IO;
using iTextSharp.text.pdf;
using iTextSharp.text.pdf.parser;
using iTextSharp.text;
using System.Text.RegularExpressions;
namespace manipulatePDF
{
public partial class Form1 : Form
{
string oldFile;
Document document = new Document();
StringBuilder text = new StringBuilder();
public Form1()
{
InitializeComponent();
}
private void open_Click(object sender, EventArgs e)
{
reset_Click(sender, e);
openFileDialog1.Filter = "PDF Files (.pdf)|*.pdf";
openFileDialog1.FilterIndex = 1;
if (openFileDialog1.ShowDialog() == System.Windows.Forms.DialogResult.OK)
{
label1.Text = "File Location: " + openFileDialog1.FileName;
oldFile = openFileDialog1.FileName;
// open the reader
PdfReader reader = new PdfReader(oldFile);
iTextSharp.text.Rectangle size = reader.GetPageSizeWithRotation(1);
document.SetPageSize(size);
for (int cPage = 1; cPage <= reader.NumberOfPages; cPage++)
{
ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
string currentText = PdfTextExtractor.GetTextFromPage(reader, cPage, strategy);
currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
text.Append(currentText);
reader.Close();
}
tb.Text = text.ToString();
}
}
private void save_Click(object sender, EventArgs e)
{
saveFileDialog1.InitialDirectory = "C: ";
saveFileDialog1.Title = "Save the PDF File";
saveFileDialog1.Filter = "PDF files (*.pdf)|*.pdf";
if (saveFileDialog1.ShowDialog() == System.Windows.Forms.DialogResult.OK)
{
PdfReader reader = new PdfReader(oldFile);
string newFile = saveFileDialog1.FileName;
// open the writer
FileStream fs = new FileStream(newFile, FileMode.Create, FileAccess.Write);
PdfWriter writer = PdfWriter.GetInstance(document, fs);
document.Open();
// the pdf content
PdfContentByte cb = writer.DirectContent;
// select the font properties
PdfGState graphicsState = new PdfGState();
graphicsState.FillOpacity = 10;
cb.SetGState(graphicsState);
int index = 0;
while (index < text.ToString().LastIndexOf(sb.Text))
{
if (contain.Checked == true)
{
tb.Find(sb.Text, index, tb.TextLength, RichTextBoxFinds.MatchCase);
tb.SelectionBackColor = Color.Gold;
index = tb.Text.IndexOf(sb.Text, index) + 1;
}
else if (exact.Checked == true)
{
tb.Find(sb.Text, index, tb.TextLength, RichTextBoxFinds.WholeWord);
tb.SelectionBackColor = Color.Gold;
index = tb.Text.IndexOf(sb.Text, index) + 1;
}
}
int count = 0; //counts the pattern occurance
for (int cPage = 1; cPage <= reader.NumberOfPages; cPage++)
{
ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
string currentText = PdfTextExtractor.GetTextFromPage(reader, cPage, strategy);
currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
string textToSearch = sb.Text;
int lastStartIndex = currentText.IndexOf(textToSearch, 0, StringComparison.CurrentCulture);
while (lastStartIndex != -1)//if the pattern was found
{
count++;
lastStartIndex = currentText.IndexOf(textToSearch, lastStartIndex + 1, StringComparison.CurrentCulture);
BaseFont bf = BaseFont.CreateFont(BaseFont.HELVETICA, BaseFont.CP1252, BaseFont.NOT_EMBEDDED);
cb.SetFontAndSize(bf, 10);
cb.SetColorFill(new CMYKColor(0f, 0f, 1f, 0f));
cb.Rectangle(document.PageSize.Width - 500f, 600f, 100f, 100f);
cb.Fill();
}
if (count != 0)
{
if (contain.Checked == true)
{
label2.Text = "Number of pages: " + cPage + " - " + textToSearch + " found " + count + " times. \n";
}
else if (exact.Checked == true)
{
//finds the words that are bounded by a space or a dot and store in cCount
//returns the count of matched pattern = count - cCount
}
}
text.Append(currentText);
// create the new page and add it to the pdf
PdfImportedPage page = writer.GetImportedPage(reader, cPage);
cb.AddTemplate(page, 0, 0);
document.NewPage();
//PdfStamper stamper = new PdfStamper(reader, fs);
////Create a rectangle for the highlight. NOTE: Technically this isn't used but it helps with the quadpoint calculation
//iTextSharp.text.Rectangle rect = new iTextSharp.text.Rectangle(60.6755f, 749.172f, 94.0195f, 735.3f);
////Create an array of quad points based on that rectangle. NOTE: The order below doesn't appear to match the actual spec but is what Acrobat produces
//float[] quad = { rect.Left, rect.Bottom, rect.Right, rect.Bottom, rect.Left, rect.Top, rect.Right, rect.Top };
////Create our hightlight
//PdfAnnotation highlight = PdfAnnotation.CreateMarkup(stamper.Writer, rect, null, PdfAnnotation.MARKUP_HIGHLIGHT, quad);
////Set the color
//highlight.Color = BaseColor.YELLOW;
////Add the annotation
//stamper.AddAnnotation(highlight, 1);
}
// close the streams
document.Close();
fs.Close();
writer.Close();
reader.Close();
}
}
private void reset_Click(object sender, EventArgs e)
{
tb.Text = "";
}
}
答案 0 :(得分:3)
好吧,我添加了一个使用Vb.NET 2010制作的可下载的示例,它完全符合您的需要,并且可以在Chris引用的同一个帖子中的另一篇文章中找到它。该代码适用于每种字体类型,字体大小,并将返回您搜索的单词/句子的所有匹配项,将每个匹配作为带有x / y位置的矩形返回到UI,最后高亮显示它们并保存到新的PDF,您只需要提供一些初始参数,如搜索词,文化比较类型,源PDF路径和目标PDF路径。唯一没有实现的是搜索词/句子分成多行的特殊情况,但由于你可以在TextChunk类中使用SameLine()方法,所以它应该是代码中的一个简单更改。