getting-coordinates-of-string-using-itextextractionstrategy-and-locationtextextr
我正在使用此代码。根据示例,此代码工作绝对正常。但是当我在我的pdf中使用时,它没有检测到字符串坐标。每次var t在我的pdf中都是空的。我的pdf包含英语和另一种语言。
var t = new MyLocationTextExtractionStrategy("Address");
我正在寻找地址坐标。但每次我运行此代码时都不会检测到任何坐标。
在MyLocationTextExtractionStrategy Class
中using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using iTextSharp.text.pdf.parser;
namespace adharCardByMYR
{
#region string location
/*
//Our test file
var testFile = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.Desktop), "test.pdf");
//Create our test file, nothing special
using (var fs = new FileStream(testFile, FileMode.Create, FileAccess.Write, FileShare.None)) {
using (var doc = new Document()) {
using (var writer = PdfWriter.GetInstance(doc, fs)) {
doc.Open();
doc.Add(new Paragraph("This is my sample file"));
doc.Close();
}
}
}
//Create an instance of our strategy
var t = new MyLocationTextExtractionStrategy("sample");
//Parse page 1 of the document above
using (var r = new PdfReader(testFile)) {
var ex = PdfTextExtractor.GetTextFromPage(r, 1, t);
}
//Loop through each chunk found
foreach (var p in t.myPoints) {
Console.WriteLine(string.Format("Found text {0} at {1}x{2}", p.Text, p.Rect.Left, p.Rect.Bottom));
}
*/
public class MyLocationTextExtractionStrategy : LocationTextExtractionStrategy
{
//Hold each coordinate
public List<RectAndText> myPoints = new List<RectAndText>();
//The string that we're searching for
public String TextToSearchFor { get; set; }
//How to compare strings
public System.Globalization.CompareOptions CompareOptions { get; set; }
public MyLocationTextExtractionStrategy(String textToSearchFor, System.Globalization.CompareOptions compareOptions = System.Globalization.CompareOptions.None)
{
this.TextToSearchFor = textToSearchFor;
this.CompareOptions = compareOptions;
}
//Automatically called for each chunk of text in the PDF
public override void RenderText(TextRenderInfo renderInfo)
{
base.RenderText(renderInfo);
//See if the current chunk contains the text
var startPosition = System.Globalization.CultureInfo.CurrentCulture.CompareInfo.IndexOf(renderInfo.GetText(), this.TextToSearchFor, this.CompareOptions);
//If not found bail
if (startPosition < 0)
{
return;
}
//Grab the individual characters
var chars = renderInfo.GetCharacterRenderInfos().Skip(startPosition).Take(this.TextToSearchFor.Length).ToList();
//Grab the first and last character
var firstChar = chars.First();
var lastChar = chars.Last();
//Get the bounding box for the chunk of text
var bottomLeft = firstChar.GetDescentLine().GetStartPoint();
var topRight = lastChar.GetAscentLine().GetEndPoint();
//Create a rectangle from it
var rect = new iTextSharp.text.Rectangle(
bottomLeft[Vector.I1],
bottomLeft[Vector.I2],
topRight[Vector.I1],
topRight[Vector.I2]
);
//Add this to our main collection
this.myPoints.Add(new RectAndText(rect, this.TextToSearchFor));
}
}
//Helper class that stores our rectangle and text
public class RectAndText
{
public iTextSharp.text.Rectangle Rect;
public String Text;
public RectAndText(iTextSharp.text.Rectangle rect, String text)
{
this.Rect = rect;
this.Text = text;
}
}
#endregion
}
以我的主要形式
string address = "Address:";
//address = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(address)));
var t = new MyLocationTextExtractionStrategy(address);
////Parse page 1 of the document above
////using (var r = new PdfReader(testFile))
////{
var ex = iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader, 1, t);
////}
string abc = "";
foreach (var p in t.myPoints)
{
abc += string.Format("Found text {0} at {1}x{2}", p.Text, p.Rect.Left, p.Rect.Bottom);
}
label1.Text = abc;