Question

getting-coordinates-of-string-using-itextextractionstrategy-and-locationtextextr

我正在使用此代码。根据示例，此代码工作绝对正常。但是当我在我的pdf中使用时，它没有检测到字符串坐标。每次var t在我的pdf中都是空的。我的pdf包含英语和另一种语言。

var t = new MyLocationTextExtractionStrategy("Address");

我正在寻找地址坐标。但每次我运行此代码时都不会检测到任何坐标。

在MyLocationTextExtractionStrategy Class

中

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using iTextSharp.text.pdf.parser;

namespace adharCardByMYR
{
    #region string location

    /*
     //Our test file
    var testFile = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.Desktop), "test.pdf");

    //Create our test file, nothing special
    using (var fs = new FileStream(testFile, FileMode.Create, FileAccess.Write, FileShare.None)) {
        using (var doc = new Document()) {
            using (var writer = PdfWriter.GetInstance(doc, fs)) {
                doc.Open();
                doc.Add(new Paragraph("This is my sample file"));
                doc.Close();
            }
        }
    }

    //Create an instance of our strategy
    var t = new MyLocationTextExtractionStrategy("sample");

    //Parse page 1 of the document above
    using (var r = new PdfReader(testFile)) {
        var ex = PdfTextExtractor.GetTextFromPage(r, 1, t);
    }

    //Loop through each chunk found
    foreach (var p in t.myPoints) {
        Console.WriteLine(string.Format("Found text {0} at {1}x{2}", p.Text, p.Rect.Left, p.Rect.Bottom));
    }
     */


    public class MyLocationTextExtractionStrategy : LocationTextExtractionStrategy
    {
        //Hold each coordinate
        public List<RectAndText> myPoints = new List<RectAndText>();

        //The string that we're searching for
        public String TextToSearchFor { get; set; }

        //How to compare strings
        public System.Globalization.CompareOptions CompareOptions { get; set; }

        public MyLocationTextExtractionStrategy(String textToSearchFor, System.Globalization.CompareOptions compareOptions = System.Globalization.CompareOptions.None)
        {
            this.TextToSearchFor = textToSearchFor;
            this.CompareOptions = compareOptions;
        }

        //Automatically called for each chunk of text in the PDF
        public override void RenderText(TextRenderInfo renderInfo)
        {
            base.RenderText(renderInfo);

            //See if the current chunk contains the text
            var startPosition = System.Globalization.CultureInfo.CurrentCulture.CompareInfo.IndexOf(renderInfo.GetText(), this.TextToSearchFor, this.CompareOptions);

            //If not found bail
            if (startPosition < 0)
            {
                return;
            }

            //Grab the individual characters
            var chars = renderInfo.GetCharacterRenderInfos().Skip(startPosition).Take(this.TextToSearchFor.Length).ToList();

            //Grab the first and last character
            var firstChar = chars.First();
            var lastChar = chars.Last();


            //Get the bounding box for the chunk of text
            var bottomLeft = firstChar.GetDescentLine().GetStartPoint();
            var topRight = lastChar.GetAscentLine().GetEndPoint();

            //Create a rectangle from it
            var rect = new iTextSharp.text.Rectangle(
                                                    bottomLeft[Vector.I1],
                                                    bottomLeft[Vector.I2],
                                                    topRight[Vector.I1],
                                                    topRight[Vector.I2]
                                                    );

            //Add this to our main collection
            this.myPoints.Add(new RectAndText(rect, this.TextToSearchFor));
        }

    }

    //Helper class that stores our rectangle and text
    public class RectAndText
    {
        public iTextSharp.text.Rectangle Rect;
        public String Text;
        public RectAndText(iTextSharp.text.Rectangle rect, String text)
        {
            this.Rect = rect;
            this.Text = text;
        }
    }
    #endregion
}

以我的主要形式

string address = "Address:";
                //address = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(address)));
                var t = new MyLocationTextExtractionStrategy(address);

                ////Parse page 1 of the document above
                ////using (var r = new PdfReader(testFile))
                ////{
                    var ex = iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader, 1, t);
                ////}
                string abc = "";
                foreach (var p in t.myPoints)
                {
                    abc += string.Format("Found text {0} at {1}x{2}", p.Text, p.Rect.Left, p.Rect.Bottom);
                }
                label1.Text = abc;

使用C＃的ITextSharp

0 个答案: