如何在C#中以pdf格式搜索文本(完全匹配)

时间:2012-05-14 09:06:06

标签: c# visual-studio-2008 .net-3.5 adobe-reader axacropdf

我坚持申请。我想搜索文本(完全匹配)和位置,并通过c#中的axacropdf控件突出显示pdf中的文本。我怎样才能做到这一点。我搜索了许多主题但没有帮助。我怎样才能达到这个目标。你有没有任何代码或DLL。请给我一些代码。

感谢

1 个答案:

答案 0 :(得分:0)

使用Bytescout.PDFExtractor;

       try
        {
            if (openFileDialog1.ShowDialog() == DialogResult.OK)
            {
                FileName = openFileDialog1.FileName;

                string FName = FileName;
                int a = FileName.LastIndexOf('\\');
                txtFileName.Text = FileName.Substring(a + 1);

                axAcroPDF1.LoadFile(FileName);

                FileInfo fi = new FileInfo(FileName);
                string PDFFileName = fi.Name.ToString();
                double filesize = (fi.Length / 1024F / 1024F);
                string size = filesize.ToString("0.00 MB");
                string CreationDate = fi.CreationTime.ToString();
                string LastAccessDate = fi.LastAccessTime.ToString();
                string ModifiedDate = fi.LastWriteTime.ToString();

                lblVersion.Text = "File Name         : " + PDFFileName + "\nSize                  : " + size + "\nCreation Date   : " + CreationDate + "\nModified Date   : " + ModifiedDate;

                TextExtractor extractor = new TextExtractor();
                extractor.RegistrationName = "Demo";
                extractor.RegistrationKey = "Demo";
                extractor.LoadDocumentFromFile(FileName);

                int pageCount = extractor.GetPageCount();

                RectangleF location;

                for (int s = 0; s <= Keywords.Length - 1; s++)
                {



                    if (Keywords[s] != "")
                    {
                        TreeNode tNode = new TreeNode();

                        tNode = treeView1.Nodes.Add(Keywords[s]);


                        for (int i = 0; i < pageCount; i++)
                        {
                            if (extractor.Find(i, Keywords[s], false, out location))
                            {
                                do
                                {
                                    int j = i;
                                    tNode.Nodes.Add((j+1).ToString() + "     " + location.ToString());

                                    float X = location.X;
                                    float Y = location.Y;
                                    float Width = location.Width;  
                                    float Height = location.Height;




                                    float Left = location.Left;
                                    float Right = location.Right;
                                    float Top = location.Top;
                                    float Bottom = location.Bottom;


                                    //axAcroPDF1.setCurrentHighlight(Convert.ToInt32(X), Convert.ToInt32(Y), Convert.ToInt32(Width), Convert.ToInt32(Height));



                                }
                                while (extractor.FindNext(out location));
                            }
                        }
                    }
                    else
                    {

                    }
                }      
            }
        }
        catch (Exception ex)
        {
            MessageBox.Show(ex.Message.ToString(), "Exception", MessageBoxButtons.OK, MessageBoxIcon.Error);
        }