当使用itextsharp在c#中阅读marathi pdf时,它没有显示来自pdf的正确文本?

时间:2014-01-07 05:46:50

标签: c# pdf

我试图在c#中使用itextsharp阅读marathi pdf,但我没有正确获取数据。 当我通过c#阅读马拉地语pdf时,会注意显示正确的数据。

    public string ReadPdfFile(string fileName)
    {
        StringBuilder text = new StringBuilder();

        if (System.IO.File.Exists(fileName))
        {
            PdfReader pdfReader = new PdfReader(fileName);
            Workbook workbook = new Workbook();

            //workbook.LoadFromFile("F:/finalexcel.xls");
            Worksheet sheet = workbook.Worksheets[0];
            sheet.Range["A1"].Text = "ओळखपत्र क्रमांक";
            sheet.Range["B1"].Text = "मतदाराचे पुर्ण नाव";
            sheet.Range["C1"].Text = "वडिलांचे नाव";
            sheet.Range["D1"].Text = "घर क्रमांक";
            sheet.Range["E1"].Text = "वय";
            sheet.Range["F1"].Text = "लिंग ";
            for (int page =1; page <= pdfReader.NumberOfPages; page++)
            {
                ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
                //ITextExtractionStrategy strategy = new LocationTextExtractionStrategy();
                string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);

                //string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);
                currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Unicode, Encoding.Unicode, Encoding.UTF8.GetBytes(currentText)));
                //MessageBox.Show(currentText);
                int ab = 2;
                int cd = 2;
                int ef = 2;
                int gh = 2;
                int ij = 2;
                int kl = 2;
                string[] a = currentText.Split('\n');
                //string[] b = currentText.Split("लऱंग").ToString();
                //  string[] b = currentText.Split('\n');
                foreach (var item in a)
                {
                    //MessageBox.Show(item);
                    if (item.Contains(":"))
                    {
                        string[] sample = item.Split(':');
                        if (sample[1].Length > 1 && sample[0].Contains("मतदार"))
                        {
                            sheet.Range["B" + ab].Text = sample[1];
                            ab++;
                        }
                        if (sample[1].Length > 1 && sample[0].Contains("ळडडऱांचे नाळ"))
                        {
                            sheet.Range["C" + cd].Text = sample[1];
                            cd++;
                        }
                        if (sample[1].Length > 1 && sample[0].Contains("घर क्रमांक"))
                        {
                            sheet.Range["D" + ef].Text = sample[1];
                            ef++;
                        }
                        if (sample[1].Length > 1 && sample[0].Contains("ळय"))
                        {
                            //MessageBox.Show(sample[1]);
                            string[] xyz = sample[1].Split(' ');
                            //MessageBox.Show(xyz[0]);
                            //sheet.Range["D" + gh].Text = xyz[0];
                            sheet.Range["E" + gh].Text = xyz[2];
                            sheet.Range["F" + ij].Text = sample[2];

                            gh++;
                            ij++;
                        }
                        if (sample[1].Length > 1 && sample[0].Contains("लऱंग"))
                        {
                            //MessageBox.Show("a");


                        }

                    }
                    else
                    {
                        //string[] sample = item.Split(' ');
                        if (!item.Contains("नाळ"))
                        {
                            sheet.Range["a" + kl].Text = item;
                            kl++;
                        }

                    }
                    //ab++;
                }
                workbook.SaveToFile("F:/finalexel.xls");
                text.Append(currentText);
            }
            textBox1.Text = text.ToString();
            pdfReader.Close();
        }
        return (text.ToString()).Trim();
    }

0 个答案:

没有答案