在ItextSharp上处理书签和链接上的PDF文件操作

时间:2014-01-16 13:47:34

标签: c# asp.net itextsharp

与书签和链接相关的问题很少

BookMarks

  1. 当我们得到书签时,是否有任何选项可以在页面内容中添加书签并修改书签文本并修改PDF文件中树形结构中的书签prsent。

  2. 我们是否可以选择在页面内容中搜索书签。我考虑的是(空格,拼写等)

  3. 选择编辑pdf页面内容中的书签和书签。

  4. 是否有任何选项可以在页面内容中找到H1,H2

  5. 是否有任何选项可以找到书签指向正确的页面。

  6. 链接

    1. 是否有找到链接的选项。并检查链接是否指向正确的URL。
    2. 这是我的代码

      public void ReadPdfFile(string fileName, string CompareText)
          {
              StringBuilder text = new StringBuilder();
              System.Data.DataTable dtResult = new System.Data.DataTable();
              string currentText = "";
              string title = "";
              string[] pages;
              if (ViewState["Append"] != null)
              {
                  dtResult = ViewState["Append"] as System.Data.DataTable;
              }
      
              string SearchText = string.Empty;
      
              if (!dtResult.Columns.Contains("BookMarks"))
              {
                  dtResult.Columns.Add(new DataColumn("BookMarks"));
      
              }
              if (!dtResult.Columns.Contains("Exists"))
              {
                  dtResult.Columns.Add(new DataColumn("Exists"));
      
              }
              if (File.Exists(fileName))
              {
                       PdfReader pdfReader = new PdfReader(fileName);
                      ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
                      //string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);
                      IList<Dictionary<string, object>> bookmarks = SimpleBookmark.GetBookmark(pdfReader);
                      for (int i = 0; i < bookmarks.Count; i++)
                      {
                          var values = bookmarks[i].Values.ToList();
                           title = values[0].ToString();
                           pages = values[1].ToString().Split(' ');
                          currentText = PdfTextExtractor.GetTextFromPage(pdfReader, Convert.ToInt32(pages[0].ToString()));
                          currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
                          if (currentText.Contains(title))
                          {
                              DataRow dr = dtResult.NewRow();
                              dr["BookMarks"] = title;
                              dr["Exists"] = "No";
                              dtResult.Rows.Add(dr);
                              dtResult.AcceptChanges();
                          }
                          currentText = "";
                          if (values.Count >= 4)
                          {
                              var ChildValues = (((System.Collections.Generic.List<System.Collections.Generic.Dictionary<string, object>>)(values[3])));
                              int haschild = 0;
      
                              int count = 0;
                              int n = 0;
                              int m = 0;
                                 while (count < ChildValues.Count)
                                 {
                                      haschild = 1;
                                      title = ChildValues[n]["Title"].ToString();
      
                                      pages = ChildValues[n]["Page"].ToString().Split(' ');
      
                                      currentText = PdfTextExtractor.GetTextFromPage(pdfReader, Convert.ToInt32(pages[0].ToString()));
      
                                      currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
      
                                     // currentText = currentText.Replace('\n', ' ');
                                      if (currentText.Contains(title))
                                      {
                                          DataRow dr = dtResult.NewRow();
                                          dr["BookMarks"] = title;
                                          dr["Exists"] = "No";
                                          dtResult.Rows.Add(dr);
                                          dtResult.AcceptChanges();
                                      }
                                      currentText = "";
                                      if (ChildValues[n].Count >= 4)
                                      {
                                          int mychildcount = 0;
                                          m = 0;
                                          var mychild = (((System.Collections.Generic.List<System.Collections.Generic.Dictionary<string, object>>)(ChildValues[n]["Kids"])));
                                          while (haschild == 1 || mychildcount < mychild.Count)
                                          {
      
                                              title = mychild[m]["Title"].ToString();
      
                                              pages = mychild[m]["Page"].ToString().Split(' ');
      
                                              currentText = PdfTextExtractor.GetTextFromPage(pdfReader, Convert.ToInt32(pages[0].ToString()));
      
                                              currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
      
                                             // currentText = currentText.Replace('\n', ' ');
                                              if (currentText.Contains(title))
                                              {
                                                  DataRow dr = dtResult.NewRow();
                                                  dr["BookMarks"] = title;
                                                  dr["Exists"] = "No";
                                                  dtResult.Rows.Add(dr);
                                                  dtResult.AcceptChanges();
                                              }
                                              currentText = "";
                                              if (mychild[m].Count >= 4)
                                              {
      
                                                  haschild = 1;
                                                  mychild = (((System.Collections.Generic.List<System.Collections.Generic.Dictionary<string, object>>)(mychild[m]["Kids"])));
      
                                              }
                                              else
                                              {
      
                                                  m++;
                                                  haschild = 0;
                                                  mychildcount++;
                                              }
      
                                          }
                                          n++;
                                          count++;
      
                                      }
                                      else
                                      {
                                          n++;
                                          count++;
                                      }
                                      }
      
                                  }
                          }
      
                        pdfReader.Close();
              }
      
      
                  ViewState["Append"] = dtResult;
                GVResult.DataSource = dtResult;
              GVResult.DataBind();
      
                  }
      

0 个答案:

没有答案