与书签和链接相关的问题很少
BookMarks
当我们得到书签时,是否有任何选项可以在页面内容中添加书签并修改书签文本并修改PDF文件中树形结构中的书签prsent。
我们是否可以选择在页面内容中搜索书签。我考虑的是(空格,拼写等)
选择编辑pdf页面内容中的书签和书签。
是否有任何选项可以在页面内容中找到H1,H2
是否有任何选项可以找到书签指向正确的页面。
链接
这是我的代码
public void ReadPdfFile(string fileName, string CompareText)
{
StringBuilder text = new StringBuilder();
System.Data.DataTable dtResult = new System.Data.DataTable();
string currentText = "";
string title = "";
string[] pages;
if (ViewState["Append"] != null)
{
dtResult = ViewState["Append"] as System.Data.DataTable;
}
string SearchText = string.Empty;
if (!dtResult.Columns.Contains("BookMarks"))
{
dtResult.Columns.Add(new DataColumn("BookMarks"));
}
if (!dtResult.Columns.Contains("Exists"))
{
dtResult.Columns.Add(new DataColumn("Exists"));
}
if (File.Exists(fileName))
{
PdfReader pdfReader = new PdfReader(fileName);
ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
//string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);
IList<Dictionary<string, object>> bookmarks = SimpleBookmark.GetBookmark(pdfReader);
for (int i = 0; i < bookmarks.Count; i++)
{
var values = bookmarks[i].Values.ToList();
title = values[0].ToString();
pages = values[1].ToString().Split(' ');
currentText = PdfTextExtractor.GetTextFromPage(pdfReader, Convert.ToInt32(pages[0].ToString()));
currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
if (currentText.Contains(title))
{
DataRow dr = dtResult.NewRow();
dr["BookMarks"] = title;
dr["Exists"] = "No";
dtResult.Rows.Add(dr);
dtResult.AcceptChanges();
}
currentText = "";
if (values.Count >= 4)
{
var ChildValues = (((System.Collections.Generic.List<System.Collections.Generic.Dictionary<string, object>>)(values[3])));
int haschild = 0;
int count = 0;
int n = 0;
int m = 0;
while (count < ChildValues.Count)
{
haschild = 1;
title = ChildValues[n]["Title"].ToString();
pages = ChildValues[n]["Page"].ToString().Split(' ');
currentText = PdfTextExtractor.GetTextFromPage(pdfReader, Convert.ToInt32(pages[0].ToString()));
currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
// currentText = currentText.Replace('\n', ' ');
if (currentText.Contains(title))
{
DataRow dr = dtResult.NewRow();
dr["BookMarks"] = title;
dr["Exists"] = "No";
dtResult.Rows.Add(dr);
dtResult.AcceptChanges();
}
currentText = "";
if (ChildValues[n].Count >= 4)
{
int mychildcount = 0;
m = 0;
var mychild = (((System.Collections.Generic.List<System.Collections.Generic.Dictionary<string, object>>)(ChildValues[n]["Kids"])));
while (haschild == 1 || mychildcount < mychild.Count)
{
title = mychild[m]["Title"].ToString();
pages = mychild[m]["Page"].ToString().Split(' ');
currentText = PdfTextExtractor.GetTextFromPage(pdfReader, Convert.ToInt32(pages[0].ToString()));
currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
// currentText = currentText.Replace('\n', ' ');
if (currentText.Contains(title))
{
DataRow dr = dtResult.NewRow();
dr["BookMarks"] = title;
dr["Exists"] = "No";
dtResult.Rows.Add(dr);
dtResult.AcceptChanges();
}
currentText = "";
if (mychild[m].Count >= 4)
{
haschild = 1;
mychild = (((System.Collections.Generic.List<System.Collections.Generic.Dictionary<string, object>>)(mychild[m]["Kids"])));
}
else
{
m++;
haschild = 0;
mychildcount++;
}
}
n++;
count++;
}
else
{
n++;
count++;
}
}
}
}
pdfReader.Close();
}
ViewState["Append"] = dtResult;
GVResult.DataSource = dtResult;
GVResult.DataBind();
}