我应该将PDF文件转换为Html文件。我正在使用itextSharper对库进行编码。我找到了关于它的代码,但是这个代码只能用PDF格式转换文本。我需要转换所有页面(图像表和其他东西)我该怎么做。
这是我的代码:
using System.IO;
using iTextSharp.text.pdf;
using iTextSharp.text.pdf.parser;
public class PdfToHtmlManager
{
public string ConvertPdf(string inputFileName, string outputFileName)
{
if (string.IsNullOrEmpty(inputFileName) || string.IsNullOrEmpty(outputFileName))
{
return "File name error";
}
else if (!File.Exists(inputFileName))
{
return "File is not exist";
}
else
{
PdfReader pr = new PdfReader(inputFileName);
int maxPage = pr.NumberOfPages;
pr.Close();
StreamWriter outFile = new StreamWriter(outputFileName, true, System.Text.Encoding.UTF8);
int page = 1;
while (page<=maxPage)
{
ITextExtractionStrategy its = new SimpleTextExtractionStrategy();
PdfReader reader = new PdfReader(inputFileName);
outFile.Write(PdfTextExtractor.GetTextFromPage(reader, page, its));
reader.Close();
page++;
}
outFile.Close();
}
return "success";
}
}