Open XML:将段落转换为简单的HTML

时间:2012-08-13 20:44:12

标签: openxml openxml-sdk

我有一个Paragraph对象,希望将它包含的内部文本转换为HTML片段。

我使用的是Microsoft的Open XML SDK 2.0。

1 个答案:

答案 0 :(得分:0)

    [Test]
    public void GetHTMLOutOfParagraphsWithoutHeadingInformation()
    {

        var paragraphs = new List<Paragraph>();
        StyleDefinitionsPart styles = null;

        // Open the file read-only since we don't need to change it.
        using (var wordprocessingDocument = WordprocessingDocument.Open(documentFileName, true))
        {
            paragraphs = wordprocessingDocument.MainDocumentPart.Document.Body
                .OfType<Paragraph>().ToList();
            styles = wordprocessingDocument.MainDocumentPart.StyleDefinitionsPart;

            foreach (var p in paragraphs)
            {
                using (var memoryStream = new MemoryStream())
                {
                    var doc = WordprocessingDocument.Create(memoryStream, WordprocessingDocumentType.Document);
                    doc.AddMainDocumentPart().AddPart(styles);
                    doc.MainDocumentPart.Document = new Document();
                    doc.MainDocumentPart.Document.Body = new Body();
                    doc.MainDocumentPart.Document.Body.Append(p.CloneNode(true));
                    doc.MainDocumentPart.Document.Save();
                    Console.WriteLine(GetHTMLOfDoc(doc));
                }
            }
        }
    }

    string GetHTMLOfDoc(WordprocessingDocument doc)
    {

        HtmlConverterSettings settings = new HtmlConverterSettings()
        {
            PageTitle = "Test Title",
            CssClassPrefix = "Pt",
            Css = "",
            ConvertFormatting = false,

        };

        XElement html = HtmlConverter.ConvertToHtml(doc, settings);
        var notNullAnyMore = html.XPathSelectElement("//*[local-name() = 'body']");
        return notNullAnyMore.ToStringNewLineOnAttributes();
    }
}