iTextsharp base64嵌入图像在标题中没有解析/显示

时间:2016-06-29 07:55:47

标签: vb.net base64 pdf-generation itext html-to-pdf

背景

我有一个编辑器,我可以用HTML编辑文档的页眉和页脚,然后将它们合并到主文档中。 Base64嵌入式图像在主文档内容中完美地工作,但是在页眉或页脚中它们消失(所以如果我有一个带有image1.png的主文档,并且该文档有一个带有image_header.png的标题,则image1将显示,image_header惯于)。我的看法是标签处理器没有应用于PageHeader html中的元素。

我创建了一个自定义ImageTagProcessor(下面)

Public Class CustomImageTagProcessor
    Inherits iTextSharp.tool.xml.html.Image
    Public Overrides Function [End](ctx As IWorkerContext, tag As Tag, currentContent As IList(Of IElement)) As IList(Of IElement)
        Dim attributes As IDictionary(Of String, String) = tag.Attributes
        Dim src As String = String.Empty
        If Not attributes.TryGetValue(iTextSharp.tool.xml.html.HTML.Attribute.SRC, src) Then
            Return New List(Of IElement)(1)
        End If

        If String.IsNullOrEmpty(src) Then
            Return New List(Of IElement)(1)
        End If



        If src.StartsWith("data:image/", StringComparison.InvariantCultureIgnoreCase) Then
            ' data:[<MIME-type>][;charset=<encoding>][;base64],<data>
            Dim base64Data As String = src.Substring(src.IndexOf(",") + 1)
            Dim imagedata As Byte() = Convert.FromBase64String(base64Data)
            Dim image As iTextSharp.text.Image = iTextSharp.text.Image.GetInstance(imagedata)

            Dim list As List(Of IElement) = New List(Of IElement)()
            Dim htmlPipelineContext As pipeline.html.HtmlPipelineContext = GetHtmlPipelineContext(ctx)
            list.Add(GetCssAppliers().Apply(New Chunk(DirectCast(GetCssAppliers().Apply(image, tag, htmlPipelineContext), iTextSharp.text.Image), 0, 0, True), tag, htmlPipelineContext))
            Return list
        Else
            If File.Exists(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, src)) Then
                Dim imagedata As Byte() = File.ReadAllBytes(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, src))
                Dim image As iTextSharp.text.Image = iTextSharp.text.Image.GetInstance(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, src))

                Dim list As List(Of IElement) = New List(Of IElement)()
                Dim htmlPipelineContext As pipeline.html.HtmlPipelineContext = GetHtmlPipelineContext(ctx)
                list.Add(GetCssAppliers().Apply(New Chunk(DirectCast(GetCssAppliers().Apply(image, tag, htmlPipelineContext), iTextSharp.text.Image), 0, 0, True), tag, htmlPipelineContext))
                Return list
            End If
            Return MyBase.[End](ctx, tag, currentContent)
        End If
    End Function
End Class

并通过主PDF生成器库中的以下代码连接

Dim tagProcessors As DefaultTagProcessorFactory = CType(Tags.GetHtmlTagProcessorFactory(), DefaultTagProcessorFactory)
tagProcessors.RemoveProcessor(HTML.Tag.IMG) ' remove the default processor
tagProcessors.AddProcessor(HTML.Tag.IMG, New CustomImageTagProcessor()) ' use our new processor

'Setup CSS
Dim cssResolver As ICSSResolver = XMLWorkerHelper.GetInstance().GetDefaultCssResolver(True)
cssResolver.AddCssFile(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "assets/css/pdf.css"), True)
'Setup Fonts
Dim xmlFontProvider As XMLWorkerFontProvider = New XMLWorkerFontProvider(XMLWorkerFontProvider.DONTLOOKFORFONTS)
xmlFontProvider.RegisterDirectory(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "assets/fonts/"))

Dim cssAppliers As CssAppliers = New CssAppliersImpl(xmlFontProvider)
Dim htmlContext As HtmlPipelineContext = New HtmlPipelineContext(cssAppliers)
htmlContext.SetAcceptUnknown(True)
htmlContext.SetTagFactory(tagProcessors)

'Setup Pipelines
Dim pdf As PdfWriterPipeline = New PdfWriterPipeline(document, writer)
Dim htmlp As HtmlPipeline = New HtmlPipeline(htmlContext, pdf)
Dim css As CssResolverPipeline = New CssResolverPipeline(cssResolver, htmlp)

在我的HeaderFooter类中(继承自PdfPageEventHelper)实例化的类的新实例:

Public Sub New(ByVal headerHTML As String, ByVal footerHTML As String)
    MyBase.New()

    '< Other code not related >

    Dim pdfElHandler As New PdfElementsHandler()

    Using sr As TextReader = New StringReader(Me.HeaderHTML)
        XMLWorkerHelper.GetInstance().ParseXHtml(pdfElHandler, sr)
    End Using
    headerElements = pdfElHandler.elements

    Using sr As TextReader = New StringReader(Me.FooterHTML)
        XMLWorkerHelper.GetInstance().ParseXHtml(pdfElHandler, sr)
    End Using
    footerElements = pdfElHandler.elements

    headerTable = New PdfPTable(1)
    headerTable = BuildElements(headerElements, "header")

    footerTable = New PdfPTable(1)
    footerTable = BuildElements(footerElements, "footer")

End Sub

Private Function BuildElements(tableElements As ElementList, type As String) As PdfPTable

        Dim holderTable As New PdfPTable({1})
        holderTable.HorizontalAlignment = Element.ALIGN_LEFT

        Dim holderCell As New PdfPCell()
        holderCell.Padding = 0
        holderCell.UseBorderPadding = False
        holderCell.Border = 0

        If type = "header" Then
            If Not String.IsNullOrEmpty(HeaderHTML) Then

                For Each el As IElement In tableElements
                    holderCell.AddElement(el)
                Next

                Dim holderRow As New PdfPRow({holderCell})
                holderTable.Rows.Add(holderRow)

            End If
        End If

        If type = "footer" Then
            If Not String.IsNullOrEmpty(FooterHTML) Then

                For Each el As IElement In tableElements
                    holderCell.AddElement(el)
                Next

                Dim holderRow As New PdfPRow({holderCell})
                holderTable.Rows.Add(holderRow)

            End If
        End If
        holderTable.WidthPercentage = 100

        Return holderTable
    End Function

调试ParseXHTML显示之后逐步执行headerElements

1 Table (correct)
1 Row (correct)
2 Cells (correct)
Cell[0] Empty (not correct, there should be an image Element in here, parsed from an <img src="... html element
Cell[1] Contains composite text elements (correct)

我的OnEndPage事件如下:

Public Overrides Sub OnEndPage(ByVal writer As PdfWriter, ByVal document As Document)
        'MyBase.OnEndPage(writer, document)
        Dim pageSize As Rectangle = document.PageSize

        Dim tagProcessors As DefaultTagProcessorFactory = CType(Tags.GetHtmlTagProcessorFactory(), DefaultTagProcessorFactory)
        tagProcessors.RemoveProcessor(HTML.Tag.IMG) ' remove the default processor
        tagProcessors.AddProcessor(HTML.Tag.IMG, New CustomImageTagProcessor()) ' use our new processor

        Dim htmlContext As HtmlPipelineContext = New HtmlPipelineContext(Nothing)
        htmlContext.SetAcceptUnknown(True)
        htmlContext.SetTagFactory(tagProcessors)



        Dim FinalMarginTop, FinalMarginBottom As Single
        FinalMarginTop = Me.MarginTop
        FinalMarginBottom = Me.MarginBottom

        document.SetMargins(MarginLeft, MarginRight, MarginTop, MarginBottom)

        If Me.UsesHeader Or Me.UsesFooter Then
            Dim under As PdfContentByte = writer.DirectContent
            Dim ct As New ColumnText(writer.DirectContent)

            If Me.UsesHeader Then
                'Create the header rectangle
                Dim headerRect As New Rectangle(0, document.PageSize.Height, document.PageSize.Width, CalculatedHeaderHeight)

                headerRect.Left += MarginLeft
                headerRect.Right -= MarginRight ' document.RightMargin
                headerRect.Top += MarginTop ' document.TopMargin
                headerRect.Bottom -= MarginBottom ' document.BottomMargin

                If HeaderType = EnumHeaderDisplayType.FirstPageOnly Then
                    If writer.PageNumber = 1 Then
                        ct.SetSimpleColumn(headerRect)
                        ct.AddElement(headerTable)
                        ct.Go()

                        FinalMarginTop = MarginTop
                    End If
                Else
                    ct.SetSimpleColumn(headerRect)
                    ct.AddElement(headerTable)
                    ct.Go()

                    FinalMarginTop = CalculatedHeaderHeight + MarginTop
                End If
            End If

            If Me.UsesFooter Then
                Dim footerRect As New Rectangle(0, 0, pageSize.Width, CalculatedFooterHeight)
                footerRect.BorderWidth = 0
                footerRect.Left += document.LeftMargin
                footerRect.Right -= document.RightMargin
                footerRect.Top += CalculatedFooterHeight
                footerRect.Bottom += document.BottomMargin
                ct.SetSimpleColumn(footerRect)
                ct.AddElement(footerTable)
                ct.Go()

                FinalMarginBottom = CalculatedFooterHeight + MarginBottom
            End If

        End If


    End Sub

所以我认为自定义图像标签处理器需要在这个阶段应用,但我无法看到OnEndPage中我可以使用它。

1 个答案:

答案 0 :(得分:0)

所以我想出了答案。主文档的管道处理不适用于构建页眉和页脚的编写器。

我还必须在pageevent中设置标记处理程序,cssresolvers,字体和管道。完成此操作后,处理图像并将处理过的图像的字节应用于每个pageevent(或pageend)上的主文档

所以我的代码变成了

If Me.UsesHeader Then
            headerElements = New ElementList() 'XMLWorkerHelper.ParseToElementList(Me.HeaderHTML, Nothing)
            headerTable = SetTable(headerElements, Me.HeaderHTML)
            CalculatedHeaderHeight = headerTable.Rows(0).MaxHeights
        End If

        If Me.UsesFooter Then
            footerElements = New ElementList() 'XMLWorkerHelper.ParseToElementList(Me.HeaderHTML, Nothing)
            footerTable = SetTable(footerElements, Me.FooterHTML)
            CalculatedFooterHeight = footerTable.Rows(0).MaxHeights
        End If

其中footerElementsheaderElements的类型为ElementList 并且SetTable函数是:

Public Function SetTable(ByVal elements As ElementList, ByVal htmlcode As String) As PdfPTable

        Dim tagProcessors As DefaultTagProcessorFactory = CType(Tags.GetHtmlTagProcessorFactory(), DefaultTagProcessorFactory)
        tagProcessors.RemoveProcessor(HTML.Tag.IMG) ' remove the default processor
        tagProcessors.AddProcessor(HTML.Tag.IMG, New CustomImageTagProcessor()) ' use our new processor

        Dim cssResolver As ICSSResolver = XMLWorkerHelper.GetInstance().GetDefaultCssResolver(True)
        cssResolver.AddCssFile(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "assets/css/pdf.css"), True)

        'Setup Fonts
        Dim xmlFontProvider As XMLWorkerFontProvider = New XMLWorkerFontProvider(XMLWorkerFontProvider.DONTLOOKFORFONTS)
        xmlFontProvider.RegisterDirectory(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "assets/fonts/"))

        Dim cssAppliers As CssAppliers = New CssAppliersImpl(xmlFontProvider)

        Dim htmlContext As HtmlPipelineContext = New HtmlPipelineContext(cssAppliers)
        htmlContext.SetAcceptUnknown(True)
        htmlContext.SetTagFactory(tagProcessors)

        Dim pdf As ElementHandlerPipeline = New ElementHandlerPipeline(elements, Nothing)
        Dim htmlp As HtmlPipeline = New HtmlPipeline(htmlContext, pdf)
        Dim css As CssResolverPipeline = New CssResolverPipeline(cssResolver, htmlp)

        Dim worker As XMLWorker = New XMLWorker(css, True)
        Dim p As XMLParser = New XMLParser(worker)

        'Dim holderTable As New PdfPTable({1})
        Dim holderTable As PdfPTable = New PdfPTable({1})

        holderTable.HorizontalAlignment = Element.ALIGN_LEFT

        Dim holderCell As New PdfPCell()
        holderCell.Padding = 0
        holderCell.UseBorderPadding = False
        holderCell.Border = 0

        p.Parse(New MemoryStream(System.Text.Encoding.ASCII.GetBytes(htmlcode)))

        For Each el As IElement In elements
            holderCell.AddElement(el)
        Next

        Dim holderRow As New PdfPRow({holderCell})
        holderTable.Rows.Add(holderRow)
        holderTable.WidthPercentage = 100

        Return holderTable

    End Function

这解决了我的问题