在word文档中替换具有特定样式的文本并不适用于所有OpenXmlPart:s

时间:2015-01-15 13:44:05

标签: .net vb.net openxml openxml-sdk

我正在尝试更换一堆"字段"在存储在数据库中的Word文档中。 "字段"实际上只是用样式格式化的文本(我相信它在Word中称为快速样式),具有我们选择的特定名称。

此类适用于所有页眉部分和页脚部分,但由于某种原因它不适用于正文(MainDocumentPart)。我可以看到,当我调试时,在主体中找到样式,就像它们在页眉部分和页脚部分中一样,并且插入了文本但是当我之后检查Word文档时,只更新了页眉和页脚。正文仍包含旧值。

Word文档中的XML可能如下所示:

  <w:p w:rsidR="00394599" w:rsidRPr="00162F1F" w:rsidRDefault="00394599" w:rsidP="000663BC">
    <w:pPr>
      <w:pStyle w:val="NovaIssuedBy"/>
    </w:pPr>
    <w:r>
      <w:t>NovaIssuedBy</w:t>
    </w:r>
  </w:p>

当然是w:t元素中的文本NovaIssuedBy应该被替换,正如我所说的,这个代码适用于类似的&#34;字段&#34;在页眉和页脚中。

子UpdateNOVAFieldsInternal遍历文档中的所有部分(我认为),所有标题以及正文和页脚。如果它包含某些样式并且在需要时替换文本,则检查每个部分(在此函数中称为部分)。

子CheckSection检查一个部分是否有我们预定义的所有样式,并在需要时替换文本。

子FindStyleReplaceTextInSection完成魔术,它找到标有StyleName样式的所有部分,并用参数文本中的文本替换它们。

有没有人知道为什么这个代码适用于页眉部分和页脚部分,但不适用于正文(MainDocumentPart)?有没有人有更好的方法来解决这个问题&#34;更新Word文档中特定位置的某些文本(不只是一次,但重复),而不是像我们在此解决方案中那样使用样式和样式名称?

Option Strict On
Option Infer On

Imports Nova.Datasets
Imports DocumentFormat.OpenXml.Packaging
Imports DocumentFormat.OpenXml.Wordprocessing
Imports DocumentFormat.OpenXml
Imports System.Collections.Generic
Imports System.Xml
Imports System.IO
Imports System.Text
Imports System.Xml.Linq
Imports System.Linq

Imports <xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">

Public Class NovaFields
    Private m_Document As EmptyDocument.Data_DocumentRow = Nothing
    Private m_Data As Byte()

    Public Sub New(ByRef document As EmptyDocument.Data_DocumentRow)
        m_Document = document

        With m_Document
            If Not .FileExtension.ToUpper() = "DOCX" Then
                'Exception!
                'This is not a DOCX file!
                Throw New ApplicationException("This is not a DOCX file!")
            End If

            m_Data = .FileData
        End With

    End Sub

    Public Sub UpdateNOVAFields(ByVal parameters As NovaParameters)
        UpdateNOVAFieldsInternal(parameters)

        m_Document.FileData = m_Data
    End Sub

    ''' <summary>
    ''' This will replace all "fields" that are set in parameters in the document in m_data
    ''' </summary>
    ''' <param name="parameters"></param>
    ''' <remarks></remarks>
    Private Sub UpdateNOVAFieldsInternal(ByVal parameters As NovaParameters)
        Using documentStream As New MemoryStream()
            ' Read all the bytes, except the last Zero-byte that "closes the file", hence the -1
            documentStream.Write(m_Data, 0, m_Data.Length - 1)

            Using document As WordprocessingDocument = WordprocessingDocument.Open(documentStream, True)
                ' Assign a reference to the existing document body. 
                Dim body As Body = document.MainDocumentPart.Document.Body

                Dim headerPart As OpenXmlPart
                Dim footerPart As OpenXmlPart

                ' Check each Header-part
                For Each headerPart In document.MainDocumentPart.HeaderParts
                    CheckSection(parameters, headerPart)
                Next headerPart

                ' Check the Body-part
                CheckSection(parameters, document.MainDocumentPart)

                ' Check each Footer-part
                For Each footerPart In document.MainDocumentPart.FooterParts
                    CheckSection(parameters, footerPart)
                Next footerPart

                ' Close and save the document
                document.Close()
            End Using

            ' We must add an extra Zero-byte at the end of the stream to "close the file"
            documentStream.Position = documentStream.Length
            documentStream.WriteByte(0)
            m_Data = documentStream.ToArray()

        End Using
    End Sub
    ''' <summary>
    ''' Check the section provided for all parameters(styles)
    ''' </summary>
    ''' <param name="parameters">The parameters to use</param>
    ''' <param name="section">The section to check</param>
    ''' <remarks></remarks>
    Private Sub CheckSection(parameters As NovaParameters, ByRef section As OpenXmlPart)
        ' A bunch of if-statements like the one below are removed just to shorten the text

        ' IssuedBy
        If (parameters.IssuedBySet) Then
            FindStyleReplaceTextInSection(parameters.IssuedByStyleName, parameters.IssuedBy, section)
        End If

    End Sub

    ''' <summary>
    ''' This function will replace the text in a section formatted with a style called styleName in the section provided
    ''' </summary>
    ''' <param name="styleName">The name of the style to replace the text in</param>
    ''' <param name="text">The new text that will be replacing the old text in the document</param>
    ''' <param name="section">The section to scan for a style with the name styleName</param>
    ''' <remarks></remarks>
    Private Sub FindStyleReplaceTextInSection(styleName As String, text As String, ByRef section As OpenXmlPart)
        Try
            Dim xDoc As XDocument = XDocument.Load(XmlReader.Create(section.GetStream()))

            ' Get all Style elements with an attribute that starts with styleName (sometimes Word adds "Char" after the style name)
            Dim foundStyles As IEnumerable(Of XElement) = _
            From element In xDoc.Root.Descendants() _
            Where Not String.IsNullOrEmpty(element.@w:val) AndAlso element.@w:val.StartsWith(styleName) _
            Select element

            Dim w As XNamespace = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"

            For Each item In foundStyles
                ' Get the Style-elements parents parent
                Dim parent As XElement = item.Parent.Parent

                ' Check if it is a Run element or Paragraph element
                If parent.Name.LocalName = "r" Then
                    ' Run

                    ' Remove old text elements
                    parent...<w:t>.Remove()
                    ' Add a new text element with the text provided
                    parent.Add(<w:t><%= text %></w:t>)
                Else
                    ' Paragraph, has an extra layer around the Run element

                    ' Remove old text elements
                    parent...<w:t>.Remove()

                    ' Tried different ways of doing it here

                    ' First way of doing it, seems to work only for Header and Footer
                    Dim run As XElement = parent.Element(w + "r")
                    run.Add(<w:t><%= text %></w:t>)

                    ' Second way of doing it, this works too for Header and Footer
                    'parent.<w:r>.FirstOrDefault().Add(<w:t><%= text %></w:t>)
                End If
            Next

            ' Save the XML into the package.
            Using writer As XmlWriter = XmlWriter.Create(section.GetStream(FileMode.Create, FileAccess.Write))
                xDoc.Save(writer)
            End Using
        Catch ex As Exception
            Debug.Print("Error in FindStyleReplaceTextInSection!")
        End Try
    End Sub
End Class

编辑:Visual Studio 2010 + Framework 3.5

1 个答案:

答案 0 :(得分:1)

由于某种原因,必须在页眉和页脚之前检查正文部分。我刚刚在Header-part之前移动了Body-part,现在它可以工作了!

' Check the Body-part
CheckSection(parameters, document.MainDocumentPart)

' Check each Header-part
For Each headerPart In document.MainDocumentPart.HeaderParts
    CheckSection(parameters, headerPart)
Next headerPart

' Check each Footer-part
For Each footerPart In document.MainDocumentPart.FooterParts
    CheckSection(parameters, footerPart)
Next footerPart