我正在尝试按照另一篇文章中的示例将多个文档合并为一个文档。
我正在使用AltChunk altChunk = new AltChunk()
。合并文档时,似乎不会保留每个文档的单独听者。合并后的文档将包含第一个文档的标题。如果要合并的第一个文档不包含听众,那么所有新合并的文档中的其余所有文档都将不包含标题,反之亦然。
我的问题是,如何保留要合并的文档的不同标题?
Merge multiple word documents into one Open Xml
using System;
using System.IO;
using System.Linq;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
namespace WordMergeProject
{
public class Program
{
private static void Main(string[] args)
{
byte[] word1 = File.ReadAllBytes(@"..\..\word1.docx");
byte[] word2 = File.ReadAllBytes(@"..\..\word2.docx");
byte[] result = Merge(word1, word2);
File.WriteAllBytes(@"..\..\word3.docx", result);
}
private static byte[] Merge(byte[] dest, byte[] src)
{
string altChunkId = "AltChunkId" + DateTime.Now.Ticks.ToString();
var memoryStreamDest = new MemoryStream();
memoryStreamDest.Write(dest, 0, dest.Length);
memoryStreamDest.Seek(0, SeekOrigin.Begin);
var memoryStreamSrc = new MemoryStream(src);
using (WordprocessingDocument doc = WordprocessingDocument.Open(memoryStreamDest, true))
{
MainDocumentPart mainPart = doc.MainDocumentPart;
AlternativeFormatImportPart altPart =
mainPart.AddAlternativeFormatImportPart(AlternativeFormatImportPartType.WordprocessingML, altChunkId);
altPart.FeedData(memoryStreamSrc);
var altChunk = new AltChunk();
altChunk.Id = altChunkId;
OpenXmlElement lastElem = mainPart.Document.Body.Elements<AltChunk>().LastOrDefault();
if(lastElem == null)
{
lastElem = mainPart.Document.Body.Elements<Paragraph>().Last();
}
//Page Brake einfügen
Paragraph pageBreakP = new Paragraph();
Run pageBreakR = new Run();
Break pageBreakBr = new Break() { Type = BreakValues.Page };
pageBreakP.Append(pageBreakR);
pageBreakR.Append(pageBreakBr);
return memoryStreamDest.ToArray();
}
}
}
答案 0 :(得分:1)
我遇到了few years ago这个问题,并花了很多时间。我最终写了一个blog article链接到示例文件。使用Alt-Chunk实现带有页眉和页脚的文件集成并非一帆风顺。我将在这里尝试介绍要点。根据页眉和页脚包含的内容类型(并假设Microsoft尚未解决我最初遇到的任何问题),可能无法完全依靠AltChunk。
(另请注意,可能有一些工具/ API可以处理此问题-我不知道,并要求该站点上的内容不在主题之列。)
在解决问题之前,它有助于了解Word如何处理不同的页眉和页脚。要感受一下,请启动Word ...
分节符/取消页眉/页脚的链接
Page Layout
标签因此,规则是:
必须使用分节符,并使用未链接的页眉(和/或页脚), 以便在文档中具有不同的页眉/页脚内容。
主/子文档
Word具有一个(著名的)功能,称为“主文档”,它可以将外部(“子”)文档链接到“主”文档中。这样做会自动添加必要的分节符并取消页眉/页脚的链接,从而保留原始文件。
请注意,插入了两个分节符,其中一个是“下一页”类型,另一个是“连续”类型。第一个插入进来的文件;第二个在“主”文件。
插入文件时,必须有两个分节符,因为最后一个段落标记(包含文档末尾的分节符)不会保留到目标文档中。目标文档中的分节符携带着一些信息,以使传入的标头与目标文档中已有的标头断开链接。
保存母版后,关闭并重新打开时,子文档处于“折叠”状态(文件名是超链接而不是内容)。可以通过返回到“大纲”视图并单击“展开”按钮来展开它们。要将子文档完全合并到文档中,请单击子文档左上方的图标,然后单击“取消链接”。
那么,这是Open XML SDK在合并需要保留其页眉和页脚的文件时需要创建的环境类型。从理论上讲,这两种方法都应该起作用。实际上,我在仅使用分节符时遇到了问题;我从未测试过在Word Open XML中使用“主文档”功能。
插入分节符
这是在使用AltChunk
引入文件之前插入分节符和取消链接标题的基本代码。查看我以前的帖子和文章,只要不涉及复杂的页码,它就可以工作:
private void btnMergeWordDocs_Click(object sender, EventArgs e)
{
string sourceFolder = @"C:\Test\MergeDocs\";
string targetFolder = @"C:\Test\";
string altChunkIdBase = "acID";
int altChunkCounter = 1;
string altChunkId = altChunkIdBase + altChunkCounter.ToString();
MainDocumentPart wdDocTargetMainPart = null;
Document docTarget = null;
AlternativeFormatImportPartType afType;
AlternativeFormatImportPart chunk = null;
AltChunk ac = null;
using (WordprocessingDocument wdPkgTarget = WordprocessingDocument.Create(targetFolder + "mergedDoc.docx", DocumentFormat.OpenXml.WordprocessingDocumentType.Document, true))
{
//Will create document in 2007 Compatibility Mode.
//In order to make it 2010 a Settings part must be created and a CompatMode element for the Office version set.
wdDocTargetMainPart = wdPkgTarget.MainDocumentPart;
if (wdDocTargetMainPart == null)
{
wdDocTargetMainPart = wdPkgTarget.AddMainDocumentPart();
Document wdDoc = new Document(
new Body(
new Paragraph(
new Run(new Text() { Text = "First Para" })),
new Paragraph(new Run(new Text() { Text = "Second para" })),
new SectionProperties(
new SectionType() { Val = SectionMarkValues.NextPage },
new PageSize() { Code = 9 },
new PageMargin() { Gutter = 0, Bottom = 1134, Top = 1134, Left = 1318, Right = 1318, Footer = 709, Header = 709 },
new Columns() { Space = "708" },
new TitlePage())));
wdDocTargetMainPart.Document = wdDoc;
}
docTarget = wdDocTargetMainPart.Document;
SectionProperties secPropLast = docTarget.Body.Descendants<SectionProperties>().Last();
SectionProperties secPropNew = (SectionProperties)secPropLast.CloneNode(true);
//A section break must be in a ParagraphProperty
Paragraph lastParaTarget = (Paragraph)docTarget.Body.Descendants<Paragraph>().Last();
ParagraphProperties paraPropTarget = lastParaTarget.ParagraphProperties;
if (paraPropTarget == null)
{
paraPropTarget = new ParagraphProperties();
}
paraPropTarget.Append(secPropNew);
Run paraRun = lastParaTarget.Descendants<Run>().FirstOrDefault();
//lastParaTarget.InsertBefore(paraPropTarget, paraRun);
lastParaTarget.InsertAt(paraPropTarget, 0);
//Process the individual files in the source folder.
//Note that this process will permanently change the files by adding a section break.
System.IO.DirectoryInfo di = new System.IO.DirectoryInfo(sourceFolder);
IEnumerable<System.IO.FileInfo> docFiles = di.EnumerateFiles();
foreach (System.IO.FileInfo fi in docFiles)
{
using (WordprocessingDocument pkgSourceDoc = WordprocessingDocument.Open(fi.FullName, true))
{
IEnumerable<HeaderPart> partsHeader = pkgSourceDoc.MainDocumentPart.GetPartsOfType<HeaderPart>();
IEnumerable<FooterPart> partsFooter = pkgSourceDoc.MainDocumentPart.GetPartsOfType<FooterPart>();
//If the source document has headers or footers we want to retain them.
//This requires inserting a section break at the end of the document.
if (partsHeader.Count() > 0 || partsFooter.Count() > 0)
{
Body sourceBody = pkgSourceDoc.MainDocumentPart.Document.Body;
SectionProperties docSectionBreak = sourceBody.Descendants<SectionProperties>().Last();
//Make a copy of the document section break as this won't be imported into the target document.
//It needs to be appended to the last paragraph of the document
SectionProperties copySectionBreak = (SectionProperties)docSectionBreak.CloneNode(true);
Paragraph lastpara = sourceBody.Descendants<Paragraph>().Last();
ParagraphProperties paraProps = lastpara.ParagraphProperties;
if (paraProps == null)
{
paraProps = new ParagraphProperties();
lastpara.Append(paraProps);
}
paraProps.Append(copySectionBreak);
}
pkgSourceDoc.MainDocumentPart.Document.Save();
}
//Insert the source file into the target file using AltChunk
afType = AlternativeFormatImportPartType.WordprocessingML;
chunk = wdDocTargetMainPart.AddAlternativeFormatImportPart(afType, altChunkId);
System.IO.FileStream fsSourceDocument = new System.IO.FileStream(fi.FullName, System.IO.FileMode.Open);
chunk.FeedData(fsSourceDocument);
//Create the chunk
ac = new AltChunk();
//Link it to the part
ac.Id = altChunkId;
docTarget.Body.InsertAfter(ac, docTarget.Body.Descendants<Paragraph>().Last());
docTarget.Save();
altChunkCounter += 1;
altChunkId = altChunkIdBase + altChunkCounter.ToString();
chunk = null;
ac = null;
}
}
}
如果页面编号复杂(引自我的博客文章):
不幸的是,集成时Word应用程序中存在一个错误 将Word文档“分块”到主文档中。该过程具有 不保留许多SectionProperty的讨厌习惯 设置某节是否具有不同的首页的那个 (),然后在一节中重新启动页面编号()。只要您的文件不需要 管理这些页眉和页脚,您可以使用 “ altChunk”方法。
但是如果您确实需要处理复杂的页眉和页脚 您当前可用的方法是在以下位置复制每个文档: 整体,部分地这是一项艰巨的任务,因为 有很多可能的零件类型可以关联 仅包含主要文件主体,而且包含每个页眉和页脚 部分。
...或尝试使用主/子文档方法。
主/子文档
这种方法肯定会保留所有信息,它将作为主文档打开,但是,需要Word API(用户代码或自动化代码)“取消链接”子文档以将其转换为单个文档,集成文档。
在“ Open XML SDK生产率工具”中打开主文档文件表明,将子文档插入主文档是相当简单的过程:
带有一个子文档的文档的基本Word Open XML:
<w:body xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
<w:p>
<w:pPr>
<w:pStyle w:val="Heading1" />
</w:pPr>
<w:subDoc r:id="rId6" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" />
</w:p>
<w:sectPr>
<w:headerReference w:type="default" r:id="rId7" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" />
<w:type w:val="continuous" />
<w:pgSz w:w="11906" w:h="16838" />
<w:pgMar w:top="1417" w:right="1417" w:bottom="1134" w:left="1417" w:header="708" w:footer="708" w:gutter="0" />
<w:cols w:space="708" />
<w:docGrid w:linePitch="360" />
</w:sectPr>
</w:body>
和代码:
public class GeneratedClass
{
// Creates an Body instance and adds its children.
public Body GenerateBody()
{
Body body1 = new Body();
Paragraph paragraph1 = new Paragraph();
ParagraphProperties paragraphProperties1 = new ParagraphProperties();
ParagraphStyleId paragraphStyleId1 = new ParagraphStyleId(){ Val = "Heading1" };
paragraphProperties1.Append(paragraphStyleId1);
SubDocumentReference subDocumentReference1 = new SubDocumentReference(){ Id = "rId6" };
paragraph1.Append(paragraphProperties1);
paragraph1.Append(subDocumentReference1);
SectionProperties sectionProperties1 = new SectionProperties();
HeaderReference headerReference1 = new HeaderReference(){ Type = HeaderFooterValues.Default, Id = "rId7" };
SectionType sectionType1 = new SectionType(){ Val = SectionMarkValues.Continuous };
PageSize pageSize1 = new PageSize(){ Width = (UInt32Value)11906U, Height = (UInt32Value)16838U };
PageMargin pageMargin1 = new PageMargin(){ Top = 1417, Right = (UInt32Value)1417U, Bottom = 1134, Left = (UInt32Value)1417U, Header = (UInt32Value)708U, Footer = (UInt32Value)708U, Gutter = (UInt32Value)0U };
Columns columns1 = new Columns(){ Space = "708" };
DocGrid docGrid1 = new DocGrid(){ LinePitch = 360 };
sectionProperties1.Append(headerReference1);
sectionProperties1.Append(sectionType1);
sectionProperties1.Append(pageSize1);
sectionProperties1.Append(pageMargin1);
sectionProperties1.Append(columns1);
sectionProperties1.Append(docGrid1);
body1.Append(paragraph1);
body1.Append(sectionProperties1);
return body1;
}
}