无法使用OpenxmlSDK从DOCX文件读取形状

时间:2018-07-24 15:33:20

标签: c# ms-word openxml openxml-sdk

我有一个要求,我必须解析DOCX文件并提取所有文本和图像。我正在使用OpenxmlSDK 2.5实现此目的。我能够解析图像和文本,但是DOCX也有一组Shapes,我试图解析它们并将其转换为Drawing图像,这给了我错误的结果。

Here是我要解析的示例docx文件。

我提到了这个Stack overflow discussion,并尝试了相同的方法,但是没有运气。

我使用以下代码创建的resulting DOCX没有解析的图像。

using System.Collections.Generic;
using System.Linq;
using System.IO;
using System.Drawing;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
using DocumentFormat.OpenXml.Vml;
using DocumentFormat.OpenXml;

namespace ReadGroupShape
{

    class Program
    {
        static List<Bitmap> images = new List<Bitmap>();

        static void Main(string[] args)
        {
            MainDocumentPart mainPart = null;
            Body content = null;

            WordprocessingDocument newDoc = WordprocessingDocument.Create("NewDocx.docx", WordprocessingDocumentType.Document);
            MainDocumentPart newMainPart = newDoc.AddMainDocumentPart();
            newMainPart.Document = new Document();
            Body newbody = newMainPart.Document.AppendChild(new Body());

            byte[] docBytes = File.ReadAllBytes("SampleDoc.docx");
            using (MemoryStream ms = new MemoryStream())
            {
                ms.Write(docBytes, 0, docBytes.Length);

                using (WordprocessingDocument wpDoc = WordprocessingDocument.Open(ms, true))
                {
                    mainPart = wpDoc.MainDocumentPart;
                    content = mainPart.Document.Body;

                    foreach (Paragraph par in content.Descendants<Paragraph>())
                    {
                        Paragraph npar = newbody.AppendChild(new Paragraph());

                        foreach (Run run in par.Descendants<Run>())
                        {
                            Run nrun = npar.AppendChild(new Run());
                            DocumentFormat.OpenXml.Drawing.Blip pic = run.Descendants<DocumentFormat.OpenXml.Drawing.Blip>().FirstOrDefault();

                            ImageData imageData = run.Descendants<ImageData>().FirstOrDefault();

                            if (pic == null && imageData == null) 
                            {
                                nrun.InsertAfterSelf(run.CloneNode(true));

                            }                            
                            else
                            {
                                if (pic != null)
                                {

                                    nrun.InsertAfterSelf(CreateImageFromBlip(wpDoc, run, newMainPart, pic));    
                                }
                                else if (imageData != null)
                                {
                                    nrun.InsertAfterSelf(CreateImageFromShape(wpDoc, run, newMainPart, imageData));
                                }
                            }

                        }
                    }

                    mainPart.Document.Save();
                }

            }

            newMainPart.Document.Save();
            newDoc.Close();
        }

        private static Run CreateImageFromShape(WordprocessingDocument sourceDoc, Run sourceRun, MainDocumentPart mainpart, ImageData imageData)
        {
            ImagePart p = sourceDoc.MainDocumentPart.GetPartById(imageData.RelationshipId) as ImagePart;
            return CreateImageRun(sourceDoc, sourceRun, mainpart, p);
        }

        private static Run CreateImageFromBlip(WordprocessingDocument sourceDoc, Run sourceRun, MainDocumentPart mainpart, DocumentFormat.OpenXml.Drawing.Blip blip)
        {
            ImagePart newPart = mainpart.AddImagePart(ImagePartType.Png);

            ImagePart p = sourceDoc.MainDocumentPart.GetPartById(blip.Embed.Value) as ImagePart;

            Bitmap image = new Bitmap(p.GetStream());

            using (Stream s = p.GetStream())
            {
                s.Position = 0;
                newPart.FeedData(s);
            }

            string partId = mainpart.GetIdOfPart(newPart);

            Drawing newImage = CreateImage(partId);

            return new Run(newImage);
        }

        private static Run CreateImageRun(WordprocessingDocument sourceDoc, Run sourceRun, MainDocumentPart mainpart, ImagePart p)
        {
            ImagePart newPart = mainpart.AddImagePart(ImagePartType.Png);


            using (Stream s = p.GetStream())
            {
                s.Position = 0;
                newPart.FeedData(s);
            }

            string partId = mainpart.GetIdOfPart(newPart);

            Drawing newImage = CreateImage(partId);

            return new Run(newImage);
        }


        private static Drawing CreateImage(string relationshipId)
        {
            // Define the reference of the image.
            return new Drawing(
                               new DocumentFormat.OpenXml.Drawing.Wordprocessing.Inline(
                                   new DocumentFormat.OpenXml.Drawing.Wordprocessing.Extent() { Cx = 990000L, Cy = 792000L },
                                   new DocumentFormat.OpenXml.Drawing.Wordprocessing.EffectExtent()
                                   {
                                       LeftEdge = 0L,
                                       TopEdge = 0L,
                                       RightEdge = 0L,
                                       BottomEdge = 0L
                                   },
                                   new DocumentFormat.OpenXml.Drawing.Wordprocessing.DocProperties()
                                   {
                                       Id = (UInt32Value)1U,
                                       Name = "Picture 1"
                                   },
                                   new DocumentFormat.OpenXml.Drawing.Wordprocessing.NonVisualGraphicFrameDrawingProperties(
                                       new DocumentFormat.OpenXml.Drawing.GraphicFrameLocks() { NoChangeAspect = true }),
                                   new DocumentFormat.OpenXml.Drawing.Graphic(
                                       new DocumentFormat.OpenXml.Drawing.GraphicData(
                                           new DocumentFormat.OpenXml.Drawing.Picture(
                                               new DocumentFormat.OpenXml.Drawing.NonVisualPictureProperties(
                                                   new DocumentFormat.OpenXml.Drawing.NonVisualDrawingProperties()
                                                   {
                                                       Id = (UInt32Value)0U,
                                                       Name = "New Bitmap Image.jpg"
                                                   },
                                                   new DocumentFormat.OpenXml.Drawing.NonVisualPictureDrawingProperties()),
                                               new DocumentFormat.OpenXml.Drawing.BlipFill(
                                                   new DocumentFormat.OpenXml.Drawing.Blip(
                                                       new DocumentFormat.OpenXml.Drawing.BlipExtensionList(
                                                           new DocumentFormat.OpenXml.Drawing.BlipExtension()
                                                           {
                                                               Uri =
                                                                 "{28A0092B-C50C-407E-A947-70E740481C1C}"
                                                           })
                                                   )
                                                   {
                                                       Embed = relationshipId,
                                                       CompressionState =
                                                       DocumentFormat.OpenXml.Drawing.BlipCompressionValues.Print
                                                   },
                                                   new DocumentFormat.OpenXml.Drawing.Stretch(
                                                       new DocumentFormat.OpenXml.Drawing.FillRectangle())),
                                               new DocumentFormat.OpenXml.Drawing.ShapeProperties(
                                                   new DocumentFormat.OpenXml.Drawing.Transform2D(
                                                       new DocumentFormat.OpenXml.Drawing.Offset() { X = 0L, Y = 0L },
                                                       new DocumentFormat.OpenXml.Drawing.Extents() { Cx = 990000L, Cy = 792000L }),
                                                   new DocumentFormat.OpenXml.Drawing.PresetGeometry(
                                                       new DocumentFormat.OpenXml.Drawing.AdjustValueList()
                                                   )
                                                   { Preset = DocumentFormat.OpenXml.Drawing.ShapeTypeValues.Rectangle }))
                                       )
                                       { Uri = "http://schemas.openxmlformats.org/drawingml/2006/picture" })
                               )
                               {
                                   DistanceFromTop = (UInt32Value)0U,
                                   DistanceFromBottom = (UInt32Value)0U,
                                   DistanceFromLeft = (UInt32Value)0U,
                                   DistanceFromRight = (UInt32Value)0U,
                                   EditId = "50D07946"
                               });
        }



    }
}

我想念的是什么?有人可以帮我解析形状和图像吗?

谢谢。

0 个答案:

没有答案