pdf内容流解析

时间:2011-08-05 01:05:32

标签: c# pdf-generation pdfsharp pdf-parsing

我需要帮助解析pdf 在illustrator中构建的pdf,它有4层,每层有一个图形路径对象 我不会做的是获取所有4个图形路径并将它们绘制在另一个与此pdf具有相同宽度和高度的pdf文件中,并且我想在相同的位置绘制它们。 这是我开始编写的代码:

    public static List<PDFMask> GetMasksFromPage(PdfPage page)
    {
            List<PDFMask> masks = new List<PDFMask>();

            PdfDictionary contents = page.Elements.GetDictionary("/Contents");
            PdfDictionary.PdfStream contentsStream = contents.Stream;


            PdfDictionary resources = page.Elements.GetDictionary("/Resources");
            PdfDictionary properties = resources.Elements.GetDictionary("/Properties");
            PdfName[] keys = properties.Elements.KeyNames;



        int dataStartPointer = 0;
        int dataEndPointer = Utils.Bytes.IndexOf(contentsStream.UnfilteredValue, Encoding.ASCII.GetBytes("EMC "), dataStartPointer);
        int dataCount = dataEndPointer+4;
        for (int i = 0; i < keys.Length; i++)
        {
            PdfDictionary mc = properties.Elements.GetDictionary(keys[i].Value);

            PDFMask mask = new PDFMask();
            mask.name = mc.Elements.GetString("/Title");
            mask.key = keys[i].Value;

            byte[] data = new byte[dataCount];
            Array.Copy(contentsStream.UnfilteredValue, dataStartPointer, mask.data, 0, dataCount);

            mask.parseData(data);

            dataStartPointer += dataCount+1;
            dataEndPointer = Utils.Bytes.IndexOf(contentsStream.UnfilteredValue, Encoding.ASCII.GetBytes("EMC "), dataStartPointer);
            dataCount = dataEndPointer + 4 - dataStartPointer; 

            masks.Add(mask);
        }

        return masks;
    }

现在上面的代码用于获取所有图层数据并将它们分解为4个对象

     PdfDictionary.PdfStream contentsStream = contents.Stream;

这一行给我4层grapichs二进制数据 现在这是一个重复1层的PDFMask类

public class PDFMask
{
    public string name;
    public string key;
    public byte[] data;

    public void parseData(byte[] data)
    {
         this.data = data; //how i parsing this data to some XGrapic Object?
    }
 }

现在这就是数据源的样子:

   /Layer /MC0 BDC 
   0.75 0.68 0.67 0.902 k
   /GS0 gs
   q 1 0 0 1 396.4473 1835.6143 cm
   0 0 m
   76.497 -132.515 l
   -17.184 -159.051 l
   76.496 -185.607 l
   -0.003 -318.119 l
   -72.563 -252.047 l
   -50.486 -349.178 l
   -202.179 -349.182 l
   -180.097 -252.046 l
   -252.658 -318.116 l
   -329.154 -185.603 l
   -235.473 -159.048 l
   -329.154 -132.511 l
   -252.654 0.002 l
   -180.094 -66.07 l
   -202.175 31.087 l
   -50.482 31.081 l
   -72.563 -66.072 l
   h
   f
   Q
   EMC 

我正在寻找一些解析器(我更喜欢pdfsharp解析器) 可以将这些数据解析为一些图形对象,我将其用于另一个pdf文档

1 个答案:

答案 0 :(得分:0)

好的我要解决这个问题就是根据自己的需要构建我自己的解析器我会在这里显示代码我相信它有一天会帮助某人...

public struct GD { public double x, y, a, b, c, d; public byte t; }
public struct Coordinate { public double locX, locY, oriX, oriY, xAxis, yAxis; } 

public class PDFMask
{
    private string _name;
    public string fun;

    public string name
    {
        get
        {
            return _name;
        }
        set
        {
            if (value.Contains("{"))
            {
                _name = value.Substring(0, value.IndexOf("{"));
                fun = value.Substring(value.IndexOf("{"));
            }
            else
            {
                _name = value;
            }
        }
    }
    public string key;
    public byte[] data;
    public GD[] graphicsDirectives;
    public Coordinate coordinate;

    public void parseData(byte[] data)
    {
        this.data = data;

        graphicsDirectives = new GD[100];
        int gdCount = 0;

        byte[] buffer = new byte[100];
        int bufferCount = 0;

        for (int i = 0; i < data.Length; i++)
        {
            switch (data[i])
            {
                case (byte)'\n':
                    if (bufferCount > 2 && buffer[bufferCount - 2] == ' ' && (buffer[bufferCount - 1] == 'c' || buffer[bufferCount - 1] == 'l' || buffer[bufferCount - 1] == 'm'))
                        graphicsDirectives[gdCount++] = parseDataWriteGD(buffer, bufferCount);
                    else if (bufferCount > 3 && buffer[0] == 'q' && buffer[bufferCount - 1] == 'm' && buffer[bufferCount - 2] == 'c')
                        coordinate = parseDataWriteCoordinate(buffer, bufferCount);


                    bufferCount = 0;
                    break;


                default :
                    buffer[bufferCount++] = data[i];
                    break;
            }
        }

        GD[] actualGraphicsDirectives = new GD[gdCount];
        Array.Copy(graphicsDirectives, actualGraphicsDirectives, gdCount);
        graphicsDirectives = actualGraphicsDirectives;
    }

    public Coordinate parseDataWriteCoordinate(byte[] bytes, int count)
    {
        byte[] actualBytes = new byte[count];
        Array.Copy(bytes, actualBytes, count);

        string[] values = Encoding.ASCII.GetString(actualBytes).Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);

        Coordinate c = new Coordinate();
        c.locX = double.Parse(values[1]);
        c.locY = double.Parse(values[2]);
        c.oriX = double.Parse(values[3]);
        c.oriY = double.Parse(values[4]);
        c.xAxis = double.Parse(values[5]);
        c.yAxis = double.Parse(values[6]); 

        return c;
    }

    public GD parseDataWriteGD(byte[] bytes, int count)
    {
        byte[] actualBytes = new byte[count];
        Array.Copy(bytes, actualBytes, count);

        string[] values = Encoding.ASCII.GetString(actualBytes).Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);

        GD gd = new GD();
        gd.t = (byte)values[values.Length - 1][0];

        if (gd.t == 'c')
        {
            gd.a = double.Parse(values[0]);
            gd.b = double.Parse(values[1]);
            gd.c = double.Parse(values[2]);
            gd.d = double.Parse(values[3]);
            gd.x = double.Parse(values[4]);
            gd.y = double.Parse(values[5]);
        }
        else
        {
            gd.x = double.Parse(values[0]);
            gd.y = double.Parse(values[1]);
        }

        return gd;
    }
}