从PDF iTextSharp中删除水印

时间:2016-02-20 17:47:42

标签: pdf itextsharp watermark

我已经完成了建议here的解决方案,但我的问题有点不同。 在上述链接提供的解决方案中,只有在使用iTextSharp添加水印时才能删除水印。就我而言,我在某些情况下使用Microsoft Word添加水印。当我使用以下代码时,水印确实从PDF中消失,但是当我将PDF转换为Word时,水印再次显示为图像。根据我的理解,下面的代码是它将水印的不透明度值更改为0,因此它消失了。

private static void removeWatermark(string watermarkedFile, string unwatermarkedFile)
{
    PdfReader.unethicalreading = true;
    PdfReader reader = new PdfReader(watermarkedFile);
    reader.RemoveUnusedObjects();
    int pageCount = reader.NumberOfPages;
    for (int i = 1; i <= pageCount; i++)
    {
        var page = reader.GetPageN(i);
        PdfDictionary resources = page.GetAsDict(PdfName.RESOURCES);
        PdfDictionary extGStates = resources.GetAsDict(PdfName.EXTGSTATE);
        if (extGStates == null)
            continue;

        foreach (PdfName name in extGStates.Keys)
        {
            var obj = extGStates.Get(name);
            PdfDictionary extGStateObject = (PdfDictionary)PdfReader.GetPdfObject(obj);
            var stateNumber = extGStateObject.Get(PdfName.ca);
            if (stateNumber == null)
                continue;

            var caNumber = (PdfNumber)PdfReader.GetPdfObject(stateNumber);
            if (caNumber.FloatValue != 1f)
            {
                extGStateObject.Remove(PdfName.ca);

                extGStateObject.Put(PdfName.ca, new PdfNumber(0f));
            }
        }
    }

    using (FileStream fs = new FileStream(unwatermarkedFile, FileMode.Create, FileAccess.Write, FileShare.None))
    {
        using (PdfStamper stamper = new PdfStamper(reader, fs))
        {
            stamper.SetFullCompression();
            stamper.Close();
        }
    }
}

有没有办法通过修改代码来删除这个水印?

2 个答案:

答案 0 :(得分:2)

正如OP已经提到的,如果你完全控制了最初创建水印的过程,你可以像@ChrisHaas在his answerthe question the OP referred to中解释的那样。

另一方面,如果您创建水印的工具以自己的方式进行,则需要为这些水印定制的方法。

此方法通常需要您编辑某些内容流。 @ ChrisHaas&#39;顺便说一下,解决方案也是如此。

为了简化这一过程,首先应该创建一个通用的内容流编辑功能,然后只使用此功能来编辑这些水印。

因此,首先是一个示例通用内容流编辑器类,然后是基于此的解决方案来编辑​​OP的样本水印。

通用内容流编辑器类

这个PdfContentStreamEditor类通过指令解析原始内容流指令,跟踪图形状态的一部分;这些指令被转发到它的Write方法,默认情况下它们会在它们进入时将它们写回来,有效地创建原始流的相同或至少等效的副本。

要实际编辑流,只需覆盖此Write方法,并仅将结果流中所需的说明转发到基本Write方法。

public class PdfContentStreamEditor : PdfContentStreamProcessor
{
    /**
     * This method edits the immediate contents of a page, i.e. its content stream.
     * It explicitly does not descent into form xobjects, patterns, or annotations.
     */
    public void EditPage(PdfStamper pdfStamper, int pageNum)
    {
        PdfReader pdfReader = pdfStamper.Reader;
        PdfDictionary page = pdfReader.GetPageN(pageNum);
        byte[] pageContentInput = ContentByteUtils.GetContentBytesForPage(pdfReader, pageNum);
        page.Remove(PdfName.CONTENTS);
        EditContent(pageContentInput, page.GetAsDict(PdfName.RESOURCES), pdfStamper.GetUnderContent(pageNum));
    }

    /**
     * This method processes the content bytes and outputs to the given canvas.
     * It explicitly does not descent into form xobjects, patterns, or annotations.
     */
    public void EditContent(byte[] contentBytes, PdfDictionary resources, PdfContentByte canvas)
    {
        this.canvas = canvas;
        ProcessContent(contentBytes, resources);
        this.canvas = null;
    }

    /**
     * This method writes content stream operations to the target canvas. The default
     * implementation writes them as they come, so it essentially generates identical
     * copies of the original instructions the {@link ContentOperatorWrapper} instances
     * forward to it.
     *
     * Override this method to achieve some fancy editing effect.
     */
    protected virtual void Write(PdfContentStreamProcessor processor, PdfLiteral operatorLit, List<PdfObject> operands)
    {
        int index = 0;

        foreach (PdfObject pdfObject in operands)
        {
            pdfObject.ToPdf(canvas.PdfWriter, canvas.InternalBuffer);
            canvas.InternalBuffer.Append(operands.Count > ++index ? (byte) ' ' : (byte) '\n');
        }
    }

    //
    // constructor giving the parent a dummy listener to talk to 
    //
    public PdfContentStreamEditor() : base(new DummyRenderListener())
    {
    }

    //
    // Overrides of PdfContentStreamProcessor methods
    //
    public override IContentOperator RegisterContentOperator(String operatorString, IContentOperator newOperator)
    {
        ContentOperatorWrapper wrapper = new ContentOperatorWrapper();
        wrapper.setOriginalOperator(newOperator);
        IContentOperator formerOperator = base.RegisterContentOperator(operatorString, wrapper);
        return formerOperator is ContentOperatorWrapper ? ((ContentOperatorWrapper)formerOperator).getOriginalOperator() : formerOperator;
    }

    public override void ProcessContent(byte[] contentBytes, PdfDictionary resources)
    {
        this.resources = resources; 
        base.ProcessContent(contentBytes, resources);
        this.resources = null;
    }

    //
    // members holding the output canvas and the resources
    //
    protected PdfContentByte canvas = null;
    protected PdfDictionary resources = null;

    //
    // A content operator class to wrap all content operators to forward the invocation to the editor
    //
    class ContentOperatorWrapper : IContentOperator
    {
        public IContentOperator getOriginalOperator()
        {
            return originalOperator;
        }

        public void setOriginalOperator(IContentOperator originalOperator)
        {
            this.originalOperator = originalOperator;
        }

        public void Invoke(PdfContentStreamProcessor processor, PdfLiteral oper, List<PdfObject> operands)
        {
            if (originalOperator != null && !"Do".Equals(oper.ToString()))
            {
                originalOperator.Invoke(processor, oper, operands);
            }
            ((PdfContentStreamEditor)processor).Write(processor, oper, operands);
        }

        private IContentOperator originalOperator = null;
    }

    //
    // A dummy render listener to give to the underlying content stream processor to feed events to
    //
    class DummyRenderListener : IRenderListener
    {
        public void BeginTextBlock() { }

        public void RenderText(TextRenderInfo renderInfo) { }

        public void EndTextBlock() { }

        public void RenderImage(ImageRenderInfo renderInfo) { }
    }
}

某些背景

此类从iTextSharp解析器命名空间扩展PdfContentStreamProcessor。该类最初旨在仅解析内容流以返回文本,图像或图形提取的信息。我们利用它来跟踪图形状态的一部分,更准确地说是与文本提取相关的图形状态参数。

如果对于特定的编辑任务,还需要预先处理的信息。当前指令绘制的文本可以使用自定义IRenderListener实现来检索该信息,而不是此处使用的DummyRenderListener,而忽略它。

此类架构的灵感来自 iTextSharp.xtra 额外库中的PdfCleanUpProcessor

隐藏OP水印

的编辑器

正如OP已经发现的那样,他的水印可以被识别为使用 ExtGState 对象中定义的透明度作为 ca 值的唯一文档部分。为了隐藏水印,我们必须

  • 识别关于该值的图形状态更改和
  • 当识别出的当前 ca 值小于1时,不会绘制任何内容。

实际上水印是使用矢量图形操作构建的。因此,我们可以将编辑限制为这些操作。我们甚至可以限制它来改变最终的绘图指令(&#34;笔划&#34; /&#34;填充&#34; /&#34;填充和描边&#34;加上某些变化)以不做生成透明内容的部分(填充或抚摸)。

public class TransparentGraphicsRemover : PdfContentStreamEditor
{
    protected override void Write(PdfContentStreamProcessor processor, PdfLiteral oper, List<PdfObject> operands)
    {
        String operatorString = oper.ToString();
        if ("gs".Equals(operatorString))
        {
            updateTransparencyFrom((PdfName) operands[0]);
        }

        if (operatorMapping.Keys.Contains(operatorString))
        {
            // Downgrade the drawing operator if transparency is involved
            // For details cf. the comment before the operatorMapping declaration
            PdfLiteral[] mapping = operatorMapping[operatorString];

            int index = 0;
            if (strokingAlpha < 1)
                index |= 1;
            if (nonStrokingAlpha < 1)
                index |= 2;

            oper = mapping[index];
            operands[operands.Count - 1] = oper;
        }

        base.Write(processor, oper, operands);
    }

    // The current transparency values; beware: save and restore state operations are ignored!
    float strokingAlpha = 1;
    float nonStrokingAlpha = 1;

    void updateTransparencyFrom(PdfName gsName)
    {
        PdfDictionary extGState = getGraphicsStateDictionary(gsName);
        if (extGState != null)
        {
            PdfNumber number = extGState.GetAsNumber(PdfName.ca);
            if (number != null)
                nonStrokingAlpha = number.FloatValue;
            number = extGState.GetAsNumber(PdfName.CA);
            if (number != null)
                strokingAlpha = number.FloatValue;
        }
    }

    PdfDictionary getGraphicsStateDictionary(PdfName gsName)
    {
        PdfDictionary extGStates = resources.GetAsDict(PdfName.EXTGSTATE);
        return extGStates.GetAsDict(gsName);
    }

    //
    // Map from an operator name to an array of operations it becomes depending
    // on the current graphics state:
    //
    // * [0] the operation in case of no transparency
    // * [1] the operation in case of stroking transparency
    // * [2] the operation in case of non-stroking transparency
    // * [3] the operation in case of stroking and non-stroking transparency
    //
    Dictionary<String, PdfLiteral[]> operatorMapping = new Dictionary<String, PdfLiteral[]>();

    public TransparentGraphicsRemover()
    {
        PdfLiteral _S = new PdfLiteral("S");
        PdfLiteral _s = new PdfLiteral("s");
        PdfLiteral _f = new PdfLiteral("f");
        PdfLiteral _fStar = new PdfLiteral("f*");
        PdfLiteral _B = new PdfLiteral("B");
        PdfLiteral _BStar = new PdfLiteral("B*");
        PdfLiteral _b = new PdfLiteral("b");
        PdfLiteral _bStar = new PdfLiteral("b*");
        PdfLiteral _n = new PdfLiteral("n");

        operatorMapping["S"] = new PdfLiteral[]{ _S, _n, _S, _n };
        operatorMapping["s"] = new PdfLiteral[]{ _s, _n, _s, _n };
        operatorMapping["f"] = new PdfLiteral[]{ _f, _f, _n, _n };
        operatorMapping["F"] = new PdfLiteral[]{ _f, _f, _n, _n };
        operatorMapping["f*"] = new PdfLiteral[]{ _fStar, _fStar, _n, _n };
        operatorMapping["B"] = new PdfLiteral[]{ _B, _f, _S, _n };
        operatorMapping["B*"] = new PdfLiteral[]{ _BStar, _fStar, _S, _n };
        operatorMapping["b"] = new PdfLiteral[] { _b, _f, _s, _n };
        operatorMapping["b*"] = new PdfLiteral[]{ _bStar, _fStar, _s, _n };
    }
}

小心: 此示例编辑器非常简单:

  • 它只考虑 ExtGState 参数 ca CA 创建的透明度,特别是忽略了掩码。
  • 它不会查找保存或恢复图形状态的操作。

可以轻松解除这些限制,但需要的代码数量超过了stackoverflow应答的代码。

将此编辑器应用于OP的示例文件,如下所示

string source = @"test3.pdf";
string dest = @"test3-noTransparency.pdf";

using (PdfReader pdfReader = new PdfReader(source))
using (PdfStamper pdfStamper = new PdfStamper(pdfReader, new FileStream(dest, FileMode.Create, FileAccess.Write)))
{
    PdfContentStreamEditor editor = new TransparentGraphicsRemover();

    for (int i = 1; i <= pdfReader.NumberOfPages; i++)
    {
        editor.EditPage(pdfStamper, i);
    }
}

会生成没有水印的PDF文件。

我没有OP导出内容的工具, NitroPDF和Foxit ,所以我无法执行最终测试。 Adobe Acrobat(版本9.5)至少在导出到Word时不包含水印。

如果OP的工具在导出的Word文件中仍然有水印痕迹,则可以轻松改进此类,以便在透明度处于活动状态时实际删除路径创建和绘制操作。

Java中的相同

我开始在Java中使用iText实现这个功能,后来才意识到OP在脑海中有.net的iTextSharp。以下是等效的Java类:

public class PdfContentStreamEditor extends PdfContentStreamProcessor
{
    /**
     * This method edits the immediate contents of a page, i.e. its content stream.
     * It explicitly does not descent into form xobjects, patterns, or annotations.
     */
    public void editPage(PdfStamper pdfStamper, int pageNum) throws IOException
    {
        PdfReader pdfReader = pdfStamper.getReader();
        PdfDictionary page = pdfReader.getPageN(pageNum);
        byte[] pageContentInput = ContentByteUtils.getContentBytesForPage(pdfReader, pageNum);
        page.remove(PdfName.CONTENTS);
        editContent(pageContentInput, page.getAsDict(PdfName.RESOURCES), pdfStamper.getUnderContent(pageNum));
    }

    /**
     * This method processes the content bytes and outputs to the given canvas.
     * It explicitly does not descent into form xobjects, patterns, or annotations.
     */
    public void editContent(byte[] contentBytes, PdfDictionary resources, PdfContentByte canvas)
    {
        this.canvas = canvas;
        processContent(contentBytes, resources);
        this.canvas = null;
    }

    /**
     * <p>
     * This method writes content stream operations to the target canvas. The default
     * implementation writes them as they come, so it essentially generates identical
     * copies of the original instructions the {@link ContentOperatorWrapper} instances
     * forward to it.
     * </p>
     * <p>
     * Override this method to achieve some fancy editing effect.
     * </p> 
     */
    protected void write(PdfContentStreamProcessor processor, PdfLiteral operator, List<PdfObject> operands) throws IOException
    {
        int index = 0;

        for (PdfObject object : operands)
        {
            object.toPdf(canvas.getPdfWriter(), canvas.getInternalBuffer());
            canvas.getInternalBuffer().append(operands.size() > ++index ? (byte) ' ' : (byte) '\n');
        }
    }

    //
    // constructor giving the parent a dummy listener to talk to 
    //
    public PdfContentStreamEditor()
    {
        super(new DummyRenderListener());
    }

    //
    // Overrides of PdfContentStreamProcessor methods
    //
    @Override
    public ContentOperator registerContentOperator(String operatorString, ContentOperator operator)
    {
        ContentOperatorWrapper wrapper = new ContentOperatorWrapper();
        wrapper.setOriginalOperator(operator);
        ContentOperator formerOperator = super.registerContentOperator(operatorString, wrapper);
        return formerOperator instanceof ContentOperatorWrapper ? ((ContentOperatorWrapper)formerOperator).getOriginalOperator() : formerOperator;
    }

    @Override
    public void processContent(byte[] contentBytes, PdfDictionary resources)
    {
        this.resources = resources; 
        super.processContent(contentBytes, resources);
        this.resources = null;
    }

    //
    // members holding the output canvas and the resources
    //
    protected PdfContentByte canvas = null;
    protected PdfDictionary resources = null;

    //
    // A content operator class to wrap all content operators to forward the invocation to the editor
    //
    class ContentOperatorWrapper implements ContentOperator
    {
        public ContentOperator getOriginalOperator()
        {
            return originalOperator;
        }

        public void setOriginalOperator(ContentOperator originalOperator)
        {
            this.originalOperator = originalOperator;
        }

        @Override
        public void invoke(PdfContentStreamProcessor processor, PdfLiteral operator, ArrayList<PdfObject> operands) throws Exception
        {
            if (originalOperator != null && !"Do".equals(operator.toString()))
            {
                originalOperator.invoke(processor, operator, operands);
            }
            write(processor, operator, operands);
        }

        private ContentOperator originalOperator = null;
    }

    //
    // A dummy render listener to give to the underlying content stream processor to feed events to
    //
    static class DummyRenderListener implements RenderListener
    {
        @Override
        public void beginTextBlock() { }

        @Override
        public void renderText(TextRenderInfo renderInfo) { }

        @Override
        public void endTextBlock() { }

        @Override
        public void renderImage(ImageRenderInfo renderInfo) { }
    }
}

PdfContentStreamEditor.java

public class TransparentGraphicsRemover extends PdfContentStreamEditor
{
    @Override
    protected void write(PdfContentStreamProcessor processor, PdfLiteral operator, List<PdfObject> operands) throws IOException
    {
        String operatorString = operator.toString();
        if ("gs".equals(operatorString))
        {
            updateTransparencyFrom((PdfName) operands.get(0));
        }

        PdfLiteral[] mapping = operatorMapping.get(operatorString);

        if (mapping != null)
        {
            int index = 0;
            if (strokingAlpha < 1)
                index |= 1;
            if (nonStrokingAlpha < 1)
                index |= 2;

            operator = mapping[index];
            operands.set(operands.size() - 1, operator);
        }

        super.write(processor, operator, operands);
    }

    // The current transparency values; beware: save and restore state operations are ignored!
    float strokingAlpha = 1;
    float nonStrokingAlpha = 1;

    void updateTransparencyFrom(PdfName gsName)
    {
        PdfDictionary extGState = getGraphicsStateDictionary(gsName);
        if (extGState != null)
        {
            PdfNumber number = extGState.getAsNumber(PdfName.ca);
            if (number != null)
                nonStrokingAlpha = number.floatValue();
            number = extGState.getAsNumber(PdfName.CA);
            if (number != null)
                strokingAlpha = number.floatValue();
        }
    }

    PdfDictionary getGraphicsStateDictionary(PdfName gsName)
    {
        PdfDictionary extGStates = resources.getAsDict(PdfName.EXTGSTATE);
        return extGStates.getAsDict(gsName);
    }

    //
    // Map from an operator name to an array of operations it becomes depending
    // on the current graphics state:
    //
    // * [0] the operation in case of no transparency
    // * [1] the operation in case of stroking transparency
    // * [2] the operation in case of non-stroking transparency
    // * [3] the operation in case of stroking and non-stroking transparency
    //
    static Map<String, PdfLiteral[]> operatorMapping = new HashMap<String, PdfLiteral[]>();
    static
    {
        PdfLiteral _S = new PdfLiteral("S");
        PdfLiteral _s = new PdfLiteral("s");
        PdfLiteral _f = new PdfLiteral("f");
        PdfLiteral _fStar = new PdfLiteral("f*");
        PdfLiteral _B = new PdfLiteral("B");
        PdfLiteral _BStar = new PdfLiteral("B*");
        PdfLiteral _b = new PdfLiteral("b");
        PdfLiteral _bStar = new PdfLiteral("b*");
        PdfLiteral _n = new PdfLiteral("n");

        operatorMapping.put("S", new PdfLiteral[]{ _S, _n, _S, _n });
        operatorMapping.put("s", new PdfLiteral[]{ _s, _n, _s, _n });
        operatorMapping.put("f", new PdfLiteral[]{ _f, _f, _n, _n });
        operatorMapping.put("F", new PdfLiteral[]{ _f, _f, _n, _n });
        operatorMapping.put("f*", new PdfLiteral[]{ _fStar, _fStar, _n, _n });
        operatorMapping.put("B", new PdfLiteral[]{ _B, _f, _S, _n });
        operatorMapping.put("B*", new PdfLiteral[]{ _BStar, _fStar, _S, _n });
        operatorMapping.put("b", new PdfLiteral[]{ _b, _f, _s, _n });
        operatorMapping.put("b*", new PdfLiteral[]{ _bStar, _fStar, _s, _n });
    }
}

TransparentGraphicsRemover.java

@Test
public void testRemoveTransparentGraphicsTest3() throws IOException, DocumentException
{
    try (   InputStream resource = getClass().getResourceAsStream("test3.pdf");
            OutputStream result = new FileOutputStream(new File(RESULT_FOLDER, "test3-noTransparency.pdf")))
    {
        PdfReader pdfReader = new PdfReader(resource);
        PdfStamper pdfStamper = new PdfStamper(pdfReader, result);
        PdfContentStreamEditor editor = new TransparentGraphicsRemover();

        for (int i = 1; i <= pdfReader.getNumberOfPages(); i++)
        {
            editor.editPage(pdfStamper, i);
        }

        pdfStamper.close();
    }
}

(摘自EditPageContent.java

答案 1 :(得分:0)

这是更改颜色(超链接等)的代码。

            PdfCanvasEditor editor = new PdfCanvasEditor() {
        @Override
        protected void write(PdfCanvasProcessor processor, PdfLiteral operator, List<PdfObject> operands)
        {
            String operatorString = operator.toString();

            if (SET_FILL_RGB.equals(operatorString) && operands.size() == 4) {
                if (isApproximatelyEqual(operands.get(0), 0) &&
                        isApproximatelyEqual(operands.get(1), 0) &&
                        isApproximatelyEqual(operands.get(2), 1)) {
                    super.write(processor, new PdfLiteral("g"), Arrays.asList(new PdfNumber(0), new PdfLiteral("g")));
                    return;
                }
            }

            if (SET_STROKE_RGB.equals(operatorString) && operands.size() == 4) {
                if (isApproximatelyEqual(operands.get(0), 0) &&
                        isApproximatelyEqual(operands.get(1), 0) &&
                        isApproximatelyEqual(operands.get(2), 1)) {
                    super.write(processor, new PdfLiteral("G"), Arrays.asList(new PdfNumber(0), new PdfLiteral("G")));
                    return;
                }
            }

            super.write(processor, operator, operands);
        }

        boolean isApproximatelyEqual(PdfObject number, float reference) {
            return number instanceof PdfNumber && Math.abs(reference - ((PdfNumber)number).floatValue()) < 0.01f;
        }

        final String SET_FILL_RGB = "rg";
        final String SET_STROKE_RGB = "RG";
    };
    for (int i = 1; i <= pdfDocument.getNumberOfPages(); i++) {
        editor.editPage(pdfDocument, i);
    }
}