使用iSharpText从PDF删除链接不会删除所有链接

时间:2019-05-31 20:07:18

标签: vb.net pdf hyperlink itext

从我在网上发现的内容以及在其他类似问题中读到的内容来看,删除链接的方法不将其写在注释中,基本上用空注释替换了包含链接的注释。这在某种程度上有效,但不会删除所有链接。

我似乎能够写出指向.txt文件的所有链接,但无法删除它们。下面是我的代码,我只是以字节为单位读取.pdf,然后从返回的字节中写回文件,以进行测试。

即使访问PDF的用户使用的“从URL创建链接”选项设置为true的PDF阅读器,我如何删除或禁用所有超链接,mailto链接等?

  Public Function RemoveHyperLinks(bytes As Byte()) As Byte()
            Dim reader As PdfReader = New PdfReader(bytes)
            Try
                Dim pageCount As Integer = reader.NumberOfPages
                For i As Integer = 1 To pageCount
                    Try
                        Dim pageDictionary As PdfDictionary = reader.GetPageN(i)
                        Dim annots As PdfArray = pageDictionary.GetAsArray(PdfName.ANNOTS)
                        Dim newAnnots As New PdfArray()
                        If annots IsNot Nothing AndAlso annots.Length > 0 AndAlso annots.ArrayList.Count > 0 Then
                            If Not ContainsHyperLinks(annots) Then
                                reader.Dispose()
                                Return bytes
                            End If
                            For Each a As PdfObject In annots.ArrayList
                                Dim annotDict As PdfDictionary = CType(PdfReader.GetPdfObject(a), PdfDictionary)
                                If annotDict.Get(PdfName.SUBTYPE) IsNot Nothing AndAlso Not annotDict.Get(PdfName.SUBTYPE).Equals(PdfName.LINK) Then
                                    newAnnots.Add(annotDict)
                                End If
                            Next
                            pageDictionary.Put(PdfName.ANNOTS, newAnnots)
                        End If
                    Catch ex As Exception
                        Return bytes
                    End Try
                Next
            Catch ex As Exception
            End Try

            Using ms As New MemoryStream()
                Using Doc As New iTextSharp.text.Document()
                    Using writer As New PdfCopy(Doc, ms)
                        Doc.Open()
                        Dim i As Integer = 1
                        While i <= reader.NumberOfPages
                            writer.AddPage(writer.GetImportedPage(reader, i))
                            System.Math.Max(System.Threading.Interlocked.Increment(i), i - 1)
                        End While
                        Doc.Close()
                    End Using
                End Using
                reader.Dispose()
                Return ms.ToArray()
            End Using
            Return bytes
        End Function

        Private Function ContainsHyperLinks(list As PdfArray) As Boolean
            Dim t As Boolean
            Try
                For Each a As PdfObject In list.ArrayList
                    Dim annotDict As PdfDictionary = CType(PdfReader.GetPdfObject(a), PdfDictionary)

                    If annotDict.Get(PdfName.SUBTYPE).Equals(PdfName.LINK) OrElse annotDict.Get(PdfName.A) IsNot Nothing Then
                        Dim annotAction As PdfDictionary = annotDict.GetAsDict(PdfName.A)
                        If annotAction.Get(PdfName.S).Equals(PdfName.URI) Then
                            Dim uriLink As PdfString = annotAction.GetAsString(PdfName.URI)
                            If annotAction.GetAsString(PdfName.URI) IsNot Nothing Then
                                File.AppendAllText("C:\pdfTests\links.txt", uriLink.ToString & " " & vbNewLine)
                                t = True
                            End If
                        End If
                        If annotAction.Get(PdfName.S).Equals(PdfName.URL) Then
                            Dim urlLink As PdfString = annotAction.GetAsString(PdfName.URL)
                            If annotAction.GetAsString(PdfName.URL) IsNot Nothing Then
                                File.AppendAllText("C:\pdfTests\links.txt", urlLink.ToString & " " & vbNewLine)
                                t = True
                            End If
                        End If
                        If annotAction.Get(PdfName.S).Equals(PdfName.GOTOR) Then
                            Dim gotorLink As PdfString = annotAction.GetAsString(PdfName.GOTOR)
                            If annotAction.GetAsString(PdfName.GOTOR) IsNot Nothing Then
                                File.AppendAllText("C:\pdfTests\links.txt", gotorLink.ToString & " " & vbNewLine)
                                t = True
                            End If
                        End If
                        If annotAction.Get(PdfName.S).Equals(PdfName.GOTOE) Then
                            Dim gotoeLink As PdfString = annotAction.GetAsString(PdfName.GOTOE)
                            If annotAction.GetAsString(PdfName.GOTOE) IsNot Nothing Then
                                File.AppendAllText("C:\pdfTests\links.txt", gotoeLink.ToString & " " & vbNewLine)
                                t = True
                            End If
                        End If
                        If annotAction.Get(PdfName.S).Equals(PdfName.GOTO) Then
                            Dim gotoLink As PdfString = annotAction.GetAsString(PdfName.GOTO)
                            If annotAction.GetAsString(PdfName.GOTO) IsNot Nothing Then
                                File.AppendAllText("C:\pdfTests\links.txt", gotoLink.ToString & " " & vbNewLine)
                                t = True
                            End If
                        End If
                        If annotAction.Get(PdfName.S).Equals(PdfName.JAVASCRIPT) Then
                            Dim js As PdfString = annotAction.GetAsString(PdfName.JAVASCRIPT)
                            If annotAction.GetAsString(PdfName.JAVASCRIPT) IsNot Nothing Then
                                File.AppendAllText("C:\pdfTests\links.txt", js.ToString & " " & vbNewLine)
                                t = True
                            End If
                        End If
                    End If
                Next
            Catch ex As Exception
                File.AppendAllText("C:\pdfTests\errors.txt", ex.ToString)
            Return False
        End Try
        Return t
    End Function

0 个答案:

没有答案