问题:如何下载嵌入到Excel中的pdf文件?
这个问题已经问了很多遍了,但是我在任何地方都没有看到一个有效的答案。
因此,这是一种尝试自我回答问题的尝试。该代码有效,并且不依赖于不可靠的.Verb Verb:=xlPrimary
方法。
答案 0 :(得分:9)
注意:这仅适用于pdf文件。如果混合嵌入文件,则将无法正常工作。
基本准备工作
假设我们的Excel文件C:\Users\routs\Desktop\Sample.xlsx
嵌入了2个Pdf文件,如下所示。
出于测试目的,我们将在桌面C:\Users\routs\Desktop\Temp
上创建一个临时文件夹。
逻辑:
Excel将oleObjects
保存在\xl\embeddings\
文件夹中。如果您将Excel文件重命名为zip并用Winzip打开,则可以看到以下内容
如果您提取bin文件并将其重命名为pdf,则可以在Microsoft Edge
中打开pdf,但不能在任何其他pdf查看器中打开。为了使其与任何其他pdf查看器兼容,我们将必须进行一些Binary
阅读和编辑。
如果在任何十六进制编辑器中打开bin文件,则会看到以下内容。我使用了在线十六进制编辑器https://hexed.it/
我们必须删除%PDF
之前的所有内容
我们将尝试找到%PDF
...或更具体地说%
,P
,D
和F
的8位无符号值< / p>
如果在十六进制编辑器中向下滚动,您将获得这四个值
现在我们要做的就是读取二进制文件并删除%PDF
之前的所有内容,并以.Pdf
扩展名保存文件。
代码:
Option Explicit
Const TmpPath As String = "C:\Users\routs\Desktop\Temp"
Const ExcelFile As String = "C:\Users\routs\Desktop\Sample.xlsx"
Const ZipName As String = "C:\Users\routs\Desktop\Sample.zip"
Sub ExtractPDF()
Dim tmpPdf As String
Dim oApp As Object
Dim i As Long
'~~> Deleting any previously created files. This is
'~~> usually helpful from 2nd run onwards
On Error Resume Next
Kill ZipName
Kill TmpPath & "\*.*"
On Error GoTo 0
'~~> Copy and rename the Excel file as zip file
FileCopy ExcelFile, ZipName
Set oApp = CreateObject("Shell.Application")
'~~> Extract the bin file from xl\embeddings\
For i = 1 To oApp.Namespace(ZipName).items.Count
oApp.Namespace(TmpPath).CopyHere oApp.Namespace(ZipName).items.Item("xl\embeddings\oleObject" & i & ".bin")
tmpPdf = TmpPath & "\oleObject" & i & ".bin"
'~~> Read and Edit the Bin File
If Dir(tmpPdf) <> "" Then ReadAndWriteExtractedBinFile tmpPdf
Next i
MsgBox "Done"
End Sub
'~~> Read and ReWrite Bin File
Sub ReadAndWriteExtractedBinFile(s As String)
Dim intFileNum As Long, bytTemp As Byte
Dim MyAr() As Long, NewAr() As Long
Dim fileName As String
Dim i As Long, j As Long, k As Long
j = 1
intFileNum = FreeFile
'~~> Open the bing file
Open s For Binary Access Read As intFileNum
'~~> Get the number of lines in the bin file
Do While Not EOF(intFileNum)
Get intFileNum, , bytTemp
j = j + 1
Loop
'~~> Create an array to store the filtered results of the bin file
'~~> We will use this to recreate the bin file
ReDim MyAr(1 To j)
j = 1
'~~> Go to first record
If EOF(intFileNum) Then Seek intFileNum, 1
'~~> Store the contents of bin file in an array
Do While Not EOF(intFileNum)
Get intFileNum, , bytTemp
MyAr(j) = bytTemp
j = j + 1
Loop
Close intFileNum
'~~> Check for the #PDF and Filter out rest of the data
For i = LBound(MyAr) To UBound(MyAr)
If i = UBound(MyAr) - 4 Then Exit For
If Val(MyAr(i)) = 37 And Val(MyAr(i + 1)) = 80 And _
Val(MyAr(i + 2)) = 68 And Val(MyAr(i + 3)) = 70 Then
ReDim NewAr(1 To j - i + 2)
k = 1
For j = i To UBound(MyAr)
NewAr(k) = MyAr(j)
k = k + 1
Next j
Exit For
End If
Next i
intFileNum = FreeFile
'~~> Decide on the new name of the pdf file
'~~> Format(Now, "ddmmyyhhmmss") This method will awlays ensure that
'~~> you will get a unique filename
fileName = TmpPath & "\" & Format(Now, "ddmmyyhhmmss") & ".pdf"
'~~> Write the new binary file
Open fileName For Binary Lock Read Write As #intFileNum
For i = LBound(NewAr) To UBound(NewAr)
Put #intFileNum, , CByte(NewAr(i))
Next i
Close #intFileNum
End Sub
输出