用于计算PDF文件页面的代码改进

时间:2013-01-23 22:31:20

标签: algorithm pdf vbscript wsh

如何将所有PDF的总页数值输入到文件夹中?我还想将输出写入.txt文件。

因此,我需要知道如何获取所有PDF页面的数量以及将所有页码和PDF名称放入列中的方法。

我会输出这样的输出:

PDF file name          Number of page
-------------------------------------
firstpdffile           30 pages
secondpdffile          25 pages
thirdpdffile           10 pages
fourthpdffile           5 pages
-------------------------------------
Total                  70 pages

Option Explicit

Private Function getPdfPgCnt(ByVal sPath) 'Returns page count of file on passed path
    Dim strTStr

    With CreateObject("Adodb.Stream")
        .Open
        .Charset = "x-ansi"
        .LoadFromFile sPath
        strTStr = .ReadText(-1)
    End With

    With (New RegExp)
        .Pattern = "Type\s*/Page[^s]"
        .IgnoreCase = True
        .Global = True
        getPdfPgCnt = .Execute(strTStr).Count
    End With

    If getPdfPgCnt = 0 Then getPdfPgCnt = 1
End Function

'--------------------------------
Dim oFso, iFile
Set oFso = CreateObject("Scripting.FileSystemObject")

'enumerating pdf files in vbs's base directory
For Each iFile In oFso.getFolder(oFso.GetParentFolderName(WScript.ScriptFullName)).Files
    If LCase(oFso.GetExtensionName(iFile)) = "pdf" Then WScript.Echo iFile & " has "& getPdfPgCnt(iFile)&" pages."
Next
Set oFso = Nothing
'--------------------------------

我使用此.bat文件来运行脚本

@echo off
color 0A

@set currentdir="%cd%"

title PDF page counter

del temp1.txt
del temp2.txt
del temp3.txt
del output.txt

cscript pdfpagecount.vbs > temp1.txt

BatchSubstitute.bat "%cd%" "" temp1.txt > temp2.txt & BatchSubstitute.bat "\" "" temp2.txt > temp3.txt & Type temp3.txt | findstr /I /V /C:"Microsoft" >>output.txt & del temp1.txt & del temp2.txt & del temp3.txt

exit

@echo off
REM -- Prepare the Command Processor --
SETLOCAL ENABLEEXTENSIONS
SETLOCAL DISABLEDELAYEDEXPANSION

::BatchSubstitude - parses a File line by line and replaces a substring"
::syntax: BatchSubstitude.bat OldStr NewStr File
::          OldStr [in] - string to be replaced
::          NewStr [in] - string to replace with
::          File   [in] - file to be parsed
:$changed 20100115
:$source http://www.dostips.com
if "%~1"=="" findstr "^::" "%~f0"&GOTO:EOF
for /f "tokens=1,* delims=]" %%A in ('"type %3|find /n /v """') do (
    set "line=%%B"
    if defined line (
        call set "line=echo.%%line:%~1=%~2%%"
        for /f "delims=" %%X in ('"echo."%%line%%""') do %%~X
    ) ELSE echo.
)

1 个答案:

答案 0 :(得分:2)

努力专注于

  1. 表(页眉,正文,页脚),如控制台脚本的输出
  2. 总结价值
  3. 你需要什么

    1. .NET sprintf like formatting(class cFormat)
    2. sum变量在循环之前初始化为0,在循环中添加,在循环之后输出/使用
    3. 在代码中:

      ' 14490628.vbs
      ' http://stackoverflow.com/questions/14490628/code-improvements-for-counting-the-pages-of-pdf-files
      
      Option Explicit
      
      Dim oFS     : Set oFS  = CreateObject("Scripting.FileSystemObject")
      Dim sDir    : sDir     = "..\..\10041057\data"
      Dim oFmt    : Set oFmt = New cFormat
      Dim nFWidth : nFWidth  = 25                ' for file name
      Dim nPWidth : nPWidth  = 15                ' for number of pages
      Dim nLWidth : nLWidth  = nFWidth + nPWidth ' for whole line
      
      Dim sRuler1 : sRuler1 = String(nLWidth, "=")
      Dim sRuler2 : sRuler2 = String(nLWidth, "-")
      Dim sFmtT   : sFmtT   = insertWidth("{4}{2}{0,-@F}{1,@P}{2}{3}", nFWidth, nPWidth)
      Dim aDataT  : aDataT  = Array("PDF file name", "Number of pages", vbCrLf, sRuler2, sRuler1)
      Dim sFmtF   : sFmtF   = insertWidth("{0,-@F}{1,@P}", nFWidth, nPWidth)
      Dim aDataF  : aDataF  = Array("", 0)
      Dim sFmtS   : sFmtS   = insertWidth("{3}{2}{0,-@F}{1,@P}{2}{4}", nFWidth, nPWidth)
      Dim aDataS  : aDataS  = Array("Total", 0, vbCrLf, sRuler2, sRuler1)
      
      WScript.Echo oFmt.formatArray(sFmtT, aDataT)
      Dim oFile
      For Each oFile In oFS.GetFolder(sDir).Files
          aDataF(0) = oFile.Name
          aDataF(1) = getPdfPgCnt(oFile.Path)
          WScript.Echo oFmt.formatArray(sFmtF, aDataF)
          aDataS(1) = aDataS(1) + aDataF(1)
      Next
      WScript.Echo oFmt.formatArray(sFmtS, aDataS)
      
      Function getPdfPgCnt(sFSpec)
        ' stolen from the VBScript Docs
        Dim lowerbound : lowerbound = 1
        Dim upperbound : upperbound = 1000
        getPdfPgCnt = Int((upperbound - lowerbound + 1) * Rnd + lowerbound)
      End Function
      
      Function insertWidth(sFmt, nFWidth, nPWidth)
        insertWidth = Replace(Replace(sFmt, "@F", nFWidth), "@P", nPWidth)
      End Function
      
      ' stolen from http://stackoverflow.com/a/11262441/603855
      Class cFormat
        Private m_oSB
        Private Sub Class_Initialize()
          Set m_oSB = CreateObject("System.Text.StringBuilder")
        End Sub ' Class_Initialize
        Public Function formatOne(sFmt, vElm)
          m_oSB.AppendFormat sFmt, vElm
          formatOne = m_oSB.ToString()
          m_oSB.Length = 0
        End Function ' formatOne
        Public Function formatArray(sFmt, aElms)
          m_oSB.AppendFormat_4 sFmt, (aElms)
          formatArray = m_oSB.ToString()
          m_oSB.Length = 0
        End Function ' formatArray
      End Class ' cFormat
      

      输出:

      ========================================
      PDF file name            Number of pages
      ----------------------------------------
      xpl.txt                              706
      1.abc                                534
      xpl.vbs                              580
      1.txt                                290
      xx.txt                               302
      ----------------------------------------
      Total                               2412
      ========================================