使用Windows批处理比较文件夹中的文件内容

时间:2014-08-18 06:50:11

标签: batch-file compare

我一直在努力解决这个问题......有一个文件夹中有很多txt文件,其中随机名称是从服务器时间戳生成的,但文件内容对于该文件夹中的两个文件必须不相同!有任何想法吗?我唯一的选择是使用Windows批处理

3 个答案:

答案 0 :(得分:2)

下面的解决方案只处理一次文件名列表,因此它应该运行得更快。

@echo off
setlocal EnableDelayedExpansion

for %%a in (*.txt) do (
   if not defined size[%%~Za] (
      set size[%%~Za]="%%a"
   ) else (
      set newName="%%a"
      for %%b in (!size[%%~Za]!) do (
         fc "%%a" %%b >NUL
         if not errorlevel 1 (
            echo "%%a" and %%b are identical
            set "newName="
         )
      )
      if defined newName set "size[%%~Za]=!size[%%~Za]! !newName!"
   )
)

如果两个文件相同,则第二个文件的名称​​不保存在列表中,因此不会与其他相同大小的文件进行比较。如果不超过两个文件可能相同,则可以修改此方法,以便从列表中删除第一个文件的名称(在echo ... are identical命令下面),因此方法更快。

如果您可以下载计算MD5校验和的第三方程序,则可以使用它来检查两个文件是否相同而不是foxidrive建议的fc命令。这会更快,因为每个文件的MD5校验和只计算一次并存储在另一个数组中(文件名作为索引)。

答案 1 :(得分:1)

@ECHO OFF
SETLOCAL
SET "sourcedir=U:\sourcedir"
PUSHD "%sourcedir%"
FOR %%a IN (*.*) DO (
 FOR %%c IN (*.*) DO IF /i "%%~nxa" lss "%%~nxc" IF "%%~za"=="%%~zc" (
  FC "%%a" "%%c" >NUL
  IF NOT ERRORLEVEL 1 ECHO "%%a" and "%%c" are identical

 )
)

GOTO :EOF

您需要更改sourcedir和文件掩码*.*的设置以适合您的具体情况。


仅提及一个重复文件的修订

@ECHO OFF
SETLOCAL
SET "sourcedir=U:\sourcedir"
PUSHD "%sourcedir%"
FOR %%a IN (*.*) DO (
 SET "reported="
 FOR %%c IN (*.*) DO IF NOT DEFINED reported IF /i "%%~nxa" lss "%%~nxc" IF "%%~za"=="%%~zc" (
  FC /b "%%a" "%%c" >NUL
  IF NOT ERRORLEVEL 1 ECHO "%%a" and "%%c" are identical&SET reported=Y
 )
)

GOTO:EOF

我还将/b添加到fc以允许非文本文件。

答案 2 :(得分:1)

由于此代码使用certutil,因此仅适用于Windows Vista或更高版本的操作系统。这将检查相同大小的文件中的重复项,并且只会读取每个涉及的文件一次。

@echo off

    rem Configure environment
    setlocal enableextensions disabledelayedexpansion

    rem Where to search for files
    set "folder=%cd%"

    rem We need a temporary file to hold the size sorted list of files
    set "tempFile=%temp%\%~nx0.%random%%random%%random%.tmp"

    rem Change to target folder and work from here
    pushd "%folder%"

    rem Retrieve the list of files with its size and set a environment variable
    rem named as the size of the file. The value of this variable will hold the
    rem number of files with this size
    (for /f "delims=" %%a in ('dir /a-d /b /os *') do (
        echo \%%~za\%%a\
        set /a "sz_%%~za+=1"
    )) >"%tempFile%"

    rem Retrieve the list of sizes that happens more than one time
    for /f "tokens=2,3 delims=_=" %%a in ('set sz_') do if %%b gtr 1 if %%a gtr 0 (
        rem Retrive the list of files with the indicated size
        setlocal 
        for /f "tokens=1,2 delims=\" %%c in ('findstr /l /b /c:"\%%a\\" "%tempFile%"') do (
            set "hash="
            for /f "skip=1 delims=" %%e in ('certutil -hashfile "%%d"') do if not defined hash (
                rem For each file, compute its hash. This hash is used as a variable name. 
                rem If the variable is defined, a previous file has the same size and hash
                rem so it is a duplicate
                set "hash=1"
                if defined "%%e" (
                    <nul set /p ".=%%d = "
                    setlocal enabledelayedexpansion
                    echo(!"%%e"!
                    endlocal
                ) else (
                    rem Store the name of the file in a variable named as the hash of the file
                    set ""%%e"=%%d"
                )
            )

        )
        endlocal
        rem This inner setlocal/endlocal ensures there is no collision between hashes for 
        rem files with different sizes
    ) 

    rem Cleanup
    popd
    del /q "%tempFile%" >nul 2>nul
    endlocal

已编辑对于没有临时文件的简化版本(列表在内存中创建),同时仍然只读取每个文件所需的文件,并且需要更可读的输出

再次

已修改以更正同一文件大小的不同重复组输出的问题

@echo off

    setlocal enableextensions disabledelayedexpansion

    set "folder=%~1"
    if not defined folder set "folder=%cd%"

    pushd "%folder%"
    for /f "delims=" %%a in ('dir /a-d /b /os *') do (
        set /a "sz_%%~za+=1"
        setlocal enabledelayedexpansion
        for /f "delims=" %%b in ("!fl_%%~za! ") do (endlocal & set "fl_%%~za=%%b "%%a"")
    )

    for /f "tokens=2,3 delims=_=" %%a in ('set sz_') do if %%b gtr 1 (
        setlocal  & setlocal enabledelayedexpansion
        for /f "delims=" %%c in ("!fl_%%a!") do ( 
            endlocal 
            for %%d in (%%~c) do (
                if %%a equ 0 ( set "hash=0" ) else ( 
                    set "hash="
                    for /f "skip=1 delims=" %%e in ('certutil -hashfile "%%~d"') do if not defined hash set "hash=%%e"
                )

                setlocal enabledelayedexpansion
                for /f "delims=" %%e in ("!hash!") do if defined hash_"%%~e" (
                    for /f "delims=" %%z in ("!hash_"%%~e"!") do (endlocal & set "hash_"%%~e"=%%z"%%~d";")
                ) else (
                    endlocal & set "hash_"%%~e"="%%~d"="
                )
            )
        )
        for /f "tokens=1,* delims==" %%c in ('set hash_ 2^>nul^|find ";"') do (
            set "first=1"
            for %%e in (%%d) do if defined first (set "first=" & echo(%%e) else (echo( = %%e)
        )
        endlocal 
    )
    popd
    endlocal
    exit /b