在一个文件夹中,我有10000个文件,文件名的结构如下:
File1_0, File1_1, File1_2, File1_3 File2_0, File2_1, File2_2 File3_0, File3_1, File3_2, File3_3 ... File1000_0, File1000_1
我想删除文件的先前修订并保留最新的修订。以上文件变成这样:
File1_3 File2_2 File3_3 .... File1000_1
我正在尝试以下操作:将不带_xx
的名称放入数组中,然后删除重复项。但是我不认为这是正确的,目前我仍处于应该这样做的逻辑中。
这是代码:
Option Explicit
Dim fso, folder, sourcefolder, file
Dim b : b = Array()
Dim i
Dim x
Dim z
Dim y
sourcefolder = "C:\test"
Set fso = CreateObject("Scripting.FileSystemObject")
Set folder = fso.GetFolder(sourcefolder)
i = 0
For Each file In folder.Files
i = i + 1
x = fso.GetBaseName(file)
ReDim Preserve b(UBound(b) + 1) : b(UBound(b)) = Left(x, Len(x)-2)
y = y & b(i-1) & "@"
Next
z = RemoveDuplicates(y)
For i=0 To UBound(z)-1
WScript.Echo i+1 & " " & z(i)
Next
Function RemoveDuplicates(str)
Dim d
Dim elem
If Trim(str) = "" Then
RemoveDuplicates = Array()
Exit Function
End If
Set d = CreateObject("Scripting.Dictionary")
d.CompareMode = vbTextCompare 'make dictionary case-insensitive
For Each elem In Split(str, "@")
d(elem) = True
Next
RemoveDuplicates = d.Keys
End Function
答案 0 :(得分:1)
解决问题的一种方法是建立一个字典,将文件的基本名称映射到最高修订版本号:
Set d = CreateObject("Scripting.Dictionary")
d.CompareMode = vbTextCompare
For Each f In fso.GetFolder(sourcefolder)
basename = fso.GetBaseName(f.Name)
a = Split(basename, "_")
revision = Int(a(UBound(a)))
prefix = a(0)
if d(prefix) < revision Then
d(prefix) = revision
End If
Next
然后运行第二个循环以删除所有基名称不具有该修订版本的文件:
For Each f In fso.GetFolder(sourcefolder)
basename = fso.GetBaseName(f.Name)
a = Split(basename, "_")
revision = Int(a(UBound(a)))
prefix = a(0)
If d.Exists(prefix) And revision < d(prefix) Then
f.Delete
End If
Next
请注意,此代码假定下划线分隔的前缀和修订版是基本名称中唯一的下划线。如果文件名包含多个下划线(例如foo_bar_1.txt
),则需要调整前缀和修订的提取以解决该问题。
话虽如此,我 strong 建议不要对文件名进行修订管理。使用修订控制系统(Git,Mercurial,Subversion等)。这就是他们的发明。
答案 1 :(得分:1)
Option Explicit
' Folder to process
Dim sourceFolder
sourceFolder = "."
Dim fso
Set fso = WScript.CreateObject("Scripting.FileSystemObject")
' Regular expresion used to separate base name and sequence
Dim re
Set re = New RegExp
re.Pattern = "^(.*)_([0-9]+)$"
' Dictionary to store data for each sequence
Dim fileSequences
Set fileSequences = WScript.CreateObject("Scripting.Dictionary")
' fileSequences will use the base name of the files as key and
' hold as data an array with the higher sequence number and the
' associated file full path.
Dim f, colMatches, baseName, sequence
For Each f In fso.GetFolder(sourceFolder).Files
' Try to separate base name and sequence
Set colMatches = re.Execute( fso.GetBaseName(f.Name) )
' Only handle serialized files, those whose name match the regular expresion
If colMatches.Count > 0 Then
' base name and sequence are stored in the Submatches collection
' file extension is included in the base name to avoid handling separate series as one
baseName = LCase( colMatches.Item(0).SubMatches(0) & "." & fso.GetExtensionName( f.Name ) )
' Get the numeric sequence value - This should also handle zero prefixed sequence numbers
sequence = CLng( colMatches.Item(0).SubMatches(1) )
Select Case True
Case Not fileSequences.Exists( baseName )
' New sequence found - store current sequence value and the associated file path
fileSequences.Add baseName, Array( sequence, f.Path )
Case sequence < fileSequences.Item( baseName )(0)
' File with a lower sequence number found - Remove
f.Delete
Case sequence > fileSequences.Item( baseName )(0)
' File with a higher sequence number found - Remove previous one
fso.DeleteFile fileSequences.Item( baseName )(1)
' Update sequence information with new higher value and the associated file path
fileSequences.Item(baseName) = Array( sequence, f.Path )
End Select
End If
Next