该文件正在从os.listdir的文件夹中读取。我输入文件r'^[1-9\w]{2}_[1-9\w]{4}[1][7][\d\w]+\.[\d\w]+'
的正则表达式和另一个文件r'^[1-9\w]{2}_[1-9\w]{4}[1][8]+'
的相似内容后。比较的条件是当前七个符号匹配时os.remove(os.path.join(dir_name, each))
。一些示例:bh_txbh171002.xml,bh_txbh180101.xml,ce_txce170101.xml ...
据我所知,我们不能使用匹配,因为没有任何字符串,它返回None,而且它只将文件与正则表达式进行比较。我正在考虑条件if folder.itself(file) and file.startswitch("......."):
但是我不知道如何指出文件名的前七个符号应该比较什么。
老实说,我已将更糟糕的代码放在该请求中,从那时起我学到了更多:the link - press to check it up
答案 0 :(得分:0)
正则表达式是错误的工具我没有你的文件所以我创建了随机的demodata:
import random
import string
random.seed(42) # make random repeatable
def generateFileNames(amount):
"""Generate 2*amount of names XX_XXXX with X in [a-zA-T0-9] with duplicates in it"""
def rndName():
"""generate one random name XX_XXXX with X in [a-zA-T0-9]"""
characters = string.ascii_lowercase + string.digits
return random.choices(characters,k=2)+['_']+random.choices(characters,k=4)
for _ in range(amount): # create 2*amount names, some duplicates
name = rndName()
yield ''.join(name) # yield name once
if random.randint(1,10) > 3: # more likely to get same names twice
yield ''.join(name) # same name twice
else:
yield ''.join(rndName()) # different 2nd name
def generateNumberParts(amount):
"""Generate 2*amount of 6-digit-strings, some with 17+18 as starting numbers"""
def rndNums(nr):
"""Generate nr digits as string list"""
return random.choices(string.digits,k=nr)
for _ in range(amount):
choi = rndNums(4)
# i am yielding 18 first to demonstrate that sorting later works
yield ''.join(['18']+choi) # 18xxxx numbers
if random.randint(1,10) > 5:
yield ''.join(['17']+choi) # 17xxxx
else:
yield ''.join(rndNums(6)) # make it something other
# half the amount of files generated
m = 10
# generate filenames
filenames = [''.join(x)+'.xml' for x in zip(generateFileNames(m),
generateNumberParts(m)]
现在我将我的名字作为列表,并且可以开始找出哪些是具有较新时间戳的欺骗:
# make a dict out of your filenames, use first 7 as key
# with list of values of files starting with this key a values:
fileDict={}
for names in filenames:
fileDict.setdefault(names[0:7],[]).append(names) # create key=[] or/and append names
for k,v in fileDict.items():
print (k, " " , v)
# get files to delete (all the lower nr of the value-list if multiple in it)
filesToDelete = []
for k,v in fileDict.items():
if len(v) == 1: # nothing to do, its only 1 file
continue
print(v, " to ", end = "" ) # debugging output
v.sort(key = lambda x: int(x[7:9])) # sort by a lambda that integerfies 17/18
print (v) # debugging output
filesToDelete.extend(v[:-1]) # add all but the last file to the delete list
print("")
print(filesToDelete)
输出:
# the created filenames in your dict by "key [values]"
xa_ji0y ['xa_ji0y188040.xml', 'xa_ji0y501652.xml']
v3_a3zm ['v3_a3zm181930.xml']
mm_jbqe ['mm_jbqe171930.xml']
ck_w5ng ['ck_w5ng180679.xml', 'ck_w5ng348136.xml']
zy_cwti ['zy_cwti184296.xml', 'zy_cwti174296.xml']
41_iblj ['41_iblj182983.xml', '41_iblj172983.xml']
5x_ff0t ['5x_ff0t187453.xml']
sd_bdw2 ['sd_bdw2177453.xml']
vn_vqjt ['vn_vqjt189618.xml', 'vn_vqjt179618.xml']
ep_q85j ['ep_q85j185198.xml', 'ep_q85j175198.xml']
vf_1t2t ['vf_1t2t180309.xml', 'vf_1t2t089040.xml']
11_ertj ['11_ertj188425.xml', '11_ertj363842.xml']
# sorting the names by its integer at 8/9 position of name
['xa_ji0y188040.xml','xa_ji0y501652.xml'] to ['xa_ji0y188040.xml','xa_ji0y501652.xml']
['ck_w5ng180679.xml','ck_w5ng348136.xml'] to ['ck_w5ng180679.xml','ck_w5ng348136.xml']
['zy_cwti184296.xml','zy_cwti174296.xml'] to ['zy_cwti174296.xml','zy_cwti184296.xml']
['41_iblj182983.xml','41_iblj172983.xml'] to ['41_iblj172983.xml','41_iblj182983.xml']
['vn_vqjt189618.xml','vn_vqjt179618.xml'] to ['vn_vqjt179618.xml','vn_vqjt189618.xml']
['ep_q85j185198.xml','ep_q85j175198.xml'] to ['ep_q85j175198.xml','ep_q85j185198.xml']
['vf_1t2t180309.xml','vf_1t2t089040.xml'] to ['vf_1t2t089040.xml','vf_1t2t180309.xml']
['11_ertj188425.xml','11_ertj363842.xml'] to ['11_ertj188425.xml','11_ertj363842.xml']
# list of files to delete
['xa_ji0y188040.xml', 'ck_w5ng180679.xml', 'zy_cwti174296.xml', '41_iblj172983.xml',
'vn_vqjt179618.xml', 'ep_q85j175198.xml', 'vf_1t2t089040.xml', '11_ertj188425.xml']
答案 1 :(得分:0)
我无法理解我的代码有什么问题。在那里我从某个文件夹中定义了列表,这样我就可以处理每个文件中的字符串,对吧?然后我应用了过滤条件,并进一步选择要删除的文件。
import os
dir_name = "/Python/Test_folder/Schems"
filenames = os.listdir(dir_name)
for names in filenames:
filenames.setdefault(names[0:7],[]).append(names) # create key=[] or/and append names
for k,v in filenames.items():
filesToDelete = [] #ther's a syntax mistake. But I can't get it - there's the list or not?
for k,v in filenames.items():
if len(v) == 1:
continue
v.sort(key = lambda x: int(x[7:9]))
filesToDelete.extend(v[:-1])