在删除Python中特定列表中具有值的文件后,如何查找唯一文件?

时间:2019-01-24 06:18:30

标签: python regex glob itertools python-os

我的文件夹中包含以下文件:

"ABC"
"ABC 10"
"ABC 22"
"ABC 30"
"ABC L1"
"ABC L2"
"ABC 10 L1"
"ABC 10 L2"
"ABC 22 L1"
"ABC 22 L2"
"ABC 30 L1"
"ABC 30 L2"
"PQR"
"PQR 10"
"PQR 22"
"PQR 30"
"PQR X3"
"PQR X4"
"PQR 10 X3"
"PQR 10 X4"
"PQR 22 X3"
"PQR 22 X4"
"PQR 30 X3"
"PQR 30 X4"

现在,我需要在此文件夹中删除了某些索引的唯一文件列表,在本示例中为10、22、30。这意味着我的输出列表应该是

['ABC', 'ABC L1', 'ABC L2', 'PQR', 'PQR X3', 'PQR X4' ]

以下是MWE:

import os
import random
import errno
import itertools
from itertools import repeat
import re

#--------------------------------------
# Create random folders and files

# tzot's forced directory create hack https://stackoverflow.com/a/600612/4576447
def mkdir_p(path):
    try:
        os.makedirs(path)
    except OSError as exc:  # Python >2.5
        if exc.errno == errno.EEXIST and os.path.isdir(path):
            pass
        else:
            raise

if not os.path.isdir('./input_folder'):
    os.makedirs('input_folder')
for i in range(4):
    mkdir_p('./input_folder/folder_ABC_' + str(random.randint(100,399)))

for root, dirs, files in os.walk('./input_folder'):
    for dir in dirs:
        for i in repeat(None,4):
            result = open(os.path.join(root,dir) + '/ABC 10 L' + str(random.randint(0,3)) + '.dat','w')
            result = open(os.path.join(root,dir) + '/ABC 22 L' + str(random.randint(0,3)) + '.dat','w')
            result = open(os.path.join(root,dir) + '/ABC 30 L' + str(random.randint(0,3)) + '.dat','w')
            result =  open(os.path.join(root,dir) + '/PQR 10 X' + str(random.randint(0,3)) + '.dat','w')
            result = open(os.path.join(root,dir) + '/PQR 22 X' + str(random.randint(0,3)) + ' .dat','w')
            result = open(os.path.join(root,dir) + '/PQR 30 X' + str(random.randint(0,3)) + '.dat','w')         
            result = open(os.path.join(root,dir) + '/ABC ' + str(random.randint(0,3)) + '.dat','w')
            result = open(os.path.join(root,dir) + '/PQR ' + str(random.randint(0,3)) + '.dat','w')
#--------------------------------------
# Main rename code


remove = [10, 22, 30]

for root, dirs, files in os.walk('./input_folder'):
    for dir in dirs: 
        print (dir)
        output_files = [s for s in os.listdir(os.path.join(root,dir)) if s.endswith('.dat')]

在删除具有特定列表中的值的文件(在此示例中为“删除”)之后,如何查找唯一文件?

3 个答案:

答案 0 :(得分:0)

这是一种使用re和列表理解的方法。

例如:

import re

output_files = ['ABC', 'ABC 10', 'ABC 22', 'ABC 30', 'ABC L1', 'ABC L2', 'ABC 10 L1', 'ABC 10 L2', 'ABC 22 L1', 'ABC 22 L2', 'ABC 30 L1', 'ABC 30 L2', 'PQR', 'PQR 10', 'PQR 22', 'PQR 30', 'PQR X3', 'PQR X4', 'PQR 10 X3', 'PQR 10 X4', 'PQR 22 X3', 'PQR 22 X4', 'PQR 30 X3', 'PQR 30 X4']
remove = ["10", "22", "30"]

pat = re.compile("(" + "|".join(remove) + ")")
print( [i for i in output_files if not pat.search(i)])

输出:

['ABC', 'ABC L1', 'ABC L2', 'PQR', 'PQR X3', 'PQR X4']

答案 1 :(得分:0)

您可以像这样使用正则表达式方法

\s+(?:[13]0|22)

找到的匹配项需要替换为'',请参见a demo on regex101.com

答案 2 :(得分:0)

import re

regex = re.compile(r'([A-Z]{3})(?:\s+(?:\d+\s+)?([A-Z]\d))?')

files = ['ABC', 'ABC 10', 'ABC 22', 'ABC 30', 'ABC L1', 'ABC L2', 'ABC 10 L1', 'ABC 10 L2', 'ABC 22 L1', 'ABC 22 L2', 'ABC 30 L1', 'ABC 30 L2', 'PQR', 'PQR 10', 'PQR 22', 'PQR 30', 'PQR X3', 'PQR X4', 'PQR 10 X3', 'PQR 10 X4', 'PQR 22 X3', 'PQR 22 X4', 'PQR 30 X3', 'PQR 30 X4']

result = [
    ' '.join(group for group in regex.findall(item)[0] if group)
    for item in files
]

print(result)

# outpout
['ABC', 'ABC', 'ABC', 'ABC', 'ABC L1', 'ABC L2', 'ABC L1', 'ABC L2', 'ABC L1', 'ABC L2', 'ABC L1', 'ABC L2', 'PQR', 'PQR', 'PQR', 'PQR', 'PQR X3', 'PQR X4', 'PQR X3', 'PQR X4', 'PQR X3', 'PQR X4', 'PQR X3', 'PQR X4']

# dedupe:
result = sorted(set(result))
print(result)
# output
['ABC', 'ABC L1', 'ABC L2', 'PQR', 'PQR X3', 'PQR X4']