如何跨所有目录执行python脚本

时间:2013-03-05 07:53:26

标签: python regex

我编写了以下脚本来匿名化txt文件中的电子邮件地址:

import io, os, sys
import re

def main():

try:
    # Open the file.
    myfile = open('emails.txt', 'r')

    # Read the file's contents.
    content = myfile.read()
    content = re.sub(r'.+(?=@.+\.(com|edu))', "xxxx", content)

    myfile = open('emails.txt', 'w')
    myfile.write(content)   
    # Close the file.
    myfile.close()

except IOError:
    print('An error occured trying to read the file.')

except:
    print('An error occured.')

main()

我想知道如何才能使目录及其子目录中的所有文件都能正常工作。

2 个答案:

答案 0 :(得分:1)

os.walk()就是你想要的。我对您的代码段进行了更改以演示:

#!/usr/bin/env python

import re
from os import walk
from os.path import join

def main():
    for (dirpath, _, filenames) in walk('/path/to/root'):
        for filename in filenames:
            # Build the path to the current file.
            path_to_file = join(dirpath, filename)
            content = None
            # Open the file.
            with open(path_to_file, 'r') as myfile:
                print 'Reading {0}'.format(path_to_file)
                # Read the file's contents.
                content = myfile.read()
                content = re.sub(r'.+(?=@.+\.(com|edu))', "xxxx", content)

            with open(path_to_file, 'w') as myfile:
                myfile.write(content)

main()

答案 1 :(得分:0)

使用glob.glob

import io, os, sys
import re
import glob

def main():
    try:
        # Open the file.
        for f in glob.iglob('/path/to/root/*'):
            if not os.path.isfile(f):
                continue
            myfile = open(f, 'r')

            # Read the file's contents.
            content = myfile.read()
            content = re.sub(r'.+(?=@.+\.(com|edu))', "xxxx", content)

            myfile = open(f.replace('.txt', '.new.txt'), 'w')
            myfile.write(content)
            # Close the file.
            myfile.close()

        except IOError:
            print('An error occured trying to read the file.')
        except:
            print('An error occured.')

main()