我编写了以下脚本来匿名化txt文件中的电子邮件地址:
import io, os, sys
import re
def main():
try:
# Open the file.
myfile = open('emails.txt', 'r')
# Read the file's contents.
content = myfile.read()
content = re.sub(r'.+(?=@.+\.(com|edu))', "xxxx", content)
myfile = open('emails.txt', 'w')
myfile.write(content)
# Close the file.
myfile.close()
except IOError:
print('An error occured trying to read the file.')
except:
print('An error occured.')
main()
我想知道如何才能使目录及其子目录中的所有文件都能正常工作。
答案 0 :(得分:1)
os.walk()
就是你想要的。我对您的代码段进行了更改以演示:
#!/usr/bin/env python
import re
from os import walk
from os.path import join
def main():
for (dirpath, _, filenames) in walk('/path/to/root'):
for filename in filenames:
# Build the path to the current file.
path_to_file = join(dirpath, filename)
content = None
# Open the file.
with open(path_to_file, 'r') as myfile:
print 'Reading {0}'.format(path_to_file)
# Read the file's contents.
content = myfile.read()
content = re.sub(r'.+(?=@.+\.(com|edu))', "xxxx", content)
with open(path_to_file, 'w') as myfile:
myfile.write(content)
main()
答案 1 :(得分:0)
使用glob.glob
import io, os, sys
import re
import glob
def main():
try:
# Open the file.
for f in glob.iglob('/path/to/root/*'):
if not os.path.isfile(f):
continue
myfile = open(f, 'r')
# Read the file's contents.
content = myfile.read()
content = re.sub(r'.+(?=@.+\.(com|edu))', "xxxx", content)
myfile = open(f.replace('.txt', '.new.txt'), 'w')
myfile.write(content)
# Close the file.
myfile.close()
except IOError:
print('An error occured trying to read the file.')
except:
print('An error occured.')
main()