我正在尝试读取目录中所有文件的打印搜索,并将每个文件中的内容存储在要使用的列表中。
我的问题是当我使用print来调试文件是否存在时,它会打印出当前文件或列表中的第一个文件。但是,当我尝试从这个文件中读取时,它抱怨找不到文件
import re
import os
# Program to extract emails from text files
def path_file():
#path = raw_input("Please enter path to file:\n> ")
path = '/home/holy/thinker/leads/'
return os.listdir('/home/holy/thinker/leads') # returns a list like ["file1.txt", 'image.gif'] # need to remove trailing slashes
# read a file as 1 big string
def in_file():
print path_file()
content = []
for a_file in path_file(): # ['add.txt', 'email.txt']
print a_file
fin = open(a_file, 'r')
content.append(fin.read()) # store content of each file
print content
fin.close()
return content
print in_file()
# this is the error i get
""" ['add.txt', 'email.txt']
add.txt
Traceback (most recent call last):
File "Extractor.py", line 24, in <module>
print in_file()
File "Extractor.py", line 17, in in_file
fin = open(a_file, 'r')
IOError: [Errno 2] No such file or directory: 'add.txt'
"""
我得到的错误是充实的
答案 0 :(得分:1)
os.listdir
只返回您的文件名。您必须在该文件名之前输入目录名称。
尝试在您运行程序的同一目录中打开add.txt
。请在文件名前添加目录名。
def path_file():
#path = raw_input("Please enter path to file:\n> ")
path = '/home/holy/thinker/leads/'
return [os.path.join(path, x) for x in os.listdir(path)]
答案 1 :(得分:0)
您应该使用您想要阅读的文件的完整路径。
所以请fin = open(os.path.join(r'/home/holy/thinker/leads/', a_file), 'r')
答案 2 :(得分:0)
这里使用glob
重写以限制考虑哪些文件;
import glob
import os
import re
import sys
if sys.hexversion < 0x3000000:
# Python 2.x
inp = raw_input
else:
# Python 3.xrange
inp = input
def get_dir(prompt):
while True:
dir_name = inp(prompt)
dir_name = os.path.join(os.getcwd(), dir_name)
if os.path.isdir(dir_name):
return dir_name
else:
print("{} does not exist or is not a directory".format(dir_name))
def files_in_dir(dir_name, file_spec="*.txt"):
return glob.glob(os.path.join(dir_name, file_spec))
def file_iter(files):
for fname in files:
with open(fname) as inf:
yield fname, inf.read()
def main():
email_dir = get_dir("Please enter email directory: ")
email_files = files_in_dir(email_dir, "*.eml")
print(email_files)
content = [txt for fname,txt in file_iter(email_files)]
print(content)
if __name__=="__main__":
main()
并且试运行看起来像
Please enter email directory: c:\temp
['c:\\temp\\file1.eml', 'c:\\temp\\file2.eml']
['file1 line one\nfile1 line two\nfile1 line three',
'file2 line one\nfile2 line two']