我有这个:
from os import path
base_path = "C:\\texts\\*.txt"
for file in files:
with open (file) as in_file, open(path.join(base_path,"%s_tokenized.txt" % file), "w") as out_file:
data = in_file.readlines()
for line in data:
words = line.split()
str1 = ','.join(words)
out_file.write(str1)
out_file.write("\n")
它在读取的同一目录中生成了标记化文件。如何在不同的目录中输出这些out_files,例如"C:\\texts\\Tokenized"
?
我知道有一些方法可以在生成这些新文件之后将这些新文件移动到其他目录,但我想知道的是,如果无论如何都要将新文件输出到其他目录,同时它们是在上面的代码中生成的?
答案 0 :(得分:0)
这是我输出到任意目录中文件的方式:
dir_name = "../some_dir"
if not os.path.exists(dir_name) : os.makedirs(dir_name)
out_file_name = dir_name + '/out.txt'
out_file = open( out_file_name, 'w')
编辑:
file_name = "{0}_tokenized.txt".format(something_from_tokenizing)
if not os.path.exists(dir_name) : os.makedirs(dir_name)
out_file_name = dir_name + file_name
编辑:
我刚尝试过,为我工作过。您只需要两个路径,一个用于源目录,另一个用于目标。希望这会有所帮助。
import os
from os import path
f1 = open("in.txt")
f2 = open("out.txt")
files = ["in.txt", "out.txt"]
base_path = "."
dir_name = "./some_dir"
if not os.path.exists(dir_name) : os.makedirs(dir_name)
for file in files:
with open (file) as in_file, open(path.join(dir_name,"%s_tokenized.txt" % file), "w") as out_file:
data = in_file.readlines()
for line in data:
words = line.split()
str1 = ','.join(words)
out_file.write(str1)
out_file.write("\n")
答案 1 :(得分:0)
这就是你要找的东西:
import os
import glob
source_pattern = 'c:/texts/*.txt'
output_directory = 'c:/texts/tokenized'
# Iterate over files matching source_pattern
for input_file in glob.glob(source_pattern):
# build the output filename
base,ext = os.path.splitext(os.path.basename(input_file))
output_file = os.path.join(output_directory,base + '_tokenized' + ext)
with open(input_file) as in_file, open(output_file,'w') as out_file:
for line in in_file:
out_file.write(','.join(line.split()) + '\n')