例如,我想使用python从文本文件中取出前30%的数据。
以下是我尝试生成两个新文件的一些代码,但我不知道如何获取一定比例的数据并将其写入。
就是这样:
import sys
def NewFile(FileName):
open(FileName,'r')
print('Creating new text file')
#A for 70% training data
AFileName = 'A'+FileName
#B for 30% testing data
BFileName = 'B'+FileName
try:
afile = open(AFileName,'a')
bfile = open(BFileName,'a')
afile.close()
bfile.close()
except:
print('~~~~~~')
sys.exist(0)
答案 0 :(得分:0)
这将有效
import sys
def NewFile(FileName):
lines = open(FileName, 'r').readlines()
print('Creating new text file')
num_lines = len(lines)
num_lines_a = 0.7 * num_lines
#A for 70% training data
AFileName = 'A'+FileName
#B for 30% testing data
BFileName = 'B'+FileName
try:
afile = open(AFileName,'a')
bfile = open(BFileName,'a')
a = 1
for line in lines:
if a <= num_lines_a:
afile.write(line)
a +=1
else:
bfile.write(line)
afile.close()
bfile.close()
except:
print('~~~~~~')
sys.exist(0)
答案 1 :(得分:0)
听起来你想要这些内容,filename
是您正在阅读的文件,proportion
是您想要的第一个文件中的百分比:
def split_file(filename, tofile, othertofile, proportion):
content = open(filename).readlines()
number_of_lines = len(content)
# Split content.
first_portion = "\n".join(content[:number_of_lines * proportion])
second_portion = "\n".join(content[number_of_lines * proportion:])
# Write to files.
open(tofile, "w").write(first_portion)
open(othertofile, "w").write(second_portion)
答案 2 :(得分:0)
另一种方法:
from itertools import islice
def new_file(old_filename):
with open(old_filename, 'r') as old_file:
lines = list(old_file)
training_file_line_count = int(len(lines) * 0.7)
lines_iter = iter(lines)
with open('A' + old_filename, 'w') as training_file:
training_file.writelines(islice(lines_iter, training_file_line_count))
with open('B' + old_filename, 'w') as testing_data_file:
testing_data_file.writelines(lines_iter)