从文件树中复制随机文件

时间:2014-08-02 14:39:48

标签: python file-io random

我和here有同样的问题,但现在我正在尝试用python做同样的事情,因为它更适合这项任务。

我从这开始:

import os
import shutil
import random
import glob


root_dir = '/home/leonardo/Desktop/python_script/rfe'
output_dir = '/home/leonardo/Desktop/python_script/output_folder'
ref = 200

folders_root_dir = os.listdir(root_dir)
print folders_root_dir

count = len(folders_root_dir)
print  count

for i in xrange(count):
    folder_inside = root_dir + '/' + folders_root_dir[i]
    print folder_inside
    number_files_folder_inside = len(os.listdir(folder_inside))
    print  number_files_folder_inside

    if number_files_folder_inside > ref:
        ref_copy = round(0.2*number_files_folder_inside)
        print ref_copy
        # here I have to copy 20% of the files in this folder to the output folder 
    else:
        # here I have to copy all files from the folder to the output_dir

我尝试使用os.walk(),但我是python的新手并且在功能正常工作时选择文件证明非常难。

6 个答案:

答案 0 :(得分:2)

您需要导入以下内容:

import os
import shutil
import random

您可以获取目录中的所有文件:

files = [file for file in os.listdir(dir) if os.path.isfile(os.path.join(dir, file))]

然后使用条件:

if len(files) < 200:
    for file in files:
        shutil.copyfile(os.path.join(dir, file), dst)
else:
    # Amount of random files you'd like to select
    random_amount = 1000
    for x in xrange(random_amount):
        if len(files) == 0:
            break
        else:
            file = random.choice(files)
            shutil.copyfile(os.path.join(dir, file), outputdir)

答案 1 :(得分:1)

import os
import shutil
import random

root_dir = '/home/leonardo/Desktop/python_script/qar'
output_dir = '/home/leonardo/Desktop/python_script/output_folder'
ref = 1

for root, dirs, files in os.walk(root_dir):
    number_of_files = len(os.listdir(root)) 
    if number_of_files > ref:
        ref_copy = int(round(0.2 * number_of_files))
        for i in xrange(ref_copy):
            chosen_one = random.choice(os.listdir(root))
            file_in_track = root
            file_to_copy = file_in_track + '/' + chosen_one
            if os.path.isfile(file_to_copy) == True:
                shutil.copy(file_to_copy,output_dir)
                print file_to_copy
    else:
        for i in xrange(len(files)):
            track_list = root
            file_in_track = files[i]
            file_to_copy = track_list + '/' + file_in_track
            if os.path.isfile(file_to_copy) == True:
                shutil.copy(file_to_copy,output_dir)
                print file_to_copy
print 'Finished !' 

最终的代码有这个面孔 谢谢你们的帮助! 干杯!

答案 2 :(得分:0)

可能是(未经测试的)

    import os
    THRESHOLD = 200
    root_dir = "\home..."
    output_dir = "\home....."

    for top, dirs, nondirs in os.walk(root_dir):
        for name in nondirs[:THRESHOLD]:
            path = os.path.join(top, name)
            destination = os.path.join(output_dir, name)
            os.rename(path, destination)

答案 3 :(得分:0)

import random
import shutil
import os

rootdir = '/home/leonardo/Desktop/python_script/qar'
outdir = '/home/leonardo/Desktop/python_script/output_folder'

ref = 200

dirsAndFiles = {}   # here we store a structure  {folder: [file1, file2], folder2: [file2, file4] }
dirs = [x[0] for x in os.walk(rootdir)] # here we store all sub-dirs

for dir in dirs:
    dirsAndFiles[dir] = [f for f in os.listdir(dir) if os.path.isfile(os.path.join(dir, f))]

for (dir, files) in dirsAndFiles.iteritems():
    if len(files) > ref:
        for i in xrange(int(0.2*len(files))):  # copy 20% of files
            fe = random.choice(files)
            files.remove(fe)
            shutil.copy(os.path.join(dir, fe), outdir)
    else:                                            # copy all files
        for file in files:
             shutil.copy(os.path.join(dir, file), outdir)

答案 4 :(得分:0)

一种更紧凑的解决方案(还请注意,copyfile除非指定目标文件名也不能真正正确地完成工作):

import os
import shutil
import random

def get_file_list(input_dir):
    return [file for file in os.listdir(input_dir) if os.path.isfile(os.path.join(input_dir, file))]

def get_random_files(file_list, N):
    return random.sample(file_list, N)

def copy_files(random_files, input_dir, output_dir):
    for file in random_files:
        shutil.copy(os.path.join(input_dir, file), output_dir)

def main(input_dir, output_dir, N):
    file_list = get_file_list(input_dir)
    random_files = get_random_files(file_list, N)
    copy_files(random_files, input_dir, output_dir)

答案 5 :(得分:0)

我希望这样做是为了拆分我的数据集以进行训练,测试和验证。

这是我的代码:

import os
import shutil
import random
import numpy as np


dir = r'E:\down\imgs'
train_dir = r'E:/train_test_split/train'
test_dir = r'E:/train_test_split/test'
valid_dir = r'E:/train_test_split/validation'



files = [file for file in os.listdir(dir) if os.path.isfile(os.path.join(dir, file))]
train_count  = np.round(50/100*len(files))
test_count = np.round(30/100*len(files))
valid_count  = np.round(20/100*len(files))
rndnums = list(random.sample(range(0, len(files)), len(files)))
print("len(files)",len(files))

# print("all",len(files))
# print("train",np.round(train*len(files)))
# print("test",np.round(test*len(files)))
# print("valid",np.round(valid*len(files)))
#
# print("sum",np.round(train*len(files)) + np.round(test*len(files)) + np.round(valid*len(files)))

# Amount of random files you'd like to select

##train_files
print(rndnums)

train_file_index = rndnums[0:int(train_count)+1]
train_file_name = [files[i] for i in train_file_index]

test_file_index = rndnums[int(train_count)+1:int(train_count + test_count)+1]
test_file_name = [files[i] for i in test_file_index]

valid_file_index = rndnums[int(train_count + test_count)+1:]
valid_file_name = [files[i] for i in valid_file_index]



for x in train_file_name:
        file = x
        shutil.copyfile(os.path.join(dir, file), os.path.join(train_dir, file))
##test_files
for y in test_file_name:
        file = y
        shutil.copyfile(os.path.join(dir, file), os.path.join(test_dir, file))

##valid_files
for z in valid_file_name:
        file = z
        shutil.copyfile(os.path.join(dir, file), os.path.join(valid_dir, file))