在下面的代码中,所有输出文件都被写入T1文件夹中。如何将这些输出文件分成与原始子文件夹同名的子文件夹(原始csv文件所在的子文件夹)?谢谢
import pandas as pd
import numpy as np
import glob
import os
path = '/root/Desktop/TT1/'
mystep = 0.4
#define the function
def data_splitter(df, name):
max_time = df['Time'].max() # get max value of Time for the current csv file (df)
myrange= np.arange(0, max_time, mystep) # build the threshold range
for k in range(len(myrange)):
# build the upper values
temp = df[(df['Time'] >= myrange[k]) & (df['Time'] < myrange[k] + mystep)]
temp.to_csv("/root/Desktop/T1/{}_{}.csv".format(name, k))
# use os.walk(path) on the main path to get ALL subfolders inside path
for root,dirs,_ in os.walk(path):
for d in dirs:
path_sub = os.path.join(root,d) # this is the current subfolder
for filename in glob.glob(os.path.join(path_sub, '*.csv')):
df = pd.read_csv(filename)
name = os.path.split(filename)[1] # get the name of the current csv file
data_splitter(df, name)
答案 0 :(得分:0)
这应该有帮助
演示:
as.factor(cutree(X,k=X))
答案 1 :(得分:0)
类似的方法应该在这里起作用
import pandas as pd
import numpy as np
import glob
import os
input_root = '/root/Desktop/TT1'
output_root = '/root/Desktop/T1'
mystep = 0.4
#define the function
def data_splitter(input_file, output_path, output_basename):
df = pd.read_csv(input_file)
max_time = df['Time'].max() # get max value of Time for the current csv file (df)
myrange = np.arange(0, max_time, mystep) # build the threshold range
for k in range(len(myrange)):
# build the upper values
temp = df[(df['Time'] >= myrange[k]) & (df['Time'] < myrange[k] + mystep)]
temp.to_csv(os.path.join(output_path, f"{output_basename}_{k}.csv"))
# use os.walk(path) on the main path to get ALL subfolders inside path
for dirpath, dirnames, filenames in os.walk(input_root):
for filename in filenames:
if filename.lower().endswith('.csv'):
input_file = os.path.join(dirpath, filename)
sub_folders = dirpath[len(input_root)+1:]
output_path = os.path.join(output_root, sub_folders)
os.makedirs(output_path, exist_ok=True) # Ensure the output folder exists
output_basename = os.path.join(output_path, os.path.splitext(filename)[0] + '.csv')
data_splitter(input_file, output_path, output_basename)
这应该导致在输出根文件夹中重新创建了文件夹结构。