我大约有631个目录,每个目录中有20个文件。我想对所有目录执行以下代码,以便它可以在每次迭代时将dir_1更新为dir_2直到dir_631。我已经尝试过两次for循环,但无法成功。预先感谢。
library(TeachingDemos)
txtStart("command_split_1000/dir_1.txt")
files <- list.files(path="data/split_1000/dir_1", pattern="x*", full.names=TRUE)
total.coefs <- data.frame()
for (x in files) {
message('Running: ', x)
output <- tryCatch({
ulfasQTL::findSqtl(x, geneObjectName = "gene_after_preprocess", snpFileCate = 1)
}, print(x), error=function(e) {
cat("ERROR :", conditionMessage(e), "\n")
})
total.coefs <- rbind(total.coefs, output)
write.table(total.coefs, file = 'output_split_1000/dir_1', sep='\t')
}
txtStop()
答案 0 :(得分:1)
请考虑在list.files
循环中嵌套list.dirs
循环。另外,避免在循环内使用rbind
,因为它会导致内存中的过度复制(请参见Patrick Burns的R Interno:第2圈-增长对象)。相反,请使用lapply
在循环之外为rbind
构建数据帧列表。
# RETRIEVE ALL NEEDED DIRECORIES
dirs <- list.dirs(path="data/split_1000")
for (d in dirs) {
txtStart(paste0("command_split_1000/", basename(d), ".txt"))
# RETRIEVE ALL FILES IN CURRENT DIRECTORY
message('Directory: ', d)
files <- list.files(path=d, pattern="x*", full.names=TRUE)
# LIST OF DATA FRAMES
df_list <- lapply(files, function(x) {
message('--Running: ', x)
output <- tryCatch({
ulfasQTL::findSqtl(x, geneObjectName = "gene_after_preprocess", snpFileCate = 1)
}, print(x), error=function(e) {
cat("ERROR :", conditionMessage(e), "\n")
})
})
# ROW BIND ALL NON-NULL DF ELEMENTS
df_list <- Filter(NROW, df_list)
total.coefs <- do.call(rbind, df_list)
# SAVE OUTPUT WITH BASE NAME OF CURRENT DIRECTORY
out_path <- paste0('output_split_1000/', basename(d), '.txt')
write.table(total.coefs, file = out_path, sep='\t')
txtStop()
# RELEASE RESOURCES
rm(df_list, files, total.coefs)
gc()
}