我的这段代码效率很低,想做得更好。 每个df是一个文件列表,这些文件与term6所述的某个特定模式匹配。
有人可以向我展示使之成为高效且易于理解的代码的最佳方法吗?循环最好吗?
term6 <- c("Casos de Malaria|MALARIA8|Casos según Entidades")
term7 <- c("Casos según Entidades|MALARIA8")
term8 <- c("Distrito Capital10|hasta")
term9 <- c("NA12")
term10 <- c("hasta la semana|NA12|NA16|Entidades Federales16")
term11 <- c("NA19|Cuadro|Malaria16")
df6 <- list.files(path = "J:\\GBD2017\\Venezuela\\xlsx\\2006", pattern = term6, recursive = FALSE, full.names = FALSE, ignore.case = TRUE)
df7 <- list.files(path = "J:\\GBD2017\\Venezuela\\xlsx\\2007", pattern = term7, recursive = FALSE, full.names = FALSE, ignore.case = TRUE)
df8 <- list.files(path = "J:\\GBD2017\\Venezuela\\xlsx\\2008", pattern = term8, recursive = FALSE, full.names = FALSE, ignore.case = TRUE)
df9 <- list.files(path = "J:\\GBD2017\\Venezuela\\xlsx\\2009", pattern = term9, recursive = FALSE, full.names = FALSE, ignore.case = TRUE)
df10 <- list.files(path = "J:\\GBD2017\\Venezuela\\xlsx\\2010", pattern = term10, recursive = FALSE, full.names = FALSE, ignore.case = TRUE)
df11 <- list.files(path = "J:\\GBD2017\\Venezuela\\xlsx\\2011", pattern = term11, recursive = FALSE, full.names = FALSE, ignore.case = TRUE)
答案 0 :(得分:2)
c(
"Casos de Malaria|MALARIA8|Casos según Entidades",
"Casos según Entidades|MALARIA8",
"Distrito Capital10|hasta",
"NA12",
"hasta la semana|NA12|NA16|Entidades Federales16",
"NA19|Cuadro|Malaria16"
) -> terms
c(
"J:\\GBD2017\\Venezuela\\xlsx\\2006",
"J:\\GBD2017\\Venezuela\\xlsx\\2007",
"J:\\GBD2017\\Venezuela\\xlsx\\2008",
"J:\\GBD2017\\Venezuela\\xlsx\\2009",
"J:\\GBD2017\\Venezuela\\xlsx\\2010",
"J:\\GBD2017\\Venezuela\\xlsx\\2011"
) -> paths
unlist(lapply(seq_along(paths), function(.i) {
list.files(
path = paths[.i],
pattern = terms[.i],
recursive = FALSE,
full.names = FALSE, # generally not a good idea; I always set this to TRUE
ignore.case = TRUE
)
}), use.names = FALSE)
但是,我建议将术语和路径放在一起。这样一来,您可以在其中包含路径搜索元数据的TSV(制表符分隔文件与逗号)文件中,并且确保每个向量的长度相等约束。
它同样易于使用:
data.frame(
term = c(
"Casos de Malaria|MALARIA8|Casos según Entidades",
"Casos según Entidades|MALARIA8",
"Distrito Capital10|hasta",
"NA12",
"hasta la semana|NA12|NA16|Entidades Federales16",
"NA19|Cuadro|Malaria16"
),
path = c(
"J:\\GBD2017\\Venezuela\\xlsx\\2006",
"J:\\GBD2017\\Venezuela\\xlsx\\2007",
"J:\\GBD2017\\Venezuela\\xlsx\\2008",
"J:\\GBD2017\\Venezuela\\xlsx\\2009",
"J:\\GBD2017\\Venezuela\\xlsx\\2010",
"J:\\GBD2017\\Venezuela\\xlsx\\2011"
),
stringsAsFactors = FALSE
)-> file_search_df
unlist(lapply(1:nrow(file_search_df), function(.i) {
list.files(
path = file_search_df$path[.i],
pattern = file_search_df$term[.i],
recursive = FALSE,
full.names = FALSE, # generally not a good idea; I always set this to TRUE
ignore.case = TRUE
)
}), use.names = FALSE)
而且,如果您使用TSV方法,则更加紧凑:
file_search_df <- read.csv("path-to-metadata-file.tsv", sep = "\t", stringsAsFactors=FALSE)
unlist(lapply(1:nrow(file_search_df), function(.i) {
list.files(
path = file_search_df$path[.i],
pattern = file_search_df$term[.i],
recursive = FALSE,
full.names = FALSE, # generally not a good idea; I always set this to TRUE
ignore.case = TRUE
)
}), use.names = FALSE)
答案 1 :(得分:0)
以下内容应使您的代码更加整洁:
#myfilepaths <- paste0(rep(getwd(),6))
myfilepaths <- c(paste0('J:\\GBD2017\\Venezuela\\xlsx\\',2006:2011))
allterms <- c("test","Casos según Entidades|MALARIA8","Distrito Capital10|hasta", "NA12", "hasta la semana|NA12|NA16|Entidades Federales16", "NA19|Cuadro|Malaria16")
length(myfilepaths)==length(allterms)
searchtree <- data.frame(myfilepaths, allterms, stringsAsFactors = F)
result <- apply(searchtree, 1, function(x) list.files(x["myfilepaths"], pattern=x["allterms"], recursive = FALSE, full.names = FALSE, ignore.case = TRUE))
想法是:1)通过将年份范围粘贴到根来生成文件夹列表,然后将路径和模式的所有组合放入一个整洁的数据框中,并2)将list.files函数应用于此df。这将为您提供6个元素的列表。
> result
[[1]]
[1] "ctest.R" "ExcelTest.csv" "test.csv" "test.xls"
[[2]]
character(0)
[[3]]
character(0)
[[4]]
character(0)
[[5]]
character(0)
[[6]]
character(0)
或者您也可以使用这样的循环,产生完全相同的结果:
result <- list()
for(i in 1:length(allterms)) {
result[[i]] <- list.files(searchtree[i,"myfilepaths"], pattern=searchtree[i,"allterms"], recursive = FALSE, full.names = FALSE, ignore.case = TRUE)
}
result
edit:如果每个文件夹有多个模式,例如,如果要在2006文件夹中搜索"test"
和"Casos según Entidades|MALARIA8"
,则应创建searchtree
数据。像这样的框架:
searchtree <- data.frame(myfilepaths[c(1,1:6)], allterms[c(1:2,2:6)], stringsAsFactors = F)
colnames(searchtree) <- c("myfilepaths", "allterms")
该功能将搜索与任何文件夹位于同一行的任何模式。
searchtree
myfilepaths allterms
1 J:\\GBD2017\\Venezuela\\xlsx\\2006 test
2 J:\\GBD2017\\Venezuela\\xlsx\\2006 Casos según Entidades|MALARIA8
3 J:\\GBD2017\\Venezuela\\xlsx\\2007 Casos según Entidades|MALARIA8
4 J:\\GBD2017\\Venezuela\\xlsx\\2008 Distrito Capital10|hasta
5 J:\\GBD2017\\Venezuela\\xlsx\\2009 NA12
6 J:\\GBD2017\\Venezuela\\xlsx\\2010 hasta la semana|NA12|NA16|Entidades Federales16
7 J:\\GBD2017\\Venezuela\\xlsx\\2011 NA19|Cuadro|Malaria16
或者您可以手动管理它:
allfolders <- c("folder1","folder1", "folder2")
allpatterns <- c("pattern1", "pattern2", "pattern2")
searchtree <- data.frame(myfilepaths, allterms, stringsAsFactors = F)
searchtree
myfilepaths allterms
1 folder1 pattern1
2 folder1 pattern2
3 folder2 pattern2