Question

我有5个含有代谢物的文件（不同细菌模型的详细信息）。我正在编写一个函数来追加指定数量的文件。文件名如下所示。

[1]“01_iAPECO1_1312_metabolites.csv”“02_iB21_1397_metabolites.csv”
  [3]“03_iBWG_1329_metabolites.csv”“04_ic_1306_metabolites.csv”
  [5]“05_iE2348C_1286_metabolites.csv”

以下是我的功能。

strat = 3 # defines the starting position of the range
end = 5 # defines the ending position of the range
type = "metabolites" # two types of files - for metabolites and reactions
files <- NULL

if (type == "metabolites"){
  files <- list.files(pattern = "*metabolites\\.csv$")
}else if(type == "reactions"){
  files <- list.files(pattern = "*reactions\\.csv$")
}

#reading each file within the range and append them to create one file
for (i in start:end){
  temp_df <- data.frame(ModelName = character(), Object = character(),stringsAsFactors = F)

  #reading the current file

  temp = rbind(one,temp_df)

}
#writing the appended file  
write.csv(temp,"appended.csv",row.names = F,quote = F)
temp_df <- NULL

例如，如果我指定start = 3和end = 5，则代码应该读取文件03,04和05并附加它们。注意：文件名开头的两个整数用于获取范围引用的文件。我无法使用正则表达式在for循环中选择所需的文件。当我指定它拾取的数字但我正在寻找带有 i 的通用版本。

currentFile = grep（“01。+”，文件）

感谢任何帮助。

Answer 1

对于下面显示的测试数据，这将返回一个向量，其中包含以02,03,04和05开头并以"reactions.csv"结尾的文件的文件名

# create some test files
for(i in 1:5) cat(file = sprintf("%02djunkreactions[.]csv", i))

# test input
start <- 2
end <- 5
type <- "reactions"

list.files(pattern = paste(sprintf("^%02d.*%s[.]csv$", start:end, type), collapse = "|"))

，并提供：

[1] "02junkreactions.csv" "03junkreactions.csv" "04junkreactions.csv"
[4] "05junkreactions.csv"

注意：如果start和end都是一位数，那么可以进行简化：

list.files(pattern = sprintf("^0[%d-%d].*%s.csv$", start, end, type))

Answer 2

您可以使用交叉连接执行此操作。

library(dplyr)
library(stringi)

start = 3
end = 5

type = "metabolites"

all_files = data_frame(file = list.files() )

desired_files = data_frame(
  number = start:end,
  regex = sprintf("^%02.f.*%s", number, file_type) )

all_files %>%
  merge(desired_files) %>%
  filter(stri_detect_regex(file, regex)) %>%
  group_by(number) %>%
  do(read.csv(.$file) ) %>%
  write.csv("appended.csv", row.names = F, quote = F)

Answer 3

你在找这样的东西吗？

files <- c("01_iAPECO1_1312_metabolites.csv", "02_iB21_1397_metabolites.csv","03_iBWG_1329_metabolites.csv", "04_ic_1306_metabolites.csv","05_iE2348C_1286_metabolites.csv")

for(i in 2:4) print(grep(sprintf("^(%02d){1}_",i),files,value=T))

如何将变量值包含在R

3 个答案: