Question

我有一个包含大量csv文件的目录。我想将数据加载到R中并将函数应用于目录中每个可能的csv文件对组合，然后将输出写入文件。

我想要应用的函数是来自matchpt()库的biobase，它比较两个数据帧之间的位置。

这是我想要做的一个例子（虽然我有比这更多的文件）：

目录中的三个文件：A，B和C
对每个成对组合执行matchpt： nn1 = matchpt(A,B) nn2 = matchpt(A,C) nn3 = matchpt(B,C)
将nn1，nn2和nn3写入csv文件。

我还没有找到任何解决方案，并希望得到任何建议。我真的不知道从哪里开始，但我假设某种嵌套for循环需要以某种方式顺序循环通过所有成对的文件组合。下面是一个开头，但这只是将第一个文件与目录中的所有其他文件进行比较，所以不起作用！

library("Biobase")

# create two lists of identical filenames stored in the directory:
filenames1 = list.files(path=dir, pattern="csv$", full.names=FALSE, recursive=FALSE)
filenames2 = list.files(path=dir, pattern="csv$", full.names=FALSE, recursive=FALSE)

for(i in 1:length(filenames2)){
# load the first data frame in list 1
  df1 <- lapply(filenames1[1], read.csv, header=TRUE, stringsAsFactors=FALSE)
  df1 <- data.frame(df1)
# load a second data frame from list 2
  df2 <- lapply(filenames2[i], read.csv, header=TRUE, stringsAsFactors=FALSE)
  df2 <- data.frame(df2)

# isolate the relevant columns from within the two data frames
dat1 <- as.matrix(df1[, c("lat", "long")]) 
dat2 <- as.matrix(df2[, c("lat", "long")])

# run the matchpt function on the two data frames
nn <- matchpt(dat1, dat2)

#Extract the unique id code in the two filenames (for naming the output file)
file1 = filenames1[1]
code1 = strsplit(file1,"_")[[1]][1]
file2 = filenames2[i]
code2 = strsplit(file2,"_")[[1]][1]
outname = paste(code1, code2, sep=”_”)
outfile = paste(code, "_nn.csv", sep="")
write.csv(nn, file=outname, row.names=FALSE)

}

如何解决这个问题的任何建议将不胜感激。非常感谢！

Answer 1

您可以执行以下操作：

out <- combn( list.files(), 2, FUN=matchpt )
write.table( do.call( rbind, out ), file='output.csv', sep=',' )

这假设matchpt期望2个字符串包含文件名，并且结果每次都是相同的结构，以便rbind有意义。

您还可以编写自己的函数以传递给combn，该matchpt采用2个文件名，运行write.table，然后将结果附加到csv文件。请记住，如果您将打开的文件句柄传递给{{1}}，那么它将附加到文件而不是覆盖那里的文件。

Answer 2

在回答我的问题时，我似乎找到了解决方案。下面使用for循环来执行公共目录中的每个成对文件组合（这似乎工作并提供文件的组合，即A＆amp; B和B＆amp; A）：

# create a list of filenames
filenames = list.files(path=dir, pattern="csv$", full.names=FALSE, recursive=FALSE)

# For loop to compare the files
for(i in 1:length(filenames)){

  # load the first data frame in the list
  df1 = lapply(filenames[i], read.csv, header=TRUE, stringsAsFactors=FALSE)
  df1 = data.frame(df1)
  file1 = filenames[i]
  code1 = strsplit(file1,"_")[[1]][1] # extract unique id code of file (in case where the id comes before an underscore)
  # isolate the columns of interest within the first data frame
  d1 <- as.matrix(df1[, c("lat_UTM", "long_UTM")]) 

  # load the comparison file
  for (j in 1:length(filenames)){

    # load the second data frame in the list
    df2 = lapply(filenames[j], read.csv, header=TRUE, stringsAsFactors=FALSE)
    df2 = data.frame(df2)
    file2 = filenames[j]
    code2 = strsplit(file2,"_")[[1]][1] # extract uniqe id code of file 2 
    # isolate the columns of interest within the second data frame
    d2 <- as.matrix(df2[, c("lat_UTM", "long_UTM")])

  # run the comparison function on the two data frames (in this case matchpt)
    out <- matchpt(d1, d2)
  # Merge the unique id code in the two filenames (for naming the output file)
    outname = paste(code1, code2, sep="_")
    outfile = paste(outname, "_out.csv", sep="")
  # write the result to file
    write.csv(out, file=outfile, row.names=FALSE) 
   }
}

Answer 3

试试这个例子：

#dummy filenames
filenames <- paste0("file_",1:5,".txt")

#loop through unique combination
for(i in 1:(length(filenames)-1))
for(j in (i+1):length(filenames))
  {
  flush.console()
  print(paste("i=",i,"j=",j,"|","file1=",filenames[i],"file2=",filenames[j]))
}

如何将函数应用于存储在公共目录中的每个可能的成对组合文件

3 个答案: