如何针对多个数据帧运行函数,并输出与R中的输入同名的数据帧

时间:2015-11-04 12:50:53

标签: r

我有几个数据帧,我正在应用函数

该函数有效,但我想将它提供给几个数据帧并根据输入名称输出结果。

以下是其中一个数据帧的示例

structure(list(chr = structure(c(1L, 1L, 1L), .Label = c("chr1", 
"chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", 
"chr17", "chr18", "chr19", "chr2", "chr20", "chr21", "chr22", 
"chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chrX", 
"chrY"), class = "factor"), leftPos = c(100260254L, 100735342L, 
100805662L), strand.x = structure(c(1L, 1L, 2L), .Label = c("-", 
"+"), class = "factor"), X50CellJ_SLX.9395.FSeqJ.fq.gz = c(7L, 
295L, 132L), Cytospongex10_SLX.9395.FSeqK.fq.gz = c(72L, 256L, 
148L), FFPE20X_SLX.9395.fq.gz = c(5L, 74L, 36L), Tumour10_SMACCO_AH_088_SLX.9396.FSeqH.fq.gz = c(13L, 
154L, 65L), Tumour11_SMACCO_SH_020_SLX.9396.FSeqI.fq.gz = c(1L, 
0L, 0L), Tumour12_SMACCO_ED_008_SLX.9396.FSeqJ.fq.gz = c(3L, 
25L, 8L), Tumour13_SMACCO_AH_086_SLX.9396.FSeqK.fq.gz = c(7L, 
120L, 28L), Tumour1_SMACCO_AH_100_SLX.9396.FSeqA.fq.gz = c(0L, 
0L, 0L), Tumour2_SMACCO_AH_058_SLX.9396.FSeqB.fq.gz = c(24L, 
98L, 42L), Tumour3_SMACCO_SH_051_SLX.9396.FSeqC.fq.gz = c(29L, 
92L, 29L), Tumour4_SMACCO_ED_031_SLX.9396.FSeqD.fq.gz = c(18L, 
53L, 14L), Tumour5_SMACCO_RS_027_SLX.9396.FSeqE.fq.gz = c(8L, 
93L, 17L), Tumour7_SMACCO_AH_026_SLX.9396.FSeqF.fq.gz = c(30L, 
205L, 60L), Tumour9_SMACCO_ST_024_SLX.9396.FSeqG.fq.gz = c(15L, 
129L, 17L), strand.y = structure(c(1L, 1L, 2L), .Label = c("-", 
"+"), class = "factor"), Tumour14_SMACCO_AH_094_SLX.9394.FSeqA.fq.gz = c(0L, 
7L, 3L), Tumour15_SMACCO_WG_006_SLX.9394.FSeqB..fq.gz = c(3L, 
19L, 4L), Tumour16_SMACCO_ST_035_SLX.9394.FSeqC.fq.gz = c(1L, 
23L, 8L), Tumour17_SMACCO_ST_034_SLX.9394.fq.gz = c(7L, 26L, 
5L), Control19_SLX.9394.FSeqE.fq.gz = c(51L, 256L, 36L), Control20_SLX.9394.FSeqF.fq.gz = c(23L, 
110L, 34L), Control21_SLX.9394.FSeqG..fq.gz = c(30L, 56L, 
11L), Control22_SLX.9394.FSeqH.fq.gz = c(22L, 72L, 24L), Control23_SLX.9394.FSeqI.fq.gz = c(10L, 
23L, 2L), Control25_SLX.9394.FSeqJ.fq.gz = c(17L, 72L, 8L), 
    Control27_SLX.9394.FSeqK.fq.gz = c(10L, 21L, 9L), Control28_SLX.9395.FSeqA.fq.gz = c(13L, 
    40L, 4L), Control29_SLX.9395.FSeqB.fq.gz = c(14L, 39L, 
    6L), Control30_SLX.9395.FSeqC.fq.gz = c(5L, 32L, 5L), 
    Control31_SLX.9395.FSeqD.fq.gz = c(7L, 11L, 5L), Control32_SLX.9395.FSeqE.fq.gz = c(5L, 
    32L, 4L), Control33_SLX.9395.FSeqF.fq.gz = c(10L, 25L, 
    6L), Control34_SLX.9395.FSeqG.fq.gz = c(3L, 32L, 1L), 
    Control35_SLX.9395.FSeqH.fq.gz = c(10L, 33L, 0L), Controls = c(0L, 
    0L, 0L), Samples = c(0L, 0L, 0L)), .Names = c("chr", "leftPos", 
"strand.x", "X50CellJ_SLX.9395.FSeqJ.fq.gz", "Cytospongex10_SLX.9395.FSeqK.fq.gz", 
"FFPE20X_SLX.9395.fq.gz", "Tumour10_SMACCO_AH_088_SLX.9396.FSeqH.fq.gz", 
"Tumour11_SMACCO_SH_020_SLX.9396.FSeqI.fq.gz", "Tumour12_SMACCO_ED_008_SLX.9396.FSeqJ.fq.gz", 
"Tumour13_SMACCO_AH_086_SLX.9396.FSeqK.fq.gz", "Tumour1_SMACCO_AH_100_SLX.9396.FSeqA.fq.gz", 
"Tumour2_SMACCO_AH_058_SLX.9396.FSeqB.fq.gz", "Tumour3_SMACCO_SH_051_SLX.9396.FSeqC.fq.gz", 
"Tumour4_SMACCO_ED_031_SLX.9396.FSeqD.fq.gz", "Tumour5_SMACCO_RS_027_SLX.9396.FSeqE.fq.gz", 
"Tumour7_SMACCO_AH_026_SLX.9396.FSeqF.fq.gz", "Tumour9_SMACCO_ST_024_SLX.9396.FSeqG.fq.gz", 
"strand.y", "Tumour14_SMACCO_AH_094_SLX.9394.FSeqA.fq.gz", 
"Tumour15_SMACCO_WG_006_SLX.9394.FSeqB..fq.gz", "Tumour16_SMACCO_ST_035_SLX.9394.FSeqC.fq.gz", 
"Tumour17_SMACCO_ST_034_SLX.9394.fq.gz", "Control19_SLX.9394.FSeqE.fq.gz", 
"Control20_SLX.9394.FSeqF.fq.gz", "Control21_SLX.9394.FSeqG..fq.gz", 
"Control22_SLX.9394.FSeqH.fq.gz", "Control23_SLX.9394.FSeqI.fq.gz", 
"Control25_SLX.9394.FSeqJ.fq.gz", "Control27_SLX.9394.FSeqK.fq.gz", 
"Control28_SLX.9395.FSeqA.fq.gz", "Control29_SLX.9395.FSeqB.fq.gz", 
"Control30_SLX.9395.FSeqC.fq.gz", "Control31_SLX.9395.FSeqD.fq.gz", 
"Control32_SLX.9395.FSeqE.fq.gz", "Control33_SLX.9395.FSeqF.fq.gz", 
"Control34_SLX.9395.FSeqG.fq.gz", "Control35_SLX.9395.FSeqH.fq.gz", 
"Controls", "Samples"), row.names = c(NA, 3L), class = "data.frame")

这是我到目前为止所拥有的

mylist <- list(A = OriginalMeta , B = SLX9392 , C = SLX9393, D = SLX9397, E = Gastric, F = Dysplasia, G = GoodDysplasia, H = Cholangio, I = LCM_PS14_1105_1F) 

sortIt <- function(df1) {

  df1$strand.x<- NULL
  df1$strand.y<- NULL
  df1$strand<-NULL
  df1$X.<-NULL
  names(df1)[1] <- c("chr")
  #Get rid of X and Y chromosomes
  df1 <- df1[!grepl("chrX", df1$chr), ]
  df1 <- df1[!grepl("chrY", df1$chr), ]
xyAss3<-df1
return(xyAss3)
}

lapply(names(mylist),
       sortIt(x)write.csv(mylist[x],
                            file =paste0(x,'.csv'))) 

事情是我只是不知道如何将mylist提供给函数。我应该在lapply df1中调用x吗?关于如何将它们联系在一起,我有点困惑。

2 个答案:

答案 0 :(得分:1)

我认为您最好将.csv的创建折叠到您的函数中,然后使用df循环将该函数依次应用于列表中的每个对象。所以像这样,mylist <- list(A = df, B = df) sortIt <- function(i) { df = mylist[[i]] df[,"strand.x"] <- NULL df[,"strand.y"] <- NULL df[,"strand"] <- NULL df[,"X."] <- NULL names(df) <- c("chr", names(df)[2:length(names(df))]) df <- df[!grepl("chrX", df$chr), ] df <- df[!grepl("chrY", df$chr), ] write.csv(df, file = paste0(names(mylist)[i], ".csv"), row.names=FALSE) } for (i in seq(length(mylist))) {sortIt(i)} 是你发布的样本数据框:

apply

如果您尝试在工作区中创建新对象,那么for函数之一将是更好的选择。但是,当您尝试输出文件时,我认为您需要使用$scope.formData.allergies = [ { 'id' : 1, 'description' : 'Potassium Cyanide' }, { 'id' : 2, 'description' : 'Blue ring octopus' }, { 'id' : 3, 'description' : 'Poison dart frog' }, ]; 循环。

答案 1 :(得分:0)

不确定您要实现的目标,但猜测您希望将转换后的数据框保存到名称中取自列表的文件,这可以完成工作(它应该与其余代码一起使用) - 注意lapply(names(mylist), function(x) write.csv(sortIt(mylist[x][[1]]), file = paste0(x,'.csv'))) ):

mapply

另一种选择是使用# create the data dframes <- lapply(1:3, function(x) data.frame(x=rnorm(10), y=runif(10))) names(dframes) <- LETTERS[1:3] # the transformation function sortdf <- function(df) df[order(df$x),] # two variants of apply lapply(names(dframes), function(name) write.csv(sortdf(dframes[name][[1]]), file=paste0(name, '.csv'))) # mapply does not have the ugly [[1]] syntax bit, I'd prefer it myself mapply(function(name, df) write.csv(sortdf(df), file=paste0(name, '.csv')), names(dframes), dframes) ,这里我附上一个完整的例子:

{{1}}