我有一个包含143列和147153行的数据框,如下所示:(我的数据子集)
otu1 otu2 1 2 3
1: OTU_1 OTU_10 3.807355 5.403722 3.972693
2: OTU_1 OTU_100 -1.618910 0.000000 0.000000
3: OTU_1 OTU_101 0.000000 0.000000 0.000000
现在,我需要1列和2列以及相应的列在单独的文件中。 期望的输出将是:
文件1:
otu1 otu2 1
1: OTU_1 OTU_10 3.807355
2: OTU_1 OTU_100 -1.618910
3: OTU_1 OTU_101 0.000000
4: OTU_1 OTU_1019 0.000000
5: OTU_1 OTU_102 0.000000
文件2等等....
otu1 otu2 2
1: OTU_1 OTU_10 5.403722
2: OTU_1 OTU_100 0.000000
3: OTU_1 OTU_101 0.000000
4: OTU_1 OTU_1019 9.211077
5: OTU_1 OTU_102 0.000000
因此,我尝试了以下代码:
for ( i in 1:141){
patient$i=log_trans2[,c(1:2,(i+2))]
patient$i=patient$i[patient$i!=0]
patient$i=merge(patient$i, OTUNames, by.x="otu2", by.y="OTUId")
patient$i=merge(patient$i, OTUNames, by.x="otu1", by.y="OTUId")
write.table(patient1$i, file=sprintf("patient_grahs/patient.%s.tab",i), sep = '\t',quote = FALSE)
}
但是我收到如下错误:
$<-.data.frame
(*tmp*
,“ i”,value = c(1,2,4))中的错误: 替换有3行,数据有0
我哪里出错了?
答案 0 :(得分:1)
我们可以将lapply
与fwrite
一起使用。根据显示的数据,似乎是data.table
library(data.table)
setDT(log_trans2)
lapply(names(log_trans2)[3:ncol(log_trans2)], function(nm) {
d1 <- log_trans2[, c(names(log_trans2)[1:2], nm), with = FALSE]
d1 <- merge(d1, OTUNames, by.x="otu2", by.y="OTUId")
d1 <- merge(d1, OTUNames, by.x="otu1", by.y="OTUId")
fwrite(d1, file = paste0("patients_", nm, ".txt"))})
fwrite
的输入应为
lapply(names(log_trans2)[3:ncol(log_trans2)], function(nm)
log_trans2[, c(names(log_trans2)[1:2], nm), with = FALSE])
#[[1]]
# otu1 otu2 1
#1: OTU_1 OTU_10 3.807355
#2: OTU_1 OTU_100 -1.618910
#3: OTU_1 OTU_101 0.000000
#[[2]]
# otu1 otu2 2
#1: OTU_1 OTU_10 5.403722
#2: OTU_1 OTU_100 0.000000
#3: OTU_1 OTU_101 0.000000
#[[3]]
# otu1 otu2 3
#1: OTU_1 OTU_10 3.972693
#2: OTU_1 OTU_100 0.000000
#3: OTU_1 OTU_101 0.000000
log_trans2 <- structure(list(otu1 = c("OTU_1", "OTU_1", "OTU_1"), otu2 = c("OTU_10",
"OTU_100", "OTU_101"), `1` = c(3.807355, -1.61891, 0), `2` = c(5.403722,
0, 0), `3` = c(3.972693, 0, 0)), class = "data.frame", row.names = c("1:",
"2:", "3:"))
答案 1 :(得分:1)
我不确定您要合并的数据框是什么。
如果您的数据框为df
,并且您想一次隔离并保存一列,则可以执行以下操作:
for(i in 3:ncol(df))
{
temp_df = df[,c(1,2,i)]
write.table(temp_df, filename = paste0("patients_",i,".txt"), sep = "\t", row.names = FALSE)
}