大家好!
我试图找到解决这个问题的方法,我觉得这很简单。也许是(对你们中的一些人),但我还没有解决问题。我想要的是修改第6列到第10列中的所有零和1,将0替换为第三列值,将第四个值替换为1,以行方式替换。
这是一个可重复的例子:
# Creating dataframe vectors
chr= rep(10,10)
id= paste0("name", 1:10)
pos= seq(1,1000, length.out = 10)
allele1= c("T","T","G","G","C","T","C","C","G","C")
allele2= c("A","A","T","T","C","T","C","C","T","T")
col6= sample(c(0,1),10, TRUE)
col7= sample(c(0,1),10, TRUE)
col8= sample(c(0,1),10, TRUE)
col9= sample(c(0,1),10, TRUE)
col10= sample(c(0,1),10, TRUE)
df= data.frame(chr,id, pos, allele1, allele2, col6, col7, col8, col9, col10)
df
chr id pos allele1 allele2 col6 col7 col8 col9 col10
1 10 name1 1 T A 1 1 1 1 1
2 10 name2 112 T A 0 0 0 1 1
3 10 name3 223 G T 1 0 1 1 0
4 10 name4 334 G T 1 1 0 1 1
5 10 name5 445 C C 0 0 1 0 1
6 10 name6 556 T T 0 1 0 1 1
7 10 name7 667 C C 0 1 0 0 1
8 10 name8 778 C C 0 0 1 1 1
9 10 name9 889 G T 1 1 1 1 0
10 10 name10 1000 C T 0 1 1 0 1
根据这个输出,我希望:
df
chr id pos allele1 allele2 col6 col7 col8 col9 col10
1 10 name1 1 T A A A A A A
2 10 name2 112 T A T T T A A
3 10 name3 223 G T T G T T G
4 10 name4 334 G T T T G T T
5 10 name5 445 C C C C C C C
6 10 name6 556 T T T T T T T
7 10 name7 667 C C C C C C C
8 10 name8 778 C C C C C C C
9 10 name9 889 G T T T T T G
10 10 name10 1000 C T C T T C T
我尝试过使用'内的功能。和'申请'在for循环中,但似乎我错误地索引。我敢打赌Perl中这个任务要容易得多,但我真的很想用R练习。
以下是我尝试过的代码示例:
within(df, {
for(i in 1:nrow(df)){
df[i,6:length(df)]= ifelse(df[i,6:length(df)] == 0, df[i,4],df[i,5])
}
})
for(i in 1:nrow(df)){
df[,6:length(df)]= apply(df[,6:length(df)]==0,2,ifelse,df[i,4],df[i,5])
}
我将不胜感激任何帮助!
真诚的你
答案 0 :(得分:2)
我们可以使用mutate_at
包中的dplyr
。 df2
是最终输出。
# Load package
library(dplyr)
# Process the data
df2 <- df %>%
mutate_at(.vars = vars(contains("col")),
.funs = function(Col){
Col2 <- ifelse(Col == 1, allele2, allele1)
return(Col2)
})
我们可以使用tidyr
和dplyr
中的函数。 df3
是最终输出。
library(dplyr)
library(tidyr)
df3 <- df %>%
mutate(allele1 = as.character(allele1), allele2 = as.character(allele2)) %>%
gather(Col, Value, contains("col")) %>%
mutate(Value = ifelse(Value == 1, allele2, allele1)) %>%
spread(Col, Value) %>%
select(colnames(df))
# Set seed for reproducibility
set.seed(123)
# Creating dataframe vectors
chr= rep(10,10)
id= paste0("name", 1:10)
pos= seq(1,1000, length.out = 10)
allele1= c("T","T","G","G","C","T","C","C","G","C")
allele2= c("A","A","T","T","C","T","C","C","T","T")
col6= sample(c(0,1),10, TRUE)
col7= sample(c(0,1),10, TRUE)
col8= sample(c(0,1),10, TRUE)
col9= sample(c(0,1),10, TRUE)
col10= sample(c(0,1),10, TRUE)
df= data.frame(chr,id, pos, allele1, allele2, col6, col7, col8, col9, col10)
答案 1 :(得分:2)
您可以尝试以下方法:
chr= rep(10,10)
id= paste0("name", 1:10)
pos= seq(1,1000, length.out = 10)
allele1= c("T","T","G","G","C","T","C","C","G","C")
allele2= c("A","A","T","T","C","T","C","C","T","T")
set.seed(1) #for reproducibility
col6= sample(c(0,1),10, TRUE)
col7= sample(c(0,1),10, TRUE)
col8= sample(c(0,1),10, TRUE)
col9= sample(c(0,1),10, TRUE)
col10= sample(c(0,1),10, TRUE)
df= data.frame(chr,id, pos, allele1, allele2, col6, col7, col8, col9, col10, stringsAsFactors = F)
请注意,正如评论中提到的(thx @ycw),这里我有stringsAsFactors = F以避免因子转换!!否则,ifelse将只给出整数而不是字符。
> df
chr id pos allele1 allele2 col6 col7 col8 col9 col10
1 10 name1 1 T A 0 0 1 0 1
2 10 name2 112 T A 0 0 0 1 1
3 10 name3 223 G T 1 1 1 0 1
4 10 name4 334 G T 1 0 0 0 1
5 10 name5 445 C C 0 1 0 1 1
6 10 name6 556 T T 1 0 0 1 1
7 10 name7 667 C C 1 1 0 1 0
8 10 name8 778 C C 1 1 0 0 0
9 10 name9 889 G T 1 0 1 1 1
10 10 name10 1000 C T 0 1 0 0 1
df[, c(6:10)] <- lapply(df[, c(6:10)], function(x) ifelse(x == 0, df[, 4], df[, 5]))
> df
chr id pos allele1 allele2 col6 col7 col8 col9 col10
1 10 name1 1 T A T T A T A
2 10 name2 112 T A T T T A A
3 10 name3 223 G T T T T G T
4 10 name4 334 G T T G G G T
5 10 name5 445 C C C C C C C
6 10 name6 556 T T T T T T T
7 10 name7 667 C C C C C C C
8 10 name8 778 C C C C C C C
9 10 name9 889 G T T G T T T
10 10 name10 1000 C T C T C C T