我有一个数据框如下:
df <- read.table(text="chr pos Ref Alt D045313 D045314 D045135 D045136 D045137 D045138
Chr1 1462191 T C 1/1 0/1 1/1 0/0 1/1 1/1
Chr1 1463534 G C 0/0 1/1 0/0 0/1 0/0 0/0
Chr1 1463881 T A 0/1 0/0 1/1 0/0 1/1 1/1
Chr1 1464091 G A 0/0 0/0 1/1 0/0 1/1 1/1
Chr1 1464651 T C 1/1 0/0 1/1 0/1 1/1 1/1",head=F, stringsAsFactors=F)
预期结果:
chr pos Ref Alt D045313 D045314 D045135 D045136 D045137 D045138
Chr1 1462191 T C C/C T/C C/C T/T C/C C/C
Chr1 1463534 G C G/G C/C G/G G/C G/G G/G
Chr1 1463881 T A T/A T/T A/A T/T A/A A/A
Chr1 1464091 G A G/G G/G A/A G/G A/A A/A
Chr1 1464651 T C C/C T/T C/C T/C C/C C/C
替换将遵循以下内容: 在 df [5:10] 中,“0”应替换为 df $ Ref 中的字符,<1>由 df $ Alt <中的字符替换/ strong>即可。我在此链接[Replace specific characters in a variable in data frame in R中检查了问题,但它对我的情况不起作用。感谢任何帮助。
答案 0 :(得分:4)
创建数据:
df <- read.table(text="chr pos Ref Alt D045313 D045314 D045135 D045136 D045137 D045138
Chr1 1462191 T C 1/1 0/1 1/1 0/0 1/1 1/1
Chr1 1463534 G C 0/0 1/1 0/0 0/1 0/0 0/0
Chr1 1463881 T A 0/1 0/0 1/1 0/0 1/1 1/1
Chr1 1464091 G A 0/0 0/0 1/1 0/0 1/1 1/1
Chr1 1464651 T C 1/1 0/0 1/1 0/1 1/1 1/1",head=T, stringsAsFactors=F)
使用gsub
:
vgsub<- Vectorize(gsub, SIMPLIFY = FALSE)
new <- vgsub("0", df$Ref, as.data.frame(t(df[5:10])))
new <- vgsub("1", df$Alt, new)
df[5:10] <- do.call("rbind", new)
df
chr pos Ref Alt D045313 D045314 D045135 D045136 D045137 D045138
1 Chr1 1462191 T C C/C T/C C/C T/T C/C C/C
2 Chr1 1463534 G C G/G C/C G/G G/C G/G G/G
3 Chr1 1463881 T A T/A T/T A/A T/T A/A A/A
4 Chr1 1464091 G A G/G G/G A/A G/G A/A A/A
5 Chr1 1464651 T C C/C T/T C/C T/C C/C C/C
答案 1 :(得分:4)
使用data.table
setDT(df)[, lapply(.SD, function(x) gsub("0", Ref, gsub("1", Alt, x))),
by = .(chr, pos)]
# chr pos Ref Alt D045313 D045314 D045135 D045136 D045137 D045138
#1: Chr1 1462191 T C C/C T/C C/C T/T C/C C/C
#2: Chr1 1463534 G C G/G C/C G/G G/C G/G G/G
#3: Chr1 1463881 T A T/A T/T A/A T/T A/A A/A
#4: Chr1 1464091 G A G/G G/G A/A G/G A/A A/A
#5: Chr1 1464651 T C C/C T/T C/C T/C C/C C/C
使用dplyr
library(dplyr)
df %>%
rowwise %>%
mutate_each(funs(gsub("0", Ref, gsub("1", Alt, .))), matches("^D04."))
# chr pos Ref Alt D045313 D045314 D045135 D045136 D045137 D045138
#1 Chr1 1462191 T C C/C T/C C/C T/T C/C C/C
#2 Chr1 1463534 G C G/G C/C G/G G/C G/G G/G
#3 Chr1 1463881 T A T/A T/T A/A T/T A/A A/A
#4 Chr1 1464091 G A G/G G/G A/A G/G A/A A/A
#5 Chr1 1464651 T C C/C T/T C/C T/C C/C C/C
另一个选择
library(dplyr)
library(tidyr)
df %>%
gather(key, value, -c(chr, pos, Ref, Alt)) %>% rowwise %>%
mutate(value = gsub("0", Ref, gsub("1", Alt, value))) %>%
spread(key, value)
#Source: local data frame [5 x 10]
# chr pos Ref Alt D045313 D045314 D045135 D045136 D045137 D045138
#1 Chr1 1462191 T C C/C T/C C/C T/T C/C C/C
#2 Chr1 1463534 G C G/G C/C G/G G/C G/G G/G
#3 Chr1 1463881 T A T/A T/T A/A T/T A/A A/A
#4 Chr1 1464091 G A G/G G/G A/A G/G A/A A/A
#5 Chr1 1464651 T C C/C T/T C/C T/C C/C C/C
使用apply
基本R选项
data.frame(t(
apply(df, 1,
function(x) c(x[c(1:4)], gsub("0", x['Ref'], gsub("1", x['Alt'], x[c(5:10)]))))
))
# chr pos Ref Alt D045313 D045314 D045135 D045136 D045137 D045138
#1 Chr1 1462191 T C C/C T/C C/C T/T C/C C/C
#2 Chr1 1463534 G C G/G C/C G/G G/C G/G G/G
#3 Chr1 1463881 T A T/A T/T A/A T/T A/A A/A
#4 Chr1 1464091 G A G/G G/G A/A G/G A/A A/A
#5 Chr1 1464651 T C C/C T/T C/C T/C C/C C/C
答案 2 :(得分:2)
这是一个函数,您可以使用它来填充这种情况下的值,并且能够在将来的情况下进行更改。
convert_val <- function(df) {
reference_cols <- c("chr", "pos", "Ref", "Alt")
morph <- function(DF,vec,First="0", Second="1") {
m <- mapply(function(x,y) gsub(First, x,y), x=DF[,"Ref"], y=DF[,vec])
mapply(function(x,y) gsub(Second, x,y), x=DF[,"Alt"], y=m)
}
nums <- which(!names(df) %in% reference_cols)
df[, nums] <- lapply(nums,function(x) morph(df,x))
df
}
convert_val(df)
# chr pos Ref Alt D045313 D045314 D045135 D045136 D045137 D045138
# 1 Chr1 1462191 T C C/C T/C C/C T/T C/C C/C
# 2 Chr1 1463534 G C G/G C/C G/G G/C G/G G/G
# 3 Chr1 1463881 T A T/A T/T A/A T/T A/A A/A
# 4 Chr1 1464091 G A G/G G/G A/A G/G A/A A/A
# 5 Chr1 1464651 T C C/C T/T C/C T/C C/C C/C
将来,您可以将内部函数First
的{{1}}和Second
参数更改为要查找的新值(默认值为morph
和"0"
)。或者,如果列名称发生更改,您可以调整行"1"
。