如何用r中的变量中的值替换数据框中的特定字符

时间:2015-08-04 14:03:31

标签: r

我有一个数据框如下:

df <- read.table(text="chr     pos Ref Alt D045313 D045314 D045135 D045136 D045137 D045138
Chr1 1462191   T   C     1/1     0/1     1/1     0/0     1/1     1/1
Chr1 1463534   G   C     0/0     1/1     0/0     0/1     0/0     0/0
Chr1 1463881   T   A     0/1     0/0     1/1     0/0     1/1     1/1
Chr1 1464091   G   A     0/0     0/0     1/1     0/0     1/1     1/1
Chr1 1464651   T   C     1/1     0/0     1/1     0/1    1/1     1/1",head=F, stringsAsFactors=F)

预期结果:

  chr     pos Ref Alt D045313 D045314 D045135 D045136 D045137 D045138
Chr1 1462191   T   C     C/C     T/C     C/C     T/T     C/C     C/C
Chr1 1463534   G   C     G/G     C/C     G/G     G/C     G/G     G/G
Chr1 1463881   T   A     T/A     T/T     A/A     T/T     A/A     A/A
Chr1 1464091   G   A     G/G     G/G     A/A     G/G     A/A     A/A
Chr1 1464651   T   C     C/C     T/T     C/C     T/C    C/C     C/C

替换将遵循以下内容: 在 df [5:10] 中,“0”应替换为 df $ Ref 中的字符,<1>由 df $ Alt <中的字符替换/ strong>即可。我在此链接[Replace specific characters in a variable in data frame in R中检查了问题,但它对我的情况不起作用。感谢任何帮助。

3 个答案:

答案 0 :(得分:4)

创建数据:

df <- read.table(text="chr     pos Ref Alt D045313 D045314 D045135 D045136 D045137 D045138
                 Chr1 1462191   T   C     1/1     0/1     1/1     0/0     1/1     1/1
                 Chr1 1463534   G   C     0/0     1/1     0/0     0/1     0/0     0/0
                 Chr1 1463881   T   A     0/1     0/0     1/1     0/0     1/1     1/1
                 Chr1 1464091   G   A     0/0     0/0     1/1     0/0     1/1     1/1
                 Chr1 1464651   T   C     1/1     0/0     1/1     0/1    1/1     1/1",head=T, stringsAsFactors=F)

使用gsub

vgsub<- Vectorize(gsub, SIMPLIFY = FALSE)
new <- vgsub("0", df$Ref, as.data.frame(t(df[5:10])))
new <- vgsub("1", df$Alt, new)
df[5:10] <- do.call("rbind", new)
df
  chr     pos Ref Alt D045313 D045314 D045135 D045136 D045137 D045138
1 Chr1 1462191   T   C     C/C     T/C     C/C     T/T     C/C     C/C
2 Chr1 1463534   G   C     G/G     C/C     G/G     G/C     G/G     G/G
3 Chr1 1463881   T   A     T/A     T/T     A/A     T/T     A/A     A/A
4 Chr1 1464091   G   A     G/G     G/G     A/A     G/G     A/A     A/A
5 Chr1 1464651   T   C     C/C     T/T     C/C     T/C     C/C     C/C

答案 1 :(得分:4)

使用data.table

setDT(df)[, lapply(.SD, function(x) gsub("0", Ref, gsub("1", Alt, x))), 
            by = .(chr, pos)]

#    chr     pos Ref Alt D045313 D045314 D045135 D045136 D045137 D045138
#1: Chr1 1462191   T   C     C/C     T/C     C/C     T/T     C/C     C/C
#2: Chr1 1463534   G   C     G/G     C/C     G/G     G/C     G/G     G/G
#3: Chr1 1463881   T   A     T/A     T/T     A/A     T/T     A/A     A/A
#4: Chr1 1464091   G   A     G/G     G/G     A/A     G/G     A/A     A/A
#5: Chr1 1464651   T   C     C/C     T/T     C/C     T/C     C/C     C/C

使用dplyr

library(dplyr)

df %>% 
   rowwise %>% 
   mutate_each(funs(gsub("0", Ref, gsub("1", Alt, .))), matches("^D04."))

#   chr     pos Ref Alt D045313 D045314 D045135 D045136 D045137 D045138
#1 Chr1 1462191   T   C     C/C     T/C     C/C     T/T     C/C     C/C
#2 Chr1 1463534   G   C     G/G     C/C     G/G     G/C     G/G     G/G
#3 Chr1 1463881   T   A     T/A     T/T     A/A     T/T     A/A     A/A
#4 Chr1 1464091   G   A     G/G     G/G     A/A     G/G     A/A     A/A
#5 Chr1 1464651   T   C     C/C     T/T     C/C     T/C     C/C     C/C

另一个选择

library(dplyr)
library(tidyr)

df %>% 
   gather(key, value, -c(chr, pos, Ref, Alt)) %>% rowwise %>% 
   mutate(value = gsub("0", Ref, gsub("1", Alt, value))) %>%    
   spread(key, value)

#Source: local data frame [5 x 10]

#   chr     pos Ref Alt D045313 D045314 D045135 D045136 D045137 D045138
#1 Chr1 1462191   T   C     C/C     T/C     C/C     T/T     C/C     C/C
#2 Chr1 1463534   G   C     G/G     C/C     G/G     G/C     G/G     G/G
#3 Chr1 1463881   T   A     T/A     T/T     A/A     T/T     A/A     A/A
#4 Chr1 1464091   G   A     G/G     G/G     A/A     G/G     A/A     A/A
#5 Chr1 1464651   T   C     C/C     T/T     C/C     T/C     C/C     C/C
使用apply

基本R选项

data.frame(t(
  apply(df, 1, 
  function(x) c(x[c(1:4)], gsub("0", x['Ref'], gsub("1", x['Alt'], x[c(5:10)]))))
 ))

#   chr     pos Ref Alt D045313 D045314 D045135 D045136 D045137 D045138
#1 Chr1 1462191   T   C     C/C     T/C     C/C     T/T     C/C     C/C
#2 Chr1 1463534   G   C     G/G     C/C     G/G     G/C     G/G     G/G
#3 Chr1 1463881   T   A     T/A     T/T     A/A     T/T     A/A     A/A
#4 Chr1 1464091   G   A     G/G     G/G     A/A     G/G     A/A     A/A
#5 Chr1 1464651   T   C     C/C     T/T     C/C     T/C     C/C     C/C

答案 2 :(得分:2)

这是一个函数,您可以使用它来填充这种情况下的值,并且能够在将来的情况下进行更改。

convert_val <- function(df) {
  reference_cols <- c("chr", "pos", "Ref", "Alt")
  morph <- function(DF,vec,First="0", Second="1") {
  m <- mapply(function(x,y) gsub(First, x,y), x=DF[,"Ref"], y=DF[,vec])
  mapply(function(x,y) gsub(Second, x,y), x=DF[,"Alt"], y=m)
}
  nums <- which(!names(df) %in% reference_cols)
  df[, nums] <- lapply(nums,function(x) morph(df,x))
  df
}

convert_val(df)
#    chr     pos Ref Alt D045313 D045314 D045135 D045136 D045137 D045138
# 1 Chr1 1462191   T   C     C/C     T/C     C/C     T/T     C/C     C/C
# 2 Chr1 1463534   G   C     G/G     C/C     G/G     G/C     G/G     G/G
# 3 Chr1 1463881   T   A     T/A     T/T     A/A     T/T     A/A     A/A
# 4 Chr1 1464091   G   A     G/G     G/G     A/A     G/G     A/A     A/A
# 5 Chr1 1464651   T   C     C/C     T/T     C/C     T/C     C/C     C/C

将来,您可以将内部函数First的{​​{1}}和Second参数更改为要查找的新值(默认值为morph"0")。或者,如果列名称发生更改,您可以调整行"1"