在data.frame

时间:2019-12-19 14:47:54

标签: r data.table

我有一个像这样的data.frame:

G5_01
   X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
1:                                  0/0                                  0/0
2:                                  0/0                                  1/1
3:                                  0/1                                  0/0

我想计算每个单元格中的变化并将其转换为:

  X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
1:                                  0                                  0
2:                                  0                                  2
3:                                  1                                  0

data.table似乎能够处理此问题和我的脚本,如下所示:

library(data.table)

G5_02<-setDT(G5_01)[,lapply(.SD,function(x) sum(as.numeric(strsplit(x,"/")[[1]][1]),
                                                as.numeric(strsplit(x,"/")[[1]][2])))]

但这只会给我第一行的结果

   X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
1:                                    0                                    0

关于如何修复它的任何建议?

5 个答案:

答案 0 :(得分:2)

如果您只需要处理1和0,那么可能的解决方案可以是计算1,即

library(data.table)
setDT(df)[, lapply(.SD, function(i)stringr::str_count(i, '1'))][]

#   X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
#1:                                    0                                    0
#2:                                    0                                    2
#3:                                    1                                    0

答案 1 :(得分:1)

library(data.table)
setDT(G5_01)[, X8803713069_R01C02_8803713069_R01C02 := as.numeric(substr(X8803713069_R01C02_8803713069_R01C02, 1, 1)) + as.numeric(substr(X8803713069_R01C02_8803713069_R01C02, 3, 3))][, X8803713069_R02C02_8803713069_R02C02 := as.numeric(substr(X8803713069_R02C02_8803713069_R02C02, 1, 1)) + as.numeric(substr(X8803713069_R02C02_8803713069_R02C02, 3, 3))]

G5_01
         X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
    1:                                    0                                    0
    2:                                    0                                    2
    3:                                    1                                    0

数据

G5_01 <- read.table(text = 'X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
                                  0/0                                  0/0
                                  0/0                                  1/1
                                 0/1                                  0/0', header = T)

答案 2 :(得分:1)

也许您可以尝试下面的代码,其中使用nchar()gsub()

以下是两个以R为底的解决方案:

  • 解决方案1 ​​ :(可能比sapply()apply()lapply()方法要快)
G5_02 <- data.frame(nchar(gsub("[^1]","",as.matrix(G5_01))))
  • 解决方案2
G5_02 <- data.frame(sapply(G5_01, function(x) nchar(gsub("[^1]","",x))))

如此

> G5_01
  X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
1                                    0                                    0
2                                    0                                    2
3                                    1                                    0

数据

G5_01 <- structure(list(X8803713069_R01C02_8803713069_R01C02 = c("0/0", 
"0/0", "0/1"), X8803713069_R02C02_8803713069_R02C02 = c("0/0", 
"1/1", "0/0")), class = "data.frame", row.names = c(NA, -3L))

答案 3 :(得分:1)

base R中,我们用rowSums进行拆分后就可以使用read.table

df[] <- lapply(df, function(x) rowSums(read.table(text = x,
        sep="/", header = FALSE)))
df
#  X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
#1                                    0                                    0
#2                                    0                                    2
#3                                    1                                    0

数据

df <- structure(list(X8803713069_R01C02_8803713069_R01C02 = c("0/0", 
"0/0", "0/1"), X8803713069_R02C02_8803713069_R02C02 = c("0/0", 
"1/1", "0/0")), class = "data.frame", row.names = c(NA, -3L))

答案 4 :(得分:1)

使用data.table::tstrsplit进行转置的字符串拆分(例如purrr::transpose(strsplit(x, '/'))),然后我们可以将其转换为数字并将它们加在一起

library(dplyr)

df %>% 
  mutate_all(~ 
    data.table::tstrsplit(., '/') %>% 
      map(as.numeric) %>% 
      do.call(what = '+'))

#   X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
# 1                                    0                                    0
# 2                                    0                                    2
# 3                                    1                                    0