Question

我正在使用以下四列原始重量测量数据和一个非常接近功能的嵌套ifelse语句，该语句会产生'kg'向量。

     Id       G4_R_2_4         G4_R_2_5        G4_R_2_5_option2          kg
219 13237       16.0             NA                  NA                16.0
220 139129      8.50             55.70               47.20             8.50
221 139215      28.9             NA                  NA                28.9
222 139216       NA              46.70               8.50              46.70
223 139264      12.40            NA                  NA                12.40
224 139281      13.60            NA                  NA                13.60
225 139366      16.10            NA                  NA                16.10
226 139376      61.80            NA                  NA                61.80
227 140103      NA               48.60               9.10              48.60

目标是根据以下条件将三个'G4'列合并为kg： 1）如果G4_R_2_4不是NA，则打印其值 2）如果G4_R_2_4为NA，则打印G4_R_2_5和G4_R_2_5_option2中出现的较小值（对于蹩脚的变量名称抱歉）

我一直在使用以下语句（名为'child'的大数据集）：

> child$kg <- ifelse(child$G4_R_2_4 == 'NA' & child$G4_R_2_5 < child$G4_R_2_5_option2,
   child$G4_R_2_5, ifelse(child$G4_R_2_4 == 'NA' & child$G4_R_2_5 > child$G4_R_2_5_option2,
                          child$G4_R_2_5_option2, child$G4_R_2_4))

这导致了我现在拥有的'kg'向量。它似乎满足G4_R_2_4条件（是/不是NA）但始终为NA情况打印G4_R_2_5的值。如何将其纳入大于/小于条件？

Answer 1

从您的示例中不清楚，但我认为问题是您正在处理free()错误地\或使用错误的类型NA＆＃39;列。尝试重写你的代码：

data.frame

Answer 2

我们可以使用pmin执行此操作。假设你的G4＆＃39;列是＆＃39;字符＆＃39;我们将这些列转换为＆＃39;数字＆＃39;类并在该列上使用pmin。

 indx <- grep('^G4', names(child))
 child[indx] <- lapply(child[indx], as.numeric)
 d1 <- child[indx]
 child$kgN <- ifelse(is.na(d1[,1]), do.call(pmin, c(d1[-1], na.rm=TRUE)), d1[,1])
 child$kgN
 #[1] 16.0  8.5 28.9  8.5 12.4 13.6 16.1 61.8  9.1

或者不使用ifelse

 cbind(d1[,1], do.call(pmin, c(d1[-1], na.rm=TRUE)))[cbind(1:nrow(d1),
             (is.na(d1[,1]))+1L)]
 #[1] 16.0  8.5 28.9  8.5 12.4 13.6 16.1 61.8  9.1

基准

set.seed(24)
child1 <- as.data.frame(matrix(sample(c(NA,0:50), 1e6*3, replace=TRUE),
    ncol=3, dimnames=list(NULL, c('G4_R_2_4', 'G4_R_2_5', 
                'G4_R_2_5_option2'))) )
cyberj0g <- function(){
   with(child1, ifelse(is.na(G4_R_2_4) & G4_R_2_5 <
     G4_R_2_5_option2, G4_R_2_5, ifelse(is.na(G4_R_2_4) &
       G4_R_2_5 > G4_R_2_5_option2, G4_R_2_5_option2, G4_R_2_4)))
  }

 get_kg <- function(x){
      if(!is.na(x[2])) return (x[2])
      return (min(x[3], x[4], na.rm = T))}
RHertel <- function() apply(child1,1,get_kg) 

akrun <- function(){cbind(child1[,1], do.call(pmin, c(child1[-1],
    na.rm=TRUE)))[cbind(1:nrow(child1),  (is.na(child1[,1]))+1L)]} 

system.time(cyberj0g())
#  user  system elapsed 
# 0.451   0.000   0.388  

system.time(RHertel())
#   user  system elapsed 
# 11.808   0.000  10.928 

system.time(akrun())
#   user  system elapsed 
#  0.000   0.000   0.084 

library(microbenchmark) 
microbenchmark(cyberj0g(), akrun(), unit='relative', times=20L)
#Unit: relative
#       expr      min       lq     mean   median       uq      max neval cld
# cyberj0g() 3.750391 4.137777 3.538063 4.091793 2.895156 3.197511    20   b
#    akrun() 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000    20   a

Answer 3

这是一个可能有趣的替代版本，假设值以数字形式存储（否则列条目应转换为数值，如其他答案中所示）：

get_kg <- function(x){
 if(!is.na(x[2])) return (x[2])
 return (min(x[3], x[4], na.rm = T))}

child$kg <- apply(child,1,get_kg)

#> child
#        Id G4_R_2_4 G4_R_2_5 G4_R_2_5_option2   kg
#219  13237     16.0       NA               NA 16.0
#220 139129      8.5     55.7             47.2  8.5
#221 139215     28.9       NA               NA 28.9
#222 139216       NA     46.7              8.5  8.5
#223 139264     12.4       NA               NA 12.4
#224 139281     13.6       NA               NA 13.6
#225 139366     16.1       NA               NA 16.1
#226 139376     61.8       NA               NA 61.8
#227 140103       NA     48.6              9.1  9.1

Answer 4

我很确定问题是你没有测试这些值是否是NA，你正在测试它们是否等于字符串“NA”，它们从来都不是。这应该有效：

child$kg <- ifelse(is.na(child$G4_R_2_4) & 
                   child$G4_R_2_5 < child$G4_R_2_5_option2,
                   child$G4_R_2_5,
              ifelse(is.na(child$G4_R_2_4) &
                     child$G4_R_2_5 > child$G4_R_2_5_option2,
                     child$G4_R_2_5_option2,
                       child$G4_R_2_4))

R中的嵌套ifelse如此接近工作

4 个答案:

基准