Question

我想通过以下函数getAges按一系列ID获取年龄列表。它在整个代码示例中失败，通过在给定的id列表上以错误的顺序返回年龄来查看以下完整代码。代码DF[DF$ID %in% ids,]获取整个数据（DF），考虑ID（DF$ID），前者位于ID列表（- - %in% ids）中，并返回这些ID的年龄（[wantedIds]$Age）。我不确定部分- - %in% ids，因为R %in%比较如果匹配则返回id。

getAges <- function(...)
{
   DF[DF$ID %in% ids,]$Age
}

函数getIDs正确返回。整个代码示例

library('dplyr')
getIDs <- function(..., by = NULL){
    DF %>% filter_(...) %>% { if (!is.null(by))  arrange_(., by) else . } %>% .$ID
} 
getAges <- function(...)
{
   DF[DF$ID %in% ids,]$Age
}

DF <- structure(list(ID = c(16265L, 16272L, 16273L, 16420L, 16483L, 
16539L, 16773L, 16786L, 16795L, 17052L, 17453L, 18177L, 18184L, 
19088L, 19090L, 19093L, 19140L, 19830L), Age = c(32L, 20L, 28L, 
38L, 42L, 35L, 26L, 32L, 20L, 45L, 32L, 26L, 34L, 41L, 45L, 34L, 
38L, 50L), Gender = structure(c(2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L), .Label = c("female", 
"male"), class = "factor")), .Names = c("ID", "Age", "Gender"
), class = "data.frame", row.names = c(NA, -18L))

ids <- getIDs(by = "desc(Age)")

ages <- getAges(ids) # TODO this fails

str(ids)
str(ages)
#  int [1:18] 19830 17052 19090 16483 19088 16420 19140 16539 18184 19093 ...
# int [1:18] 32 20 28 38 42 35 26 32 20 45 ... # TODO why here this order?

原始数据作为列表

#Original
#ID Age Gender
#16265  32  male
#16272  20  female
#16273  28  female
#16420  38  female
#16483  42  male
#16539  35  female
#16773  26  male
#16786  32  female
#16795  20  female
#17052  45  female
#17453  32  female
#18177  26  female
#18184  34  female
#19088  41  female
#19090  45  male
#19093  34  male
#19140  38  female
#19830  50  female

getAges的预期输出：与列表顺序ids对应的年龄列表

R：3.3.2
操作系统：Debian 8.5

Answer 1

如果getAges的唯一目的是查找ids的年龄，请尝试

getAges <- function(...)
{
   DF[match(ids,DF$ID),"Age"]
}

Answer 2

在dplyr

getAges <- 
    DF %>% 
    na.omit %>% 
    arrange(desc(Age),ID) %>% 
    select(Age)

getAges
   Age
1   50
2   45
3   45
4   42
5   41
6   38
7   38
8   35
9   34
10  34
11  32
12  32
13  32
14  28
15  26
16  26
17  20
18  20
> as.list(getAges)
$Age
 [1] 50 45 45 42 41 38 38 35 34 34 32 32 32 28 26 26 20 20

然而，（虽然在这里我只能猜测）如果您将数据保留在数据框中，您在下一步中也会有更轻松的时间。有关该主题的精彩介绍，请参阅here，或者如果您喜欢wideo，则来自R聚会的精彩经典视频为here。在查看时，注意我们现在使用tidyr功能可以帮助您更轻松地融化和重铸reshape，当然dplyr完全改变了我们的行为方式对数据帧的操作：避免基于R $col和[]的基础引用。

Answer 3

alexis_laz关于函数

中...的改进的评论提议

library(R6)

DF2 = mydataframe$new(DF)

mydataframe = R6Class("mydataframe", 
  public = list(
    data = data.frame(ID = integer(), 
    Age = integer(), 
    gender = character()
  ), 
  initialize = function(x) { 
    stopifnot(c("ID", "Age", "Gender") %in% names(x)); self$data = x 
  }, 
  getIDs = function(..., by = NULL) self$data %>% filter_(...) %>% { 
    if (!is.null(by)) arrange_(., by) else . 
  } 
  %>% .$ID, 
  getAges = function(ids = self$data$ID) self$data$Age[match(ids, self$data$ID)]
  )#not sure if correct amount
)

# Use by 
DF2$getIDs(by = "desc(Age)"); 
DF2$getAges(); 
DF2$getAges(DF2$getIDs(by = "desc(Age)"))

为什么这个R dplyr getAges在有序列表上失败了？

3 个答案: