给出df
如下:
# group value
# 1 A 8
# 2 A 1
# 3 A 7
# 4 B 3
# 5 B 2
# 6 B 6
# 7 C 4
# 8 C 5
df <- structure(list(group = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L,
3L), .Label = c("A", "B", "C"), class = "factor"), value = c(8L,
1L, 7L, 3L, 2L, 6L, 4L, 5L)), .Names = c("group", "value"), class = "data.frame", row.names = c(NA,
-8L))
索引的向量(可能带有NA
):
inds <- c(2,1,NA)
我们如何才能获得每个组value
列的第n个元素,最好是基数R ?
例如,根据inds
,我们希望组value
中的A
的第二个元素,组B
中的第一个元素,组中的NA
C
。结果将是:
#[1] 1 3 NA
答案 0 :(得分:5)
以下是mapply
和split
的解决方案:
mapply("[", with(df, split(value, group)), inds)
返回一个命名向量
A B C
1 3 NA
with(df, split(value, group))
按组拆分数据并返回数据帧列表。 mapply
获取该列表和&#34; inds&#34;并应用子集函数&#34; [&#34;每对论点。
答案 1 :(得分:2)
使用levels
和sapply
即可:
DF <- structure(list(group = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L,
3L), .Label = c("A", "B", "C"), class = "factor"), value = c(8L,
1L, 7L, 3L, 2L, 6L, 4L, 5L)), .Names = c("group", "value"), class = "data.frame", row.names = c(NA,
-8L))
inds <- c(2,1,NA)
lvls = levels(DF$group)
groupInds = sapply(1:length(lvls),function(x) DF$value[DF$group==lvls[x]][inds[x]] )
groupInds
#[1] 1 3 NA
答案 2 :(得分:1)
再次使用mapply(但不像IMO的回答那么优雅):
mapply(function(x, y) subset(df, group == x, value)[y,] ,levels(df$group), inds)
答案 3 :(得分:1)
我知道你在基地R中说最好是,但只是为了记录,这里是一个data.table
方式
setDT(df)[, .SD[inds[.GRP], value], by=group][,V1]
#[1] 1 3 NA
答案 4 :(得分:0)
我刚刚提出了另一个解决方案:
diag(aggregate(value~group, df, function(x) x[inds])[,-1])
#[1] 1 3 NA
<强>基准强>
library(microbenchmark)
library(data.table)
df <- structure(list(group = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L,
3L), .Label = c("A", "B", "C"), class = "factor"), value = c(8L,
1L, 7L, 3L, 2L, 6L, 4L, 5L)), .Names = c("group", "value"), class = "data.frame", row.names = c(NA,
-8L))
inds <- c(2,1,NA)
f_Imo <- function(df) as.vector(mapply("[", with(df, split(value, group)), inds))
f_Osssan <- function(df) {lvls = levels(df$group);sapply(1:length(lvls),function(x) df$value[df$group==lvls[x]][inds[x]])}
f_User2321 <- function(df) unlist(mapply(function(x, y) subset(df, group == x, value)[y,] ,levels(df$group), inds))
f_dww <- function(df) setDT(df)[, .SD[inds[.GRP], value], by=group][,V1]
f_m0h3n <- function(df) diag(aggregate(value~group, df, function(x) x[inds])[,-1])
all.equal(f_Imo(df), f_Osssan(df), f_User2321(df), f_dww(df), f_m0h3n(df))
# [1] TRUE
microbenchmark(f_Imo(df), f_Osssan(df), f_m0h3n(df), f_User2321(df), f_dww(df))
# Unit: microseconds
# expr min lq mean median uq max neval
# f_Imo(df) 71.004 85.1180 91.52996 91.748 96.8810 121.048 100
# f_Osssan(df) 252.788 276.5265 318.70529 287.648 301.5495 2651.492 100
# f_m0h3n(df) 1422.627 1555.4365 1643.47184 1618.740 1670.7095 4729.827 100
# f_User2321(df) 2889.738 3000.3055 3148.44916 3037.945 3118.7860 6013.442 100
# f_dww(df) 2960.740 3086.2790 3206.02147 3143.381 3250.9545 5976.229 100