我的数据与此类似:
set.seed(1)
dt <- data.table(rank=c(3,4,2,1),`1`=rnorm(4),`2`=rnorm(4),`3`=rnorm(4),`4`=rnorm(4),`5`=rnorm(4),`6`=rnorm(4))
rank 1 2 3 4 5 6
1: 3 -0.6264538 0.3295078 0.5757814 -0.62124058 -0.01619026 0.91897737
2: 4 0.1836433 -0.8204684 -0.3053884 -2.21469989 0.94383621 0.78213630
3: 2 -0.8356286 0.4874291 1.5117812 1.12493092 0.82122120 0.07456498
4: 1 1.5952808 0.7383247 0.3898432 -0.04493361 0.59390132 -1.98935170
我想添加一个新列rank_match
,该列从名为rank
到1
的行中找到第n个(取自6
列)最大值。例如,第一行将在该行中从名为1
到6
的列中寻找第3个最大值,即0.3295078。
类似这样的东西(但是它们当然不起作用):
dt[,rank_match := (sort(`1`:`6`, decreasing = TRUE)[rank])]
dt[,rank_match := (sort(.SD, decreasing = TRUE)[rank]), .SDcols=`1`:`6`]
输出应类似于以下内容:
rank 1 2 3 4 5 6 rank_match
1: 3 -0.6264538 0.3295078 0.5757814 -0.62124058 -0.01619026 0.91897737 0.3295078
2: 4 0.1836433 -0.8204684 -0.3053884 -2.21469989 0.94383621 0.78213630 -0.3053884
3: 2 -0.8356286 0.4874291 1.5117812 1.12493092 0.82122120 0.07456498 1.1249309
4: 1 1.5952808 0.7383247 0.3898432 -0.04493361 0.59390132 -1.98935170 1.5952808
非常感谢。
答案 0 :(得分:4)
一个选项是按行顺序分组,从第2列开始指定感兴趣的列,unlist
,Data.table的子集,sort
以降序排列,根据“排名”列,并将其分配给“ rank_match”
dt[, rank_match := sort(unlist(.SD), decreasing = TRUE)[rank],
1:nrow(dt), .SDcols = 2:ncol(dt) ]
dt
# rank 1 2 3 4 5 6 rank_match
#1: 3 -0.6264538 0.3295078 0.5757814 -0.62124058 -0.01619026 0.91897737 0.3295078
#2: 4 0.1836433 -0.8204684 -0.3053884 -2.21469989 0.94383621 0.78213630 -0.3053884
#3: 2 -0.8356286 0.4874291 1.5117812 1.12493092 0.82122120 0.07456498 1.1249309
#4: 1 1.5952808 0.7383247 0.3898432 -0.04493361 0.59390132 -1.98935170 1.5952808
另一种选择是melt
,然后获取“值”列的相应值
out <- melt(dt, id.var = c('rn', 'rank'))[order(-value),
value[rank[1]] , .(rn)][order(rn)]$V1
dt[, rank_match := out][, rn := NULL][]
或@IceCreamToucan建议的一种紧凑方法
dt[, rank_match := melt(.SD, 'rank')[, value[order(-value)[rank]], rank]$V1]
或使用pmap
(来自purrr
)遍历行
library(purrr)
dt[, rank_match := pmap_dbl(.SD, ~ c(...) %>%
{sort(-.[-1])[.[1]]})]
答案 1 :(得分:3)
constructor(private router: Router) { }
goToTestPage() {
this.router.navigateByUrl('/test');
}
在apply
每行上显示的功能:
.SD
给予:
dt[, rank_match := apply(.SD, 1, function(x) -sort(-x[-1])[x[1]])]
答案 2 :(得分:2)
dt[, rank_match := apply(.SD, 1, function(x) x[order(-x)][rank]), by = rank, .SDcols = `1`:`6`]
dt
rank 1 2 3 4 5 6 rank_match
1: 3 -0.6264538 0.3295078 0.5757814 -0.62124058 -0.01619026 0.91897737 0.3295078
2: 4 0.1836433 -0.8204684 -0.3053884 -2.21469989 0.94383621 0.78213630 -0.3053884
3: 2 -0.8356286 0.4874291 1.5117812 1.12493092 0.82122120 0.07456498 1.1249309
4: 1 1.5952808 0.7383247 0.3898432 -0.04493361 0.59390132 -1.98935170 1.5952808
答案 3 :(得分:2)
DescTools::Large
从向量返回第n
个最大元素,而不对整个事物进行排序。不确定与dt[order(-value)[rank], ...]
相比。
library(DescTools)
library(data.table)
dt[, rank_match := melt(dt, 'rank')[, Large(value, rank)[1], rank]$V1]
# rank 1 2 3 4 5 6 rank_match
# 1: 3 -0.6264538 0.3295078 0.5757814 -0.62124058 -0.01619026 0.91897737 0.3295078
# 2: 4 0.1836433 -0.8204684 -0.3053884 -2.21469989 0.94383621 0.78213630 -0.3053884
# 3: 2 -0.8356286 0.4874291 1.5117812 1.12493092 0.82122120 0.07456498 1.1249309
# 4: 1 1.5952808 0.7383247 0.3898432 -0.04493361 0.59390132 -1.98935170 1.5952808
注意:如果某些行具有相同的等级,则必须使用akrun答案中的rn
/行号逻辑。
答案 4 :(得分:2)
另一种data.table实现(具有两个变体):
# option 1
dt[melt(dt, id = 1)[, value[frank(-value) == .BY], by = rank]
, on = .(rank)
, rank_match := V1 ]
# option 2
dt[, rank_match := melt(dt, id = 1)[, value[frank(-value) == .BY], by = rank]$V1 ]
这两个都能达到预期效果:
> dt rank 1 2 3 4 5 6 rank_match 1: 3 -0.6264538 0.3295078 0.5757814 -0.62124058 -0.01619026 0.91897737 0.3295078 2: 4 0.1836433 -0.8204684 -0.3053884 -2.21469989 0.94383621 0.78213630 -0.3053884 3: 2 -0.8356286 0.4874291 1.5117812 1.12493092 0.82122120 0.07456498 1.1249309 4: 1 1.5952808 0.7383247 0.3898432 -0.04493361 0.59390132 -1.98935170 1.5952808