考虑以下样本数据:
{{1}}
我正在努力寻找行方式找到并总结三个最高值的解决方案。
有什么想法吗?
答案 0 :(得分:6)
如果你有一个向量x
,计算三个最大值之和的函数可能是
fun = function(x)
sum(tail(sort(x), 3))
您希望将其应用于对象的每一行m
apply(m, 1, fun)
更快(例如,40%)的实施是
colSums(apply(m, 1, sort, decreasing = TRUE)[1:3, ])
或使用部分排序
colSums(apply(m, 1, sort.int, partial = 9:11)[9:11, ])
对于性能,如果m
中有很多行,请避免使用apply()
隐含的行进行迭代。实现可能是
library(matrixStats)
rowSums(m * (rowRanks(m) > ncol(m) - 3))
但是当m
行中有联系时,这会失败; rowRanks()
不支持ties.method = "first"
。相反,实现我们自己的rowRanks()
.rowRanks <- function(m) {
m[] = sort.list(sort.list(m))
rowRanks(m)
}
rowSums(m * (.rowRanks(m) > ncol(m) - 3))
对于整数解决方案,似乎人们希望从整洁的数据开始
tbl <- df %>%
rowid_to_column() %>%
gather("letter","value",-1) %>%
group_by(rowid)
使用top_n()
的解决方案会产生错误的结果,因此最好的选择似乎是
summarize(tbl, fun(value))
(fun
可以在这里进行扩展,但这似乎并不是一个好主意,因为它会使单独修改和测试变得更加困难。)
比较方法
f0 = function(m) apply(m, 1, fun)
f0a = function(m) colSums(apply(m, 1, sort, decreasing=TRUE)[1:3, ])
f0b = function(m) colSums(apply(m, 1, sort.int, partial = 9:11)[9:11, ])
f1 = function(m) rowSums(m * (.rowRanks(m) > ncol(m) - 3))
f2 = function(tbl) summarize(tbl, fun(value))
与
> library(microbenchmark)
> identical(f0(m), f0a(m))
[1] TRUE
> identical(f0(m), f0b(m))
[1] TRUE
> identical(f0(m), f1(m))
[1] TRUE
> identical(f0(m), f2(tbl)$`fun(value)`)
[1] TRUE
> microbenchmark(f0(m), f0a(m), f0b(m), f1(m), f2(tbl), times=10)
Unit: microseconds
expr min lq mean median uq max neval
f0(m) 837.505 860.890 894.9245 905.9880 921.386 948.242 10
f0a(m) 594.895 637.258 650.7217 653.1800 673.599 713.167 10
f0b(m) 274.925 277.734 305.6551 296.2975 330.482 362.765 10
f1(m) 166.416 169.290 192.8086 189.5945 215.491 219.478 10
f2(tbl) 2265.451 2277.599 2425.6083 2327.7015 2359.896 3349.995 10
> m = m[sample(nrow(m), 1000, TRUE),]
> microbenchmark(f0(m), f0a(m), f0b(m), f1(m), times=10)
Unit: milliseconds
expr min lq mean median uq max neval
f0(m) 137.705781 139.793459 141.658415 141.821540 143.653272 144.428092 10
f0a(m) 85.946679 86.663967 88.500392 87.513880 89.634696 94.458554 10
f0b(m) 29.762981 30.890124 32.470553 32.649594 33.116767 36.686603 10
f1(m) 2.034407 2.120689 2.137723 2.144328 2.176306 2.184712 10
答案 1 :(得分:3)
您可以使用0.9999999999...
和map
对每个列进行转置和top_n(3)
:
sum
感觉有点hacky,但完成工作。
更新 Moody_Mudskipper的更好方法:
library(tidyverse)
tdf <- as.data.frame(t(df))
var_names <- names(tdf)
var_names %>%
map_dfc(~tdf %>% select(.x) %>% top_n(3) %>% sum()) %>%
t()
[,1]
V1 66.46
V2 56.44
V3 86.30
V4 63.39
V5 90.62
V6 76.47
数据:
var_names %>%
map_dfc(~tdf %>% select(.x) %>% top_n(3) %>% summarize_all(sum)) %>%
gather
^注意:数据与OP略有不同,行号被删除。
答案 2 :(得分:3)
使用tidyverse
:
df %>%
rowid_to_column() %>%
gather("letter","value",-1) %>%
group_by(rowid) %>%
arrange(desc(value)) %>%
slice(1:3) %>%
summarize(value= sum(value))
# # A tibble: 6 x 2
# rowid value
# <int> <dbl>
# 1 1 66.5
# 2 2 56.4
# 3 3 76.8
# 4 4 63.4
# 5 5 90.6
# 6 6 76.5
替代解决方案,受@ andrew_reece解决方案的启发:
tdf <- setNames(as.data.frame(t(df)),seq(nrow(df)))
tdf %>%
summarize_all(~sum(tail(sort(.),3))) %>%
gather(rowid,value)
# rowid value
# 1 1 66.46
# 2 2 56.44
# 3 3 76.83
# 4 4 63.39
# 5 5 90.62
# 6 6 76.47
请注意:
真正的tidyverse
方式setNames(as.data.frame(t(df)),seq(nrow(df)))
如下:
df %>%
rowid_to_column %>%
gather("letter","value",-1) %>%
spread("rowid","value")
答案 3 :(得分:2)
let backgroundBlurOne: UIVisualEffectView = {
let blur = UIVisualEffectView(effect: UIBlurEffect(style: UIBlurEffectStyle.dark))
blur.translatesAutoresizingMaskIntoConstraints = false
blur.layer.cornerRadius = 6
blur.layer.masksToBounds = true
blur.isUserInteractionEnabled = false
blur.backgroundColor = UIColor.black.withAlphaComponent(10)
return blur
}()
let backgroundBlurTwo: UIVisualEffectView = {
let blur = UIVisualEffectView(effect: UIBlurEffect(style: UIBlurEffectStyle.dark))
blur.translatesAutoresizingMaskIntoConstraints = false
blur.layer.cornerRadius = 6
blur.layer.masksToBounds = true
blur.isUserInteractionEnabled = false
blur.backgroundColor = UIColor.black.withAlphaComponent(10)
return blur
}()
let labelOne: UILabel = {
let label = UILabel()
label.translatesAutoresizingMaskIntoConstraints = false
label.font = UIFont.systemFont(ofSize: 17)
label.textColor = UIColor.white
label.textAlignment = .center
label.sizeToFit()
label.numberOfLines = 0
return label
}()
let labelTwo: UILabel = {
let label = UILabel()
label.translatesAutoresizingMaskIntoConstraints = false
label.font = UIFont.systemFont(ofSize: 17)
label.textColor = UIColor.white
label.textAlignment = .center
label.sizeToFit()
label.numberOfLines = 0
return label
}()
override func viewDidLoad() {
super.viewDidLoad()
labelOne.text = "Hello"
putBlurEffectBehindLabel(backgroundBlurOne, labelOne, yDistance: 0)
}
override func viewDidLayoutSubviews() {
super.viewDidLayoutSubviews()
labelTwo.text = "Heloooooooooooooooooooooooo"
putBlurEffectBehindLabel(backgroundBlurTwo, labelTwo, yDistance: backgroundBlurOne.frame.height + 16)
}
func putBlurEffectBehindLabel(_ blur: UIVisualEffectView, _ label: UILabel, yDistance: CGFloat){
view.addSubview(blur)
blur.centerXAnchor.constraint(equalTo: view.centerXAnchor).isActive = true
blur.centerYAnchor.constraint(equalTo: view.centerYAnchor, constant: yDistance).isActive = true
blur.contentView.addSubview(label)
label.centerXAnchor.constraint(equalTo: blur.centerXAnchor).isActive = true
label.centerYAnchor.constraint(equalTo: blur.centerYAnchor).isActive = true
let text = label.text
let textSize = (text! as NSString).size(withAttributes: [NSAttributedStringKey.font: UIFont.systemFont(ofSize: 17)])
blur.widthAnchor.constraint(equalToConstant: textSize.width + 15).isActive = true
blur.heightAnchor.constraint(equalToConstant: textSize.height + 10).isActive = true
}
根据@Soheil的建议,R Base解决方案:
df <- read.table('"A" "B" "C" "D" "E" "F" "G" "H" "I" "L" "K"
"1" 2.59 23.08 4.61 10.32 5.61 0.05 0.34 24.04 19.34 5.76 4.27
"2" 23.31 1.84 15.34 1.23 0 0 6.13 12.88 6.13 17.18 15.95
"3" 9.47 0 13.68 9.47 0 0 0 2.11 5.26 6.32 53.68
"4" 2.16 29.99 4.58 7.92 0.92 0 0 8.97 16.57 12.05 16.83
"5" 5.4 76.35 2.06 8.87 0 0 0.26 0.39 0.9 2.7 3.08
"6" 8.24 30 7.65 0 1.18 0 0 4.71 22.94 23.53 1.76')