第一,第二和第三最高值的行总和

时间:2018-06-03 00:39:45

标签: r

考虑以下样本数据:

{{1}}

我正在努力寻找行方式找到并总结三个最高值的解决方案。

有什么想法吗?

4 个答案:

答案 0 :(得分:6)

如果你有一个向量x,计算三个最大值之和的函数可能是

fun = function(x)
    sum(tail(sort(x), 3))

您希望将其应用于对象的每一行m

apply(m, 1, fun)

更快(例如,40%)的实施是

colSums(apply(m, 1, sort, decreasing = TRUE)[1:3, ])

或使用部分排序

colSums(apply(m, 1, sort.int, partial = 9:11)[9:11, ])

对于性能,如果m中有很多行,请避免使用apply()隐含的行进行迭代。实现可能是

library(matrixStats)
rowSums(m * (rowRanks(m) > ncol(m) - 3))

但是当m行中有联系时,这会失败; rowRanks()不支持ties.method = "first"。相反,实现我们自己的rowRanks()

.rowRanks <- function(m) {
    m[] = sort.list(sort.list(m))
    rowRanks(m)
}
rowSums(m * (.rowRanks(m) > ncol(m) - 3))

对于整数解决方案,似乎人们希望从整洁的数据开始

tbl <- df %>%
  rowid_to_column() %>%
  gather("letter","value",-1) %>%
  group_by(rowid)

使用top_n()的解决方案会产生错误的结果,因此最好的选择似乎是

summarize(tbl, fun(value))

fun可以在这里进行扩展,但这似乎并不是一个好主意,因为它会使单独修改和测试变得更加困难。)

比较方法

f0 = function(m) apply(m, 1, fun)
f0a = function(m) colSums(apply(m, 1, sort, decreasing=TRUE)[1:3, ])
f0b = function(m) colSums(apply(m, 1, sort.int, partial = 9:11)[9:11, ])
f1 = function(m) rowSums(m * (.rowRanks(m) > ncol(m) - 3))
f2 = function(tbl) summarize(tbl, fun(value))

> library(microbenchmark)
> identical(f0(m), f0a(m))
[1] TRUE
> identical(f0(m), f0b(m))
[1] TRUE
> identical(f0(m), f1(m))
[1] TRUE
> identical(f0(m), f2(tbl)$`fun(value)`)
[1] TRUE
> microbenchmark(f0(m), f0a(m), f0b(m), f1(m), f2(tbl), times=10)
Unit: microseconds
    expr      min       lq      mean    median       uq      max neval
   f0(m)  837.505  860.890  894.9245  905.9880  921.386  948.242    10
  f0a(m)  594.895  637.258  650.7217  653.1800  673.599  713.167    10
  f0b(m)  274.925  277.734  305.6551  296.2975  330.482  362.765    10
   f1(m)  166.416  169.290  192.8086  189.5945  215.491  219.478    10
 f2(tbl) 2265.451 2277.599 2425.6083 2327.7015 2359.896 3349.995    10

> m = m[sample(nrow(m), 1000, TRUE),]
> microbenchmark(f0(m), f0a(m), f0b(m), f1(m), times=10)
Unit: milliseconds
   expr        min         lq       mean     median         uq        max neval
  f0(m) 137.705781 139.793459 141.658415 141.821540 143.653272 144.428092    10
 f0a(m)  85.946679  86.663967  88.500392  87.513880  89.634696  94.458554    10
 f0b(m)  29.762981  30.890124  32.470553  32.649594  33.116767  36.686603    10
  f1(m)   2.034407   2.120689   2.137723   2.144328   2.176306   2.184712    10

答案 1 :(得分:3)

您可以使用0.9999999999...map对每个列进行转置和top_n(3)

sum

感觉有点hacky,但完成工作。

更新 Moody_Mudskipper的更好方法:

library(tidyverse)

tdf <- as.data.frame(t(df)) 
var_names <- names(tdf)

var_names %>%
  map_dfc(~tdf %>% select(.x) %>% top_n(3) %>% sum()) %>% 
  t()

    [,1]
V1 66.46
V2 56.44
V3 86.30
V4 63.39
V5 90.62
V6 76.47

数据:

var_names %>% 
  map_dfc(~tdf %>% select(.x) %>% top_n(3) %>% summarize_all(sum)) %>% 
  gather

^注意:数据与OP略有不同,行号被删除。

答案 2 :(得分:3)

使用tidyverse

df %>%
  rowid_to_column() %>%
  gather("letter","value",-1) %>%
  group_by(rowid) %>%
  arrange(desc(value)) %>%
  slice(1:3) %>%
  summarize(value= sum(value))

# # A tibble: 6 x 2
#   rowid value
#   <int> <dbl>
# 1     1  66.5
# 2     2  56.4
# 3     3  76.8
# 4     4  63.4
# 5     5  90.6
# 6     6  76.5

替代解决方案,受@ andrew_reece解决方案的启发:

tdf <- setNames(as.data.frame(t(df)),seq(nrow(df)))
tdf %>%
  summarize_all(~sum(tail(sort(.),3))) %>%
  gather(rowid,value)

#   rowid value
# 1     1 66.46
# 2     2 56.44
# 3     3 76.83
# 4     4 63.39
# 5     5 90.62
# 6     6 76.47    

请注意:

真正的tidyverse方式setNames(as.data.frame(t(df)),seq(nrow(df)))如下:

df %>%
  rowid_to_column %>%
  gather("letter","value",-1) %>%
  spread("rowid","value")

答案 3 :(得分:2)

let backgroundBlurOne: UIVisualEffectView = {
    let blur = UIVisualEffectView(effect: UIBlurEffect(style: UIBlurEffectStyle.dark))
    blur.translatesAutoresizingMaskIntoConstraints = false
    blur.layer.cornerRadius = 6
    blur.layer.masksToBounds = true
    blur.isUserInteractionEnabled = false
    blur.backgroundColor = UIColor.black.withAlphaComponent(10)
    return blur
}()

let backgroundBlurTwo: UIVisualEffectView = {
    let blur = UIVisualEffectView(effect: UIBlurEffect(style: UIBlurEffectStyle.dark))
    blur.translatesAutoresizingMaskIntoConstraints = false
    blur.layer.cornerRadius = 6
    blur.layer.masksToBounds = true
    blur.isUserInteractionEnabled = false
    blur.backgroundColor = UIColor.black.withAlphaComponent(10)
    return blur
}()

let labelOne: UILabel = {
    let label = UILabel()
    label.translatesAutoresizingMaskIntoConstraints = false
    label.font = UIFont.systemFont(ofSize: 17)
    label.textColor = UIColor.white
    label.textAlignment = .center
    label.sizeToFit()
    label.numberOfLines = 0
    return label
}()

let labelTwo: UILabel = {
    let label = UILabel()
    label.translatesAutoresizingMaskIntoConstraints = false
    label.font = UIFont.systemFont(ofSize: 17)
    label.textColor = UIColor.white
    label.textAlignment = .center
    label.sizeToFit()
    label.numberOfLines = 0
    return label
}()

override func viewDidLoad() {
    super.viewDidLoad()

    labelOne.text = "Hello"
    putBlurEffectBehindLabel(backgroundBlurOne, labelOne, yDistance: 0)
}

override func viewDidLayoutSubviews() {
    super.viewDidLayoutSubviews()

    labelTwo.text = "Heloooooooooooooooooooooooo"
    putBlurEffectBehindLabel(backgroundBlurTwo, labelTwo, yDistance: backgroundBlurOne.frame.height + 16)
}

func putBlurEffectBehindLabel(_ blur: UIVisualEffectView, _ label: UILabel, yDistance: CGFloat){

    view.addSubview(blur)
    blur.centerXAnchor.constraint(equalTo: view.centerXAnchor).isActive = true
    blur.centerYAnchor.constraint(equalTo: view.centerYAnchor, constant: yDistance).isActive = true

    blur.contentView.addSubview(label)
    label.centerXAnchor.constraint(equalTo: blur.centerXAnchor).isActive = true
    label.centerYAnchor.constraint(equalTo: blur.centerYAnchor).isActive = true

    let text = label.text
    let textSize = (text! as NSString).size(withAttributes: [NSAttributedStringKey.font: UIFont.systemFont(ofSize: 17)])

    blur.widthAnchor.constraint(equalToConstant: textSize.width + 15).isActive = true
    blur.heightAnchor.constraint(equalToConstant: textSize.height + 10).isActive = true
}

根据@Soheil的建议,R Base解决方案:

df <- read.table('"A" "B" "C" "D" "E" "F" "G" "H" "I" "L" "K"
"1" 2.59 23.08 4.61 10.32 5.61 0.05 0.34 24.04 19.34 5.76 4.27
"2" 23.31 1.84 15.34 1.23 0 0 6.13 12.88 6.13 17.18 15.95
"3" 9.47 0 13.68 9.47 0 0 0 2.11 5.26 6.32 53.68
"4" 2.16 29.99 4.58 7.92 0.92 0 0 8.97 16.57 12.05 16.83
"5" 5.4 76.35 2.06 8.87 0 0 0.26 0.39 0.9 2.7 3.08
"6" 8.24 30 7.65 0 1.18 0 0 4.71 22.94 23.53 1.76')