Question

我正在寻找每行列的最小（或最大）值的解决方案。像：

import cv2
import numpy as np
from math import sqrt

def calc_distance(p1, p2):
    (x1, y1) = p1
    (x2, y2) = p2
    return round(sqrt((x1-x2)**2 + (y1-y2)**2))

# param contains the center and the color of the circle 
def draw_red_circle(event, x, y, flags, param):
    if event == cv2.EVENT_LBUTTONDBLCLK:
        center = param[0]
        radius = calc_distance((x, y), center)
        cv2.circle(img, center, radius, param[1], 2)


def draw_blue_circle(event, x, y, flags, param):
    if event == cv2.EVENT_LBUTTONDBLCLK:
        center = (100,100)
        radius = calc_distance((x, y), center)     
        cv2.circle(img, center, radius, (255, 0, 0), 2)

img = np.zeros((512,512,3), np.uint8)

# create 2 windows
cv2.namedWindow("img_red")
cv2.namedWindow("img_blue")

# different doubleClick action for each window
# you can send center and color to draw_red_circle via param
param = [(200,200),(0,0,255)]
cv2.setMouseCallback("img_red", draw_red_circle, param)
cv2.setMouseCallback("img_blue", draw_blue_circle) # param = None


while True:
    # both windows are displaying the same img
    cv2.imshow("img_red", img)
    cv2.imshow("img_blue", img)
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break
cv2.destroyAllWindows()

我尝试了# my data.frame is df: library(tibble) df <- tribble( ~name, ~type_1, ~type_2, ~type_3, "a", 1, 5, 2, "b", 2, 2, 6, "c", 3, 8, 2 ) # and output should be result_df: result_df <- tribble( ~name, ~type_1, ~type_2, ~type_3, ~min_val, ~min_col, "a", 1, 5, 2, 1, "type_1", "b", 8, 2, 6, 2, "type_2", "c", 3, 8, 0, 0 ,"type_3" )和rowwise功能，但它没有用。我可以使用收集和分组，但我想知道是否有列/行解决方案。

这种方法对于均值，中值函数也很有用。

感谢您的帮助。

Answer 1

一种相当普遍的方法是重塑为暂时重塑为长形式，这使得计算更容易 - 一个普通的分组mutate。

library(tidyr)
library(dplyr)

df <- tribble(
    ~name, ~type_1, ~type_2, ~type_3,
    "a",   1,   5, 2,
    "b",   8,   2, 6,
    "c",   3,   8, 2
)

df %>% 
    gather(type, type_val, contains('type')) %>% 
    group_by(name) %>% 
    mutate(min_val = min(type_val), 
           min_col = type[type_val == min_val]) %>% 
    spread(type, type_val)
#> # A tibble: 3 x 6
#> # Groups:   name [3]
#>   name  min_val min_col type_1 type_2 type_3
#>   <chr>   <dbl> <chr>    <dbl>  <dbl>  <dbl>
#> 1 a           1 type_1       1      5      2
#> 2 b           2 type_2       8      2      6
#> 3 c           2 type_3       3      8      2

实际上，最好通过放弃spread调用将数据保留为长格式。

注意事项：

如果多个值可以等于最小值（或最大值或中值或其他值），type_val == min_val将具有两个真值，因此必须进一步汇总以将其减少为单个数字，例如which.min如何返回第一个最小值。
在规模上，重塑可能会很昂贵，因此更为复杂但优化的方法（例如，利用max.col）可能更为可取。

Answer 2

你能提供一些关于result_df背后逻辑的细节吗？也许有可能分享你的聚会和分组代码？

得出以下中间结果：

df$min_val = apply(df[2:4], 1, min) 
df$min_col = names(df[2:4])[apply( df[2:4], 1, which.min)]

Answer 3

基础R方法有问题吗？

# find the columns in question
mask <- colnames(df)[startsWith(colnames(df), 'type_')]

# apply row-wise and transpose afterwards
df[c('min_val', 'min_col')] <- t(apply(df[mask], 1, function(x) {
  m <- which.min(x)
  (y <- c(x[m], mask[m]))
}))

这会产生

# A tibble: 3 x 6
  name  type_1 type_2 type_3 min_val min_col
  <chr>  <dbl>  <dbl>  <dbl> <chr>   <chr>  
1 a         1.     5.     2. 1       type_1 
2 b         2.     2.     6. 2       type_1 
3 c         3.     8.     2. 2       type_3

请注意，which.min()采用第一个找到的匹配项（第二行中有两个2）。

Answer 4

我可能错过了一些东西;你可能想要一个纯粹的dplyr类型的回复......但这是一种方法：

我重新创建了数据，因为我不确定为什么result_df和df有不同的值

df <- data.frame(name = letters[1:15], as.data.frame(
  lapply(1:3, function(i){
    sample(1:10, 15, T)
  })) %>% setNames(sprintf("type_%s", 1:ncol(.))
))

然后循环/应用rowwise如此发言并重新绑定

result_df <- lapply(1:nrow(df), function(i){
  check_df <- df[i,] %>% select(matches("type"))
  r <- check_df[which.min(as.numeric(check_df))]
  data.frame(df[i,], min_val = as.numeric(r), min_col = names(r))
}) %>% rbind_pages()


> df
>    name type_1 type_2 type_3
1     a      9      9      8
2     b      9      7      6
3     c      4      5      5
4     d      7      4      4
5     e      6      5      9
6     f      2      9      7
7     g      9     10      4
8     h      3      5      1
9     i      9      5      5
10    j      1      1      9
11    k      9      5      2
12    l      2      3      4
13    m      4      2      3
14    n      1      3      7
15    o      2      7      6

> result_df
   name type_1 type_2 type_3 min_val min_col
1     a      9      9      8       8  type_3
2     b      9      7      6       6  type_3
3     c      4      5      5       4  type_1
4     d      7      4      4       4  type_2
5     e      6      5      9       5  type_2
6     f      2      9      7       2  type_1
7     g      9     10      4       4  type_3
8     h      3      5      1       1  type_3
9     i      9      5      5       5  type_2
10    j      1      1      9       1  type_1
11    k      9      5      2       2  type_3
12    l      2      3      4       2  type_1
13    m      4      2      3       2  type_2
14    n      1      3      7       1  type_1
15    o      2      7      6       2  type_1

多列中每行的最小（或最大）值

4 个答案: