Question

我将用某一列#!/usr/bin/env bash declare -a ign_table=() # Populates ign_table with patterns from .dockerignore while IFS= read -r line || [[ ${line} ]]; do ign_table+=("${line}") done < <(sed '/^#/d;/^$/d' .dockerignore) is_docker_ignored() { locale -i ignore=1 # false, default not ignored for ign_patt in "${ign_table[@]}"; do # If pattern starts with ! it is an exception rule # when filename match !pattern, do not ignore it # shellcheck disable=SC2053 # $ign_patt must not use quotes to match wildcards if [[ ${ign_patt} =~ ^\!(.*) ]] && [[ ${1} == ${BASH_REMATCH[1]} ]]; then return 1 # false: no need to check further patterns, file not ignored fi # Normal exclusion pattern, if file match, # shellcheck disable=SC2053 # $ign_patt must not use quotes to match wildcards if [[ ${1} == $ign_patt ]]; then ignore=0 # true: it match an ignore pattern, file may not be ignored if it later matches an exception pattern fi done return "${ignore}" } while IFS= read -r file do is_docker_ignored "${file}" && continue # File is in .dockerignore commit_hash="$(git rev-list --all -1 "${file}")" printf '%s\n' "${commit_hash:0:8}" done < <(git ls-files)中的列取平均值。例如：

break

我能想到的唯一解决方案是手动插入行索引，然后使用set.seed(0) dt = data.frame(cbind(rnorm(10, 0, 1), rnorm(10, 0, 2), rnorm(10, 0, 3))) breaks = c(0,1,2,4,8,Inf)或colMeans，这很痛苦，因为我有更长的loop规则。我的预期结果如下：

break

任何建议（或直接给重复的答案）都非常感谢！

Answer 1

我们可以使用split创建的组，list将数据集分为data.frame的{{1}}，并通过rep遍历list，得到sapply

colMeans

如果行重叠

re1 <- t(sapply(split(dt, rep(1:5, c(1, 1, 2, 3, 3))), colMeans))

-检查OP的输出

library(tidyverse)
dt %>% 
   mutate(n = case_when(row_number() == 8 ~ 2, TRUE ~ 1)) %>%
   uncount(n) %>% 
   group_by(grp = rep(1:5, c(1, 1, 2, 4, 3))) %>% 
   summarise_all(mean) %>%
   ungroup %>%
   select(-grp)
# A tibble: 5 x 3
#      X1     X2     X3
#   <dbl>  <dbl>  <dbl>
#1  1.26   1.53  -0.673
#2 -0.326 -1.60   1.13 
#3  1.30  -1.44   1.41 
#4 -0.587 -0.675  0.631
#5  0.701 -1.13  -1.93

Answer 2

我们可以使用group_by中的summarize和dplyr轻松地做到这一点。对于分组变量，我们可以使用Base R函数cut，该函数采用中断向量（对您的情况而言是完美的）并将bin row_number装箱（这是dplyr的帮助函数）：

library(dplyr)

dt %>%
  group_by(grp = cut(row_number(), breaks)) %>%
  summarize_all(mean) %>%
  select(-grp)

输出：

# A tibble: 5 x 3
      X1     X2     X3
   <dbl>  <dbl>  <dbl>
1  1.26   1.53  -0.673
2 -0.326 -1.60   1.13 
3  1.30  -1.44   1.41 
4 -0.587 -0.675  0.631
5  1.20  -0.802 -1.86

或带有基数R：

dt$grp <- cut(1:nrow(dt), breaks)
aggregate(cbind(X1, X2, X3) ~ grp, data = dt, FUN = mean)

输出：

      grp         X1         X2         X3
1   (0,1]  1.2629543  1.5271869 -0.6728037
2   (1,2] -0.3262334 -1.5980185  1.1321869
3   (2,4]  1.3011143 -1.4371186  1.4062888
4   (4,8] -0.5871490 -0.6752118  0.6309875
5 (8,Inf]  1.1994431 -0.8018551 -1.8568098

请注意，输出似乎将数字四舍五入，但这仅用于显示。基础数据保持原始精度。

通过自定义的行中断汇总

2 个答案: