Question

Option Explicit

Sub run_stats()

Dim i As Long
Dim nrows As Long
Dim rng As Range
Dim cell As Range


For i = 3 To 50
    Set rng = Worksheets("Sheet1").Range(Worksheets("Sheet1").Cells(11, i), Worksheets("Sheet1").Cells(11, i).End(xlDown))
    For Each cell In rng
        Debug.Print cell.Value
        'check if value is error -> change to empty cell if it is
        If Application.WorksheetFunction.IsError(cell) Then
            cell.Value = ""
        End If
    Next cell
    With Worksheets("Statistics")
        .Cells(4, i).Value = Application.WorksheetFunction.StDev_S(rng)
        .Cells(5, i).Value = Application.WorksheetFunction.Average(rng) + (2 * Application.WorksheetFunction.StDev_S(rng))
        .Cells(6, i).Value = Application.WorksheetFunction.Average(rng) + Application.WorksheetFunction.StDev_S(rng)
        .Cells(7, i).Value = Application.WorksheetFunction.Average(rng)
        .Cells(8, i).Value = Application.WorksheetFunction.Average(rng) - Application.WorksheetFunction.StDev_S(rng)
        .Cells(9, i).Value = Application.WorksheetFunction.Average(rng) - (2 * Application.WorksheetFunction.StDev_S(rng))
        .Cells(10, i).Value = Application.WorksheetFunction.Min(rng)
        .Cells(11, i).Value = Application.WorksheetFunction.Max(rng)
    End With
Next i

End Sub

我想获取2010年4月至2011年7月7日的平均值，然后开始获取2011年4月至2012年7月7日的平均值？

我已经尝试过此代码，但是它仅在第一部分起作用，所以有人可以在第二部分帮助我吗？

Month Year Rainfall
4     2010
5     2010
6     2010
7     2010
8     2010
9     2010
10    2010
11    2010
12    2010
1     2011
2     2011
3      2011
4     2011
5     2011
6     2011
7     2011

Answer 1

类似的事情会起作用：

一行用于创建分组，其余为标准R内容

df$gp<- sapply(1:nrow(df), function(x) x%/%12)

我们在一起：

library(dplyr)

df <- structure(list(Month = c(4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 
                               1L, 2L, 3L, 4L, 5L, 6L, 7L), Year = c(2010L, 2010L, 2010L, 2010L, 
                                                                     2010L, 2010L, 2010L, 2010L, 2010L, 2011L, 2011L, 2011L, 2011L, 
                                                                     2011L, 2011L, 2011L), Rainfall = c(3L, 4L, 5L, 3L, 4L, 5L, 6L, 
                                                                                                        7L, 8L, 4L, 3L, 4L, 5L, 6L, 5L, 4L)), row.names = c(NA, -16L), class = c("data.table", 
                                                                                                                                                                                 "data.frame"))

df
#>    Month Year Rainfall
#> 1      4 2010        3
#> 2      5 2010        4
#> 3      6 2010        5
#> 4      7 2010        3
#> 5      8 2010        4
#> 6      9 2010        5
#> 7     10 2010        6
#> 8     11 2010        7
#> 9     12 2010        8
#> 10     1 2011        4
#> 11     2 2011        3
#> 12     3 2011        4
#> 13     4 2011        5
#> 14     5 2011        6
#> 15     6 2011        5
#> 16     7 2011        4

df$gp<- sapply(1:nrow(df), function(x) x%/%12)

df
#>    Month Year Rainfall gp
#> 1      4 2010        3  0
#> 2      5 2010        4  0
#> 3      6 2010        5  0
#> 4      7 2010        3  0
#> 5      8 2010        4  0
#> 6      9 2010        5  0
#> 7     10 2010        6  0
#> 8     11 2010        7  0
#> 9     12 2010        8  0
#> 10     1 2011        4  0
#> 11     2 2011        3  0
#> 12     3 2011        4  1
#> 13     4 2011        5  1
#> 14     5 2011        6  1
#> 15     6 2011        5  1
#> 16     7 2011        4  1

df %>% group_by(gp) %>% summarise(mean(Rainfall))
#> # A tibble: 2 x 2
#>      gp `mean(Rainfall)`
#>   <dbl>            <dbl>
#> 1     0             4.73
#> 2     1             4.8

使用lubridate包或转换为ts对象，可以找到更好的方法来解决此窗口问题。

Answer 2

使用我自己的伪造数据（如下），这是一个解决方案：

sapply(years, function(yr) {
  mean(subset(x, (Year == yr & Month >= 4) | (Year == yr+1 & Month <= 7))$Rainfall)
})
# [1] 0.5421714 0.4412616 0.4867803

（分别用于2010、2011和2012）。

这并不是严格检查以确保我们每个月都有所有月份（包括4和7），这是不同的讨论。

说明：

seq(min(x$Year), max(x$Year)-1)：从第一年到倒数第二年（假设是连续的年份）逐年迭代；
(Year == yr & Month >= 4)：包括今年以及第4个月或之后的所有数据，或者...
| (Year == yr+1 & Month <= 7)：明年和7月之前。
从这里开始，只需sum( subset(...)$Rainfall )

中间步骤如下（包含我的数据）：

sapply(seq(min(x$Year), max(x$Year)-1), function(yr) {
  subset(x, (Year == yr & Month >= 4) | (Year == yr+1 & Month <= 7))
}, simplify=F)
# [[1]]
#    Month Year  Rainfall
# 4      4 2010 0.1680519
# 5      5 2010 0.9438393
# 6      6 2010 0.9434750
# 7      7 2010 0.1291590
# 8      8 2010 0.8334488
# 9      9 2010 0.4680185
# 10    10 2010 0.5499837
# 11    11 2010 0.5526741
# 12    12 2010 0.2388948
# 13     1 2011 0.7605133
# 14     2 2011 0.1808201
# 15     3 2011 0.4052822
# 16     4 2011 0.8535485
# 17     5 2011 0.9763985
# 18     6 2011 0.2258255
# 19     7 2011 0.4448092
# [[2]]
#    Month Year   Rainfall
# 16     4 2011 0.85354845
# 17     5 2011 0.97639849
# 18     6 2011 0.22582546
# 19     7 2011 0.44480923
# 20     8 2011 0.07497942
# 21     9 2011 0.66189876
# 22    10 2011 0.38754954
# 23    11 2011 0.83688918
# 24    12 2011 0.15050144
# 25     1 2012 0.34727225
# 26     2 2012 0.48877323
# 27     3 2012 0.14924686
# 28     4 2012 0.35706259
# 29     5 2012 0.96264405
# 30     6 2012 0.13237200
# 31     7 2012 0.01041453
# [[3]]
#    Month Year   Rainfall
# 28     4 2012 0.35706259
# 29     5 2012 0.96264405
# 30     6 2012 0.13237200
# 31     7 2012 0.01041453
# 32     8 2012 0.16464224
# 33     9 2012 0.81019214
# 34    10 2012 0.86886104
# 35    11 2012 0.51428176
# 36    12 2012 0.62719629
# 37     1 2013 0.84442900
# 38     2 2013 0.28487057
# 39     3 2013 0.66722565
# 40     4 2013 0.15046975
# 41     5 2013 0.98172786
# 42     6 2013 0.29701074
# 43     7 2013 0.11508408

数据：

set.seed(2)
years <- 4
x <- data.frame(
  Month = rep(1:12, times=years),
  Year = rep(2009 + seq_len(years), each=12),
  Rainfall = runif(12*years)
)
head(x)
#   Month Year  Rainfall
# 1     1 2010 0.1848823
# 2     2 2010 0.7023740
# 3     3 2010 0.5733263
# 4     4 2010 0.1680519
# 5     5 2010 0.9438393
# 6     6 2010 0.9434750

从R的前一年的4月到7月如何取平均值？

2 个答案: