通过汇总第二个df的3年平均值来合并表格

时间:2014-12-29 15:53:29

标签: r

我正在模拟各种变量对特定运输中鱼类生长的影响。我想根据其地理网格位置计算每个运输的三年平均捕获率。我需要一个公式来采用' Stat_sq' (地理网格方块)和“年份”。钓鱼之旅(包含在第一个数据框中),并通过统计平均捕获率来生成三个活跃年份的平均捕获率(Stat_sq'和'年'来自第二个数据框架。

年度范围为93,94,95,96,97,98,99,00,01,02,03,04,05,06,07,08,09,10,11,12,13,14

如果存在NA /缺失值,我还希望它只返回两年平均值或仅返回一年值。

Dataframe1
Trip_Id Stat_sq Year
0113A_1   48E8  13
0113A_10  49E8  13
0113A_11  49E8  12
0113A_12  49E8  13
0113A_13  49E8  12
0113A_15  49E8  11
0113A_16  49E8  10
0113A_18  50E8  13
0113A_19  50E8  12

Dataframe2
Stat_sq Year    Catch_Rate
48E8    13      12.353719
48E8    12      16.508482
48E8    11      2.841493  
42E8    10      12.721584
50E8    13      12.419484
50E9    12      22.461538
50E8    11      28.141433
44E7    00      29.681110
44E8    00      5.427430   
45E6    00      13.259770
45E7    00      18.250903
45E8    00      62.222222  

对于运输0113A_1,我希望平方48E8的平均捕获率为13,12,11(3年包括当年)= 10.567898

任何帮助表示赞赏。

2 个答案:

答案 0 :(得分:2)

可能有帮助

 df1$Yr <- with(df1, as.numeric(ifelse(as.numeric(Year)>=93,
                              paste0(19,Year), paste0(20,Year))))

 df2$Yr <-  with(df2, as.numeric(ifelse(as.numeric(Year)>=93,
                              paste0(19,Year), paste0(20,Year))))


 res <-   unsplit(lapply(split(df1, df1$Stat_sq),
            function(x) {
           x1 <- df2[df2$Stat_sq %in% unique(x$Stat_sq),]
             x$Avg <- sapply(seq_len(nrow(x)), function(i) {
                   x2 <- x[i,]
                   indx <- x1$Yr %in% seq(x2$Yr-2, x2$Yr)
                   if(length(indx)>0) mean(x1$Catch_Rate[indx], na.rm=TRUE)
                    else NA})
              x}),
            df1$Stat_sq)

 head(res[,-4],2)
 #   Trip_Id Stat_sq Year     Avg
 #1  0113A_1    48E8   13 10.5679
 #2 0113A_10    49E8   13      NA

数据

 df1 <- structure(list(Trip_Id = c("0113A_1", "0113A_10", "0113A_11", 
 "0113A_12", "0113A_13", "0113A_15", "0113A_16", "0113A_18", "0113A_19"
 ), Stat_sq = c("48E8", "49E8", "49E8", "49E8", "49E8", "49E8", 
 "49E8", "50E8", "50E8"), Year = c("13", "13", "12", "13", "12", 
 "11", "10", "13", "12")), .Names = c("Trip_Id", "Stat_sq", "Year"
 ), class = "data.frame", row.names = c(NA, -9L))

 df2 <- structure(list(Stat_sq = c("48E8", "48E8", "48E8", "42E8", "50E8", 
 "50E9", "50E8", "44E7", "44E8", "45E6", "45E7", "45E8"), Year = c("13", 
 "12", "11", "10", "13", "12", "11", "00", "00", "00", "00", "00"
 ), Catch_Rate = c(12.353719, 16.508482, 2.841493, 12.721584, 
 12.419484, 22.461538, 28.141433, 29.68111, 5.42743, 13.25977, 
 18.250903, 62.222222)), .Names = c("Stat_sq", "Year", "Catch_Rate"
 ), class = "data.frame", row.names = c(NA, -12L))

答案 1 :(得分:0)

在这里,这应该会给你你想要的结果:

df1 <- structure(list(Trip_Id = c("0113A_1", "0113A_10", "0113A_11", 
                                  "0113A_12", "0113A_13", "0113A_15", "0113A_16", "0113A_18", "0113A_19"
), Stat_sq = c("48E8", "49E8", "49E8", "49E8", "49E8", "49E8", 
               "49E8", "50E8", "50E8"), Year = c(13, 13, 12, 13, 12, 11, 10, 
                                                 13, 12)), .Names = c("Trip_Id", "Stat_sq", "Year"), class = "data.frame", 
row.names = c(NA, -9L))

df2 <- structure(list(Stat_sq = c("48E8", "48E8", "48E8", "42E8", "50E8", 
                                  "50E9", "50E8", "44E7", "44E8", "45E6", "45E7", "45E8"), Year = c(13, 
                                                                                                    12, 11, 10, 13, 12, 11, 0, 0, 0, 0, 0), Catch_Rate = c(12.353719, 
                                                                                                                                                           16.508482, 2.841493, 12.721584, 12.419484, 22.461538, 28.141433, 
                                                                                                                                                           29.68111, 5.42743, 13.25977, 18.250903, 62.222222)), .Names = c("Stat_sq", 
                                                                                                                                                                                                                           "Year", "Catch_Rate"), class = "data.frame", row.names = c(NA, -12L))

combined <- merge(df1,df2,all.x=TRUE)

findRate <- function(ggs,year){

  # ggs - geographic grid square (stat_sq)
  # year - desired year

  filter1 <- combined[combined$Stat_sq==ggs,]
  last3years <- c(year:(year-2))
  filter2 <- filter1[is.element(filter1$Year,last3years),]
  output <- aggregate(Catch_Rate~Year,data=filter2,mean)
  print(output)
}

findRate("50E8",14)