我正在模拟各种变量对特定运输中鱼类生长的影响。我想根据其地理网格位置计算每个运输的三年平均捕获率。我需要一个公式来采用' Stat_sq' (地理网格方块)和“年份”。钓鱼之旅(包含在第一个数据框中),并通过统计平均捕获率来生成三个活跃年份的平均捕获率(Stat_sq'和'年'来自第二个数据框架。
年度范围为93,94,95,96,97,98,99,00,01,02,03,04,05,06,07,08,09,10,11,12,13,14
如果存在NA /缺失值,我还希望它只返回两年平均值或仅返回一年值。
Dataframe1
Trip_Id Stat_sq Year
0113A_1 48E8 13
0113A_10 49E8 13
0113A_11 49E8 12
0113A_12 49E8 13
0113A_13 49E8 12
0113A_15 49E8 11
0113A_16 49E8 10
0113A_18 50E8 13
0113A_19 50E8 12
Dataframe2
Stat_sq Year Catch_Rate
48E8 13 12.353719
48E8 12 16.508482
48E8 11 2.841493
42E8 10 12.721584
50E8 13 12.419484
50E9 12 22.461538
50E8 11 28.141433
44E7 00 29.681110
44E8 00 5.427430
45E6 00 13.259770
45E7 00 18.250903
45E8 00 62.222222
对于运输0113A_1,我希望平方48E8的平均捕获率为13,12,11(3年包括当年)= 10.567898
任何帮助表示赞赏。
答案 0 :(得分:2)
可能有帮助
df1$Yr <- with(df1, as.numeric(ifelse(as.numeric(Year)>=93,
paste0(19,Year), paste0(20,Year))))
df2$Yr <- with(df2, as.numeric(ifelse(as.numeric(Year)>=93,
paste0(19,Year), paste0(20,Year))))
res <- unsplit(lapply(split(df1, df1$Stat_sq),
function(x) {
x1 <- df2[df2$Stat_sq %in% unique(x$Stat_sq),]
x$Avg <- sapply(seq_len(nrow(x)), function(i) {
x2 <- x[i,]
indx <- x1$Yr %in% seq(x2$Yr-2, x2$Yr)
if(length(indx)>0) mean(x1$Catch_Rate[indx], na.rm=TRUE)
else NA})
x}),
df1$Stat_sq)
head(res[,-4],2)
# Trip_Id Stat_sq Year Avg
#1 0113A_1 48E8 13 10.5679
#2 0113A_10 49E8 13 NA
df1 <- structure(list(Trip_Id = c("0113A_1", "0113A_10", "0113A_11",
"0113A_12", "0113A_13", "0113A_15", "0113A_16", "0113A_18", "0113A_19"
), Stat_sq = c("48E8", "49E8", "49E8", "49E8", "49E8", "49E8",
"49E8", "50E8", "50E8"), Year = c("13", "13", "12", "13", "12",
"11", "10", "13", "12")), .Names = c("Trip_Id", "Stat_sq", "Year"
), class = "data.frame", row.names = c(NA, -9L))
df2 <- structure(list(Stat_sq = c("48E8", "48E8", "48E8", "42E8", "50E8",
"50E9", "50E8", "44E7", "44E8", "45E6", "45E7", "45E8"), Year = c("13",
"12", "11", "10", "13", "12", "11", "00", "00", "00", "00", "00"
), Catch_Rate = c(12.353719, 16.508482, 2.841493, 12.721584,
12.419484, 22.461538, 28.141433, 29.68111, 5.42743, 13.25977,
18.250903, 62.222222)), .Names = c("Stat_sq", "Year", "Catch_Rate"
), class = "data.frame", row.names = c(NA, -12L))
答案 1 :(得分:0)
在这里,这应该会给你你想要的结果:
df1 <- structure(list(Trip_Id = c("0113A_1", "0113A_10", "0113A_11",
"0113A_12", "0113A_13", "0113A_15", "0113A_16", "0113A_18", "0113A_19"
), Stat_sq = c("48E8", "49E8", "49E8", "49E8", "49E8", "49E8",
"49E8", "50E8", "50E8"), Year = c(13, 13, 12, 13, 12, 11, 10,
13, 12)), .Names = c("Trip_Id", "Stat_sq", "Year"), class = "data.frame",
row.names = c(NA, -9L))
df2 <- structure(list(Stat_sq = c("48E8", "48E8", "48E8", "42E8", "50E8",
"50E9", "50E8", "44E7", "44E8", "45E6", "45E7", "45E8"), Year = c(13,
12, 11, 10, 13, 12, 11, 0, 0, 0, 0, 0), Catch_Rate = c(12.353719,
16.508482, 2.841493, 12.721584, 12.419484, 22.461538, 28.141433,
29.68111, 5.42743, 13.25977, 18.250903, 62.222222)), .Names = c("Stat_sq",
"Year", "Catch_Rate"), class = "data.frame", row.names = c(NA, -12L))
combined <- merge(df1,df2,all.x=TRUE)
findRate <- function(ggs,year){
# ggs - geographic grid square (stat_sq)
# year - desired year
filter1 <- combined[combined$Stat_sq==ggs,]
last3years <- c(year:(year-2))
filter2 <- filter1[is.element(filter1$Year,last3years),]
output <- aggregate(Catch_Rate~Year,data=filter2,mean)
print(output)
}
findRate("50E8",14)