我之前发布了a loop question,我正在尝试另一个循环但没有成功。帮助尝试解决这个问题将非常感激。截至目前,为了完成我的工作,我将按年对数据进行子集化并按原样运行我的原始函数,但我正在使用的数据集之一是一个很长的时间序列。我的原始函数计算给定年份数据集的年龄鱼数。这个功能很好。我想要做的是添加一个for循环,允许函数循环所有年份并提供相同的信息。
数据:
x <- structure(list(Year = c(2007, 2012, 2012, 2007, 2012, 2007, 2012,
2007, 2012, 2007, 2012, 2007, 2012, 2007, 2012, 2007, 2012, 2007,
2012, 2007, 2012, 2007, 2012, 2007, 2012, 2007, 2012, 2007, 2012,
2007, 2012, 2007, 2012, 2007, 2012, 2007, 2012, 2007, 2012, 2007,
2012, 2007, 2012, 2007, 2012, 2007, 2012, 2007, 2012, 2007, 2012,
2007, 2012, 2012, 2007, 2012, 2007, 2012, 2012, 2007, 2012, 2012,
2007, 2012, 2007, 2012, 2007, 2012, 2007, 2012, 2012, 2007, 2012,
2012, 2012, 2012, 2007, 2012, 2007, 2012, 2007, 2012, 2007, 2012
), Season = c("Fall", "Fall", "Fall", "Fall", "Fall", "Fall",
"Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall",
"Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall",
"Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall",
"Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall",
"Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall",
"Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall",
"Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall",
"Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall",
"Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall",
"Fall", "Fall", "Fall", "Fall", "Fall", "Fall"), Length = c(6,
9, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25,
26, 26, 27, 27, 28, 28, 29, 29, 30, 30, 31, 31, 32, 32, 33, 33,
34, 34, 35, 35, 36, 37, 37, 38, 38, 39, 40, 40, 41, 42, 42, 43,
43, 44, 44, 45, 45, 46, 47, 47, 48, 49, 50, 51, 51, 52, 52, 53,
54, 55, 58), Exp_number = c(2, 1, 3, 2, 2, 6, 4, 11, 6, 24, 13,
38, 41.208, 26, 77.096, 37, 227.704, 41, 276.064, 20, 276.536,
23, 277.008, 23, 72.832, 11, 66.096, 8, 43.888, 12, 13.472, 14,
2, 14, 4, 8, 4, 10, 5, 12, 2, 13, 5, 9, 2, 7, 1, 4, 3, 2, 2,
8, 2, 3, 2, 1, 3, 2, 5, 1, 8, 2, 2, 2, 1, 6, 1, 2, 1, 1, 4, 1,
3, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1)), .Names = c("Year", "Season",
"Length", "Exp_number"), row.names = c(8L, 43L, 55L, 64L, 68L,
75L, 78L, 86L, 91L, 98L, 103L, 110L, 115L, 120L, 125L, 131L,
136L, 143L, 148L, 157L, 162L, 169L, 174L, 181L, 186L, 193L, 197L,
206L, 211L, 220L, 225L, 234L, 238L, 247L, 252L, 260L, 265L, 274L,
279L, 288L, 293L, 302L, 307L, 316L, 320L, 329L, 334L, 343L, 346L,
355L, 360L, 368L, 371L, 383L, 392L, 395L, 404L, 409L, 422L, 430L,
435L, 447L, 456L, 461L, 468L, 472L, 480L, 483L, 491L, 495L, 505L,
512L, 516L, 527L, 537L, 545L, 550L, 553L, 558L, 562L, 565L, 568L,
571L, 583L), class = "data.frame")
y <- structure(c(4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 4, 5, 30, 29, 28,
17, 8, 8, 6, 16, 26, 59, 46, 77, 89, 78, 64, 51, 34, 31, 27,
19, 30, 21, 26, 13, 26, 18, 12, 8, 9, 12, 9, 6, 13, 12, 20, 10,
14, 14, 11, 8, 10, 13, 7, 6, 4, 4, 8, 2, 2, 4, 3, 0, 2, 0, 1,
1, 1, 1, 1, 1, 0.941176470588235, 0.875, 0.625, 0.666666666666667,
0.375, 0.423076923076923, 0.423728813559322, 0.391304347826087,
0.246753246753247, 0.235955056179775, 0.153846153846154, 0.203125,
0.0980392156862745, 0.0882352941176471, 0, 0.037037037037037,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0588235294117647,
0.125, 0.375, 0.333333333333333, 0.5625, 0.5, 0.457627118644068,
0.478260869565217, 0.545454545454545, 0.561797752808989, 0.564102564102564,
0.59375, 0.647058823529412, 0.411764705882353, 0.483870967741935,
0.222222222222222, 0.157894736842105, 0.0666666666666667, 0.0476190476190476,
0.0384615384615385, 0.0769230769230769, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0625, 0.0769230769230769,
0.11864406779661, 0.130434782608696, 0.207792207792208, 0.191011235955056,
0.269230769230769, 0.171875, 0.176470588235294, 0.5, 0.483870967741935,
0.481481481481481, 0.736842105263158, 0.8, 0.619047619047619,
0.576923076923077, 0.615384615384615, 0.423076923076923, 0.277777777777778,
0.333333333333333, 0.25, 0.111111111111111, 0.166666666666667,
0.111111111111111, 0, 0, 0, 0, 0, 0.142857142857143, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0.0112359550561798, 0.0128205128205128,
0.03125, 0.0784313725490196, 0, 0.032258064516129, 0.185185185185185,
0.105263157894737, 0.1, 0.285714285714286, 0.269230769230769,
0.307692307692308, 0.5, 0.5, 0.583333333333333, 0.625, 0.555555555555556,
0.666666666666667, 0.444444444444444, 0.5, 0.538461538461538,
0.333333333333333, 0.25, 0.1, 0, 0.214285714285714, 0, 0.125,
0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.037037037037037,
0, 0.0333333333333333, 0.0476190476190476, 0.115384615384615,
0, 0.0769230769230769, 0.166666666666667, 0.0833333333333333,
0.125, 0.222222222222222, 0.166666666666667, 0.444444444444444,
0.333333333333333, 0.230769230769231, 0.333333333333333, 0.4,
0.9, 0.571428571428571, 0.357142857142857, 0.545454545454545,
0.5, 0.4, 0.230769230769231, 0.142857142857143, 0.333333333333333,
0.25, 0, 0.375, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.037037037037037, 0,
0, 0, 0, 0, 0, 0.0555555555555556, 0, 0, 0.111111111111111, 0,
0, 0.166666666666667, 0.230769230769231, 0.25, 0.25, 0, 0.214285714285714,
0.214285714285714, 0.272727272727273, 0, 0.4, 0.461538461538462,
0.285714285714286, 0, 0.75, 0.5, 0.25, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0833333333333333,
0.1, 0, 0.0714285714285714, 0.142857142857143, 0.0909090909090909,
0.125, 0.1, 0.307692307692308, 0.285714285714286, 0.333333333333333,
0, 0.25, 0.25, 0, 1, 0.5, 0, 0, 0.5, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0714285714285714, 0.0909090909090909,
0, 0, 0, 0.285714285714286, 0.333333333333333, 0, 0, 0, 0, 0,
0.25, 0.333333333333333, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.25, 0, 0, 0, 0, 0, 0.25,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.125, 0.5, 0, 0.25,
0.666666666666667, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0,
0, 0, 0, 0.5, 0, 1), .Dim = c(57L, 13L), .Dimnames = list(NULL,
c("len", "nl", "A0", "A1", "A2", "A3", "A4", "A5", "A6",
"A7", "A8", "A9", "A10")))
z <- structure(list(Length = c(4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60)), .Names = "Length", row.names = c(NA,
-57L), class = "data.frame")
原始功能 - 子集x运行一年。
subx <- subset(x,Year==2007)
myfunction <- function (x,y,z) {
#x - expanded length data by year and season
#y - alk as a percent by season
#z - length bins from alk percent data
#merge z and y to make sure both data sets have the same number of length bins
comb<-merge(z,x,by="Length",all=T)
#replace NA by column
comb$Year[is.na(comb$Year)]<-unique(x$Year)
comb$Exp_number[is.na(comb$Exp_number)] <- 0
#catch at age
catchatage<-comb[,4]*y[,3:13]
columntotal<-colSums(catchatage[1:dim(catchatage)[1],1:dim(catchatage)[2]])
#assign and combine age, number of fish at age and year
Number=as.data.frame(columntotal)
Age=rownames(Number);Age<-as.numeric(gsub("A","",Age)) #get rid of the letter A in age
Year=rep(unique(x$Year),length(columntotal))
age<-as.data.frame(cbind(Year,Age,Number=Number[,1]))
#reorder age
age<-age[order(age[,2]),]
return(age)
}
myfunction(subx,y,z)
我的函数带有唯一年份值的循环 - 使用整个x数据集。
myfunction_2 <- function (x,y,z) {
#x - expanded length data by year and season
#y - alk as a percent by season
#z - length bins from alk percent data
#loop through years in survey
#get unique year values from combined year season dataset
y_levels<-unique(x$Year)
for (i in length(y_levels)){
#subset the data
subset_data<-x$Year==y_levels[i]
#merge z and y to make sure both data sets have the same number of length bins
comb<-merge(z,subset_data,by="Length",all=T)
#replace NA by column
comb$Year[is.na(comb$Year)]<-unique(x$Year[i])
comb$Exp_number[is.na(comb$Exp_number)] <- 0
#catch at age
catchatage<-comb[,4]*y[,3:13]
columntotal<-colSums(catchatage[1:dim(catchatage)[1],1:dim(catchatage)[2]])
#assign and combine age, number of fish at age and year
Number=as.data.frame(columntotal)
Age=rownames(Number);Age<-as.numeric(gsub("A","",Age)) #get rid of the letter A in age
Year=rep(unique(subset_data$Year[i]),length(columntotal))
age<-as.data.frame(cbind(Year,Age,Number=Number[,1]))
#reorder age
age<-age[order(age[,2]),]
return(age)
}
}
myfunction_2(x,y,z)
我收到的错误消息是:
fix.by(by.y,y)出错:'by'必须指定唯一有效的列
所以我认为我的循环不能按唯一年份对数据进行子集化。
谢谢。
答案 0 :(得分:3)
为了使这段代码能够工作,有几点需要改变:
subset_data<-x$Year==y_levels[i]
实际上并未定义子集,而应该是subset_data <- subset(x, Year==y_levels[i])
for (i in length(y_levels))
应为for (i in 1:length(y_levels))
,否则循环仅适用于2012年return(age)
在循环内部时应该在在纠正第1点之后,更容易纠正其余部分。
另一个建议:你的第二个功能应该使用第一个,代码将更容易阅读。
最后,在没有“for”语句的情况下循环的另一种方法是使用lapply
(或更确切地说do.call
,请参阅@konvas'评论):
do.call(rbind, lapply(unique(x$Year), function(yy) myfunction(subset(x,Year==yy),y,z) ))
答案 1 :(得分:1)
虽然您的代码应该可行(在修复@VincentGuillemot建议的错误之后),但如果您愿意,可以使myfunction
更具可读性。我认为值得从头开始修改您的起始数据框,以包含Length
和Year
的每个组合(而不是在myfunction
中进行,为期一年一段时间)。
所以,既然你所追求的是所有长年组合,那么
之类的东西呢# create a data frame consisting of all length-year combinations
data <- expand.grid(Length = z$Length, Year = unique(x$Year))
data <- merge(data, x, all = TRUE) # merge with x
data <- merge(data, as.data.frame(y), by.x = "Length", by.y = "len") # merge with y
data$Exp_number[is.na(data$Exp_number)] <- 0 # set missing Exp_number values to 0
在此阶段,您的数据位于一个数据框(而不是3个)中,并且已经处理了缺失值(除了Season
列,您似乎并不关心) 。我发现在d
(而不是x
,y
,z
)上执行分析更容易,您可以专注于实际计算而不是合并数据框和替换NA
秒。
现在,您的功能看起来像
myfunction <- function(d) {
# d is a subset of data for a given year
catchatage <- d$Exp_number * d[grep("^A[0-9]*$", names(d))]
Number <- colSums(catchatage)
Age <- as.numeric(gsub("A", "", names(Number)))
result <- data.frame(Year = d$Year[1],
Age = Age, Number = Number)
result[order(result$Age), ]
}
我更喜欢使用列名和grep
而不是列索引,因为如果数据的顺序/结构发生变化(另一方面,变量的名称),使用索引会导致未检测到的错误很少改变)。
依据myfunction
将data
应用于Year
的子集,并将结果合并。这可以通过多种方式完成,使用lapply
作为@VincentGuillemot在帖子中建议,或by
(或其他几种非基础方法,如plyr
,dplyr
,{ {1}}如果你有兴趣研究它们)
data.table