循环遍历唯一值R.

时间:2014-07-16 14:48:28

标签: r loops unique

我之前发布了a loop question,我正在尝试另一个循环但没有成功。帮助尝试解决这个问题将非常感激。截至目前,为了完成我的工作,我将按年对数据进行子集化并按原样运行我的原始函数,但我正在使用的数据集之一是一个很长的时间序列。我的原始函数计算给定年份数据集的年龄鱼数。这个功能很好。我想要做的是添加一个for循环,允许函数循环所有年份并提供相同的信息。

数据:

x <- structure(list(Year = c(2007, 2012, 2012, 2007, 2012, 2007, 2012, 
2007, 2012, 2007, 2012, 2007, 2012, 2007, 2012, 2007, 2012, 2007, 
2012, 2007, 2012, 2007, 2012, 2007, 2012, 2007, 2012, 2007, 2012, 
2007, 2012, 2007, 2012, 2007, 2012, 2007, 2012, 2007, 2012, 2007, 
2012, 2007, 2012, 2007, 2012, 2007, 2012, 2007, 2012, 2007, 2012, 
2007, 2012, 2012, 2007, 2012, 2007, 2012, 2012, 2007, 2012, 2012, 
2007, 2012, 2007, 2012, 2007, 2012, 2007, 2012, 2012, 2007, 2012, 
2012, 2012, 2012, 2007, 2012, 2007, 2012, 2007, 2012, 2007, 2012
), Season = c("Fall", "Fall", "Fall", "Fall", "Fall", "Fall", 
"Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall", 
"Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall", 
"Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall", 
"Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall", 
"Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall", 
"Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall", 
"Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall", 
"Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall", 
"Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall", "Fall", 
"Fall", "Fall", "Fall", "Fall", "Fall", "Fall"), Length = c(6, 
9, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 
18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 
26, 26, 27, 27, 28, 28, 29, 29, 30, 30, 31, 31, 32, 32, 33, 33, 
34, 34, 35, 35, 36, 37, 37, 38, 38, 39, 40, 40, 41, 42, 42, 43, 
43, 44, 44, 45, 45, 46, 47, 47, 48, 49, 50, 51, 51, 52, 52, 53, 
54, 55, 58), Exp_number = c(2, 1, 3, 2, 2, 6, 4, 11, 6, 24, 13, 
38, 41.208, 26, 77.096, 37, 227.704, 41, 276.064, 20, 276.536, 
23, 277.008, 23, 72.832, 11, 66.096, 8, 43.888, 12, 13.472, 14, 
2, 14, 4, 8, 4, 10, 5, 12, 2, 13, 5, 9, 2, 7, 1, 4, 3, 2, 2, 
8, 2, 3, 2, 1, 3, 2, 5, 1, 8, 2, 2, 2, 1, 6, 1, 2, 1, 1, 4, 1, 
3, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1)), .Names = c("Year", "Season", 
"Length", "Exp_number"), row.names = c(8L, 43L, 55L, 64L, 68L, 
75L, 78L, 86L, 91L, 98L, 103L, 110L, 115L, 120L, 125L, 131L, 
136L, 143L, 148L, 157L, 162L, 169L, 174L, 181L, 186L, 193L, 197L, 
206L, 211L, 220L, 225L, 234L, 238L, 247L, 252L, 260L, 265L, 274L, 
279L, 288L, 293L, 302L, 307L, 316L, 320L, 329L, 334L, 343L, 346L, 
355L, 360L, 368L, 371L, 383L, 392L, 395L, 404L, 409L, 422L, 430L, 
435L, 447L, 456L, 461L, 468L, 472L, 480L, 483L, 491L, 495L, 505L, 
512L, 516L, 527L, 537L, 545L, 550L, 553L, 558L, 562L, 565L, 568L, 
571L, 583L), class = "data.frame")


y <- structure(c(4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 
34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 
50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 4, 5, 30, 29, 28, 
17, 8, 8, 6, 16, 26, 59, 46, 77, 89, 78, 64, 51, 34, 31, 27, 
19, 30, 21, 26, 13, 26, 18, 12, 8, 9, 12, 9, 6, 13, 12, 20, 10, 
14, 14, 11, 8, 10, 13, 7, 6, 4, 4, 8, 2, 2, 4, 3, 0, 2, 0, 1, 
1, 1, 1, 1, 1, 0.941176470588235, 0.875, 0.625, 0.666666666666667, 
0.375, 0.423076923076923, 0.423728813559322, 0.391304347826087, 
0.246753246753247, 0.235955056179775, 0.153846153846154, 0.203125, 
0.0980392156862745, 0.0882352941176471, 0, 0.037037037037037, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0588235294117647, 
0.125, 0.375, 0.333333333333333, 0.5625, 0.5, 0.457627118644068, 
0.478260869565217, 0.545454545454545, 0.561797752808989, 0.564102564102564, 
0.59375, 0.647058823529412, 0.411764705882353, 0.483870967741935, 
0.222222222222222, 0.157894736842105, 0.0666666666666667, 0.0476190476190476, 
0.0384615384615385, 0.0769230769230769, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0625, 0.0769230769230769, 
0.11864406779661, 0.130434782608696, 0.207792207792208, 0.191011235955056, 
0.269230769230769, 0.171875, 0.176470588235294, 0.5, 0.483870967741935, 
0.481481481481481, 0.736842105263158, 0.8, 0.619047619047619, 
0.576923076923077, 0.615384615384615, 0.423076923076923, 0.277777777777778, 
0.333333333333333, 0.25, 0.111111111111111, 0.166666666666667, 
0.111111111111111, 0, 0, 0, 0, 0, 0.142857142857143, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0.0112359550561798, 0.0128205128205128, 
0.03125, 0.0784313725490196, 0, 0.032258064516129, 0.185185185185185, 
0.105263157894737, 0.1, 0.285714285714286, 0.269230769230769, 
0.307692307692308, 0.5, 0.5, 0.583333333333333, 0.625, 0.555555555555556, 
0.666666666666667, 0.444444444444444, 0.5, 0.538461538461538, 
0.333333333333333, 0.25, 0.1, 0, 0.214285714285714, 0, 0.125, 
0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.037037037037037, 
0, 0.0333333333333333, 0.0476190476190476, 0.115384615384615, 
0, 0.0769230769230769, 0.166666666666667, 0.0833333333333333, 
0.125, 0.222222222222222, 0.166666666666667, 0.444444444444444, 
0.333333333333333, 0.230769230769231, 0.333333333333333, 0.4, 
0.9, 0.571428571428571, 0.357142857142857, 0.545454545454545, 
0.5, 0.4, 0.230769230769231, 0.142857142857143, 0.333333333333333, 
0.25, 0, 0.375, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.037037037037037, 0, 
0, 0, 0, 0, 0, 0.0555555555555556, 0, 0, 0.111111111111111, 0, 
0, 0.166666666666667, 0.230769230769231, 0.25, 0.25, 0, 0.214285714285714, 
0.214285714285714, 0.272727272727273, 0, 0.4, 0.461538461538462, 
0.285714285714286, 0, 0.75, 0.5, 0.25, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0833333333333333, 
0.1, 0, 0.0714285714285714, 0.142857142857143, 0.0909090909090909, 
0.125, 0.1, 0.307692307692308, 0.285714285714286, 0.333333333333333, 
0, 0.25, 0.25, 0, 1, 0.5, 0, 0, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0714285714285714, 0.0909090909090909, 
0, 0, 0, 0.285714285714286, 0.333333333333333, 0, 0, 0, 0, 0, 
0.25, 0.333333333333333, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.25, 0, 0, 0, 0, 0, 0.25, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.125, 0.5, 0, 0.25, 
0.666666666666667, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0, 
0, 0, 0, 0.5, 0, 1), .Dim = c(57L, 13L), .Dimnames = list(NULL, 
    c("len", "nl", "A0", "A1", "A2", "A3", "A4", "A5", "A6", 
    "A7", "A8", "A9", "A10")))

z <- structure(list(Length = c(4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 
31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 
47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60)), .Names = "Length", row.names = c(NA, 
-57L), class = "data.frame")

原始功能 - 子集x运行一年。

subx <- subset(x,Year==2007)

myfunction <- function (x,y,z) {
    #x - expanded length data by year and season
    #y - alk as a percent by season
    #z - length bins from alk percent data 

    #merge z and y to make sure both data sets have the same number of length bins
    comb<-merge(z,x,by="Length",all=T)

    #replace NA by column
    comb$Year[is.na(comb$Year)]<-unique(x$Year)
    comb$Exp_number[is.na(comb$Exp_number)] <- 0

    #catch at age   
    catchatage<-comb[,4]*y[,3:13]
    columntotal<-colSums(catchatage[1:dim(catchatage)[1],1:dim(catchatage)[2]])

    #assign and combine age, number of fish at age and year 
    Number=as.data.frame(columntotal)
    Age=rownames(Number);Age<-as.numeric(gsub("A","",Age))  #get rid of the letter A in age
    Year=rep(unique(x$Year),length(columntotal))

    age<-as.data.frame(cbind(Year,Age,Number=Number[,1]))

    #reorder age
    age<-age[order(age[,2]),]
    return(age)
}
myfunction(subx,y,z)

我的函数带有唯一年份值的循环 - 使用整个x数据集。

myfunction_2 <- function (x,y,z) {
    #x - expanded length data by year and season
    #y - alk as a percent by season
    #z - length bins from alk percent data

    #loop through years in survey
    #get unique year values from combined year season dataset
    y_levels<-unique(x$Year)

    for (i in length(y_levels)){

    #subset the data 
    subset_data<-x$Year==y_levels[i]

    #merge z and y to make sure both data sets have the same number of length bins
    comb<-merge(z,subset_data,by="Length",all=T)

    #replace NA by column
    comb$Year[is.na(comb$Year)]<-unique(x$Year[i])
    comb$Exp_number[is.na(comb$Exp_number)] <- 0

    #catch at age   
    catchatage<-comb[,4]*y[,3:13]
    columntotal<-colSums(catchatage[1:dim(catchatage)[1],1:dim(catchatage)[2]])

    #assign and combine age, number of fish at age and year 
    Number=as.data.frame(columntotal)
    Age=rownames(Number);Age<-as.numeric(gsub("A","",Age))  #get rid of the letter A in age
    Year=rep(unique(subset_data$Year[i]),length(columntotal))

    age<-as.data.frame(cbind(Year,Age,Number=Number[,1]))

    #reorder age
    age<-age[order(age[,2]),]

    return(age)

    }
}

myfunction_2(x,y,z)

我收到的错误消息是:

  

fix.by(by.y,y)出错:'by'必须指定唯一有效的列

所以我认为我的循环不能按唯一年份对数据进行子集化。

谢谢。

2 个答案:

答案 0 :(得分:3)

为了使这段代码能够工作,有几点需要改变:

  1. subset_data<-x$Year==y_levels[i]实际上并未定义子集,而应该是subset_data <- subset(x, Year==y_levels[i])
  2. 在循环中,for (i in length(y_levels))应为for (i in 1:length(y_levels)),否则循环仅适用于2012年
  3. return(age)在循环内部时应该在
  4. 之外
  5. 每次迭代的结果都没有合并
  6. 在纠正第1点之后,更容易纠正其余部分。

    另一个建议:你的第二个功能应该使用第一个,代码将更容易阅读。

    最后,在没有“for”语句的情况下循环的另一种方法是使用lapply(或更确切地说do.call,请参阅@konvas'评论):

     do.call(rbind, lapply(unique(x$Year), function(yy) myfunction(subset(x,Year==yy),y,z) ))
    

答案 1 :(得分:1)

虽然您的代码应该可行(在修复@VincentGuillemot建议的错误之后),但如果您愿意,可以使myfunction更具可读性。我认为值得从头开始修改您的起始数据框,以包含LengthYear的每个组合(而不是在myfunction中进行,为期一年一段时间)。

所以,既然你所追求的是所有长年组合,那么

之类的东西呢
# create a data frame consisting of all length-year combinations
data <- expand.grid(Length = z$Length, Year = unique(x$Year)) 
data <- merge(data, x, all = TRUE) # merge with x
data <- merge(data, as.data.frame(y), by.x = "Length", by.y = "len") # merge with y
data$Exp_number[is.na(data$Exp_number)] <- 0 # set missing Exp_number values to 0

在此阶段,您的数据位于一个数据框(而不是3个)中,并且已经处理了缺失值(除了Season列,您似乎并不关心) 。我发现在d(而不是xyz)上执行分析更容易,您可以专注于实际计算而不是合并数据框和替换NA秒。

现在,您的功能看起来像

myfunction <- function(d) {
    # d is a subset of data for a given year
    catchatage <- d$Exp_number * d[grep("^A[0-9]*$", names(d))]
    Number <- colSums(catchatage)
    Age <- as.numeric(gsub("A", "", names(Number)))
    result <- data.frame(Year = d$Year[1],
        Age = Age, Number = Number)
    result[order(result$Age), ]
}

我更喜欢使用列名和grep而不是列索引,因为如果数据的顺序/结构发生变化(另一方面,变量的名称),使用索引会导致未检测到的错误很少改变)。

依据myfunctiondata应用于Year的子集,并将结果合并。这可以通过多种方式完成,使用lapply作为@VincentGuillemot在帖子中建议,或by(或其他几种非基础方法,如plyrdplyr,{ {1}}如果你有兴趣研究它们)

data.table