Question

这里的第一篇文章，但我一直在读很多，谢谢！

我有一个包含许多列的庞大数据框，但这里只有4个问题：

dates/classes/names/grades.

对于每个日期，我有几个班级（与学生一起），每个班级有几个人（姓名 - 在各自的班级中总是相同的人），每个班级每个日期都有一个等级。

在第一次约会时，我会使用max[]检查每个班级中考虑成绩的最佳学生。
但是，对于下一个日期，我想要执行以下操作：

如果以前最好的学生仍然是班上的前三名，那么我们认为他仍然是最好的。
否则，我们认为新的第一名学生是最好的。

因此，每个日期都取决于前一个日期。

没有循环可以做到这一点吗？
我不知道如何，因为每次迭代都取决于前一次迭代。

这是我的代码如下。如果没有优化，请道歉！

非常感谢：）

for (i in (1:(length(horizon)-1))) #horizon is the vector of dates
{
    uni3 <- dataaf[dataaf[,1] == as.numeric(horizon[i]),]     #dataaf contains all the data, we only keep the date for the considered date i

    if (i == 1)                             #we take the best student per class
    {
        selecdate <- data.frame()                             #selecdate is the dataframe containing the best people for this date

        for (z in (1:15)    #15 classes
        {
            selecsec <- na.omit(uni3[uni3[,14] == z,])                 #classes are column 14
            ligneselec <- max(selecsec[,13])                          #grades are column 13
            selecsec <- data.frame(uni3[match(ligneselec,uni3[,13]),])
            selecdate <- rbind(selecdate,selecsec)
        }
    } 
    else {              #we keep a student if he was in the previous top 3, else we take the best one
        selecdate <- data.frame()

        for (z in (1:15))
        {
            lastsec <- na.omit(lastdate[lastdate[,14] == z,])         #last results

            #retrieving the top 3 people this date
            selecsec <- na.omit(uni3[uni3[,14] == z,])
            newligneselec <- tail(sort(selecsec[,13]),3)
            selecsec <- data.frame(selecsec[rev(match(newligneselec,selecsec[,13])),])

            if((length(match(selecsec[,3],lastsec[,3])[!is.na(match(selecsec[,3],lastsec[,3]))]) == 0)) 
            {
                ligneselec <- max(selecsec[,13])
                selecsec <- data.frame(uni3[match(ligneselec,uni3[,13]),])
            } 
            else 
            {
                selecsec <- lastsec
            } 

            selecdate <- rbind(selecdate,selecsec)
        }
    }

    lastdate <- selecdate #recording the last results
}

编辑：这是一个例子。

在第1天，John和Audrey都被选入1级和2级。
在约会2，约翰仍然是最好的3，所以他仍然被选中，虽然奥黛丽只有第4名，所以吉姆（在2日排名第1）取代了她。
在第3天，约翰仍然是最好的3人之一，所以他仍然被选中（我工作的数据没有关系问题）。吉姆现在排名第四，所以桑德拉取代了他的位置。

结构（列表（日期=结构（c（1L，1L，1L，1L，1L，1L，1L， 1L，1L，1L，2L，2L，2L，2L，2L，2L，2L，2L，2L，2L，3L，3L，3L， 3L，3L，3L，3L，3L，3L，3L），。标签= c（“12/02”，“13/02”，“14/02” ），class =“factor”），Classes = c（1,1,1,1,1,2,2,2,2， 2,1,1,1,1,2,2,2,2,2,1,1,1,1,1,2,2,2,2,2 ），名称=结构（c（6L，3L，9L，7L，1L，8L，4L，10L，5L，2L， 6L，3L，9L，7L，1L，8L，4L，10L，5L，2L，6L，3L，9L，7L，1L， 8L，4L，10L，5L，2L）,. Label = c（“Ashley”，“Audrey”，“Bob”，“Denis”， “Jim”，“John”，“Kim”，“Sandra”，“Terry”，“Tim”），class =“factor”），等级= c（10,5,3,2,1,3,4,5,6,7,8,2,10,9,1， 7,5,1,8,2,5,1,4,8,8,7,6,5,4,3）），。名= c（“日期”， “Classes”，“Names”，“Grades”），row.names = c（NA，-30L），class =“data.frame”）

Answer 1

编辑在评论中反映澄清的请求。

###---------- CREATING THE DATA (may be different from what you had in mind)
# Classes and Students
Classes <- c("U.S. History", "English", "NonLinear Optimization")
Students <- c("James", "Jamie", "John", "Jim", "Jane", "Jordan", "Jose")
df.1 <- expand.grid(Classes = Classes, Students = Students, stringsAsFactors = T)
# Generate Dates
Dates.seq <- seq(as.Date("2017/2/10"), as.Date("2017/3/27"), "days")
df.2 <- merge(Dates.seq, df.1)
# Generate Grades
grading <- c(4.0, 3.7, 3.3, 3.0, 2.7, 2.3, 2.0, 1.7)
Grades <- sample(grading, size = dim(df.2)[1], replace = T, prob = grading/sum(grading)) # smart students
df <- data.frame(df.2, Grades)
colnames(df) <- c("Dates","Classes","Students","Grades")

# Works assuming your df has the following labeled and formatted columns
str(df)
#'data.frame':  966 obs. of  4 variables:
#  $ Dates   : Date, format: "2017-02-10" "2017-02-11" "2017-02-12" ...
#  $ Classes : Factor w/ 3 levels "U.S. History",..: 1 1 1 1 1 1 1 1 1 1 ...
#  $ Students: Factor w/ 7 levels "James","Jamie",..: 1 1 1 1 1 1 1 1 1 1 ...
#  $ Grades  : num  2.3 3.3 2.3 3.3 2.7 4 4 1.7 2.3 4 ...

# No aggregateion, just splitting by classes
df.split1 <- split(df, df[,"Classes"])
# Then splitting each of those lists by Dates
df.split2 <- lapply(df.split1, function(x) split(x, x[,"Dates"]))
# double the lapply becuase now we have lists within lists
top1 <- lapply(df.split2, function(i) lapply(i, function(j) j[order(-j[,"Grades"])[1], "Students"]))
top3 <- lapply(df.split2, function(i) lapply(i, function(j) j[order(-j[,"Grades"])[1:3], "Students"]))

# Easier to read
AllClasses <- levels(df[,"Classes"])
AllDates <- unique(df[,"Dates"])

# Initialize a matrix to keep track of changes in the Top1 and Top3
superstar <- matrix(NA, nrow = length(AllDates), ncol = length(AllClasses), 
                    dimnames = list(as.character(AllDates), AllClasses))

# Looping
for(date in 1:length(AllDates)){
  for(class in AllClasses){
    if(date == 1){ 
      # First NewTop1 = First Top1 
      superstar[date, class] <- unlist(top1[[class]][date])
    } else {
      # If superstar in date-1 is in the Top3 of date now,
      if(superstar[date-1, class] %in% as.numeric(unlist(top3[[class]][date]))){
        # still superstar
        superstar[date,class] <- superstar[date-1, class]
      } else{
        # new superstar is highest scorer of date now
        superstar[date,class] <- unlist(top1[[class]][date])
      }
    }
  }
}
# painful for me trying to figure out how to convert superstar numbers to names but this worked
superstar.char <- as.data.frame(matrix(levels(df[,"Students"])[superstar], ncol = length(AllClasses)))
dimnames(superstar.char) <- dimnames(superstar)
superstar.char # superstar with Students as characters

如果您有任何困难，请告诉我！

Answer 2

有可能解决在带有递归函数的循环中解决的任何问题（一个调用自身的函数）。由于您要根据i更改函数的行为，因此您需要将i作为参数传递给函数。您还需要该函数能够在完成时实现并返回结果集。

R - 试图在这里避免循环

2 个答案: