对于每个日期,我有几个班级(与学生一起),每个班级有几个人(姓名 - 在各自的班级中总是相同的人),每个班级每个日期都有一个等级。
这是我的代码如下。 如果没有优化,请道歉!
for (i in (1:(length(horizon)-1))) #horizon is the vector of dates
uni3 <- dataaf[dataaf[,1] == as.numeric(horizon[i]),] #dataaf contains all the data, we only keep the date for the considered date i
if (i == 1) #we take the best student per class
selecdate <- data.frame() #selecdate is the dataframe containing the best people for this date
for (z in (1:15) #15 classes
selecsec <- na.omit(uni3[uni3[,14] == z,]) #classes are column 14
ligneselec <- max(selecsec[,13]) #grades are column 13
selecsec <- data.frame(uni3[match(ligneselec,uni3[,13]),])
selecdate <- rbind(selecdate,selecsec)
else { #we keep a student if he was in the previous top 3, else we take the best one
selecdate <- data.frame()
for (z in (1:15))
lastsec <- na.omit(lastdate[lastdate[,14] == z,]) #last results
#retrieving the top 3 people this date
selecsec <- na.omit(uni3[uni3[,14] == z,])
newligneselec <- tail(sort(selecsec[,13]),3)
selecsec <- data.frame(selecsec[rev(match(newligneselec,selecsec[,13])),])
if((length(match(selecsec[,3],lastsec[,3])[!is.na(match(selecsec[,3],lastsec[,3]))]) == 0))
ligneselec <- max(selecsec[,13])
selecsec <- data.frame(uni3[match(ligneselec,uni3[,13]),])
selecsec <- lastsec
selecdate <- rbind(selecdate,selecsec)
lastdate <- selecdate #recording the last results
结构(列表(日期=结构(c(1L,1L,1L,1L,1L,1L,1L, 1L,1L,1L,2L,2L,2L,2L,2L,2L,2L,2L,2L,2L,3L,3L,3L, 3L,3L,3L,3L,3L,3L,3L),。标签= c(“12/02”,“13/02”,“14/02” ),class =“factor”),Classes = c(1,1,1,1,1,2,2,2,2, 2,1,1,1,1,2,2,2,2,2,1,1,1,1,1,2,2,2,2,2 ),名称=结构(c(6L,3L,9L,7L,1L,8L,4L,10L,5L,2L, 6L,3L,9L,7L,1L,8L,4L,10L,5L,2L,6L,3L,9L,7L,1L, 8L,4L,10L,5L,2L),. Label = c(“Ashley”,“Audrey”,“Bob”,“Denis”, “Jim”,“John”,“Kim”,“Sandra”,“Terry”,“Tim”),class =“factor”), 等级= c(10,5,3,2,1,3,4,5,6,7,8,2,10,9,1, 7,5,1,8,2,5,1,4,8,8,7,6,5,4,3)),。名= c(“日期”, “Classes”,“Names”,“Grades”),row.names = c(NA,-30L),class =“data.frame”)
答案 0 :(得分:0)
###---------- CREATING THE DATA (may be different from what you had in mind)
# Classes and Students
Classes <- c("U.S. History", "English", "NonLinear Optimization")
Students <- c("James", "Jamie", "John", "Jim", "Jane", "Jordan", "Jose")
df.1 <- expand.grid(Classes = Classes, Students = Students, stringsAsFactors = T)
# Generate Dates
Dates.seq <- seq(as.Date("2017/2/10"), as.Date("2017/3/27"), "days")
df.2 <- merge(Dates.seq, df.1)
# Generate Grades
grading <- c(4.0, 3.7, 3.3, 3.0, 2.7, 2.3, 2.0, 1.7)
Grades <- sample(grading, size = dim(df.2)[1], replace = T, prob = grading/sum(grading)) # smart students
df <- data.frame(df.2, Grades)
colnames(df) <- c("Dates","Classes","Students","Grades")
# Works assuming your df has the following labeled and formatted columns
#'data.frame': 966 obs. of 4 variables:
# $ Dates : Date, format: "2017-02-10" "2017-02-11" "2017-02-12" ...
# $ Classes : Factor w/ 3 levels "U.S. History",..: 1 1 1 1 1 1 1 1 1 1 ...
# $ Students: Factor w/ 7 levels "James","Jamie",..: 1 1 1 1 1 1 1 1 1 1 ...
# $ Grades : num 2.3 3.3 2.3 3.3 2.7 4 4 1.7 2.3 4 ...
# No aggregateion, just splitting by classes
df.split1 <- split(df, df[,"Classes"])
# Then splitting each of those lists by Dates
df.split2 <- lapply(df.split1, function(x) split(x, x[,"Dates"]))
# double the lapply becuase now we have lists within lists
top1 <- lapply(df.split2, function(i) lapply(i, function(j) j[order(-j[,"Grades"])[1], "Students"]))
top3 <- lapply(df.split2, function(i) lapply(i, function(j) j[order(-j[,"Grades"])[1:3], "Students"]))
# Easier to read
AllClasses <- levels(df[,"Classes"])
AllDates <- unique(df[,"Dates"])
# Initialize a matrix to keep track of changes in the Top1 and Top3
superstar <- matrix(NA, nrow = length(AllDates), ncol = length(AllClasses),
dimnames = list(as.character(AllDates), AllClasses))
# Looping
for(date in 1:length(AllDates)){
for(class in AllClasses){
if(date == 1){
# First NewTop1 = First Top1
superstar[date, class] <- unlist(top1[[class]][date])
} else {
# If superstar in date-1 is in the Top3 of date now,
if(superstar[date-1, class] %in% as.numeric(unlist(top3[[class]][date]))){
# still superstar
superstar[date,class] <- superstar[date-1, class]
} else{
# new superstar is highest scorer of date now
superstar[date,class] <- unlist(top1[[class]][date])
# painful for me trying to figure out how to convert superstar numbers to names but this worked
superstar.char <- as.data.frame(matrix(levels(df[,"Students"])[superstar], ncol = length(AllClasses)))
dimnames(superstar.char) <- dimnames(superstar)
superstar.char # superstar with Students as characters
答案 1 :(得分:0)