随机将教师分配给施加限制的教室

时间:2015-06-30 12:10:41

标签: r dplyr

这个问题与a question I asked before非常相似。更复杂的是我有N所学校的G级和C级教室。另外,我想将每个T老师分配到一所学校和年级的2个教室。

我可以使用以下代码生成一些虚假数据:

library(randomNames)
set.seed(6232015)
n.schools <-20

gen.names <- function(n, which.names = "both", name.order = "last.first"){
  names <- unique(randomNames(n=n, which.names = which.names, name.order = name.order))
  need <- n - length(names)
  while(need>0){ 
    names <- unique(c(randomNames(n=need, which.names = which.names, name.order = name.order), names))
    need <- n - length(names)
  }
  return(names)
}

#Generates the classrooms data frame

grade <- c(3,4,5)
classroom <- c(LETTERS[1:4])

classroom <- expand.grid(grade=c(3,4,5), 
                         classroom=c(LETTERS[1:4]), 
                         School.ID=paste0(gen.names(n = n.schools, which.names = "last"), ' School'))

#Generates teachers data frame
n.teachers=nrow(classroom)/2
gen.teachers <- function(n.teachers){
  Teacher.ID <- gen.names(n = n.teachers, name.order = "last.first")
  Teacher.exp <- runif(n = n.teachers, min = 1, max = 30)
  Teacher.Other <- sample(c(0,1), replace = T, prob = c(0.5, 0.5), size = n.teachers)
  Teacher.RE <- rnorm(n = n.teachers, mean = 0, sd = 1)
  Teachers <- data.frame(Teacher.ID, Teacher.exp, Teacher.Other, Teacher.RE)
  return(Teachers)
}
Teachers <- gen.teachers(n.teachers = n.teachers)

我想要创建的数据框有240行,包含7个变量。由于我想要施加的限制,使用像我之前的问题的答案中的样本将无效(我认为)。我考虑过使用group_by(),但我认为不会那样做......

谢谢!

2 个答案:

答案 0 :(得分:0)

这很有效,但我正在努力学习更优雅的解决方案

library(randomNames)
library(dplyr)
set.seed(6232015)
n.schools <-20
n.grades <- 3
n.classrooms <- 4
total.classrooms <- n.classrooms*n.grades*n.schools

gen.names <- function(n, which.names = "both", name.order = "last.first"){
  names <- unique(randomNames(n=n, which.names = which.names, name.order = name.order))
  need <- n - length(names)
  while(need>0){ 
    names <- unique(c(randomNames(n=need, which.names = which.names, name.order = name.order), names))
    need <- n - length(names)
  }
  return(names)
}

#Generates teachers data frame
n.teachers=total.classrooms/2
gen.teachers <- function(n.teachers){
  Teacher.ID <- gen.names(n = n.teachers, name.order = "last.first")
  Teacher.exp <- runif(n = n.teachers, min = 1, max = 30)
  Teacher.Other <- sample(c(0,1), replace = T, prob = c(0.5, 0.5), size = n.teachers)
  Teacher.RE <- rnorm(n = n.teachers, mean = 0, sd = 1)
  Teachers <- data.frame(Teacher.ID, Teacher.exp, Teacher.Other, Teacher.RE) %>% mutate(Teacher.ID=as.character(Teacher.ID))
  return(Teachers)
}
Teachers <- gen.teachers(n.teachers = n.teachers)
str(Teachers$Teacher.ID)

#Make a ‘schoolGrade’ object and then reshape

schoolGrade <- expand.grid(grade = c(3,4,5), 
                           School.ID = paste0(gen.names(n = n.schools, which.names = "last"), 
                                              ' School'))

# assign each of T teachers to 2 classrooms within a single school and grade
cuttoff1<-n.teachers/2
schoolGrade$A <- Teachers$Teacher.ID[1:cuttoff1]
schoolGrade$B <- Teachers$Teacher.ID[1:cuttoff1]
schoolGrade$C <- Teachers$Teacher.ID[(cuttoff1+1):n.teachers]
schoolGrade$D <- Teachers$Teacher.ID[(cuttoff1+1):n.teachers]

library(tidyr)
schoolGrade <- gather(schoolGrade, Classroom, Teacher.ID, A:D) %>% full_join(Teachers, by="Teacher.ID")

主要问题是如果我想将n.classrooms从4增加到20.在这种情况下,不是有4条线从A到D,我会有20,加上额外的截止。这很复杂......

答案 1 :(得分:0)

这个答案让我可以轻松地将n.classrooms设置为任何值,例如20。

问题是这段代码非常缓慢。建议改进它是非常受欢迎的!

library(dplyr)
library(randomNames)
library(geosphere)
set.seed(7142015)
# Define Parameters
n.Schools <- 20
first.grade<-3
last.grade<-5
n.Grades <-last.grade-first.grade+1
n.Classrooms <- 20 # THIS IS WHAT I WANTED TO BE ABLE TO CHANGE
n.Teachers <- (n.Schools*n.Grades*n.Classrooms)/2 #Two classrooms per teacher

# Define Random names function:
gen.names <- function(n, which.names = "both", name.order = "last.first"){
  names <- unique(randomNames(n=n, which.names = which.names, name.order = name.order))
  need <- n - length(names)
  while(need>0){ 
    names <- unique(c(randomNames(n=need, which.names = which.names, name.order = name.order), names))
    need <- n - length(names)
  }
  return(names)
}

# Generate n.Schools names
gen.schools <- function(n.schools) {
  School.ID <-
    paste0(gen.names(n = n.schools, which.names = "last"), ' School')  
  School.long <- rnorm(n = n.schools, mean = 21.7672, sd = 0.025)
  School.lat <- rnorm(n = n.schools, mean = 58.8471, sd = 0.025)
  School.RE <- rnorm(n = n.schools, mean = 0, sd = 1)
  Schools <-
    data.frame(School.ID, School.lat, School.long, School.RE) %>% 
    mutate(School.ID = as.character(School.ID)) %>% 
    rowwise() %>%  mutate (School.distance = distHaversine(
      p1 = c(School.long, School.lat),
      p2 = c(21.7672, 58.8471), r = 3961
    ))
  return(Schools)
}

Schools <- gen.schools(n.schools = n.Schools)  

# Generate Grades
Grades <- c(first.grade:last.grade)

# Generate n.Classrooms

Classrooms <- LETTERS[1:n.Classrooms]

# Group schools and grades

SchGr <- outer(paste0(Schools$School.ID, '-'), paste0(Grades, '-'), FUN="paste")
#head(SchGr)

# Group SchGr and Classrooms

SchGrClss <- outer(SchGr, paste0(Classrooms, '-'), FUN="paste")
#head(SchGrClss)

# These are the combination of  School-Grades-Classroom
SchGrClssTmp <- as.matrix(SchGrClss, ncol=1, nrow=length(SchGrClss) )
SchGrClssEnd <- as.data.frame(SchGrClssTmp)

# Assign n.Teachers (2 classroom in a given school-grade)
Allpairs <- as.data.frame(t(combn(SchGrClssTmp, 2)))
AllpairsTmp <- paste(Allpairs$V1, Allpairs$V2, sep=" ")

library(stringr)
xm <- do.call(rbind, str_split(string = AllpairsTmp, pattern = "-"))
separoPairs <- as.data.frame((xm), stringsAsFactors = FALSE)
separoPairs <- separoPairs %>% select(-V7)  %>%  #Drops empty column
  mutate(V1=as.character(V1), V4=as.character(V4), V2=as.numeric(V2), V5=as.numeric(V5)) %>% mutate(V4 = trimws(V4, which = "both"))

#Only the rows with V1=V4 and V2=V5 are valid
validPairs <- separoPairs %>% filter(V1==V4 & V2==V5) %>% select(V1, V2, V3, V6)

# Generate n.Teachers

gen.teachers <- function(n.teachers){
  Teacher.ID <- gen.names(n = n.teachers, name.order = "last.first")
  Teacher.exp <- runif(n = n.teachers, min = 1, max = 30)
  Teacher.Other <- sample(c(0,1), replace = T, prob = c(0.5, 0.5), size = n.teachers)
  Teacher.RE <- rnorm(n = n.teachers, mean = 0, sd = 1)
  Teachers <- data.frame(Teacher.ID, Teacher.exp, Teacher.Other, Teacher.RE)
  return(Teachers)
}
Teachers <- gen.teachers(n.teachers = n.Teachers) %>% 
  mutate(Teacher.ID = as.character(Teacher.ID))

# Randomly assign n.Teachers teachers to the "ValidPairs"
TmpAssignments <- validPairs[sample(1:nrow(validPairs), n.Teachers), ]
Assignments <- cbind.data.frame(Teachers$Teacher.ID, TmpAssignments) 
names(Assignments) <- c("Teacher.ID", "School.ID", "Grade", "Class_1", "Class_2")

# Tidy Data
library(tidyr)
TeacherClassroom <- Assignments %>% 
  gather(x, Classroom, Class_1,Class_2) %>% 
  select(-x) %>% 
  mutate(Teacher.ID = as.character(Teacher.ID))

# Merge
DF_Classrooms <- TeacherClassroom %>% full_join(Teachers, by="Teacher.ID") %>% full_join(Schools, by="School.ID")
rm(list=setdiff(ls(), "DF_Classrooms")) # Clean the work space!

谢谢!