所以,我一直在寻找解决方案。但我仍然没有成功。也许你可以帮助我。
# Minimal example
A <- rep("A", 9)
B <- rep("B", 4)
schoolid <- c(A, B)
dc <- c("a", "b", "c", "", "", "", "", "", "", "a", "b", "i", "j")
mc <- c( "", "", "", "a", "b", "c", "", "", "", "a", "b", "i", "j")
ec <- c( "", "", "", "", "", "", "a", "b", "c", "a", "b", "i", "j")
dpoints <- c(20, 15, 17, "", "", "", "", "", "", 14, 13, 13, 12)
mpoints <- c( "", "", "", 18, 12, 20, "", "", "", 15, 11, 14, 9)
epoints <- c( "", "", "", "", "", "", 13, 14, 15, 16, 21, 17, 7)
data <- data.frame(schoolid, dc, mc, ec, dpoints, mpoints, epoints)
这是我拥有的数据集,其中包含:
# dc ... pupilsID in Deutsch
# mc ... pupilsID in math
# ec ... pupilsID in English
# dpoints, mpoints, epoints, the achieved points in the tests
# the four pupils in school A did participate in all tests, but
# information are spread over three rows per pupil
# in school B everything is allright: note, that four pupils in school B have the same codes as others in school A
View(data)
###############################################################################################
所以一开始我想为pupilsID设一个变量“code”,它结合了
的信息 # the three columns dc, mc and ec
# in the following way:
x <- rep(c("a", "b", "c"), 3)
data1 <- data
data1$code <- c(x, "a", "b", "i", "j")
View(data1)
#其次,对于数据框的上半部分,我想合并?像这样的行
# result is my desired dataset
schoolid1 <- c("A", "A", "A", "B", "B", "B", "B")
code <- c("a", "b", "c", "a", "b", "i", "j")
dpoints1 <- c(20, 15, 17, 14, 13, 13, 12)
mpoints1 <- c(18, 12, 20, 15, 11, 14, 9)
epoints1 <- c(13, 14, 15, 16, 21, 17, 7)
result <- data.frame(schoolid1, code, dpoints1, mpoints1, epoints1)
View(result)
############################################################################################
所以1.)我尝试了以下(虽然不起作用)
# i is a counting variable for the i.th row in data
i <- 1
for (i in 1:13){
if (data[i, "dc"]==data[i, "mc"]==data[i, "ec"]){
data$code <- data[i, "dc"]
} else if (!is.na(data[i, "dc"]) & is.na(data[i, "mc"]) & is.na(data[i, "ec"])){
data$code <- data[i, "dc"]
} else if (is.na(data[i, "dc"]) & !is.na(data[i, "mc"]) & is.na(data[i, "ec"])){
data$code <- data[i, "mc"]
} else if (is.na(data[i, "dc"]) & is.na(data[i, "mc"]) & !is.na(data[i, "ec"])){
malsehen$code <- data[i, "ec"]
}
i <- i+1
}
第二个问题,我不知道
答案 0 :(得分:1)
尝试此操作:
sdata<-split(data,data$schoolid)
Apart<-cbind(sdata$A[1:3,"schoolid",drop=FALSE],
as.data.frame(
Map(function(x,y) x[(seq_along(x)-1) %/% 3 ==y],sdata$A[,2:7],0:2)))
rbind(Apart,sdata$B)
# schoolid dc mc ec dpoints mpoints epoints
#1 A a a a 20 18 13
#2 A b b b 15 12 14
#3 A c c c 17 20 15
#10 B a a a 14 15 16
#11 B b b b 13 11 21
#12 B i i i 13 14 17
#13 B j j j 12 9 7
答案 1 :(得分:1)
以下是使用data.table
library(data.table) #1.9.5+
ints<-paste0(c("d","m","e"),"points")
setDT(data)[,(ints):=lapply(.SD,function(x)as.integer(levels(x))[x]),.SDcols=ints]
# Problem 1
data[,code:=levels(dc)[pmax(as.integer(dc),as.integer(mc),as.integer(ec))]]
# Problem 2
data[,(ints):=lapply(.SD,function(x)max(x,na.rm=T)),
by=.(schoolid,code),.SDcols=ints]
# Remove excess information
data<-unique(setkey(data,schoolid,code))[,(chars):=NULL]
> data
schoolid dpoints mpoints epoints code
1: A 20 18 13 a
2: A 15 12 14 b
3: A 17 20 15 c
4: B 14 15 16 a
5: B 13 11 21 b
6: B 13 14 17 i
7: B 12 9 7 j
注意,您应该检查identical(levels(data$dc),levels(data$ec),levels(data$mc))
的定义,code
的定义依赖于此。