我想在函数内为每个类别numbers
创建变量name
的bin。但是我在使用函数中作为参数提供的类别名称时遇到了困难。也许data.table方法会更好。
set.seed(10)
b<-(rnorm(10, sd=1,mean=10))
y<-runif(3)
pr<-y/sum(y)
names<-unlist(lapply(mapply(rep, LETTERS[1:3], 1:3), function (x) paste0(x, collapse = "") ) )
x <- sample(names, 10, replace=TRUE, prob=pr)
df<-data.frame(name=x,numbers=b)
df
#working without bin limits per category (not desired)
#and using "numbers" in cut (not desired)
binfunction1 <- function(df, colgroup1, varcount,binsize) {
new<-df %>%
group_by_(colgroup1) %>%
mutate(bin = cut(numbers, breaks <- c(seq(7, 15, by = binsize)), # limits by colgroup not implemented
labels = 1:(length(breaks)-1) ) )
return(new)
}
binfunction1(df,"name","numbers",0.5)
name numbers bin
<fctr> <dbl> <fctr>
1 BB 10.018746 7
2 A 9.815747 6
3 CCC 8.628669 4
4 CCC 9.400832 5
5 BB 10.294545 7
6 CCC 10.389794 7
7 A 8.791924 4
8 A 9.636324 6
9 A 8.373327 3
10 A 9.743522 6
答案 0 :(得分:1)
不是最优雅的解决方案,但结果是你追求的结果? (我不太明白你的问题)
binfunction3 <- function(x, colgroup1, varcount, binsize) {
tmp <- split(x, x[[colgroup1]], drop = TRUE)
tp <- lapply(tmp, function(k) {
breaks <- c(seq(min(k[[varcount]])*0.9, max(k[[varcount]])*1.1, by = binsize))
cbind(k, data.frame(bin = cut(k[[varcount]], breaks, labels = 1:(length(breaks)-1))))
})
tp <- do.call(rbind, tp)
rownames(tp) <- gsub("[[:alpha:]]*\\.", "", rownames(tp))
return(tp[rownames(x),])
}
binfunction3(df,"name","numbers",0.5)
# name numbers bin
# 1 A 10.018746 5
# 2 CCC 9.815747 5
# 3 CCC 8.628669 2
# 4 BB 9.400832 2
# 5 A 10.294545 6
# 6 BB 10.389794 4
# 7 A 8.791924 3
# 8 CCC 9.636324 4
# 9 A 8.373327 2
# 10 A 9.743522 5
答案 1 :(得分:0)
我的答案基于Mikko,但允许更好地控制休息时间和箱子大小的最小和最大限制。
binfunctionnew <- function(x, colgroup, varcount, binexp) {
tmp <- split(x, x[colgroup], drop = TRUE)
tp <- lapply(tmp, function(k) {
bin<-cut(k[,varcount],
breaks=c(seq(min(k[,varcount])*(1-10^(-(binexp+1))),
max(k[,varcount])*(1+10^(-(binexp-2))),
by = 10^(-(binexp))) ) , labels=F)
cbind (k, data.frame(bin = bin))
} )
tp <- do.call(rbind, tp)
return(tp)
}
#example or
binfunctionnew(df,"name","numbers",1) binfunctionnew(df,"name","numbers",0)
# name numbers bin name numbers bin
# A.1 A 10.018746 18 A.1 A 10.018746 3
# A.5 A 10.294545 21 A.5 A 10.294545 3
# A.7 A 8.791924 6 A.7 A 8.791924 2
# A.9 A 8.373327 1 A.9 A 8.373327 1
# A.10 A 9.743522 15 A.10 A 9.743522 3
# BB.4 BB 9.400832 1 BB.4 BB 9.400832 1
# BB.6 BB 10.389794 11 BB.6 BB 10.389794 2
# CCC.2 CCC 9.815747 13 CCC.2 CCC 9.815747 3
# CCC.3 CCC 8.628669 1 CCC.3 CCC 8.628669 1
# CCC.8 CCC 9.636324 11 CCC.8 CCC 9.636324 2