我有两个具有相似分布的变量,但不一样。
var1 <- 1:20
var2 <- 5:25
然后我将bin var1
并创建一个新变量:
bin.var1 <- cut(var1,2)
现在我想使用与var2
相同的中断来var1
。有没有办法做到这不是手动?
答案 0 :(得分:2)
您可以从cut.default
源代码中提取相关部分:
var1 <- 1:20
var2 <- 5:25
breaks <- 2
nb <- breaks + 1
rx <- range(var1)
dx <- diff(rx)
breaks <- seq.int(rx[1L], rx[2L], length.out = nb)
breaks[c(1L, nb)] <- c(rx[1L] - dx/1000, rx[2L] + dx/1000)
breaks
# [1] 0.981 10.500 20.019
cut(var1, 2)
# [1] (0.981,10.5] (0.981,10.5] (0.981,10.5] (0.981,10.5] (0.981,10.5] (0.981,10.5]
# [7] (0.981,10.5] (0.981,10.5] (0.981,10.5] (0.981,10.5] (10.5,20] (10.5,20]
# [13] (10.5,20] (10.5,20] (10.5,20] (10.5,20] (10.5,20] (10.5,20]
# [19] (10.5,20] (10.5,20]
# Levels: (0.981,10.5] (10.5,20]
identical(cut(var1, 2), cut(var1, breaks))
# [1] TRUE
cut(var2, breaks)
# [1] (0.981,10.5] (0.981,10.5] (0.981,10.5] (0.981,10.5] (0.981,10.5] (0.981,10.5]
# [7] (10.5,20] (10.5,20] (10.5,20] (10.5,20] (10.5,20] (10.5,20]
# [13] (10.5,20] (10.5,20] (10.5,20] (10.5,20] <NA> <NA>
# [19] <NA> <NA> <NA>
# Levels: (0.981,10.5] (10.5,20]
或者像@Henrik提到的那样,在?cut
下的最后一个示例中,您可以使用标签
## one way to extract the breakpoints
labs <- levels(cut(var1, 2))
(br <- cbind(lower = as.numeric( sub("\\((.+),.*", "\\1", labs) ),
upper = as.numeric( sub("[^,]*,([^]]*)\\]", "\\1", labs) )))
# lower upper
# [1,] 0.981 10.5
# [2,] 10.500 20.0
cut(var2, unique(c(br)))
# [1] (0.981,10.5] (0.981,10.5] (0.981,10.5] (0.981,10.5] (0.981,10.5] (0.981,10.5]
# [7] (10.5,20] (10.5,20] (10.5,20] (10.5,20] (10.5,20] (10.5,20]
# [13] (10.5,20] (10.5,20] (10.5,20] (10.5,20] <NA> <NA>
# [19] <NA> <NA> <NA>
# Levels: (0.981,10.5] (10.5,20]