R中的相关分位数

时间:2019-10-24 21:29:42

标签: r quantile

我正在尝试对ID进行选位,并在A的4个四分位数B的四分位数中对这些值进行 。因此,B的四分位数将取决于A的十分位数。以下是我的尝试:

require(dplyr)
require(OneR)
foo <- data.frame(ID = 1:100,
                  A = runif(100, 50, 200),
                  B = runif(100, 50, 200),
                  stringsAsFactors = FALSE)
foo1<-foo %>%
  mutate(Aquantile = bin(A,nbins =  10,labels = c(1:10))) %>% 
  group_by(Aquantile) %>% 
  mutate(Bquantile = bin(B,nbins =  4,labels = c(1:4)))
foo1 <- foo1 %>% mutate(checkB = bin(B,nbins =  4,labels = c(1:4)))

但是,从检查变量可以看出,Bquantile中的结果与Aquantile无关。请帮助

1 个答案:

答案 0 :(得分:0)

mutate()不会抵消分组。因此,在第二步中您将获得相同的结果。当不提供特定标签时,您可以猜测组之间的差异

foo1 <- foo %>%
  mutate(Aquantile = bin(A, 
                         nbins = 10, 
                         method = "content", 
                         labels = c(1:10)) %>% as.character()) %>% 
  group_by(Aquantile) %>% 
  mutate(Bquantile = bin(B, 
                         nbins =  4,
                         method = "content", 
                         labels = c(1:4)) %>% as.character())

is_grouped_df(foo1)
# [1] TRUE

foo1 %>% 
  mutate(checkB_grouped = bin(B, nbins = 4, labels = c(1:4))) %>% 
  ungroup() %>% 
  mutate(checkB_not_grouped = bin(B, nbins = 4, labels = c(1:4))) 

# A tibble: 100 x 7
      ID     A     B Aquantile Bquantile   checkB_grouped checkB_not_grouped
   <int> <dbl> <dbl> <fct>     <chr>       <fct>          <fct>             
 1     1 147.   78.9 7         (64.6,97.6] 1              1                 
 2     2  78.6 176.  2         (147,179]   4              4                 
 3     3 161.   96.9 8         (96.8,120]  1              2                 
 4     4 197.  149.  10        (126,161]   3              3                 
 5     5 164.  163.  8         (143,166]   3              4                 
 6     6 179.   65.5 9         (51.4,88.4] 1              1                 
 7     7  70.7 123.  2         (115,147]   3              2                 
 8     8  77.0  87.8 2         (82.6,115]  2              2                 
 9     9  91.9 100.  3         (86.6,108]  2              2                 
10    10  81.2 149.  3         (128,149]   4              3     

foo1 %>% 
  mutate(checkB_grouped = bin(B, nbins = 4)) %>% 
  ungroup() %>% 
  mutate(checkB_not_grouped = bin(B, nbins = 4)) 

# A tibble: 100 x 7
      ID     A     B Aquantile Bquantile   checkB_grouped checkB_not_grouped
   <int> <dbl> <dbl> <fct>     <chr>       <chr>          <fct>             
 1     1 147.   78.9 7         (64.6,97.6] (64.6,97.6]    (50.4,87.6]       
 2     2  78.6 176.  2         (147,179]   (147,179]      (162,199]         
 3     3 161.   96.9 8         (96.8,120]  (96.8,120]     (87.6,125]        
 4     4 197.  149.  10        (126,161]   (126,161]      (125,162]         
 5     5 164.  163.  8         (143,166]   (143,166]      (162,199]         
 6     6 179.   65.5 9         (51.4,88.4] (51.4,88.4]    (50.4,87.6]       
 7     7  70.7 123.  2         (115,147]   (115,147]      (87.6,125]        
 8     8  77.0  87.8 2         (82.6,115]  (82.6,115]     (87.6,125]        
 9     9  91.9 100.  3         (86.6,108]  (86.6,108]     (87.6,125]        
10    10  81.2 149.  3         (128,149]   (128,149]      (125,162]