如何根据条件使用小标题值进行操作

时间:2020-09-22 17:17:18

标签: r tibble tidy

我希望可以解释我的问题,我有以下示例:

    men <- c(204887,218530, 161706, 275769,159381, 184331, 166283,171636, 196118,
         189572, 157465,171137, 120363, 301591, 126385, 157317)
    women <- c(205113, 225793, 152857, 320207, 139366, 181088, 157010, 
         169101, 206268, 188374, 151218, 168920, 113733, 331912, 
         110329, 149641)
   age <-seq(27,42, by= 1)

我做了个小标题:

base1<-tibble(M=men, W=women, A=age)


   # A tibble: 16 x 3
        M      W     A
    <dbl>  <dbl> <dbl>
 1 204887 205113    27
 2 218530 225793    28
 3 161706 152857    29
 4 275769 320207    30.....

我希望能够以一种简单的方式进行下一个操作:

IR= (men_value at age 30)*5 / (men_val at age 28 + men_val at age 29 + ... + men_val at age 32)

我想知道哪些是最简单的方法,而不需要进行大量的循环或条件操作,希望您能为我提供帮助,我也很想使用tibble来完成此操作,因为我将使用数据库,在此先感谢! :)

2 个答案:

答案 0 :(得分:2)

解决方案可能如下所示。

library(dplyr)
library(magrittr)

base1 <- structure(list(M = c(204887, 218530, 161706, 275769, 159381, 
184331, 166283, 171636, 196118, 189572, 157465, 171137, 120363, 
301591, 126385, 157317), W = c(205113, 225793, 152857, 320207, 
139366, 181088, 157010, 169101, 206268, 188374, 151218, 168920, 
113733, 331912, 110329, 149641), A = c(27, 28, 29, 30, 31, 32, 
33, 34, 35, 36, 37, 38, 39, 40, 41, 42)), row.names = c(NA, -16L
), class = c("tbl_df", "tbl", "data.frame"))

num <- base1 %>%
  filter(age == 30) %>%
  pull(M) %>%
  multiply_by(5)

den <- base1 %>%
  filter(age %in% 28:32) %>%
  pull(M) %>%
  sum()
  
num/den

# [1] 1.379235

答案 1 :(得分:1)

另一种攻击方法是对所有如下所示的行执行此操作。它提供与上述相同的结果。

library(tidyverse)

men <- c(204887,218530, 161706, 275769,159381, 184331, 166283,171636, 196118,
     189572, 157465,171137, 120363, 301591, 126385, 157317)
women <- c(205113, 225793, 152857, 320207, 139366, 181088, 157010, 
       169101, 206268, 188374, 151218, 168920, 113733, 331912, 
       110329, 149641)
age <-seq(27,42, by= 1)

base1<-tibble(M=men, W=women, A=age) 

test<-base1 %>%
  group_by(M,A) %>%
  mutate(M_IR = (M*5) / sum(base1$M[which(base1$A %in% seq(A-2,A+2,1))])) %>%
  mutate(W_IR = (W*5) / sum(base1$W[which(base1$A %in% seq(A-2,A+2,1))])) %>%
  mutate(M_IR_Denom = sum(base1$M[which(base1$A %in% seq(A-2,A+2,1))]),
     W_IR_Denom = sum(base1$W[which(base1$A %in% seq(A-2,A+2,1))]),
     M_IR_Num = M*5, W_IR_Num = W * 5)

输出如下:

> test
# A tibble: 16 x 9
# Groups:   M, A [16]
      M      W     A  M_IR  W_IR M_IR_Denom W_IR_Denom M_IR_Num W_IR_Num
   <dbl>  <dbl> <dbl> <dbl> <dbl>      <dbl>      <dbl>    <dbl>    <dbl>
 1 204887 205113    27 1.75  1.76      585123     583763  1024435  1025565
 2 218530 225793    28 1.27  1.25      860892     903970  1092650  1128965
 3 161706 152857    29 0.792 0.733    1020273    1043336   808530   764285
 4 275769 320207    30 1.38  1.57      999717    1019311  1378845  1601035
 5 159381 139366    31 0.841 0.733     947470     950528   796905   696830
 6 184331 181088    32 0.963 0.937     957400     966772   921655   905440
 7 166283 157010    33 0.947 0.921     877749     852833   831415   785050
 8 171636 169101    34 0.945 0.938     907940     901841   858180   845505
 9 196118 206268    35 1.11  1.18      881074     871971   980590  1031340
10 189572 188374    36 1.07  1.07      885928     883881   947860   941870
11 157465 151218    37 0.943 0.913     834655     828513   787325   756090
12 171137 168920    38 0.910 0.885     940128     954157   855685   844600
13 120363 113733    39 0.686 0.649     876941     876112   601815   568665
14 301591 331912    40 1.72  1.90      876793     874535  1507955  1659560
15 126385 110329    41 0.896 0.782     705656     705615   631925   551645
16 157317 149641    42 1.34  1.26      585293     591882   786585   748205