如何分散并计算两个变量之间的百分比变化?

时间:2019-01-10 17:29:00

标签: r tidyr spread

我正在使用以下数据框:

set.seed(1)
my.df <-data.frame(id = gl(3,20,60,labels = c("a","b","c")),
year=gl(4,5,60,labels = c("year1","year2","year3","year4")),
factor1=gl(5,1,60,labels = c("category1","category2","category3","category4","category5")),
value = runif(60))

通过合并year和factor1的值并分布在每个id上,我距离我感兴趣的表又近了一步。

my.df %>% unite(year_factor1,year,factor1) %>% spread(year_factor1,value)

但是,我似乎无法弄清楚的是,如何在每年的类别之后增加百分比变化。

所需的输出:

data.frame(id=gl(3,1,labels = c("a","b","c")),year1_category1=(runif(3)),year1_category2=(runif(3)),year1_category3=(runif(3)),year1_category4=(runif(3)),year1_category5=(runif(3)),
           year2_category1=(runif(3)),year2_category1_pct.change=(runif(3)),year2_category2=(runif(3)),year2_category2_pct.change=(runif(3)),year2_category3=(runif(3)),year2_category3_pct.change=(runif(3)),year2_category4=(runif(3)),year2_category4_pct.change=(runif(3)),year2_category5=(runif(3)),year2_category5_pct.change=(runif(3)),
           year3_category1=(runif(3)),year3_category1_pct.change=(runif(3)),year3_category2=(runif(3)),year3_category2_pct.change=(runif(3)),year3_category3=(runif(3)),year3_category3_pct.change=(runif(3)),year3_category4=(runif(3)),year3_category4_pct.change=(runif(3)),year3_category5=(runif(3)),year3_category5_pct.change=(runif(3)),
           year4_category1=(runif(3)),year4_category1_pct.change=(runif(3)),year4_category2=(runif(3)),year4_category2_pct.change=(runif(3)),year4_category3=(runif(3)),year4_category3_pct.change=(runif(3)),year4_category4=(runif(3)),year4_category4_pct.change=(runif(3)),
           year4_category5=(runif(3)),year4_category5_pct.change=(runif(3)))
#>   id year1_category1 year1_category2 year1_category3 year1_category4
#> 1  a      0.05403628       0.6017416       0.2748790       0.8498938
#> 2  b      0.40174450       0.1308556       0.3466478       0.5111918
#> 3  c      0.78077210       0.8037151       0.3978118       0.8246243
#>   year1_category5 year2_category1 year2_category1_pct.change
#> 1       0.9848534       0.2471940                  0.9831659
#> 2       0.5840961       0.3779873                  0.6495037
#> 3       0.1205900       0.8666536                  0.5573478
#>   year2_category2 year2_category2_pct.change year2_category3
#> 1       0.4415218                  0.8226931       0.3774495
#> 2       0.1097148                  0.3729142       0.5352926
#> 3       0.6563287                  0.5661968       0.6088608
#>   year2_category3_pct.change year2_category4 year2_category4_pct.change
#> 1                  0.2558094       0.5297330                 0.08667323
#> 2                  0.3121636       0.6468038                 0.18475789
#> 3                  0.1568249       0.3442455                 0.80310710
#>   year2_category5 year2_category5_pct.change year3_category1
#> 1       0.3304696                  0.5679554       0.6907089
#> 2       0.1408567                  0.3947066       0.3544264
#> 3       0.4415897                  0.1584157       0.5595521
#>   year3_category1_pct.change year3_category2 year3_category2_pct.change
#> 1                  0.4037807       0.5081890                0.002351471
#> 2                  0.2053016       0.6088907                0.902341202
#> 3                  0.7316155       0.3540799                0.293481401
#>   year3_category3 year3_category3_pct.change year3_category4
#> 1      0.30519403                  0.5563667       0.9992379
#> 2      0.07312194                  0.1358821       0.4616496
#> 3      0.66276744                  0.3022609       0.3671218
#>   year3_category4_pct.change year3_category5 year3_category5_pct.change
#> 1                  0.1286520       0.7287696                  0.3621064
#> 2                  0.6164204       0.0334039                  0.3797226
#> 3                  0.7807685       0.1480333                  0.9079468
#>   year4_category1 year4_category1_pct.change year4_category2
#> 1       0.8600580                  0.3603230       0.3549054
#> 2       0.3055958                  0.8403466       0.3775594
#> 3       0.6731182                  0.3089408       0.6180033
#>   year4_category2_pct.change year4_category3 year4_category3_pct.change
#> 1                  0.8338310       0.7365086                 0.71533124
#> 2                  0.9338141       0.8408221                 0.17404513
#> 3                  0.1028760       0.6926105                 0.09373974
#>   year4_category4 year4_category4_pct.change year4_category5
#> 1     0.009570635                 0.40378796      0.32387997
#> 2     0.273314996                 0.47437386      0.02022038
#> 3     0.789791202                 0.00637341      0.46654020
#>   year4_category5_pct.change
#> 1                 0.03983659
#> 2                 0.35921332
#> 3                 0.35210547

reprex package(v0.2.1)于2019-01-10创建

我只能通过ID和年份来计算简单数据框中的变化百分比,而我可以通过以下方式进行计算:

library(tidyr)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
set.seed(1)
simple<-data.frame(id = gl(3,4,12,labels = c("a","b","c")),
    year=gl(4,1,12,labels = c("year1","year2","year3","year4")),
    value = runif(12))

simple%>%group_by(id) %>% mutate(lag = lag(value))%>%mutate(pct.change = (value - lag)/lag*100)
#> # A tibble: 12 x 5
#> # Groups:   id [3]
#>    id    year   value     lag pct.change
#>    <fct> <fct>  <dbl>   <dbl>      <dbl>
#>  1 a     year1 0.266  NA           NA   
#>  2 a     year2 0.372   0.266       40.2 
#>  3 a     year3 0.573   0.372       53.9 
#>  4 a     year4 0.908   0.573       58.5 
#>  5 b     year1 0.202  NA           NA   
#>  6 b     year2 0.898   0.202      345.  
#>  7 b     year3 0.945   0.898        5.15
#>  8 b     year4 0.661   0.945      -30.1 
#>  9 c     year1 0.629  NA           NA   
#> 10 c     year2 0.0618  0.629      -90.2 
#> 11 c     year3 0.206   0.0618     233.  
#> 12 c     year4 0.177   0.206      -14.3

reprex package(v0.2.1)于2019-01-11创建

0 个答案:

没有答案