我正在使用以下数据框:
set.seed(1)
my.df <-data.frame(id = gl(3,20,60,labels = c("a","b","c")),
year=gl(4,5,60,labels = c("year1","year2","year3","year4")),
factor1=gl(5,1,60,labels = c("category1","category2","category3","category4","category5")),
value = runif(60))
通过合并year和factor1的值并分布在每个id上,我距离我感兴趣的表又近了一步。
my.df %>% unite(year_factor1,year,factor1) %>% spread(year_factor1,value)
但是,我似乎无法弄清楚的是,如何在每年的类别之后增加百分比变化。
所需的输出:
data.frame(id=gl(3,1,labels = c("a","b","c")),year1_category1=(runif(3)),year1_category2=(runif(3)),year1_category3=(runif(3)),year1_category4=(runif(3)),year1_category5=(runif(3)),
year2_category1=(runif(3)),year2_category1_pct.change=(runif(3)),year2_category2=(runif(3)),year2_category2_pct.change=(runif(3)),year2_category3=(runif(3)),year2_category3_pct.change=(runif(3)),year2_category4=(runif(3)),year2_category4_pct.change=(runif(3)),year2_category5=(runif(3)),year2_category5_pct.change=(runif(3)),
year3_category1=(runif(3)),year3_category1_pct.change=(runif(3)),year3_category2=(runif(3)),year3_category2_pct.change=(runif(3)),year3_category3=(runif(3)),year3_category3_pct.change=(runif(3)),year3_category4=(runif(3)),year3_category4_pct.change=(runif(3)),year3_category5=(runif(3)),year3_category5_pct.change=(runif(3)),
year4_category1=(runif(3)),year4_category1_pct.change=(runif(3)),year4_category2=(runif(3)),year4_category2_pct.change=(runif(3)),year4_category3=(runif(3)),year4_category3_pct.change=(runif(3)),year4_category4=(runif(3)),year4_category4_pct.change=(runif(3)),
year4_category5=(runif(3)),year4_category5_pct.change=(runif(3)))
#> id year1_category1 year1_category2 year1_category3 year1_category4
#> 1 a 0.05403628 0.6017416 0.2748790 0.8498938
#> 2 b 0.40174450 0.1308556 0.3466478 0.5111918
#> 3 c 0.78077210 0.8037151 0.3978118 0.8246243
#> year1_category5 year2_category1 year2_category1_pct.change
#> 1 0.9848534 0.2471940 0.9831659
#> 2 0.5840961 0.3779873 0.6495037
#> 3 0.1205900 0.8666536 0.5573478
#> year2_category2 year2_category2_pct.change year2_category3
#> 1 0.4415218 0.8226931 0.3774495
#> 2 0.1097148 0.3729142 0.5352926
#> 3 0.6563287 0.5661968 0.6088608
#> year2_category3_pct.change year2_category4 year2_category4_pct.change
#> 1 0.2558094 0.5297330 0.08667323
#> 2 0.3121636 0.6468038 0.18475789
#> 3 0.1568249 0.3442455 0.80310710
#> year2_category5 year2_category5_pct.change year3_category1
#> 1 0.3304696 0.5679554 0.6907089
#> 2 0.1408567 0.3947066 0.3544264
#> 3 0.4415897 0.1584157 0.5595521
#> year3_category1_pct.change year3_category2 year3_category2_pct.change
#> 1 0.4037807 0.5081890 0.002351471
#> 2 0.2053016 0.6088907 0.902341202
#> 3 0.7316155 0.3540799 0.293481401
#> year3_category3 year3_category3_pct.change year3_category4
#> 1 0.30519403 0.5563667 0.9992379
#> 2 0.07312194 0.1358821 0.4616496
#> 3 0.66276744 0.3022609 0.3671218
#> year3_category4_pct.change year3_category5 year3_category5_pct.change
#> 1 0.1286520 0.7287696 0.3621064
#> 2 0.6164204 0.0334039 0.3797226
#> 3 0.7807685 0.1480333 0.9079468
#> year4_category1 year4_category1_pct.change year4_category2
#> 1 0.8600580 0.3603230 0.3549054
#> 2 0.3055958 0.8403466 0.3775594
#> 3 0.6731182 0.3089408 0.6180033
#> year4_category2_pct.change year4_category3 year4_category3_pct.change
#> 1 0.8338310 0.7365086 0.71533124
#> 2 0.9338141 0.8408221 0.17404513
#> 3 0.1028760 0.6926105 0.09373974
#> year4_category4 year4_category4_pct.change year4_category5
#> 1 0.009570635 0.40378796 0.32387997
#> 2 0.273314996 0.47437386 0.02022038
#> 3 0.789791202 0.00637341 0.46654020
#> year4_category5_pct.change
#> 1 0.03983659
#> 2 0.35921332
#> 3 0.35210547
由reprex package(v0.2.1)于2019-01-10创建
我只能通过ID和年份来计算简单数据框中的变化百分比,而我可以通过以下方式进行计算:
library(tidyr)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
set.seed(1)
simple<-data.frame(id = gl(3,4,12,labels = c("a","b","c")),
year=gl(4,1,12,labels = c("year1","year2","year3","year4")),
value = runif(12))
simple%>%group_by(id) %>% mutate(lag = lag(value))%>%mutate(pct.change = (value - lag)/lag*100)
#> # A tibble: 12 x 5
#> # Groups: id [3]
#> id year value lag pct.change
#> <fct> <fct> <dbl> <dbl> <dbl>
#> 1 a year1 0.266 NA NA
#> 2 a year2 0.372 0.266 40.2
#> 3 a year3 0.573 0.372 53.9
#> 4 a year4 0.908 0.573 58.5
#> 5 b year1 0.202 NA NA
#> 6 b year2 0.898 0.202 345.
#> 7 b year3 0.945 0.898 5.15
#> 8 b year4 0.661 0.945 -30.1
#> 9 c year1 0.629 NA NA
#> 10 c year2 0.0618 0.629 -90.2
#> 11 c year3 0.206 0.0618 233.
#> 12 c year4 0.177 0.206 -14.3
由reprex package(v0.2.1)于2019-01-11创建