group_by并处理嵌套的小标题

时间:2019-05-31 16:33:05

标签: r dplyr tidyr

我有一些数据,并且我根据一些变量的分数计算了分位数。我使用以下代码创建嵌套的小标题:

代码:

nested_results <- df %>%
  as_tibble() %>%
  mutate(date2 = as.Date(date2)) %>%
  group_by(period) %>%
  nest(.key = "data") %>%
  mutate(m_quintile = map(data, ~select(.x, score1, ID, date2)),
         m_quintile = map(m_quintile, ~mutate(.x, QM = ntile(score1, 2))),
         r_quintile = map(data, ~select(.x, score2, ID, date2)),
         r_quintile = map(r_quintile, ~mutate(.x, QR = ntile(score2, 2))))

现在,我想group_byQM,然后取平均值。我可以使用以下方法将其扩展:

x <- nested_results$m_quintile[[4]]

x %>%
  group_by(QM) %>%
  summarise(ave = mean(score1))

哪个给我:

# A tibble: 2 x 2
     QM   ave
  <int> <dbl>
1     1 0.644
2     2 0.956

所以我想执行此操作,但是要对数据中的所有列表执行该操作。

数据:

df <- structure(list(ID = c("38480210", "95810210", "62937750", "02376R10", 
"75884910", "36357610", "31561610", "03076C10", "63707110", "42193310", 
"22822V10", "69370C10", "03349M10", "69365610", "72348410", "31190010", 
"57174810", "25674610", "02503Y10", "02376R10", "92343E10", "63707110", 
"30219G10", "02376R10", "75121210", "61945C10", "11012210", "01741R10", 
"62937750", "93114210", "G491BT10", "82981J10", "05361110", "87612E10", 
"69370C10", "62949110", "74251V10", "20588710", "30161N10", "98310W10", 
"17290810", "44919P50", "78440X10", "56357140", "91704710", "95810210", 
"02079K30", "86764P10", "48666K10", "G6543110", "12503M10", "02313510", 
"74432010", "44107P10", "88731730", "74331510", "24906P10", "01858110", 
"25667710", "60819010", "65249B10", "23355L10", "03349M10", "02376R10", 
"44919P50", "88355610", "35461310", "15189T10", "21683110", "17177930", 
"60871R20", "89417E10", "03349M10", "06738310", "45337C10", "12738710", 
"73763010", "69074210", "H8498910", "69076840", "H2717810", "88579Y10", 
"06405810", "88431510", "82655210", "36955010", "23355L10", "30219G10", 
"89364110", "31421110", "10113710", "72378710", "77669610", "60871R20", 
"12503M10", "83269640", "43258910", "43644010", "87150310", "02376R10"
), date1 = structure(c(14425, 16282, 16555, 14517, 14060, 16009, 
14852, 16311, 17347, 14943, 14456, 17896, 16282, 17256, 16860, 
17500, 13909, 16708, 14547, 14578, 17682, 16009, 17835, 17317, 
14729, 15005, 16982, 16769, 14609, 15399, 17651, 16220, 16402, 
16282, 17225, 14609, 14120, 15705, 17044, 16343, 14757, 16860, 
15338, 16678, 16038, 14152, 17105, 15552, 17105, 17317, 15856, 
16891, 13999, 16220, 17197, 15736, 17651, 17409, 15978, 14609, 
16982, 15217, 16678, 16738, 15644, 16493, 17044, 16101, 15884, 
16678, 15155, 16038, 15308, 16038, 15705, 16402, 17562, 15125, 
15338, 14638, 16070, 16800, 16555, 13969, 16190, 14211, 16070, 
14244, 16070, 16101, 16920, 16038, 14334, 15947, 17743, 17347, 
14790, 15611, 17378, 17225), class = "Date"), X1 = c(0.0386907942593098, 
0.0822307392954826, 0.00774113647639751, 0.456043899059296, -0.110843725502491, 
0.0870561078190804, -0.0045542698353529, 0.0563545450568199, 
0.00979493372142315, 0.112359501421452, 0.19650287926197, -0.0415076464414597, 
0.0489177219569683, 0.129585191607475, 0.0380033850669861, 0.115392759442329, 
0.0483566299080849, -0.125885099172592, -0.170278623700142, 0.120593711733818, 
0.110884040594101, 0.0393035411834717, 0.0206293966621161, 0.138198047876358, 
0.0571495816111565, 0.0612886361777782, 0.0311018992215395, -0.138095214962959, 
-0.0137844579294324, -0.0371577367186546, -0.0949702933430672, 
0.00815372075885534, 0.0567769445478916, 0.0283002480864525, 
0.0251093730330467, 0.0126581704244018, 0.0771583765745163, -0.0120562827214599, 
-0.0794527605175972, 0.00395354209467769, -0.0462215766310692, 
-0.144782453775406, 0.015947800129652, -0.0333899334073067, 0.0300950203090906, 
-0.217901706695557, 0.00726317288354039, 0.0145262870937586, 
-0.0980149358510971, -0.15625, 0.0735411867499352, 0.0744226351380348, 
-0.0324600748717785, 0.028904378414154, 0.00331502850167453, 
0.079360157251358, 0.00059627863811329, -0.0638279989361763, 
0.0461367033421993, 0.158714607357979, -0.0510033145546913, -0.13095235824585, 
-0.0496198311448097, 0.190316736698151, -0.0712639763951302, 
0.0382557362318039, 0.0085659408941865, 0.00949091091752052, 
0.283668667078018, -0.121414542198181, -0.0340835973620415, 0.0514483749866486, 
-0.0790285691618919, 0.0195272620767355, -0.056249987334013, 
0.0512534826993942, 0.0601202398538589, 0.00951375812292099, 
-0.0283822026103735, -0.171889260411263, 0.0880646035075188, 
-0.0379358939826488, 0.0564114898443222, -0.0941470116376877, 
0.0303062964230776, -0.143401876091957, 0.0657545104622841, -0.0439923293888569, 
0.0287503190338612, -0.0663194432854652, 0.16533762216568, -0.131995305418968, 
0.0266022328287363, -0.0189772360026836, -0.0666859075427055, 
-0.0744622349739075, -0.00493999244645238, 0.0300560463219881, 
0.0969911441206932, 0.0499435178935528), score1 = c(0.889709759542928, 
0.903469924523747, 0.936274855415902, 0.840172917716411, 0.938323955852229, 
0.907836986338914, 0.945157831453149, 0.979324203073073, 0.920940353945943, 
0.894110161051458, 0.908847794881057, 0.97475724651739, 0.937118479686145, 
0.980120977458591, 0.623303306607424, 0.758760289975453, 0.834341187444698, 
0.797212924462504, 0.825647909001716, 0.840172917716411, 0.768103754483029, 
0.941304519974329, 0.814586219311369, 0.988945234241073, 0.895889262055934, 
0.953934207350125, 0.707089430822071, 0.951597633344339, 0.961769494932988, 
0.990656146898528, 0.913191248231833, 0.836173815309104, 0.645645826100038, 
0.61526193432501, 0.926878672415338, 0.798406831947735, 0.715894830193448, 
0.609914447070846, 0.982037009964077, 0.958260690099507, 0.911602782396998, 
0.994929081364825, 0.942357992461572, 0.915166869000075, 0.959259622259605, 
0.471048178589481, 0.965151413031517, 0.962644520224149, 0.728328414533729, 
0.844767123989088, 0.880744018935896, 0.960509971835405, 0.901561159073948, 
0.933375301286572, 0.941222955392298, 0.788692675853074, 0.975021500943698, 
0.854282162746391, 0.975017263736014, 0.865895403465366, 0.933743132976269, 
0.00716484468589074, 0.660694331374853, 0.975355558211521, 0.989302607548638, 
0.752631808056627, 0.975284876200517, 0.983936335902952, 0.911492029660943, 
0.839999214552006, 0.93598101241571, 0.991730259396069, 0.961880715763936, 
0.911068572578018, 0.894289026496564, 0.953439004423798, 0.973516036989061, 
0.727264493223711, 0.863272896776261, 0.916593316222779, 0.968803082797289, 
0.983502534473476, 0.767304310790636, 0.98164412008614, 0.790234444674146, 
0.917347388087877, 0.932513138540991, 0.922235640070869, 0.928466316777634, 
0.947699254256095, 0.968973518894286, 0.915476983039877, 0.965341200374593, 
0.964086556353561, 0.941095916152213, 0.993534425279995, 0.973964970619073, 
0.849059697982275, 0.956578344330122, 0.988945234241073), score2 = c(0.760647198307699, 
0.935547201829489, 0.99188287953172, 0.704417398608422, 0.902493763886597, 
0.957521348656098, 0.991560272871509, 0.92735166103028, 0.767863082641417, 
0.902764742861846, 0.899520282689611, 0.924089953437449, 0.968664643612792, 
0.866970855643533, 0.901476962756387, 0.94755176837048, 0.902095296676105, 
0.925442465985343, 0.867683186989228, 0.704417398608422, 0.689251219536623, 
0.895182904019693, 0.925420151447255, 0.973787880226641, 0.971649355774121, 
0.836836284054653, 0.874290288656945, 0.642108765156155, 0.9479805690219, 
0.986001188878182, 0.900173154373532, 0.931572233567178, 0.204373496689057, 
0.748226591771599, 0.911326831765278, 0.745529165807053, 0.966520301934851, 
0.8984684314108, 0.996410303573447, 0.944913839452991, 0.820108952795389, 
0.993871872926776, 0.977233364779766, 0.958314403117101, 0.954658754872084, 
0.408524852242646, 0.926462982377825, 0.901657771925462, 0.244783410018176, 
0.976437829472092, 0.905634213631591, 0.927502557897019, 0.891598180696963, 
0.987697157506651, 0.938150919267463, 0.926776348475605, 0.985851315815003, 
0.982498338035899, 0.985805489731244, 0.908490790573957, 0.679883185186743, 
0.00993418025804333, 0.843573469386974, 0.981867470724874, 0.958967224214118, 
0.983500414398743, 0.574198183132523, 0.965866545328729, 0.966640899395171, 
0.589766758440325, 0.945728847589739, 0.995058520927149, 0.978123949182993, 
0.987534808983012, 0.89453102403482, 0.982809219062493, 0.730292876688027, 
0.962628956020742, 0.8586064303671, 0.930644808280356, 0.983088940653883, 
0.968752883224036, 0.887189900705482, 0.983184857078597, 0.779431248014471, 
0.955492602558785, 0.987272896987165, 0.976334384370231, 0.917912611981472, 
0.894706435230544, 0.999499942620125, 0.948601238374727, 0.995016927685647, 
0.972298506885195, 0.673023784571769, 0.999064451129489, 0.998527621099781, 
0.981617931351593, 0.900129909322293, 0.973787880226641), period = c("July_2008_June_2009", 
"July_2014_June_2015", "July_2014_June_2015", "July_2009_June_2010", 
"July_2007_June_2008", "July_2013_June_2014", "July_2010_June_2011", 
"July_2014_June_2015", "July_2016_June_2017", "July_2010_June_2011", 
"July_2009_June_2010", "July_2018_June_2019", "July_2014_June_2015", 
"July_2016_June_2017", "July_2015_June_2016", "July_2017_June_2018", 
"July_2007_June_2008", "July_2015_June_2016", "July_2009_June_2010", 
"July_2009_June_2010", "July_2017_June_2018", "July_2013_June_2014", 
"July_2018_June_2019", "July_2016_June_2017", "July_2009_June_2010", 
"July_2010_June_2011", "July_2015_June_2016", "July_2015_June_2016", 
"July_2009_June_2010", "July_2011_June_2012", "July_2017_June_2018", 
"July_2013_June_2014", "July_2014_June_2015", "July_2014_June_2015", 
"July_2016_June_2017", "July_2009_June_2010", "July_2008_June_2009", 
"July_2012_June_2013", "July_2016_June_2017", "July_2014_June_2015", 
"July_2009_June_2010", "July_2015_June_2016", "July_2011_June_2012", 
"July_2015_June_2016", "July_2013_June_2014", "July_2008_June_2009", 
"July_2016_June_2017", "July_2012_June_2013", "July_2016_June_2017", 
"July_2016_June_2017", "July_2012_June_2013", "July_2015_June_2016", 
"July_2007_June_2008", "July_2013_June_2014", "July_2016_June_2017", 
"July_2012_June_2013", "July_2017_June_2018", "July_2017_June_2018", 
"July_2013_June_2014", "July_2009_June_2010", "July_2015_June_2016", 
"July_2011_June_2012", "July_2015_June_2016", "July_2015_June_2016", 
"July_2012_June_2013", "July_2014_June_2015", "July_2016_June_2017", 
"July_2013_June_2014", "July_2012_June_2013", "July_2015_June_2016", 
"July_2010_June_2011", "July_2013_June_2014", "July_2011_June_2012", 
"July_2013_June_2014", "July_2012_June_2013", "July_2014_June_2015", 
"July_2017_June_2018", "July_2010_June_2011", "July_2011_June_2012", 
"July_2009_June_2010", "July_2013_June_2014", "July_2015_June_2016", 
"July_2014_June_2015", "July_2007_June_2008", "July_2013_June_2014", 
"July_2008_June_2009", "July_2013_June_2014", "July_2008_June_2009", 
"July_2013_June_2014", "July_2013_June_2014", "July_2015_June_2016", 
"July_2013_June_2014", "July_2008_June_2009", "July_2013_June_2014", 
"July_2018_June_2019", "July_2016_June_2017", "July_2009_June_2010", 
"July_2012_June_2013", "July_2017_June_2018", "July_2016_June_2017"
), date2 = c("2008-02-22", "2014-03-03", "2014-04-01", "2009-03-02", 
"2007-03-06", "2012-11-16", "2010-02-22", "2014-01-28", "2016-03-16", 
"2010-03-18", "2009-03-04", "2018-01-22", "2014-02-27", "2016-02-11", 
"2014-12-10", "2017-03-06", "2007-02-28", "2015-02-17", "2009-03-02", 
"2009-03-02", "2016-08-11", "2013-03-20", "2018-02-01", "2016-02-22", 
"2008-11-24", "2009-11-18", "2015-02-13", "2015-02-23", "2009-04-01", 
"2011-02-17", "2017-02-23", "2013-04-02", "2013-09-10", "2014-02-14", 
"2016-01-19", "2008-11-26", "2008-03-07", "2012-03-23", "2016-02-10", 
"2014-03-03", "2009-02-27", "2015-02-20", "2011-02-25", "2015-02-24", 
"2013-03-01", "2008-02-29", "2016-02-22", "2012-02-24", "2016-02-29", 
"2015-12-21", "2012-02-29", "2015-03-02", "2007-02-28", "2013-03-01", 
"2016-05-05", "2012-02-09", "2017-02-24", "2017-02-03", "2013-02-27", 
"2009-03-02", "2015-02-27", "2010-08-30", "2015-02-27", "2015-02-23", 
"2012-02-24", "2013-10-28", "2016-02-18", "2013-04-02", "2012-04-11", 
"2015-02-25", "2009-12-02", "2013-02-12", "2011-03-01", "2013-04-23", 
"2012-02-28", "2014-02-21", "2017-02-15", "2010-02-17", "2011-02-28", 
"2009-02-24", "2013-03-01", "2015-02-12", "2014-02-21", "2007-03-01", 
"2012-10-29", "2008-02-22", "2013-02-28", "2008-02-14", "2013-02-27", 
"2013-02-28", "2014-12-17", "2013-02-22", "2008-02-28", "2012-11-21", 
"2018-02-27", "2016-02-29", "2009-02-26", "2012-02-21", "2017-02-24", 
"2016-02-29")), row.names = c(NA, -100L), .internal.selfref = <pointer: 0x0000000008ec1ef0>, class = "data.frame")

1 个答案:

答案 0 :(得分:2)

假设我们需要在'm_quintile'list列,pull列和maplist列中进行此操作,并按'QM'分组, mean of'score1'

nested_results %>%
    pull(m_quintile) %>% 
    map(~ .x %>%
               group_by(QM) %>% 
               summarise(Ave = mean(score1)))

或者在将列bind_rows提取到带有标识符列(.id)的单个数据集中后,按“ QM”,“ grp”分组,得到“ score1”的mean '

nested_results %>%
   pull(m_quintile) %>% 
   bind_rows(, .id = 'grp') %>% 
   group_by(grp, QM) %>% 
   summarise(Ave = mean(score1))