我有一个嵌套的小标题,如下所示:
# A tibble: 2 x 3
SCORE score1_rank score2_rank
<chr> <list> <list>
1 scr_rnk_1 <tibble [54 x 5]> <tibble [54 x 5]>
2 scr_rnk_2 <tibble [46 x 5]> <tibble [46 x 5]>
我想为4个小标题中的每个小标题构造回归。我可以通过以下方式扩展数据并单独运行回归:
sub_data1 <- nested_df$score1_rank[[1]]
sub_data2 <- nested_df$score1_rank[[2]]
#Reression 1
sub_data1 <- sub_data1[!is.na(sub_data1$Y), ]
lm(Y ~ X1 + X2, data = sub_data1)
#Regression 2
sub_data2 <- sub_data2[!is.na(sub_data2$Y), ]
lm(Y ~ X1 + X2, data = sub_data2)
不过,我想尝试对整个嵌套小标题执行此操作。
即我正在尝试map
对小贴士进行回归。
数据:
nested_df <- structure(list(SCORE = c("scr_rnk_1", "scr_rnk_2"), score1_rank = list(
structure(list(time = c("July_2013_June_2014", "July_2013_June_2014",
"July_2013_June_2014", "July_2013_June_2014", "July_2013_June_2014",
"July_2014_June_2015", "July_2014_June_2015", "July_2014_June_2015",
"July_2014_June_2015", "July_2014_June_2015", "July_2014_June_2015",
"July_2014_June_2015", "July_2016_June_2017", "July_2016_June_2017",
"July_2016_June_2017", "July_2016_June_2017", "July_2010_June_2011",
"July_2010_June_2011", "July_2010_June_2011", "July_2010_June_2011",
"July_2010_June_2011", "July_2010_June_2011", "July_2012_June_2013",
"July_2012_June_2013", "July_2012_June_2013", "July_2012_June_2013",
"July_2012_June_2013", "July_2012_June_2013", "July_2018_June_2019",
"July_2018_June_2019", "July_2018_June_2019", "July_2018_June_2019",
"July_2015_June_2016", "July_2015_June_2016", "July_2015_June_2016",
"July_2015_June_2016", "July_2015_June_2016", "July_2015_June_2016",
"July_2015_June_2016", "July_2011_June_2012", "July_2011_June_2012",
"July_2011_June_2012", "July_2011_June_2012", "July_2011_June_2012",
"July_2008_June_2009", "July_2008_June_2009", "July_2008_June_2009",
"July_2017_June_2018", "July_2017_June_2018", "July_2017_June_2018",
"July_2009_June_2010", "July_2009_June_2010", "July_2009_June_2010",
"July_2019_June_2020"), score1 = c(0.878385627705134, 0.829149886628575,
0.873633400824437, 0.873191548477804, 0.833360020840671,
0.821514348879447, 0.93893179382238, 0.902566094498171, 0.832521540654393,
0.904546026086165, 0.944312545893212, 0.90721438246816, 0.925563285777056,
0.837735581176652, 0.898314100598163, 0.881156591451732,
0.927432166201199, 0.810462622843289, 0.924966424794594,
0.54982486102469, 0.632637353015548, 0.93598101241571, 0.748712668464033,
0.887355002120062, 0.00606213355201044, 0.66570681669867,
0.809662797719473, 0.80883896141453, 0.410059100270974, 0.45097086832185,
0.855118540355703, 0.73792861592456, 0.582170697766921, 0.910913548399676,
0.909192361557635, 0.61000565934628, 0.541242004262667, 0.847840909074889,
0.838844407944549, 0.638014235742945, 0.948686837455938,
0.569343264654849, 0.942357992461572, 0.956483422999484,
0.716630105733463, 0.757677906984471, 0.840660131450953,
0.944095864840561, 0.74291963665858, 0.944596570938035, 0.916460742106468,
0.90890022256817, 0.895889262055934, 0.886515265060623),
Y = c(-0.0392143242061138, 0.00517332553863525, 0.0475661605596542,
-0.0140374358743429, -0.0235463473945856, 0.0460794232785702,
0.0647838711738586, -0.0257589742541313, 0.0539961569011211,
-0.170428335666656, 0.0925306528806686, 0.11557175219059,
0.0496749319136143, -0.11405622959137, 0.0666666403412819,
-0.0189777128398418, -0.00572755141183734, 0.0277173686772585,
-0.0241545476019383, 0.0328245237469673, 0.223529428243637,
0.0253662765026092, 0.0394621938467026, 0.0815821811556816,
0.0597507022321224, -0.0132956989109516, 0.0609685145318508,
0.0393742695450783, -0.00168346334248781, -0.000859459512867033,
0.0345749147236347, NA, 0.0327170714735985, 0.144188165664673,
0.0415891073644161, 0.0028026478830725, -0.0840985849499702,
0.00914959330111742, 0.0197730101644993, -0.0929021015763283,
0.0382972247898579, NA, 0.015947800129652, 0.0136986169964075,
-0.139593943953514, 0.113736107945442, 0.0216289088129997,
-0.209788918495178, 0.00545153254643083, 0.126438871026039,
0.0538020096719265, 0.0774460881948471, 0.0651820451021194,
NA), X1 = c(0.14, 5.52, 0.14, -3.29, 1.82, -1.17, 1.93,
2.7, -1.44, -1.74, 5.91, -2.05, 2.72, 1.86, 2.28, 1.39,
3.49, 4.47, -1.52, 4.47, 9.85, -0.68, -2.52, 5.46, -0.43,
-0.43, 2.3, 0.56, -8.19, 0.87, 2.53, NA, 7.32, 6.92,
6.92, -6.18, -3.91, -6.32, 0.45, -8.88, -0.44, NA, -0.44,
-1.11, -8.54, 7.28, -6.53, 1.93, 1.93, 1.93, 6.24, 8.62,
6.24, NA), X2 = c(-0.5, 2.22, -0.5, 2.93, -0.17, 1.42,
-0.53, 0.78, 1.67, -0.05, -0.39, -1.08, 0.46, 0.37, -0.62,
0.17, 0.18, -0.69, -0.42, -0.69, 1.48, 1.32, 0.21, 0.17,
-0.76, -0.76, 1.19, -0.66, -2.51, -0.38, -2.56, NA, -2.36,
1.33, 1.33, 1.16, -0.25, -2.16, 0.04, -0.53, -0.46, NA,
-0.46, 0.23, 2.23, -1.27, -0.57, -0.61, -0.61, -0.61,
-0.19, -1.37, -0.19, NA)), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -54L)), structure(list(time = c("July_2013_June_2014",
"July_2013_June_2014", "July_2013_June_2014", "July_2013_June_2014",
"July_2013_June_2014", "July_2014_June_2015", "July_2014_June_2015",
"July_2014_June_2015", "July_2014_June_2015", "July_2014_June_2015",
"July_2014_June_2015", "July_2016_June_2017", "July_2016_June_2017",
"July_2016_June_2017", "July_2010_June_2011", "July_2010_June_2011",
"July_2010_June_2011", "July_2010_June_2011", "July_2010_June_2011",
"July_2012_June_2013", "July_2012_June_2013", "July_2012_June_2013",
"July_2012_June_2013", "July_2012_June_2013", "July_2018_June_2019",
"July_2018_June_2019", "July_2018_June_2019", "July_2015_June_2016",
"July_2015_June_2016", "July_2015_June_2016", "July_2015_June_2016",
"July_2015_June_2016", "July_2015_June_2016", "July_2015_June_2016",
"July_2011_June_2012", "July_2011_June_2012", "July_2011_June_2012",
"July_2011_June_2012", "July_2008_June_2009", "July_2008_June_2009",
"July_2008_June_2009", "July_2017_June_2018", "July_2017_June_2018",
"July_2017_June_2018", "July_2009_June_2010", "July_2009_June_2010"
), score1 = c(0.910630243821458, 0.887211746784698, 0.920092482844549,
0.94450683954903, 0.886972163304589, 0.991052738161695, 0.981619567238222,
0.977490375052585, 0.961036277360393, 0.985523653404714,
0.948091565971217, 0.959812930740014, 0.936269500157121,
0.948541666157695, 0.939675946745415, 0.995146212267317,
0.944554298851532, 0.982930629437269, 0.963858517802992,
0.92872841572452, 0.968099127001545, 0.945198156814004, 0.892947157198215,
0.906930889247629, 0.957790348580216, 0.928122479697648,
0.953267485671018, 0.963714595673124, 0.976914001156382,
0.973623547932495, 0.962870831719229, 0.978333062077069,
0.958765402277667, 0.959032891808224, 0.972965648015492,
0.982760065777063, 0.957170836537733, 0.961880715763936,
0.975885654717621, 0.924673632533321, 0.925318007280836,
0.987246011368269, 0.98249943727474, 0.980272445641619, 0.978206000922261,
0.929807352926533), Y = c(0.0737265646457672, 0.0278251487761736,
0.201131358742714, 0.125700861215591, 0.0777644738554955,
-0.0130416098982096, -0.0990565568208694, 0.0333333089947701,
-0.031569954007864, 0.0422280319035053, -0.0111790159717202,
-0.278726726770401, -0.139534845948219, -0.0800638571381569,
0.23757965862751, -0.0746169164776802, 0.0465963147580624,
0.0337920561432838, -0.0111621227115393, -0.0133928591385484,
0.0778210312128067, -0.0821536555886269, 0.00643268134444952,
NA, 0.152694001793861, 0.0409262739121914, 0.0360006913542747,
-0.0233012177050114, -0.211209982633591, -0.11425743252039,
-0.169167995452881, 0.0282719731330872, 0.161968618631363,
-0.0525752492249012, 0.0127659253776074, -0.0466842725872993,
-0.115001328289509, -0.00946897640824318, 0.114568591117859,
0.2675521671772, -0.0196253582835197, 0.123595483601093,
NA, 0.12380950897932, -0.0350765138864517, -0.16666667163372
), X1 = c(2.01, 0.14, 5.06, 5.52, 1.82, 2.7, -3.09, 1.65,
0.5, 1.93, -1.17, 2.25, 1.86, -1.88, 9.85, -3.9, 3.94, 7.6,
4.47, -2.52, 1.32, 2.78, 0.09, NA, 0.88, 2.53, 2.53, 7.32,
1.13, -6.18, -6.32, -0.3, 7.32, -6.18, 4.93, -1.11, -9.2,
-7.52, 11.42, 9.96, -0.26, 1.93, NA, 0.49, 8.62, 0.49), X2 = c(2.18,
-0.5, -1.03, 2.22, -0.17, 0.78, -2.72, -2.19, 1.22, -0.53,
1.42, -0.51, 0.37, -1.55, 1.48, -0.22, -0.02, 2.08, -0.69,
0.21, -1.2, -0.32, 0.35, NA, -0.57, -2.56, -2.56, -2.36,
-3.09, 1.16, -2.16, 1.75, -2.36, 1.16, -0.77, 0.23, -1.33,
-0.63, 1.64, 1.63, 2.85, -0.61, NA, 1.88, -1.37, 3.81)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -46L))), score2_rank = list(
structure(list(time = c("July_2013_June_2014", "July_2013_June_2014",
"July_2013_June_2014", "July_2013_June_2014", "July_2013_June_2014",
"July_2014_June_2015", "July_2014_June_2015", "July_2014_June_2015",
"July_2014_June_2015", "July_2014_June_2015", "July_2014_June_2015",
"July_2014_June_2015", "July_2016_June_2017", "July_2016_June_2017",
"July_2016_June_2017", "July_2016_June_2017", "July_2010_June_2011",
"July_2010_June_2011", "July_2010_June_2011", "July_2010_June_2011",
"July_2010_June_2011", "July_2010_June_2011", "July_2012_June_2013",
"July_2012_June_2013", "July_2012_June_2013", "July_2012_June_2013",
"July_2012_June_2013", "July_2012_June_2013", "July_2018_June_2019",
"July_2018_June_2019", "July_2018_June_2019", "July_2018_June_2019",
"July_2015_June_2016", "July_2015_June_2016", "July_2015_June_2016",
"July_2015_June_2016", "July_2015_June_2016", "July_2015_June_2016",
"July_2015_June_2016", "July_2011_June_2012", "July_2011_June_2012",
"July_2011_June_2012", "July_2011_June_2012", "July_2011_June_2012",
"July_2008_June_2009", "July_2008_June_2009", "July_2008_June_2009",
"July_2017_June_2018", "July_2017_June_2018", "July_2017_June_2018",
"July_2009_June_2010", "July_2009_June_2010", "July_2009_June_2010",
"July_2019_June_2020"), score2 = c(0.573384803196917, 0.95560973004494,
0.936151601862601, 0.940067094946625, 0.790149367637373,
0.885023225824309, 0.956490411723667, 0.918534374861312,
0.9660240615445, 0.961407533200788, 0.794743982673356, 0.926614681101157,
0.924390324452674, 0.838697174839086, 0.548480558835933,
0.928419789574611, 0.942229561212187, 0.808215644539813,
0.89946853678008, 0.931010276978734, 0.780385177969094, 0.945728847589739,
0.958939314931932, 0.101395325662518, 0.0547541695358364,
0.757995973046388, 0.815555744982054, 0.947726570770333,
0.589921893700343, 0.924114006154793, 0.164071857964122,
0.946752193254218, 0.801515206601873, 0.709037475517904,
0.730962189352849, 0.872901083488831, 0.958819700206169,
0.951829945538551, 0.924000702901887, 0.963439907199707,
0.94482417669742, 0.817381450384857, 0.977233364779766, 0.881676744287434,
0.820839678297149, 0.449214983785051, 0.536396658733052,
0.756705578897905, 0.904306523171427, 0.947974271863387,
0.947487349720247, 0.95821125132286, 0.890792036806817, 0.983129670844182
), Y = c(-0.0392143242061138, 0.0475661605596542, 0.0278251487761736,
-0.0235463473945856, 0.0777644738554955, 0.0333333089947701,
0.0460794232785702, 0.0647838711738586, -0.0257589742541313,
-0.170428335666656, 0.0925306528806686, 0.11557175219059,
-0.278726726770401, -0.139534845948219, -0.11405622959137,
0.0666666403412819, -0.00572755141183734, 0.0277173686772585,
0.23757965862751, -0.0241545476019383, 0.0465963147580624,
0.0253662765026092, 0.0394621938467026, 0.00643268134444952,
0.0597507022321224, -0.0132956989109516, 0.0609685145318508,
0.0393742695450783, -0.00168346334248781, 0.0345749147236347,
NA, 0.0360006913542747, 0.0327170714735985, -0.0233012177050114,
0.0028026478830725, -0.0840985849499702, 0.161968618631363,
0.00914959330111742, 0.0197730101644993, -0.0466842725872993,
-0.0929021015763283, 0.0382972247898579, 0.015947800129652,
0.0136986169964075, -0.139593943953514, 0.113736107945442,
0.0216289088129997, -0.209788918495178, 0.00545153254643083,
0.12380950897932, 0.0538020096719265, 0.0774460881948471,
-0.16666667163372, NA), X1 = c(0.14, 0.14, 0.14, 1.82, 1.82,
1.65, -1.17, 1.93, 2.7, -1.74, 5.91, -2.05, 2.25, 1.86, 1.86,
2.28, 3.49, 4.47, 9.85, -1.52, 3.94, -0.68, -2.52, 0.09,
-0.43, -0.43, 2.3, 0.56, -8.19, 2.53, NA, 2.53, 7.32, 7.32,
-6.18, -3.91, 7.32, -6.32, 0.45, -1.11, -8.88, -0.44, -0.44,
-1.11, -8.54, 7.28, -6.53, 1.93, 1.93, 0.49, 6.24, 8.62,
0.49, NA), X2 = c(-0.5, -0.5, -0.5, -0.17, -0.17, -2.19,
1.42, -0.53, 0.78, -0.05, -0.39, -1.08, -0.51, 0.37, 0.37,
-0.62, 0.18, -0.69, 1.48, -0.42, -0.02, 1.32, 0.21, 0.35,
-0.76, -0.76, 1.19, -0.66, -2.51, -2.56, NA, -2.56, -2.36,
-2.36, 1.16, -0.25, -2.36, -2.16, 0.04, 0.23, -0.53, -0.46,
-0.46, 0.23, 2.23, -1.27, -0.57, -0.61, -0.61, 1.88, -0.19,
-1.37, 3.81, NA)), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -54L)), structure(list(time = c("July_2013_June_2014",
"July_2013_June_2014", "July_2013_June_2014", "July_2013_June_2014",
"July_2013_June_2014", "July_2014_June_2015", "July_2014_June_2015",
"July_2014_June_2015", "July_2014_June_2015", "July_2014_June_2015",
"July_2014_June_2015", "July_2016_June_2017", "July_2016_June_2017",
"July_2016_June_2017", "July_2010_June_2011", "July_2010_June_2011",
"July_2010_June_2011", "July_2010_June_2011", "July_2010_June_2011",
"July_2012_June_2013", "July_2012_June_2013", "July_2012_June_2013",
"July_2012_June_2013", "July_2012_June_2013", "July_2018_June_2019",
"July_2018_June_2019", "July_2018_June_2019", "July_2015_June_2016",
"July_2015_June_2016", "July_2015_June_2016", "July_2015_June_2016",
"July_2015_June_2016", "July_2015_June_2016", "July_2015_June_2016",
"July_2011_June_2012", "July_2011_June_2012", "July_2011_June_2012",
"July_2011_June_2012", "July_2008_June_2009", "July_2008_June_2009",
"July_2008_June_2009", "July_2017_June_2018", "July_2017_June_2018",
"July_2017_June_2018", "July_2009_June_2010", "July_2009_June_2010"
), score2 = c(0.977777238266838, 0.994161535248162, 0.973746623206586,
0.959737686390477, 0.960771840809366, 0.973573416279972,
0.971473417619078, 0.994362749200424, 0.998832204612857,
0.969953961861552, 0.974595202023975, 0.990460167618893,
0.977938934839813, 0.933720130788891, 0.997555980989323,
0.983534940461115, 0.961638641355128, 0.98302503175898, 0.955924205281728,
0.960588460795172, 0.980272014323638, 0.99319344527155, 0.990396166187007,
0.96928405964874, 0.958824291095735, 0.94735915935544, 0.956799713877734,
0.974313477760366, 0.959422857050319, 0.970981339110875,
0.986720965210939, 0.988119219123952, 0.987757971968369,
0.998331238333002, 0.985606980938901, 0.996309951852897,
0.978123949182993, 0.980322946112709, 0.870995840583191,
0.99620925825849, 0.952471805464684, 0.967521340577839, 0.997358168481063,
0.954089152398106, 0.99961257213601, 0.971649355774121),
Y = c(0.00517332553863525, 0.0737265646457672, 0.201131358742714,
-0.0140374358743429, 0.125700861215591, -0.0130416098982096,
-0.0990565568208694, 0.0539961569011211, -0.031569954007864,
0.0422280319035053, -0.0111790159717202, 0.0496749319136143,
-0.0189777128398418, -0.0800638571381569, -0.0746169164776802,
0.0328245237469673, 0.223529428243637, 0.0337920561432838,
-0.0111621227115393, -0.0133928591385484, 0.0815821811556816,
0.0778210312128067, -0.0821536555886269, NA, -0.000859459512867033,
0.152694001793861, 0.0409262739121914, -0.211209982633591,
0.144188165664673, 0.0415891073644161, -0.11425743252039,
-0.169167995452881, 0.0282719731330872, -0.0525752492249012,
0.0127659253776074, -0.115001328289509, -0.00946897640824318,
NA, 0.114568591117859, 0.2675521671772, -0.0196253582835197,
0.123595483601093, NA, 0.126438871026039, -0.0350765138864517,
0.0651820451021194), X1 = c(5.52, 2.01, 5.06, -3.29,
5.52, 2.7, -3.09, -1.44, 0.5, 1.93, -1.17, 2.72, 1.39,
-1.88, -3.9, 4.47, 9.85, 7.6, 4.47, -2.52, 5.46, 1.32,
2.78, NA, 0.87, 0.88, 2.53, 1.13, 6.92, 6.92, -6.18,
-6.32, -0.3, -6.18, 4.93, -9.2, -7.52, NA, 11.42, 9.96,
-0.26, 1.93, NA, 1.93, 8.62, 6.24), X2 = c(2.22, 2.18,
-1.03, 2.93, 2.22, 0.78, -2.72, 1.67, 1.22, -0.53, 1.42,
0.46, 0.17, -1.55, -0.22, -0.69, 1.48, 2.08, -0.69, 0.21,
0.17, -1.2, -0.32, NA, -0.38, -0.57, -2.56, -3.09, 1.33,
1.33, 1.16, -2.16, 1.75, 1.16, -0.77, -1.33, -0.63, NA,
1.64, 1.63, 2.85, -0.61, NA, -0.61, -1.37, -0.19)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -46L)))), row.names = c(NA,
-2L), class = c("tbl_df", "tbl", "data.frame"))
答案 0 :(得分:1)
2个嵌套的lapply
是我在整个小标题中都会用到的:
#iterate across score1 and score2
lapply(df[-1], function(x) {
#iterate within score1 and then score2 to run the regressions
lapply(x, function(y) {
sub_data1 <- y[!is.na(y$Y), ]
lm(Y ~ X1 + X2, data = sub_data1)
})
})
输出(4个回归):
# $score1_rank
# $score1_rank[[1]]
#
# Call:
# lm(formula = Y ~ X1 + X2, data = sub_data1)
#
# Coefficients:
# (Intercept) X1 X2
# 0.010491 0.008486 -0.002082
#
#
# $score1_rank[[2]]
#
# Call:
# lm(formula = Y ~ X1 + X2, data = sub_data1)
#
# Coefficients:
# (Intercept) X1 X2
# -0.013118 0.013098 0.008622
#
#
#
# $score2_rank
# $score2_rank[[1]]
#
# Call:
# lm(formula = Y ~ X1 + X2, data = sub_data1)
#
# Coefficients:
# (Intercept) X1 X2
# -0.003704 0.007486 -0.009675
#
#
# $score2_rank[[2]]
#
# Call:
# lm(formula = Y ~ X1 + X2, data = sub_data1)
#
# Coefficients:
# (Intercept) X1 X2
# -0.002017 0.012093 0.014742
答案 1 :(得分:1)
另一种选择是使用tidyverse
和broom
的整洁模型方法。
library(tidyverse)
library(broom)
nested_df %>%
gather(key, data, -SCORE) %>%
mutate(tidymod = map(data, ~lm(Y ~ X1 + X2, data = .) %>% tidy)) %>%
unnest(tidymod)
# A tibble: 12 x 7
SCORE key term estimate std.error statistic p.value
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 scr_rnk_1 score1_rank (Intercept) 0.0105 0.00962 1.09 0.281
2 scr_rnk_1 score1_rank X1 0.00849 0.00212 4.00 0.000219
3 scr_rnk_1 score1_rank X2 -0.00208 0.00808 -0.258 0.798
4 scr_rnk_2 score1_rank (Intercept) -0.0131 0.0155 -0.848 0.402
5 scr_rnk_2 score1_rank X1 0.0131 0.00320 4.10 0.000192
6 scr_rnk_2 score1_rank X2 0.00862 0.00894 0.965 0.340
7 scr_rnk_1 score2_rank (Intercept) -0.00370 0.0125 -0.296 0.769
8 scr_rnk_1 score2_rank X1 0.00749 0.00291 2.57 0.0132
9 scr_rnk_1 score2_rank X2 -0.00968 0.00961 -1.01 0.319
10 scr_rnk_2 score2_rank (Intercept) -0.00202 0.0121 -0.166 0.869
11 scr_rnk_2 score2_rank X1 0.0121 0.00242 4.99 0.0000121
12 scr_rnk_2 score2_rank X2 0.0147 0.00774 1.91 0.0640