我根据目录中的一堆csv文件填充数据结构,显示一系列县(按行)的就业,住户等增长的时间序列(年度)。虽然我可以循环遍历dat
对象的长度并将每个表填充到一个数据框中,然后依次处理,我对lapply
函数族感兴趣,以避免循环并使代码更简单,更容易维护。
假设我想对恰好位于结构中所有表中的列执行操作(可能是grep
,用于将所有列重命名为仅嵌入在名称中的年份部分,或者转换为county_id.li8
变量为具有适当因子级别的factor
。)我无法正确索引结构索引以达到第二级和第三级,并对这些列应用lapply
调用。例如,我可以通过
> head(dat[1][[1]][1])
county_id.i8
1 1
2 7
3 17
4 21
5 23
6 24
但这只能通过插入特定的索引来实现。换句话说,我将如何制作类似以下工作的内容(此处将字母指定为因子级别作为使用lapply处理结构中表格中重复列的一般示例)。
lapply(dat[][[]][1],factor, labels=letters[1:9]) #indices ommitted here; looking for general notation for access to the first column in all tables in structure
关于如何在所有级别下正确索引几个级别的指针,以及最终如何在结构中嵌入的不同表中的列上调用lappy函数? (以下数据)。
> dat<-lapply(fileList,read.csv,header=T,sep = "\t")
> dput(dat)
list(structure(list(county_id.i8 = c(1L, 7L, 17L, 21L, 23L, 24L,
28L, 35L, 38L, 39L, 41L, 43L, 44L, 48L, 49L, 50L, 57L), county_employment_pda_2010.f8 = c(313325,
102645, 0, 19302, 0, 0, 9340, 0, 483207, 0, 110998, 444452, 0,
22426, 74123, 0, 0), county_employment_pda_2011.f8 = c(313216,
102576, 0, 19281, 0, 0, 9338, 0, 483043, 0, 110974, 445445, 0,
22417, 74017, 0, 0), county_employment_pda_2012.f8 = c(313238,
102504, 0, 19252, 0, 0, 9333, 0, 482906, 0, 110947, 446132, 0,
22416, 73971, 0, 0), county_employment_pda_2013.f8 = c(313137,
102418, 0, 19262, 0, 0, 9352, 0, 482697, 0, 110867, 447037, 0,
22373, 73946, 0, 0), county_employment_pda_2014.f8 = c(313114,
102399, 0, 19255, 0, 0, 9357, 0, 482469, 0, 110784, 447622, 0,
22359, 73874, 0, 0), county_employment_pda_2015.f8 = c(312961,
102281, 0, 19249, 0, 0, 9357, 0, 482181, 0, 110733, 448479, 0,
22358, 73877, 0, 0), county_employment_pda_2016.f8 = c(312943,
102273, 0, 19239, 0, 0, 9329, 0, 481983, 0, 110719, 449118, 0,
22367, 73848, 0, 0), county_employment_pda_2017.f8 = c(312839,
102216, 0, 19231, 0, 0, 9329, 0, 481889, 0, 110653, 450126, 0,
22360, 73789, 0, 0), county_employment_pda_2018.f8 = c(312760,
102188, 0, 19219, 0, 0, 9331, 0, 481795, 0, 110620, 451350, 0,
22341, 73748, 0, 0), county_employment_pda_2019.f8 = c(312723,
102139, 0, 19214, 0, 0, 9316, 0, 481816, 0, 110484, 452171, 0,
22311, 73725, 0, 0), county_employment_pda_2020.f8 = c(312531,
102094, 0, 19208, 0, 0, 9316, 0, 481812, 0, 110444, 453251, 0,
22294, 73681, 0, 0)), .Names = c("county_id.i8", "county_employment_pda_2010.f8",
"county_employment_pda_2011.f8", "county_employment_pda_2012.f8",
"county_employment_pda_2013.f8", "county_employment_pda_2014.f8",
"county_employment_pda_2015.f8", "county_employment_pda_2016.f8",
"county_employment_pda_2017.f8", "county_employment_pda_2018.f8",
"county_employment_pda_2019.f8", "county_employment_pda_2020.f8"
), class = "data.frame", row.names = c(NA, -17L)), structure(list(
county_id.i8 = c(1L, 7L, 17L, 21L, 23L, 24L, 28L, 35L, 38L,
39L, 41L, 43L, 44L, 48L, 49L, 50L, 57L), county_employment_tpp_2010.f8 = c(450548,
164024, 0, 76243, 0, 0, 9280, 0, 578345, 0, 201026, 709110,
0, 39157, 79052, 0, 0), county_employment_tpp_2011.f8 = c(450309,
163934, 0, 76189, 0, 0, 9280, 0, 578065, 0, 200971, 710321,
0, 39154, 78941, 0, 0), county_employment_tpp_2012.f8 = c(450223,
163828, 0, 76125, 0, 0, 9278, 0, 577849, 0, 200922, 710952,
0, 39137, 78894, 0, 0), county_employment_tpp_2013.f8 = c(450078,
163780, 0, 76119, 0, 0, 9297, 0, 577584, 0, 200821, 712099,
0, 39123, 78858, 0, 0), county_employment_tpp_2014.f8 = c(449954,
163635, 0, 76071, 0, 0, 9297, 0, 577275, 0, 200757, 713066,
0, 39093, 78754, 0, 0), county_employment_tpp_2015.f8 = c(449743,
163455, 0, 76039, 0, 0, 9298, 0, 576946, 0, 200697, 713844,
0, 39095, 78671, 0, 0), county_employment_tpp_2016.f8 = c(449702,
163416, 0, 76012, 0, 0, 9270, 0, 576679, 0, 200621, 714573,
0, 39095, 78608, 0, 0), county_employment_tpp_2017.f8 = c(449493,
163366, 0, 75941, 0, 0, 9264, 0, 576523, 0, 200540, 715484,
0, 39093, 78554, 0, 0), county_employment_tpp_2018.f8 = c(449325,
163290, 0, 75815, 0, 0, 9266, 0, 576412, 0, 200353, 716977,
0, 39052, 78510, 0, 0), county_employment_tpp_2019.f8 = c(449077,
163186, 0, 75750, 0, 0, 9251, 0, 576354, 0, 200169, 717829,
0, 39018, 78487, 0, 0), county_employment_tpp_2020.f8 = c(448740,
163053, 0, 75704, 0, 0, 9250, 0, 576269, 0, 200122, 718708,
0, 39008, 78352, 0, 0)), .Names = c("county_id.i8", "county_employment_tpp_2010.f8",
"county_employment_tpp_2011.f8", "county_employment_tpp_2012.f8",
"county_employment_tpp_2013.f8", "county_employment_tpp_2014.f8",
"county_employment_tpp_2015.f8", "county_employment_tpp_2016.f8",
"county_employment_tpp_2017.f8", "county_employment_tpp_2018.f8",
"county_employment_tpp_2019.f8", "county_employment_tpp_2020.f8"
), class = "data.frame", row.names = c(NA, -17L)), structure(list(
county_id.i8 = c(1L, 7L, 17L, 21L, 23L, 24L, 28L, 35L, 38L,
39L, 41L, 43L, 44L, 48L, 49L, 50L, 57L), county_empres_pda_2010.f8 = c(201923,
43963, 0, 8772, 0, 0, 815, 0, 186251, 0, 78408, 160782, 0,
7238, 38390, 0, 0), county_empres_pda_2011.f8 = c(201783,
43849, 0, 8732, 0, 0, 795, 0, 213456, 0, 82832, 167626, 0,
7284, 37663, 0, 0), county_empres_pda_2012.f8 = c(202059,
44012, 0, 8742, 0, 0, 795, 0, 225552, 0, 87327, 167766, 0,
7498, 37518, 0, 0), county_empres_pda_2013.f8 = c(201918,
43878, 0, 8715, 0, 0, 789, 0, 232941, 0, 93303, 170896, 0,
7502, 38012, 0, 0), county_empres_pda_2014.f8 = c(209007,
43640, 0, 8648, 0, 0, 787, 0, 235599, 0, 96654, 174762, 0,
7530, 37910, 0, 0), county_empres_pda_2015.f8 = c(212050,
43789, 0, 8572, 0, 0, 776, 0, 234853, 0, 100111, 179057,
0, 7551, 37825, 0, 0), county_empres_pda_2016.f8 = c(214927,
43883, 0, 8531, 0, 0, 764, 0, 239730, 0, 102522, 182816,
0, 7518, 37677, 0, 0), county_empres_pda_2017.f8 = c(218551,
44331, 0, 8474, 0, 0, 764, 0, 240854, 0, 105426, 186818,
0, 7531, 37545, 0, 0), county_empres_pda_2018.f8 = c(220972,
45006, 0, 8432, 0, 0, 789, 0, 241628, 0, 107229, 190735,
0, 7546, 37596, 0, 0), county_empres_pda_2019.f8 = c(223044,
45761, 0, 8379, 0, 0, 818, 0, 244283, 0, 108506, 194185,
0, 7521, 37502, 0, 0), county_empres_pda_2020.f8 = c(224509,
46506, 0, 8394, 0, 0, 821, 0, 247482, 0, 109911, 197017,
0, 7504, 37591, 0, 0)), .Names = c("county_id.i8", "county_empres_pda_2010.f8",
"county_empres_pda_2011.f8", "county_empres_pda_2012.f8", "county_empres_pda_2013.f8",
"county_empres_pda_2014.f8", "county_empres_pda_2015.f8", "county_empres_pda_2016.f8",
"county_empres_pda_2017.f8", "county_empres_pda_2018.f8", "county_empres_pda_2019.f8",
"county_empres_pda_2020.f8"), class = "data.frame", row.names = c(NA,
-17L)), structure(list(county_id.i8 = c(1L, 7L, 17L, 21L, 23L,
24L, 28L, 35L, 38L, 39L, 41L, 43L, 44L, 48L, 49L, 50L, 57L),
county_empres_tpp_2010.f8 = c(443752, 155338, 0, 65907, 0,
0, 2828, 0, 456214, 0, 206571, 617212, 0, 37273, 48617, 0,
0), county_empres_tpp_2011.f8 = c(445080, 154940, 0, 65404,
0, 0, 2763, 0, 487189, 0, 212888, 630774, 0, 37297, 47577,
0, 0), county_empres_tpp_2012.f8 = c(445075, 155693, 0, 65455,
0, 0, 2774, 0, 499931, 0, 219766, 628413, 0, 37542, 47263,
0, 0), county_empres_tpp_2013.f8 = c(444322, 155348, 0, 65132,
0, 0, 2741, 0, 507989, 0, 228214, 632356, 0, 37431, 47767,
0, 0), county_empres_tpp_2014.f8 = c(451863, 154962, 0, 64733,
0, 0, 2723, 0, 509617, 0, 234769, 637378, 0, 37330, 47588,
0, 0), county_empres_tpp_2015.f8 = c(454943, 155713, 0, 64439,
0, 0, 2703, 0, 506948, 0, 241141, 643996, 0, 37412, 47541,
0, 0), county_empres_tpp_2016.f8 = c(458014, 156027, 0, 64177,
0, 0, 2673, 0, 512542, 0, 245412, 649009, 0, 37347, 47498,
0, 0), county_empres_tpp_2017.f8 = c(462516, 156941, 0, 63885,
0, 0, 2662, 0, 512233, 0, 249690, 654784, 0, 37323, 47328,
0, 0), county_empres_tpp_2018.f8 = c(465893, 158289, 0, 63755,
0, 0, 2713, 0, 511619, 0, 252055, 660675, 0, 37380, 47365,
0, 0), county_empres_tpp_2019.f8 = c(468709, 159607, 0, 63518,
0, 0, 2776, 0, 513150, 0, 253851, 666054, 0, 37360, 47326,
0, 0), county_empres_tpp_2020.f8 = c(471780, 160499, 0, 63788,
0, 0, 2781, 0, 515528, 0, 255491, 669768, 0, 37373, 47399,
0, 0)), .Names = c("county_id.i8", "county_empres_tpp_2010.f8",
"county_empres_tpp_2011.f8", "county_empres_tpp_2012.f8", "county_empres_tpp_2013.f8",
"county_empres_tpp_2014.f8", "county_empres_tpp_2015.f8", "county_empres_tpp_2016.f8",
"county_empres_tpp_2017.f8", "county_empres_tpp_2018.f8", "county_empres_tpp_2019.f8",
"county_empres_tpp_2020.f8"), class = "data.frame", row.names = c(NA,
-17L)), structure(list(county_id.i8 = c(1L, 7L, 17L, 21L, 23L,
24L, 28L, 35L, 38L, 39L, 41L, 43L, 44L, 48L, 49L, 50L, 57L),
county_households_pda_2010.f8 = c(170187, 37225, 0, 7006,
0, 0, 651, 0, 156702, 0, 53789, 111880, 0, 6479, 29142, 0,
0), county_households_pda_2011.f8 = c(169149, 37004, 0, 6943,
0, 0, 638, 0, 174237, 0, 56758, 115577, 0, 6497, 28608, 0,
0), county_households_pda_2012.f8 = c(169278, 37095, 0, 6955,
0, 0, 637, 0, 183566, 0, 60653, 115752, 0, 6634, 28529, 0,
0), county_households_pda_2013.f8 = c(169023, 36924, 0, 6933,
0, 0, 633, 0, 189987, 0, 65709, 117615, 0, 6626, 29128, 0,
0), county_households_pda_2014.f8 = c(174100, 36699, 0, 6889,
0, 0, 629, 0, 192437, 0, 68543, 120561, 0, 6643, 29129, 0,
0), county_households_pda_2015.f8 = c(176860, 36749, 0, 6839,
0, 0, 624, 0, 191746, 0, 71450, 123785, 0, 6646, 29103, 0,
0), county_households_pda_2016.f8 = c(179434, 36761, 0, 6812,
0, 0, 618, 0, 196075, 0, 73490, 126676, 0, 6616, 28999, 0,
0), county_households_pda_2017.f8 = c(182536, 37124, 0, 6772,
0, 0, 619, 0, 197431, 0, 75885, 129800, 0, 6618, 28961, 0,
0), county_households_pda_2018.f8 = c(184556, 37722, 0, 6736,
0, 0, 638, 0, 198440, 0, 77483, 132850, 0, 6620, 29037, 0,
0), county_households_pda_2019.f8 = c(186021, 38369, 0, 6698,
0, 0, 663, 0, 201268, 0, 78655, 135419, 0, 6591, 29009, 0,
0), county_households_pda_2020.f8 = c(187210, 38907, 0, 6717,
0, 0, 665, 0, 204334, 0, 79840, 137468, 0, 6569, 29120, 0,
0)), .Names = c("county_id.i8", "county_households_pda_2010.f8",
"county_households_pda_2011.f8", "county_households_pda_2012.f8",
"county_households_pda_2013.f8", "county_households_pda_2014.f8",
"county_households_pda_2015.f8", "county_households_pda_2016.f8",
"county_households_pda_2017.f8", "county_households_pda_2018.f8",
"county_households_pda_2019.f8", "county_households_pda_2020.f8"
), class = "data.frame", row.names = c(NA, -17L)), structure(list(
county_id.i8 = c(1L, 7L, 17L, 21L, 23L, 24L, 28L, 35L, 38L,
39L, 41L, 43L, 44L, 48L, 49L, 50L, 57L), county_households_tpp_2010.f8 = c(355208,
123536, 0, 50467, 0, 0, 2424, 0, 345393, 0, 138895, 406129,
0, 31138, 37070, 0, 0), county_households_tpp_2011.f8 = c(354126,
122641, 0, 49973, 0, 0, 2362, 0, 365214, 0, 143132, 413115,
0, 30966, 36340, 0, 0), county_households_tpp_2012.f8 = c(354044,
123118, 0, 50018, 0, 0, 2363, 0, 375399, 0, 149277, 411874,
0, 31089, 36140, 0, 0), county_households_tpp_2013.f8 = c(353169,
122709, 0, 49794, 0, 0, 2345, 0, 382912, 0, 156550, 414253,
0, 30988, 36782, 0, 0), county_households_tpp_2014.f8 = c(358483,
122366, 0, 49502, 0, 0, 2326, 0, 385046, 0, 162182, 418427,
0, 30902, 36727, 0, 0), county_households_tpp_2015.f8 = c(361320,
122856, 0, 49265, 0, 0, 2309, 0, 383106, 0, 167712, 423715,
0, 30946, 36751, 0, 0), county_households_tpp_2016.f8 = c(364010,
123010, 0, 49051, 0, 0, 2286, 0, 388265, 0, 171458, 427874,
0, 30863, 36738, 0, 0), county_households_tpp_2017.f8 = c(367824,
123698, 0, 48857, 0, 0, 2281, 0, 388946, 0, 175097, 432592,
0, 30811, 36694, 0, 0), county_households_tpp_2018.f8 = c(370694,
124838, 0, 48781, 0, 0, 2329, 0, 389217, 0, 177329, 437403,
0, 30821, 36766, 0, 0), county_households_tpp_2019.f8 = c(372758,
125835, 0, 48614, 0, 0, 2382, 0, 391713, 0, 179043, 441576,
0, 30800, 36802, 0, 0), county_households_tpp_2020.f8 = c(375254,
126557, 0, 48859, 0, 0, 2386, 0, 394477, 0, 180661, 444561,
0, 30801, 36884, 0, 0)), .Names = c("county_id.i8", "county_households_tpp_2010.f8",
"county_households_tpp_2011.f8", "county_households_tpp_2012.f8",
"county_households_tpp_2013.f8", "county_households_tpp_2014.f8",
"county_households_tpp_2015.f8", "county_households_tpp_2016.f8",
"county_households_tpp_2017.f8", "county_households_tpp_2018.f8",
"county_households_tpp_2019.f8", "county_households_tpp_2020.f8"
), class = "data.frame", row.names = c(NA, -17L)))
答案 0 :(得分:1)
# I read your data into my_list
library(plyr)
# There is no difference between lappy and llply, except that llply is nicer
# because it preserves names. See ?llply for more information.
new_list <- llply(my_list, function(foo) {
# Notice that foo now holds the contents of each list item.
# so class(foo) should give you data.frame
# you can now access columns just by referencing foo$something
# rather than using an index
nicer_data <- melt(foo, id.vars = 1)
names(nicer_data) <- c("country_id", "year", "value")
# This returns only the year portion of the string
nicer_data$year <- substring(nicer_data$year,first=23, last=26)
return(nicer_data)
})