在r中生成这个表是否有更快/更少的罗嗦方式?

时间:2013-09-11 07:01:32

标签: r tabular

我有一个看起来像这样的表

head(reduced_nunn)
  ln_maddison_pcgdp2000 ln_export_area ln_export_pop abs_latitude longitude
1              6.670766       7.967494     14.399250            8  17.54142
2              6.354370       1.140843      4.451658           16  29.88722
3              7.187657       8.304137     13.308970            6   2.34264
4              6.748760       6.413822     11.724290           12  -1.74292
5              8.377471      -2.302585      3.912023           24  23.82042
6              6.472346       1.171314      8.052058            4  20.48058
  rain_min humid_max low_temp ln_coastline_area island_dum islam legor_fr legor_uk
1        0        78       14         0.2468601          0     0        1        0
2        5        82       17        -4.6051700          0     1        1        0
3       13        78       18         0.0684028          0    13        1        0
4        0        67        9        -4.6051700          0    42        1        0
5        0        74       -4        -4.6051700          0     0        0        1
6        5        72       14        -4.6051700          0     8        1        0
  ln_avg_gold_pop ln_avg_oil_pop ln_avg_all_diamonds_pop atlantic_distance_minimum
1       -6.614335       0.643126               -1.701396                  5.668760
2       -2.633923      -9.210340               -6.907755                 10.626210
3      -13.815510      -3.531555               -6.907755                  5.120652
4       -2.164328      -9.210340               -6.907755                  4.774938
5       -3.705090      -9.210340                2.186849                  5.686335
6       -3.316046      -9.210340               -1.849576                  5.642056
  indian_distance_minimum saharan_distance_minimum red_sea_distance_minimum
1                6.980571                 4.925892                 3.872354
2                2.570375                 3.718742                 2.215324
3                9.233961                 2.834785                 3.901736
4                9.299419                 2.763519                 4.239375
5                5.764575                 5.856533                 4.299600
6                8.772295                 2.840084                 2.293167

我想生成这个表:

enter image description here

我没有遇到太多麻烦

summarystats <- data.frame(
  "Names" = as.vector(names(reduced_nunn)),
  "Mean" = as.vector(sapply(reduced_nunn,mean)),
  "Standard Deviation" = as.vector(sapply(reduced_nunn,sd)),
  "Minimum" = as.vector(sapply(reduced_nunn,min)),
  "Maximum" = as.vector(sapply(reduced_nunn,max)),
  "Number of Observations" = as.vector(sapply(reduced_nunn,length)))

但是,我想知道是否有一种方法可以使用apply函数或其他东西来避免必须多次写as.vector(sapply(reduced_nunn,fun)。我想不出一个好办法。谁能想到什么?

修改

数据低于

> dput(reduced_nunn)
structure(list(ln_maddison_pcgdp2000 = c(6.670766, 6.35437, 7.187657, 
6.74876, 8.377471, 6.472346, 7.189922, 7.01661, 7.702556, 6.364751, 
7.482682, 7.005789, 7.934514, 7.979339, 6.436151, 8.265393, 7.154615, 
6.349139, 6.796824, 6.523562, 8.981682, 6.927558, 6.741701, 7.750184, 
7.405496, 7.885329, 6.559615, 6.73578, 7.266828, 6.924613, 9.273503, 
6.520621, 8.24144, 6.22059, 7.052721, 6.721426, 6.898715, 7.267525, 
5.937536, 6.760415, 7.111512, 7.865572, 8.75684, 6.049734, 6.35437, 
8.420241, 6.261492, 6.669498, 8.32821, 5.384495, 6.50129, 7.154615
), ln_export_area = c(7.967494, 1.140843, 8.304137, 6.413822, 
-2.302585, 1.171314, 5.096793, 4.944928, 5.623267, -2.302585, 
-2.302585, -1.661718, 3.257355, 0.3999169, 7.078711, 4.62739, 
8.818254, 7.26078, 7.561687, 8.518584, -0.9844123, 4.99911, 4.113622, 
1.61487, -2.302585, -2.302585, 5.363239, 6.520308, 6.659775, 
5.072949, -2.302585, 6.968824, -1.465302, 2.752292, 7.690816, 
-2.302585, 5.841245, 7.561687, 6.878126, 3.923764, -2.302585, 
-2.302585, -2.302585, 6.023867, 8.536835, -2.302585, 6.338511, 
2.959842, 0.5095113, 5.787438, 3.614361, 1.023552), ln_export_pop = c(14.39925, 
4.451658, 13.30897, 11.72429, 3.912023, 8.052058, 10.8437, 10.33106, 
12.39107, 3.912023, 3.912023, 4.703024, 9.961392, 5.477251, 12.99278, 
11.69496, 13.69867, 12.8232, 12.20487, 13.78166, 4.560218, 10.44663, 
10.01004, 9.503454, 3.912023, 3.912023, 11.35552, 13.05647, 12.70125, 
12.87464, 3.912023, 12.33135, 7.080479, 9.695232, 12.08837, 3.912023, 
11.95632, 12.91682, 11.47905, 10.37327, 3.912023, 3.912023, 3.912023, 
12.87262, 13.28513, 3.912023, 12.10194, 7.543646, 7.011269, 11.76894, 
10.893, 7.925018), abs_latitude = c(8, 16, 6, 12, 24, 4, 6, 4, 
4, 11, 15, 11, 36, 28, 12, 0.2, 7, 10, 13, 12, 3, 2, 6, 32, 29, 
33, 19, 14, 23, 18, 20, 15, 24, 13, 9, 2, 12, 14, 8, 5, 0.2, 
26, 4, 12, 6, 36, 4, 0.2, 30, 7, 20, 19), longitude = c(17.54142, 
29.88722, 2.34264, -1.74292, 23.82042, 20.48058, -5.55555, 12.74132, 
15.2263, 43.49777, -24.04431, 42.57752, 2.63691, 29.87953, 39.61983, 
11.79747, -1.20736, -10.93922, -15.38402, -14.96533, 10.34204, 
37.85755, -9.3071, 18.04934, 28.2439, -6.35425, 46.72618, -3.54519, 
35.58901, -10.34285, 57.79387, 34.30362, 17.21072, 9.3731, 8.10141, 
29.91774, 30.04159, -14.46586, -11.79189, 45.93132, 6.68778, 
31.49749, 55.37541, 18.66029, 0.97634, 9.56295, 34.81658, 32.38633, 
25.14831, 23.65134, 27.81663, 29.87154), rain_min = c(0L, 5L, 
13L, 0L, 0L, 5L, 41L, 23L, 0L, 69L, 0L, 0L, 0L, 0L, 5L, 3L, 15L, 
3L, 0L, 0L, 5L, 15L, 31L, 0L, 8L, 0L, 8L, 0L, 13L, 0L, 0L, 0L, 
0L, 0L, 25L, 7L, 0L, 0L, 3L, 0L, 0L, 20L, 69L, 0L, 15L, 3L, 0L, 
46L, 8L, 3L, 0L, 0L), humid_max = c(78L, 82L, 78L, 67L, 74L, 
72L, 82L, 75L, 71L, 78L, 73L, 74L, 66L, 41L, 73L, 79L, 77L, 87L, 
78L, 74L, 95L, 62L, 95L, 72L, 42L, 72L, 71L, 73L, 67L, 69L, 74L, 
66L, 35L, 68L, 80L, 83L, 41L, 74L, 82L, 80L, 79L, 81L, 78L, 72L, 
77L, 64L, 56L, 72L, 67L, 73L, 71L, 57L), low_temp = c(14L, 17L, 
18L, 9L, -4L, 14L, 15L, 14L, 12L, 19L, 13L, 17L, 0L, 1L, 0L, 
17L, 15L, 17L, 7L, 13L, 17L, 5L, 13L, 1L, -9L, 0L, 1L, 8L, 7L, 
7L, 10L, -1L, -4L, 8L, 16L, 12L, 5L, 12L, 19L, 15L, 13L, -5L, 
19L, 8L, 15L, -1L, 8L, 12L, -2L, 14L, 4L, 0L), ln_coastline_area = c(0.2468601, 
-4.60517, 0.0684028, -4.60517, -4.60517, -4.60517, 0.4696153, 
-0.1668627, -0.7049121, 5.054218, 5.478362, 2.65835, -0.7015861, 
0.8960881, 4.529333, 1.194601, 0.813252, 0.3411072, 1.957224, 
2.026835, 2.35459, -0.084053, 1.651772, 0.0056658, -4.60517, 
1.414962, 2.107577, -4.60517, 1.124865, -0.3119217, 4.555573, 
-4.60517, 0.5923609, -4.60517, -0.0799525, -4.60517, -1.079278, 
0.9966474, 1.723961, 1.55798, 5.383156, -4.60517, 6.983902, -4.60517, 
-0.0141846, 1.947651, 0.4072272, -4.60517, 0.8589395, -4.151253, 
-4.60517, -4.60517), island_dum = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), islam = c(0, 
1, 13, 42, 0, 8, 20, 22, 1, 99, 0, 94, 99, 88, 32, 0.8, 16, 80, 
94, 34, 0.5, 6, 14, 97, 0, 99, 3, 89, 13, 99, 13, 16, 0, 90, 
45, 9, 73, 91, 39, 100, 0, 0, 0, 43, 12, 99, 33, 7, 1, 1, 0.3, 
0), legor_fr = c(1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 
1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 
1L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L), legor_uk = c(0L, 0L, 0L, 
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 
0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 
0L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 
1L), ln_avg_gold_pop = c(-6.614335, -2.633923, -13.81551, -2.164328, 
-3.70509, -3.316046, -2.972711, -4.296489, -3.967561, -13.81551, 
-13.81551, -13.81551, -13.81551, -13.81551, -3.805913, -1.654193, 
0.6532509, -0.8973768, -13.81551, -13.81551, -13.81551, -5.279897, 
-1.717547, -13.81551, -13.81551, -5.472521, -7.501954, -0.8741745, 
-5.648631, -1.444776, -13.81551, -13.81551, -0.6370343, -5.203685, 
-11.85755, -3.503525, -3.181087, -13.81551, -3.553395, -13.81551, 
-13.81551, -13.81551, -13.81551, -13.81551, -13.81551, -13.81551, 
-3.161444, -2.876623, 3.084304, -2.295242, -3.235008, 0.6928481
), ln_avg_oil_pop = c(0.643126, -9.21034, -3.531555, -9.21034, 
-9.21034, -9.21034, -3.270892, -0.8711616, 1.000878, -9.21034, 
-9.21034, -9.21034, 0.9135318, -0.3610424, -9.21034, 2.650107, 
-5.899707, -9.21034, -9.21034, -9.21034, 0.3627662, -9.21034, 
-9.21034, 3.235896, -9.21034, -7.77914, -9.21034, -9.21034, -9.21034, 
-9.21034, -9.21034, -9.21034, -9.21034, -9.21034, 0.1340378, 
-9.21034, -4.488462, -9.21034, -9.21034, -9.21034, -9.21034, 
-9.21034, -9.21034, -9.21034, -9.21034, -0.3781253, -9.21034, 
-9.21034, -5.725029, -3.441503, -9.21034, -9.21034), ln_avg_all_diamonds_pop = c(-1.701396, 
-6.907755, -6.907755, -6.907755, 2.186849, -1.849576, -4.228216, 
-6.907755, -6.907755, -6.907755, -6.907755, -6.907755, -6.907755, 
-6.907755, -6.907755, -2.165953, -2.239469, -3.673854, -6.907755, 
-6.907755, -6.907755, -6.907755, -2.123542, -6.907755, -3.637529, 
-6.907755, -6.907755, -6.907755, -6.907755, -6.907755, -6.907755, 
-6.907755, 0.2363898, -6.907755, -6.907755, -6.907755, -6.907755, 
-6.907755, -1.536141, -6.907755, -6.907755, -4.457984, -6.907755, 
-6.907755, -6.907755, -6.907755, -4.186928, -6.907755, -1.201608, 
-0.68398, -6.907755, -5.543311), atlantic_distance_minimum = c(5.66876, 
10.62621, 5.120652, 4.774938, 5.686335, 5.642056, 4.185696, 5.642056, 
5.527229, 10.13065, 3.646842, 14.40755, 6.559232, 16.39266, 12.58899, 
5.531399, 4.772588, 3.719985, 3.888797, 3.795674, 5.577306, 11.08334, 
3.776146, 8.422357, 7.202152, 5.793966, 9.686486, 3.897489, 9.264256, 
4.42371, 10.3101, 9.266991, 5.682842, 5.158515, 5.224331, 10.7538, 
15.25287, 3.897721, 3.705474, 12.05779, 5.196697, 8.290959, 11.45741, 
5.581032, 4.92623, 7.479859, 10.59497, 10.99569, 6.765942, 5.712497, 
9.027167, 9.027167), indian_distance_minimum = c(6.980571, 2.570375, 
9.233961, 9.299419, 5.764575, 8.772295, 9.457085, 8.772295, 7.923528, 
1.754229, 11.59978, 2.682206, 14.91231, 4.667312, 2.705884, 8.366795, 
9.299526, 10.26924, 10.79257, 10.63111, 8.556146, 2.704583, 9.777017, 
16.77543, 3.035, 13.67561, 0.9039161, 10.79005, 2.185373, 11.9143, 
0.0319096, 2.183153, 5.792154, 9.223114, 9.150605, 2.622741, 
3.527528, 10.79068, 10.18761, 2.358296, 8.474005, 2.622083, 1.742192, 
8.875547, 9.258235, 15.83294, 2.558215, 2.699154, 3.457205, 7.643048, 
2.388914, 2.388914), saharan_distance_minimum = c(4.925892, 3.718742, 
2.834785, 2.763519, 5.856533, 2.840084, 3.353074, 3.002548, 3.697363, 
4.845693, 3.481602, 2.350743, 0.9850905, 0.4303847, 2.543248, 
3.70284, 3.174178, 3.245414, 3.171976, 3.284617, 3.462215, 3.358859, 
3.594752, 0.6098508, 6.637325, 1.022596, 5.731615, 2.262917, 
5.267768, 2.255257, 6.273852, 4.820801, 5.980785, 1.768215, 2.641684, 
3.567813, 1.827123, 3.034838, 3.473508, 3.090304, 3.6702, 6.294675, 
4.635344, 1.879364, 3.009106, 0.3097339, 4.05628, 3.203552, 6.583775, 
3.747742, 4.848526, 5.453967), red_sea_distance_minimum = c(3.872354, 
2.215324, 3.901736, 4.239375, 4.2996, 2.293167, 4.793966, 3.051031, 
3.227007, 2.609506, 6.465437, 0.0643895, 3.654165, 1.112658, 
0.5100758, 3.528861, 4.332308, 5.258811, 5.637868, 5.633392, 
3.515037, 1.36133, 5.2275, 2.151154, 4.845831, 4.570611, 3.453547, 
4.310751, 3.298301, 4.973302, 3.883714, 2.922141, 4.685066, 2.953876, 
3.314152, 2.101732, 0.983083, 5.518319, 5.409636, 0.6954757, 
3.932184, 4.422592, 2.252856, 2.026491, 4.084906, 3.20461, 2.18672, 
1.649949, 4.89507, 2.686999, 3.253377, 3.695537)), .Names = c("ln_maddison_pcgdp2000", 
"ln_export_area", "ln_export_pop", "abs_latitude", "longitude", 
"rain_min", "humid_max", "low_temp", "ln_coastline_area", "island_dum", 
"islam", "legor_fr", "legor_uk", "ln_avg_gold_pop", "ln_avg_oil_pop", 
"ln_avg_all_diamonds_pop", "atlantic_distance_minimum", "indian_distance_minimum", 
"saharan_distance_minimum", "red_sea_distance_minimum"), class = "data.frame", row.names = c(NA, 
-52L))

1 个答案:

答案 0 :(得分:0)

你问过要做一个功能。

查看您的代码,重复使用as.vector(sapply(DATA, FUNC)) DATA=reduced_nunnFUNC因列而异。

您可以为此编写一个函数:

myCol<-function(f,data=reduced_nunn){ as.vector(sapply(data, f)) }

请注意可以使用第二个参数覆盖的数据的默认值。

然后你会像这样使用它:

summarystats <- data.frame(
  "Names" = as.vector(names(reduced_nunn)),
  "Mean" = myCol(mean),
  "Standard Deviation" = myCol(sd),
  "Minimum" = myCol(min),
  "Maximum" = myCol(max),
  "Number of Observations" = mycol(length)
)

然后你可能会想,这也是重复的......然后写

mySummary<-function(data=reduced_nunn, 
                    cols=c(
                      c("Mean",mean),
                      c("Standard Deviation", sd),
                      c("Minimum", min),
                      c("Maximum", max),
                      c("Number of Observations", length)
                     )){
  DF <- data.frame("Names"=as.vector(names(data)));
  appendMyCol<-function(pair){ DF[,pair[0]]<<-myCol(pair[1],data) }
  sapply(cols, appendMyCol)
  DF
}

然后你就可以写

summarystats <- mySummary()

如果您想更改配置,请将cols提供给mySummary()函数或更改默认值。

但是......在我看来,这个后来的方法实际上更长,更复杂,更简单的重复myCol方法同样可读。

我没有在您的数据上测试此代码。如果它适合你,请告诉我。

注意:<<-修改父作用域中的变量。