我有两个数据结构。一个是具有预测的数据框,另一个是具有预测权重的命名向量。我想使用权重来根据它的权重乘以每个预测并合并数据。
看起来像一个简单的任务,但我想先测试一下。
df = data.frame(one=c(100,300,400,600,300,400), two=c(500,500,500,500,500,500),
three=c(50,30,40,50,60,70), four=c(2,5,3,4,3,3))
df
v = c(one=0.20,two=0.20,three=0.30,four=0.40)
df*v
这很简单。但是,当我尝试将相同的原理应用于我的数据时,它会以下列方式发挥作用。
> point_forecasts * best_forecast_weights[, grep("forc_", colnames(best_forecast_weights))]
Error in Ops.data.frame(point_forecasts, best_forecast_weights[, grep("forc_", :
‘*’ only defined for equally-sized data frames
有关如何将其扩展到我的真实数据的任何建议。
这两个是不同的维度,但我之前的样本也是如此。
> dim(point_forecasts)
[1] 12 12
> dim(best_forecast_weights[, grep("forc_", colnames(best_forecast_weights))])
[1] 1 12
这是数据。
> dput(best_forecast_weights)
structure(list(forc_ses = 0.117817985571929, forc_ets = 0.0161288086012641,
forc_aa = 0.117805724951262, forc_aaf = 0.118799383174985,
forc_hw = 0.0451599619363367, forc_nn = 0.111375507054973,
forc_lm = 0.0662063818276547, forc_bsts = 0.0818938829947608,
forc_mavg = 0.0172445767409558, forc_hybrid = 0.136259607040553,
forc_cr = 0.0534757012899833, forc_struc = 0.117832478815342,
scheme = "variance based", dim_names = "lmosconsolidatedcustomerid ",
dim_values = "USA_1760"), class = "data.frame", row.names = c(NA,
-1L), .Names = c("forc_ses", "forc_ets", "forc_aa", "forc_aaf",
"forc_hw", "forc_nn", "forc_lm", "forc_bsts", "forc_mavg", "forc_hybrid",
"forc_cr", "forc_struc", "scheme", "dim_names", "dim_values"))
> dput(point_forecasts)
structure(list(forc_ses = c(4408.4307495589, 4408.4307495589,
4408.4307495589, 4408.4307495589, 4408.4307495589, 4408.4307495589,
4408.4307495589, 4408.4307495589, 4408.4307495589, 4408.4307495589,
4408.4307495589, 4408.4307495589), forc_ets = c(3014.28888364505,
2463.93355789192, 1913.57823213878, 1363.22290638565, 812.867580632516,
262.512254879383, -287.84307087375, -838.198396626883, -1388.55372238002,
-1938.90904813315, -2489.26437388628, -3039.61969963941), forc_aa = c(4408.00532266543,
4408.00532266543, 4408.00532266543, 4408.00532266543, 4408.00532266543,
4408.00532266543, 4408.00532266543, 4408.00532266543, 4408.00532266543,
4408.00532266543, 4408.00532266543, 4408.00532266543), forc_aaf = c(4361.78033632219,
3847.04682861951, 3467.29642894454, 3283.58737770275, 3315.39477841926,
3559.8292879854, 3987.97423641516, 4521.56378153502, 5016.78121920777,
5297.45842132112, 5246.63539901233, 4886.37690151687), forc_hw = c(1996.86969056628,
6438.80091882798, 730.499044737788, -695.596151211418, -3247.24176869611,
-11.1251796965471, -339.345816659107, 2520.85448419972, 5704.86448715835,
1795.80340575535, 467.497302543252, -2666.36819973286), forc_nn = c(7570.40440833295,
7817.51208686512, 7774.78545565402, 6600.84174499494, 6427.19186182921,
7697.04750040604, 7659.83514954764, 6556.86171233771, 7656.46367244252,
6335.78196983503, 7641.79649846112, 6529.91387555727), forc_lm = c(13049.1884247555,
14145.1312730948, 9803.74999022683, 1457.80533427488, 6772.01950249792,
9981.31995200218, 9275.1827480621, 9561.780834832, 8276.59378982876,
6950.99104996345, 9168.51040241743, 5946.64813505662), forc_bsts = c(6489.80885158037,
5805.89878415257, 4021.73099992211, 1231.97020036671, 2741.04687738945,
3477.56186323454, 3426.2302905591, 3618.57762773849, 3028.2472483372,
2539.42006616604, 3523.78089091349, 2085.06547360143), forc_mavg = c(2993.75545382493,
2475.93427620301, 1958.11309858108, 1440.29192095916, 922.470743337232,
404.649565715306, -113.171611906619, -630.992789528545, -1148.81396715047,
-1666.6351447724, -2184.45632239432, -2702.27750001625), forc_hybrid = c(5681.15230426712,
6068.70892040052, 4381.18430142845, 4489.14083401097, 3910.57834895585,
4981.67550749813, 4589.50962898391, 6251.06924846359, 5220.83840646985,
5044.82310488863, 4702.63185462144, 4518.96804448667), forc_cr = c(8395.46737804888,
8395.46737804888, 8395.46737804888, 8395.46737804888, 8395.46737804888,
8395.46737804888, 8395.46737804888, 8395.46737804888, 8395.46737804888,
8395.46737804888, 8395.46737804888, 8395.46737804888), forc_struc = c(4408.93379287327,
4408.93379287327, 4408.93379287327, 4408.93379287327, 4408.93379287327,
4408.93379287327, 4408.93379287327, 4408.93379287327, 4408.93379287327,
4408.93379287327, 4408.93379287327, 4408.93379287327)), .Names = c("forc_ses",
"forc_ets", "forc_aa", "forc_aaf", "forc_hw", "forc_nn", "forc_lm",
"forc_bsts", "forc_mavg", "forc_hybrid", "forc_cr", "forc_struc"
), row.names = c(NA, -12L), class = "data.frame")
转到我原来的例子,目标是有一个集合预测,如下面的
> (df[1,1]*v[1] + df[1,2]*v[2] + df[1,3]*v[3] + df[1,4]*v[4])/4
one
33.95
答案 0 :(得分:0)
查看?sweep
的文档,在这种情况下会有所帮助。应将权重数据框强制转换为unlist
:
sweep(point_forecasts, 2, unlist(best_forecast_weights[-(13:15)]), '*')
# forc_ses forc_ets forc_aa forc_aaf forc_hw forc_nn forc_lm forc_bsts forc_mavg forc_hybrid
# 1 519.3924 48.616888 519.2883 518.1768 90.1785592 843.1576 863.93955 531.4756 51.626046 774.1116
# 2 519.3924 39.740313 519.2883 457.0268 290.7760044 870.6794 936.49796 475.4676 42.696439 826.9199
# 3 519.3924 30.863737 519.2883 411.9127 32.9893091 865.9207 649.07082 329.3552 33.766832 596.9785
# 4 519.3924 21.987161 519.2883 390.0882 -31.4130957 735.1721 96.51602 100.8908 24.837225 611.6886
# 5 519.3924 13.110586 519.2883 393.8669 -146.6453147 715.8318 448.35091 224.4750 15.907618 532.8539
# 6 519.3924 4.234010 519.2883 422.9055 -0.5024127 857.2626 660.82708 284.7910 6.978010 678.8011