我正在重写scale
函数。我想获得数据集树的相同结果。但是当我运行代码时,我得到一个非常奇怪的结果。
z_function = function(x){
(x - mean(x))/sd(x)}
scale_function = function(x){
result = apply(x,2,z_function)
att_mean = apply(x,2,mean)
att_sd = apply(x,2,sd)
attributes(result) = list("scaled:center" = att_mean,"scaled:scale"= att_sd)
result
}
scale_function(trees)
预期结果:
# Girth Height Volume
# [1,] -1.57685421 -0.9416472 -1.20885469
# [2,] -1.48125614 -1.7263533 -1.20885469
# [3,] -1.41752409 -2.0402357 -1.21493821
# [4,] -0.87580169 -0.6277648 -0.83775985
# [5,] -0.81206964 0.7847060 -0.69175532
# [6,] -0.78020362 1.0985884 -0.63700362
# [7,] -0.71647157 -1.5694121 -0.88642802
# [8,] -0.71647157 -0.1569412 -0.72825645
# [9,] -0.68460554 0.6277648 -0.46058149
# [10,] -0.65273952 -0.1569412 -0.62483658
# [11,] -0.62087350 0.4708236 -0.36324513
# [12,] -0.58900747 0.0000000 -0.55791784
# [13,] -0.58900747 0.0000000 -0.53358375
# [14,] -0.49340940 -1.0985884 -0.53966727
# [15,] -0.39781133 -0.1569412 -0.67350476
# [16,] -0.11101712 -0.3138824 -0.48491557
# [17,] -0.11101712 1.4124708 0.22077297
# [18,] 0.01644698 1.5694121 -0.16857243
# [19,] 0.14391108 -0.7847060 -0.27199230
# [20,] 0.17577710 -1.8832945 -0.32066048
# [21,] 0.23950915 0.3138824 0.26335763
# [22,] 0.30324119 0.6277648 0.09301901
# [23,] 0.39883927 -0.3138824 0.37286102
# [24,] 0.87682962 -0.6277648 0.49453146
# [25,] 0.97242770 0.1569412 0.75612291
# [26,] 1.29108793 0.7847060 1.53481372
# [27,] 1.35481998 0.9416472 1.55306429
# [28,] 1.48228408 0.6277648 1.71123586
# [29,] 1.51415010 0.6277648 1.29755636
# [30,] 1.51415010 0.6277648 1.26713875
# [31,] 2.34266672 1.7263533 2.84885447
我意外的错误结果:
# [1] -1.57685421 -1.48125614 -1.41752409 -0.87580169 -0.81206964 -0.78020362
# [7] -0.71647157 -0.71647157 -0.68460554 -0.65273952 -0.62087350 -0.58900747
# [13] -0.58900747 -0.49340940 -0.39781133 -0.11101712 -0.11101712 0.01644698
# [19] 0.14391108 0.17577710 0.23950915 0.30324119 0.39883927 0.87682962
# [25] 0.97242770 1.29108793 1.35481998 1.48228408 1.51415010 1.51415010
# [31] 2.34266672 -0.94164723 -1.72635326 -2.04023567 -0.62776482 0.78470603
# [37] 1.09858844 -1.56941205 -0.15694121 0.62776482 -0.15694121 0.47082362
# [43] 0.00000000 0.00000000 -1.09858844 -0.15694121 -0.31388241 1.41247085
# [49] 1.56941205 -0.78470603 -1.88329446 0.31388241 0.62776482 -0.31388241
# [55] -0.62776482 0.15694121 0.78470603 0.94164723 0.62776482 0.62776482
# [61] 0.62776482 1.72635326 -1.20885469 -1.20885469 -1.21493821 -0.83775985
# [67] -0.69175532 -0.63700362 -0.88642802 -0.72825645 -0.46058149 -0.62483658
# [73] -0.36324513 -0.55791784 -0.53358375 -0.53966727 -0.67350476 -0.48491557
# [79] 0.22077297 -0.16857243 -0.27199230 -0.32066048 0.26335763 0.09301901
# [85] 0.37286102 0.49453146 0.75612291 1.53481372 1.55306429 1.71123586
# [91] 1.29755636 1.26713875 2.84885447
# attr(,"scaled:center")
# Girth Height Volume
# 13.24839 76.00000 30.17097
# attr(,"scaled:scale")
# Girth Height Volume
# 3.138139 6.371813 16.437846
为什么函数中的命令attributes(result)
会给出错误的结果?
有人可以解释吗?
答案 0 :(得分:5)
由于我们要覆盖现有属性,因此需要将新属性附加到现有属性中,请参见:
z_function = function(x){
(x - mean(x))/sd(x)}
scale_function = function(x){
result = apply(x,2,z_function)
att_mean = apply(x,2,mean)
att_sd = apply(x,2,sd)
# using c() to combine 2 lists of attributes
attributes(result) = c(attributes(result),
list("scaled:center" = att_mean,"scaled:scale"= att_sd))
result
}
# check if output is the same with "scale"
identical(scale(trees), scale_function(trees))
# [1] TRUE
或者按照@shadow的注释中的建议,我们可以使用attr
,它附加属性而不是覆盖:
attr(result, "scaled:center") <- att_mean
attr(result, "scaled:scale") <- att_sd