Matrix数学对象长度中的错误消息

时间:2013-12-05 20:44:57

标签: r matrix syntax-error

我正在编写一个代码,其中包含42行和6列的原始数据矩阵。我试图从该矩阵(MAtrix B)中随机选择12行,并从该矩阵内的2组2列数据中获得2组回归系数。从那里,我想使用新计算的回归系数,并使用来自矩阵中未在随机样本(矩阵C)中选择的其他30个点的数据来预测值。

尝试运行下面的代码时,我一直收到错误消息。这是我收到的错误消息:

警告讯息:

Warning messages:
1: 'newdata' had 30 rows but variable(s) found have 12 rows 
2: 'newdata' had 30 rows but variable(s) found have 12 rows 

我很确定我的EDGww回归出于某种原因只有12行,而不是30行,这就是我收到此错误的原因。我不知道如何解决这个问题。此外,似乎有一些关于“EDGww< - predict”代码行的错误。请注意该命令的“newdata”部分是如何工作的。

A <- matrix(c(Box.Z, Box.DC.gm, Box.CR, Box.DC.ww, Box.DC.gd, Box.DC.w), nrow=42)

randco <- function(A) {

B<- A[sample(42,12),]
arows <- apply(A, 1, paste, collapse="_")
brows <- apply(B, 1, paste, collapse="_")
C<- A[-match(brows, arows), ]

Boxgm <- C[,2]
Boxww <- C[,4]
Boxgd <- C[,5]
Boxw  <- C[,6]

EDGgm<- predict(lm(B[,2] ~ B[,1]), newdata=data.frame(B=C[,1:2]))
EDGww<- predict(lm(B[,4] ~ B[,3]), newdata1=data.frame(B=C[,3:4]))


EDGgd <- EDGgm - EDGww
EDGw <- (EDGww*100/EDGgd)   



rmse.gm <- sqrt(mean((EDGgm-Boxgm)^2, na.rm = TRUE))
rmse.gd <- sqrt(mean((EDGgd-Boxgd)^2, na.rm = TRUE))
rmse.ww <- sqrt(mean((EDGww-Boxww)^2, na.rm = TRUE))
rmse.w <-  sqrt(mean((EDGw-Boxw)^2, na.rm = TRUE))

list(rmse.gm, rmse.gd, rmse.ww, rmse.w)

}

Z <- t(replicate(2, randco(A)))

这是我的矩阵A:

structure(c(972.7298, 934.814227158535, 996.933797909408, 791.217703626463, 
784.234752589183, 783.659378596087, 947.066666666667, 918.351477449456, 
884.944612286002, 561.061151079137, 543.853469531525, 557.416696524543, 
545.154969718561, 549.458419120938, 572.653512815785, 508.803114948366, 
489.664296345295, 546.234253551327, 601.674754971929, 564.621528408059, 
657.593123209169, 605.194301773428, 662.548898497015, 658.731787399959, 
653.712059064807, 645.51177904143, 638.927926119253, 672.18137254902, 
662.839142363581, 679.747359701802, 696.70399323896, 673.530320189437, 
700.21186440678, 807.331830584886, 786.227683746812, 779.653232288437, 
668.38576585869, 644.662921348315, 647.371589085072, 572.055997804008, 
568.525605977766, 573.693858845096, 19.3350173135753, 19.2528485621378, 
17.97264, 19.8067268125686, 19.6961540482885, 19.2498052750475, 
18.1278467677645, 18.5052143669591, 17.8549653586694, 21.3537310834607, 
20.9012957360391, 20.7400015202543, 20.8698484361062, 20.2784363115619, 
20.4579902498884, 19.5835524259481, 20.8039105491502, 20.3545184888189, 
19.2183579751146, 19.2741515717697, 18.8531635242811, 20.4671201111593, 
19.9477324477516, 20.0816370797239, 19.16865095264, 19.3289307393948, 
19.0611214754502, 19.9355592993905, 19.8016546674182, 19.1990838235428, 
19.8209288189899, 20.0623629281521, 20.3352443372472, 19.0073567368552, 
19.0215587432765, 19.4547199391263, 18.9404044208691, 19.5176145389921, 
18.9251879854177, 20.2094551375156, 20.6700059171779, 20.6466740494858, 
0.0184126042027551, 0.0211500857551233, 0.0175062024694362, 0.0337401180409656, 
0.0346889237620637, 0.0347098429625015, 0.0197595945566906, 0.0208987649013026, 
0.0233611391296427, 0.079218078642384, 0.0854783227450312, 0.0809066783513659, 
0.0876586886055242, 0.0862439899396154, 0.0788434985488822, 0.100477074583164, 
0.109512990787781, 0.0874625785566515, 0.0697002042276112, 0.0795835405948175, 
0.0590959175682916, 0.0691508307033452, 0.0566322479499254, 0.0581804263930345, 
0.0598276441702881, 0.0615442739013907, 0.0627343073093989, 0.0496103325156931, 
0.0537150387879776, 0.0506587540839388, 0.0472721193305599, 0.0516289789530612, 
0.0470104694823737, 0.0298054466074238, 0.0322343007905799, 0.0327009202323074, 
0.0509075880344454, 0.0561708857467634, 0.055379202376338, 0.0761713062471437, 
0.0773846368854214, 0.075659018034763, 1.24742047184357, 1.25188518062349, 
1.17616931947406, 1.4929693577313, 1.44061448275529, 1.40750828224021, 
1.28417359593431, 1.35503554694099, 1.28696331263137, 2.4154456205358, 
2.34834547373797, 2.3203592861058, 2.45940290188245, 2.39036941912483, 
2.42395662840317, 2.64559953691598, 2.85551593111206, 2.78375293672456, 
2.02594247326842, 1.99500782475067, 1.96872999244972, 1.96773821995353, 
1.91969552725095, 1.93016191885846, 2.0243918375924, 2.02855863447954, 
2.0272144929465, 2.19754956724573, 2.16243045886116, 2.05659819121302, 
1.80255436993863, 1.82806964661878, 1.86868466766234, 1.59199447237071, 
1.60223256800273, 1.65077123393453, 2.34177469771875, 2.32564214914119, 
2.25709470774863, 2.31314883935834, 2.38916504448898, 2.44775420826948, 
18.0875968417318, 18.0009633815143, 16.7964698066862, 18.3137574548373, 
18.2555395655332, 17.8422969928073, 16.8436731718302, 17.1501788200181, 
16.5680020460381, 18.9382854629249, 18.5529502623012, 18.4196422341485, 
18.4104455342238, 17.8880668924371, 18.0340336214853, 16.9379528890322, 
17.9483946180381, 17.5707655520944, 17.1924155018462, 17.2791437470191, 
16.8844335318313, 18.4993818912057, 18.0280369205006, 18.1514751608654, 
17.1442591150476, 17.3003721049152, 17.0339069825037, 17.7380097321447, 
17.639224208557, 17.1424856323298, 18.0183744490513, 18.2342932815333, 
18.4665596695848, 17.4153622644845, 17.4193261752738, 17.8039487051918, 
16.5986297231503, 17.1919723898509, 16.6680932776691, 17.8963062981573, 
18.2808408726889, 18.1989198412163, 6.8965517241379, 6.95454545454546, 
7.00247928886737, 8.15217391304348, 7.89138265447493, 7.88860471724921, 
7.62407096619517, 7.90099952403618, 7.76776408558641, 12.7542993544187, 
12.6575312310825, 12.597200622084, 13.3587364700685, 13.3629275510785, 
13.441012029141, 15.6193582202551, 15.9095896423085, 15.8430941922889, 
11.783931542667, 11.5457562825984, 11.6600298656048, 10.636778199, 
10.64838914917, 10.6336366700371, 11.8079867085979, 11.7255202499558, 
11.9010541447053, 12.3889297639934, 12.2592152199763, 11.9970827762246, 
10.0039788552265, 10.0254483044328, 10.1192896841534, 9.14132274823418, 
9.19801691455233, 9.27193883373274, 14.1082410824108, 13.5274888558692, 
13.5414091470952, 12.9252863737391, 13.0692294798011, 13.4499971955802
), .Dim = c(42L, 6L), .Dimnames = list(NULL, c("col1", "col2", 
"col3", "col4", "col5", "col6")))

1 个答案:

答案 0 :(得分:3)

如果您将A转换为data.frame,则可以避免很多麻烦,从而允许您为newdata=调用的predict部分使用命名元素。我认为这就是你追逐的东西,但很难确定:

A <- setNames(data.frame(A),c("Box.Z", "Box.DC.gm", "Box.CR",
               "Box.DC.ww", "Box.DC.gd", "Box.DC.w"))


randco <- function(A) {
 rowsel <- sample(42,12)
 B <- A[rowsel,]
 C <- A[-rowsel, ]

 Boxgm <- C$Box.DC.gm
 Boxww <- C$Box.DC.ww
 Boxgd <- C$Box.DC.gd
 Boxw  <- C$Box.DC.w

 EDGgm <- predict(lm(Box.DC.gm ~ Box.Z, data=B),newdata=C[,"Box.Z",drop=FALSE])
 EDGww <- predict(lm(Box.DC.ww ~ Box.CR, data=B),newdata=C[,"Box.CR",drop=FALSE])

 EDGgd <- EDGgm - EDGww
 EDGw <- (EDGww*100/EDGgd)  

 rmse.gm <- sqrt(mean((EDGgm-Boxgm)^2, na.rm = TRUE))
 rmse.gd <- sqrt(mean((EDGgd-Boxgd)^2, na.rm = TRUE))
 rmse.ww <- sqrt(mean((EDGww-Boxww)^2, na.rm = TRUE))
 rmse.w <-  sqrt(mean((EDGw-Boxw)^2, na.rm = TRUE))

 list(gm=rmse.gm, gd=rmse.gd, ww=rmse.ww, w=rmse.w)

}

结果:

Z <- t(replicate(2, randco(A)))

#     gm        gd        ww        w       
#[1,] 0.7078684 0.7588057 0.1827982 1.310923
#[2,] 0.6728875 0.7858108 0.2072046 1.522664