library(matrixTests)
> col_t_welch(dfA, dfB)
obs.x obs.y obs.tot mean.x mean.y mean.diff var.x var.y stderr df statistic pvalue conf.low conf.high alternative mean.null conf.level
df1.var1A 10 10 20 1.5436119 0.7488449 0.79476695 0.2993602 0.5481971 0.2911284 16.57158 2.7299537 0.01449227 0.1793279 1.4102060 two.sided 0 0.95
df1.var2A 10 10 20 2.2205661 2.2320260 -0.01145988 0.4832561 0.5249799 0.3175273 17.96923 -0.0360910 0.97160771 -0.6786419 0.6557222 two.sided 0 0.95
df1.var3A 10 10 20 3.0457651 2.7835908 0.26217424 1.2998193 1.9933106 0.5738580 17.23565 0.4568626 0.65347516 -0.9473005 1.4716490 two.sided 0 0.95
df2.var1A 10 10 20 1.7233471 1.2761199 0.44722715 0.9328694 1.3631385 0.4791668 17.38932 0.9333434 0.36342238 -0.5620050 1.4564593 two.sided 0 0.95
df2.var2A 10 10 20 1.9278754 2.6368740 -0.70899858 1.0966493 0.6907785 0.4227798 17.11741 -1.6769925 0.11170922 -1.6005202 0.1825230 two.sided 0 0.95
df2.var3A 10 10 20 3.1245106 2.9569952 0.16751542 1.0357228 0.8209887 0.4308958 17.76242 0.3887609 0.70207375 -0.7386317 1.0736625 two.sided 0 0.95
df3.var1A 10 0 10 0.6804275 NaN NaN 0.6015624 0.0000000 NaN NaN NA NA NA NA two.sided 0 0.95
df3.var2A 10 10 20 2.0143381 1.9223843 0.09195379 0.7837613 0.7611496 0.3930535 17.99614 0.2339472 0.81766669 -0.7338338 0.9177413 two.sided 0 0.95
df3.var3A 10 10 20 3.0156624 3.2768350 -0.26117263 1.5437758 1.2608029 0.5295827 17.81860 -0.4931668 0.62791751 -1.3745971 0.8522518 two.sided 0 0.95
我想执行线性判别分析。 (lda函数来自MASS库)
这里是我的代码
shell~$ adb pull /system/etc/security/cacerts.bks`
shell~$ keytool -keystore cacerts.bks -storetype BKS -provider org.bouncycastle.jce.provider.BouncyCastleProvider -storepass changeit -v -list
但在此字符串之后
mydat=structure(list(spent = c(73.5, 73.5, 73.5, 73.5, 73.5, 73.5,
73.5, 73.5, 73.5, 73.5, 73.5, 29.74, 29.74, 29.74, 29.74, 29.74,
29.74, 29.74, 29.74, 29.74, 29.74, 29.74, 73.71, 73.71, 73.71,
73.71, 73.71, 73.71, 73.5, 73.5, 73.5, 73.5, 73.5, 73.5, 73.5,
73.5, 73.5, 73.5, 73.5, 29.74, 29.74, 29.74, 29.74, 29.74, 29.74,
29.74, 29.74, 29.74, 29.74, 29.74, 73.71, 73.71, 73.71, 73.71,
73.71, 73.71), realpurchase_cash = c(501, 502, 503, 504, 505,
506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518,
519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 73.5, 73.5,
73.5, 73.5, 73.5, 73.5, 73.5, 73.5, 73.5, 73.5, 73.5, 29.74,
29.74, 29.74, 29.74, 29.74, 29.74, 29.74, 29.74, 29.74, 29.74,
29.74, 73.71, 73.71, 73.71, 73.71, 73.71, 73.71), id = c(123L,
123L, 123L, 123L, 123L, 123L, 123L, 123L, 123L, 123L, 123L, 123L,
123L, 123L, 123L, 123L, 123L, 123L, 123L, 123L, 123L, 123L, 123L,
123L, 123L, 123L, 123L, 123L, 124L, 124L, 124L, 124L, 124L, 124L,
124L, 124L, 124L, 124L, 124L, 124L, 124L, 124L, 124L, 124L, 124L,
124L, 124L, 124L, 124L, 124L, 124L, 124L, 124L, 124L, 124L, 124L
), flag = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)), .Names = c("spent",
"realpurchase_cash", "id", "flag"), class = "data.frame", row.names = c(NA,
-56L))
我得到了错误
index <- sample(1:nrow(mydat),round(0.70*nrow(mydat)))
train <- mydat[index,]
test <- mydat[-index,]
library("MASS")
fit=lda(flag~spent+realpurchase_cash, mydat, subset = train)
library("caret")
str(mydat)
library(InformationValue)
predicted <- predict(fit,test,type='response')
optCutOff <- optimalCutoff(test$flag, predicted)[1]
confusionMatrix(test$flag, predicted, threshold = optCutOff)
如何解决此错误。怎么了?
答案 0 :(得分:2)
=SUM(INDEX(A:A, AGGREGATE(14, 7, ROW($1:1)/(A$1:A1=0), 1)):
INDEX(A:A, AGGREGATE(15, 7, ROW(A1:INDEX(A:A, MATCH(1E+99, A:A)+1))/(A1:INDEX(A:A,MATCH(1E+99, A:A)+1)=0), 1)))
想要一个向量,然后您给他一个数据框(即一个列表)。
subset
或者,由于lda(flag ~ spent + realpurchase_cash, mydat, subset=index)
已经是您想要的子集,因此
train