我在r中运行非线性PCA,使用homals包。以下是我使用的一大块代码:
res1 <- homals(data = mydata, rank = 1, ndim = 9, level = "nominal")
res1 <- rescale(res1)
我想在此分析中生成1000个特征值的bootstrap估计值(替换),但我无法弄清楚代码。有没有人有任何建议?
示例数据:
dput(head(mydata, 30))
structure(list(`W age` = c(45L, 43L, 42L, 36L, 19L, 38L, 21L,
27L, 45L, 38L, 42L, 44L, 42L, 38L, 26L, 48L, 39L, 37L, 39L, 26L,
24L, 46L, 39L, 48L, 40L, 38L, 29L, 24L, 43L, 31L), `W education` = c(1L,
2L, 3L, 3L, 4L, 2L, 3L, 2L, 1L, 1L, 1L, 4L, 2L, 3L, 2L, 1L, 2L,
2L, 2L, 3L, 3L, 4L, 4L, 4L, 2L, 4L, 4L, 4L, 1L, 3L), `H education` = c(3L,
3L, 2L, 3L, 4L, 3L, 3L, 3L, 1L, 3L, 4L, 4L, 4L, 4L, 4L, 1L, 2L,
2L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 4L), `N children` = c(10L,
7L, 9L, 8L, 0L, 6L, 1L, 3L, 8L, 2L, 4L, 1L, 1L, 2L, 0L, 7L, 6L,
8L, 5L, 1L, 0L, 1L, 1L, 5L, 8L, 1L, 0L, 0L, 8L, 2L), `W religion` = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), `W employment` = c(1L,
1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L), `H occupation` = c(3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 1L, 1L, 3L, 2L, 4L, 2L, 2L,
2L, 2L, 4L, 3L, 1L, 1L, 1L, 3L, 1L, 1L, 2L, 2L, 1L), `Standard of living` =
c(4L,
4L, 3L, 2L, 3L, 2L, 2L, 4L, 2L, 3L, 3L, 4L, 3L, 3L, 1L, 4L, 4L,
3L, 1L, 1L, 1L, 4L, 4L, 4L, 3L, 4L, 4L, 2L, 4L, 4L), Media = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Contraceptive = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)), .Names = c("W age",
"W education", "H education", "N children", "W religion", "W employment",
"H occupation", "Standard of living", "Media", "Contraceptive"
), row.names = c(NA, 30L), class = "data.frame")
>
我获得了resmals功能,可以与homals包一起使用,以实现最佳缩放。这是功能:
rescale <- function(res) {
# Rescale homals results to proper scaling
n <- nrow(res$objscores)
m <- length(res$catscores)
res$objscores <- (n * m)^0.5 * res$objscores
res$scoremat <- (n * m)^0.5 * res$scoremat
res$catscores <- lapply(res$catscores, FUN = function(x) (n * m)^0.5 * x)
res$cat.centroids <- lapply(res$cat.centroids, FUN = function(x) (n * m)^0.5 * x)
res$low.rank <- lapply(res$low.rank, FUN = function(x) n^0.5 * x)
res$loadings <- lapply(res$loadings, FUN = function(x) m^0.5 * x)
res$discrim <- lapply(res$discrim, FUN = function(x) (n * m)^0.5 * x)
res$eigenvalues <- n * res$eigenvalues
return(res)
}
答案 0 :(得分:0)
在R中引导的标准方法是使用基础包boot
我对后面的代码并不是很满意,因为它会抛出很多警告。但也许这是由于我测试过的数据集。我在help("homals")
中使用了数据集和第3个示例。
我只运行了10次bootstrap重复。
library(homals)
library(boot)
boot_eigen <- function(data, indices){
d <- data[indices, ]
res <- homals(d, active = c(rep(TRUE, 4), FALSE), sets = list(c(1,3,4),2,5))
res$eigenvalues
}
data(galo)
set.seed(7578) # Make the results reproducible
eig <- boot(galo, boot_eigen, R = 10)
eig
#
#ORDINARY NONPARAMETRIC BOOTSTRAP
#
#
#Call:
#boot(data = galo, statistic = boot_eigen, R = 10)
#
#
#Bootstrap Statistics :
# original bias std. error
#t1* 0.1874958 0.03547116 0.005511776
#t2* 0.2210821 -0.02478596 0.005741331
colMeans(eig$t)
#[1] 0.2229669 0.1962961
如果在您的情况下也无法正常运行,请说明,我将删除答案。
编辑。
为了回答评论中的讨论,我更改了函数boot_eigen
,现在对homals
的调用遵循问题代码,并在返回之前调用rescale
。 / p>
boot_eigen <- function(data, indices){
d <- data[indices, ]
res <- homals(data = d, rank = 1, ndim = 9, level = "nominal")
res <- rescale(res)
res$eigenvalues
}
set.seed(7578) # Make the results reproducible
eig <- boot(mydata, boot_eigen, R = 10)