我看到了这个问题,这很有趣。我复制了它的代码。
@job = Job.find(params[:id])
问题是这部分很慢,我想对其进行优化。
library(philentropy)
normalize <- function(x) {
return ((x - min(x)) / (max(x) - min(x)))
}
knn <- function(train, test, k, method){
n.test <- nrow(test)
n.train <- nrow(train)
if (n.train + n.test <= k) stop("k can not be more than n-1")
neigh <- matrix(0, nrow = n.test, ncol = k)
ddist <- NULL
for(i in 1:n.test) {
for(j in 1:n.train) {
xmat <- rbind(test[i,], train[j,])
ddist[j] <- distance(as.data.frame(xmat), method, k) #
}
neigh[i, ] <- sort(ddist)[2:(k + 1)]
}
return(neigh)
}
wbcd <- read.csv("https://resources.oreilly.com/examples/9781784393908/raw/ac9fe41596dd42fc3877cfa8ed410dd346c43548/Machine%20Learning%20with%20R,%20Second%20Edition_Code/Chapter%2003/wisc_bc_data.csv")
rownames(wbcd) <- wbcd$id
wbcd$id <- NULL
wbcd_n <- as.data.frame(lapply(wbcd[2:31], normalize))
wbcd_train<-wbcd_n[1:469,]
wbcd_test<-wbcd_n[470:549,]
wbcd_nn <-knn(wbcd_train, wbcd_test ,3, method="euclidean")
有没有一种使用Apply功能或映射到它的方法?
我试图运行高效的东西,但没有找到东西。仅此:
for(i in 1:n.test) {
for(j in 1:n.train) {
xmat <- rbind(test[i,], train[j,])
ddist[j] <- distance(as.data.frame(xmat), method, k) #
}
neigh[i, ] <- sort(ddist)[2:(k + 1)]
}
return(neigh)
}