我实施AdaBoost有什么问题?

时间:2018-11-11 11:17:12

标签: r adaboost

我试图实现Freund和Schapire的AdaBoost算法,使其尽可能接近原始算法(请参见第2页:http://rob.schapire.net/papers/explaining-adaboost.pdf):

library(rpart)
library(OneR)

maxdepth <- 1
T <- 100 # number of rounds

# Given: (x_1, y_1),...,(x_m, y_m) where x_i element of X, y_i element of {-1, +1}
myocarde <- read.table("http://freakonometrics.free.fr/myocarde.csv", head = TRUE, sep = ";")
#myocarde <- read.table("data/myocarde.csv", header = TRUE, sep = ";")
y <- (myocarde[ , "PRONO"] == "SURVIE") * 2 - 1
x <- myocarde[ , 1:7]
m <- nrow(x)
data <- data.frame(x, y)

# Initialize: D_1(i) = 1/m for i = 1,...,m
D <- rep(1/m, m)

H <- replicate(T, list())
a <- vector(mode = "numeric", T)
set.seed(123)

# For t = 1,...,T
for(t in 1:T) {
  # Train weak learner using distribution D_t
  # Get weak hypothesis h_t: X -> {-1, +1}
  data_D_t <- data[sample(m, 10*m, replace = TRUE, prob = D), ]
  H[[t]] <- rpart(y ~., data = data_D_t, maxdepth = maxdepth, method = "class")
  # Aim: select h_t with low weighted error: e_t = Pr_i~D_t[h_t(x_i) != y_i]
  h <- predict(H[[t]], x, type = "class")
  e <- sum(h != y) / m
  # Choose a_t = 0.5 * log((1-e) / e)
  a[t] <- 0.5 * log((1-e) / e)
  # Update for i = 1,...,m: D_t+1(i) = (D_t(i) * exp(-a_t * y_i * h_t(x_i))) / Z_t
  # where Z_t is a normalization factor (chosen so that Dt+1 will be a distribution) 
  D <- D * exp(-a[t] * y * as.numeric(h))
  D <- D / sum(D)
}
# Output the final hypothesis: H(x) = sign(sum of a_t * h_t(x) for t=1 to T)
newdata <- x
H_x <- sapply(H, function(x) as.numeric(as.character(predict(x, newdata = newdata, type = "class"))))
H_x <- t(a * t(H_x))
pred <- sign(rowSums(H_x))

#H
#a
eval_model(pred, y)
## 
## Confusion matrix (absolute):
##           Actual
## Prediction -1  1 Sum
##        -1   0  1   1
##        1   29 41  70
##        Sum 29 42  71
## 
## Confusion matrix (relative):
##           Actual
## Prediction   -1    1  Sum
##        -1  0.00 0.01 0.01
##        1   0.41 0.58 0.99
##        Sum 0.41 0.59 1.00
## 
## Accuracy:
## 0.5775 (41/71)
## 
## Error rate:
## 0.4225 (30/71)
## 
## Error rate reduction (vs. base rate):
## -0.0345 (p-value = 0.6436)

与其他AdaBoost实现相比,该模型的准确性令人震惊,例如:

library(JOUSBoost)
## JOUSBoost 2.1.0
boost <- adaboost(as.matrix(x), y, tree_depth = maxdepth, n_rounds = T)
pred <- predict(boost, x)
eval_model(pred, y)
## 
## Confusion matrix (absolute):
##           Actual
## Prediction -1  1 Sum
##        -1  29  0  29
##        1    0 42  42
##        Sum 29 42  71
## 
## Confusion matrix (relative):
##           Actual
## Prediction   -1    1  Sum
##        -1  0.41 0.00 0.41
##        1   0.00 0.59 0.59
##        Sum 0.41 0.59 1.00
## 
## Accuracy:
## 1 (71/71)
## 
## Error rate:
## 0 (0/71)
## 
## Error rate reduction (vs. base rate):
## 1 (p-value < 2.2e-16)

我的问题
您能否给我一个提示,说明我的实现中出了什么问题?谢谢

1 个答案:

答案 0 :(得分:1)

关于实现为何无法正常工作的因素很多。

  1. 您没有正确使用rpart。 Adaboost的实现并未提及使用权重进行上采样-但是rpart本身可以接受权重。下面的示例显示了如何rpart用于此目的。

  2. 加权误差的计算错误。您正在计算误差比例(错误计算的样本数除以样本数)。 Adaboost使用错误预测的权重之和(sum(D[y != yhat]))。

  3. 最终的预测似乎也不正确,我最后做了一个简单的循环。

下一次,我建议您将源代码与您要比较的其他实现进行比较。

https://github.com/cran/JOUSBoost/blob/master/R/adaboost.R使用与我下面的示例几乎相同的代码-可能原本可以帮助您进行指导。

另外使用T作为变量可能会干扰逻辑TRUE,因为它是简写T,所以我避免使用它。

### packages ###
library(rpart)
library(OneR)

### parameters ###
maxdepth <- 1
rounds <- 100
set.seed(123)

### data ###
myocarde <- read.table("http://freakonometrics.free.fr/myocarde.csv", head = TRUE, sep = ";")
y <- (myocarde[ , "PRONO"] == "SURVIE") * 2 - 1
x <- myocarde[ , 1:7]
m <- nrow(x)
dataset <- data.frame(x, y)

### initialisation ###
D <- rep(1/m, m)
H <- list()
a <- vector(mode = "numeric", length = rounds)

for (i in seq.int(rounds)) {
  # train weak learner
  H[[i]] = rpart(y ~ ., data = dataset, weights = D, maxdepth = maxdepth, method = "class")
  # predictions
  yhat <- predict(H[[i]], x, type = "class")
  yhat <- as.numeric(as.character(yhat))
  # weighted error
  e <- sum(D[yhat != y])
  # alpha coefficient
  a[i] <- 0.5 * log((1 - e) / e)
  # updating weights (D)
  D <- D * exp(-a[i] * y * yhat)
  D <- D / sum(D)
}

# predict with each weak learner on dataset
y_hat_final <- vector(mode = "numeric", length = m)
for (i in seq(rounds)) {
  pred = predict(H[[i]], dataset, type = "class")
  pred = as.numeric(as.character(pred))
  y_hat_final = y_hat_final + (a[i] * pred)
}
pred <- sign(y_hat_final)

eval_model(pred, y)

> eval_model(pred, y)

Confusion matrix (absolute):
          Actual
Prediction -1  1 Sum
       -1  29  0  29
       1    0 42  42
       Sum 29 42  71

Confusion matrix (relative):
          Actual
Prediction   -1    1  Sum
       -1  0.41 0.00 0.41
       1   0.00 0.59 0.59
       Sum 0.41 0.59 1.00

Accuracy:
1 (71/71)

Error rate:
0 (0/71)

Error rate reduction (vs. base rate):
1 (p-value < 2.2e-16)