嵌套for循环xgb模型,将输出保存到data.frame中

时间:2018-03-03 12:38:13

标签: r

尝试将XGB模型的输出存储到新列中。

我目前有以下内容:

score <- data.table()
result <- data.table()
output <- data.table()
output$auc <- 0

for(i in 1:length(m)){
  country.ID <- m[i]    
  for(j in 1:length(k)){
    sector.ID <- k[j]   
    S1 <- which(xgbdata$Region.in.country.id == country.ID)
    S2 <- which(xgbdata$Major.sectors.id == sector.ID)
    rows.2.consider <- intersect(S1, S2)



    for(h in 1:n_folds) {
      if(submission) {
        x_train <- train %>%
          select(-BvD.ID.number, -Major.sectors, -Region.in.country, -Major.sectors.id, -Region.in.country.id, -status)
        x_test <- test %>%
          select(-BvD.ID.number, -Major.sectors, -Region.in.country, -Major.sectors.id, -Region.in.country.id, -status)
        y_train <- train$status
        y_test <- test$status
      } else {
        train.ids <- ids[-seq(h, length(ids), by = n_folds)]
        test.ids <- ids[seq(h, length(ids), by = n_folds)]

        x_train <- train %>%
          select(-BvD.ID.number, -Major.sectors, -Region.in.country, -Major.sectors.id, -Region.in.country.id, -status)
        x_train <- x_train[train.ids,]

        x_test <- data_train %>%
          select(-BvD.ID.number, -Major.sectors, -Region.in.country, -Major.sectors.id, -Region.in.country.id, -status)
        x_test <- x_test[test.ids,]
      }
      x_train <- apply(x_train, 2, as.numeric)
      x_test <- apply(x_test, 2, as.numeric)

      if(submission) {
        nrounds <- 12
        early_stopping_round <- NULL
        dtrain <- xgb.DMatrix(data = as.matrix(x_train), label = y_train)
        dtest <- xgb.DMatrix(data = as.matrix(x_test))
        watchlist <- list(train = dtrain)
      } else {
        nrounds <- 3000
        early_stopping_round <- 100
        dtrain <- xgb.DMatrix(data = as.matrix(x_train), label = y_train)
        dtest <- xgb.DMatrix(data = as.matrix(x_test), label = y_test)
        watchlist <- list(train = dtrain, test = dtest)
      }

      params <- list("eta" = 0.01,
                     "max_deptch" = 8,
                     "colsample_bytree" = 0.3528,
                     "min_child_weight" = 1,
                     "subsample" = 1,
                     "objective" = "reg:logistic",
                     "eval_metric" = "auc")

      model_xgb <- xgb.train(params = params,
                             data = dtrain,
                             maximize = TRUE,
                             nrounds = nrounds,
                             watchlist = watchlist,
                             early_stopping_rounds = early_stopping_round,
                             print_every_n = 1)

      pred <- predict(model_xgb, dtest)

      if(submission) {
        result <- cbind(test %>%
                          select(BvD.ID.number), status = round(pred, 0))
      } else {
        score <- rbind(score,
                       data.frame(accuracy = Accuracy(round(pred, 0), y_test),
                                  best_iteration = model_xgb$best_iteration))
        temp <- cbind(train[test.ids,], pred = pred)
        result <- rbind(result, temp)
      }
    }


output$auc[h] <- model_xgb$evaluation_log



    #rm(sector.ID, S1, S2, rows.2.consider, j)
  }
  #rm(country.ID, i)
}

在循环的底部,我想将xgboost模型中的所有evaluation_log保存到名为data.frame的{​​{1}}中,但对所有output执行此操作所有regions。以下不起作用...一旦我删除下面的代码,模型运行没有错误。

industries

编辑:`output$auc[h] <- model_xgb$evaluation_log` 的清洁版

str

0 个答案:

没有答案