网络中隐藏层的大小(以h2o为单位的网格)

时间:2018-08-31 17:28:49

标签: r deep-learning h2o

我用一组不同的hiperparameters运行h2o.grid。在下面,您可以看到与其关联的代码:

       library(caret)
        library(ROCR)
        library(h2o)
        h2o.init()

    #creditcard <- read.csv("creditcard.csv") #https://www.kaggle.com/mlg-ulb/creditcardfraud
        as_h2o <- function(df) {
          for (colname in colnames(df)) {
            if (class(df[[colname]]) == "character") {
              df[[colname]] <- as.factor(df[[colname]])
            }
          }
          df <- as.h2o(df)
          df
        }

        creditcard[,-31]<-scale(creditcard[,-31])

        index <- createDataPartition(creditcard$Class, p = 0.3, list = FALSE)
        train.set <- creditcard[-index, -1]
        test.set <- creditcard[index, -1]

    index.valid <- createDataPartition(creditcard$Class, p = 0.2, list = FALSE)
    train.set <- creditcard[-index.valid, -1]
    valid.set <- creditcard[index.valid, -1]

Y = "Class"
        X = colnames(test.set[,-30])     

     hyper_params <- list(
       activation = c("Rectifier", "Maxout", "Tanh", "RectifierWithDropout", "MaxoutWithDropout", "TanhWithDropout"), 
       hidden = list(c(17,16,15), c(19,15,11), c(16,14,12),c(20,15,10),c(25,17,10),c(15,10,5)),
       epochs = c(50, 100, 200),
       l1 = c(0, 0.001,0.00001, 0.0001), 
       l2 = c(0,0.001, 0.00001, 0.0001),
       rate = c(0, 0.1, 0.005, 0.001),
       rate_annealing = c(1e-8, 1e-7, 1e-6),
       rho = c(0.9, 0.95, 0.99, 0.999),
       epsilon = c(1e-10, 1e-8, 1e-6, 1e-4),
       momentum_start = c(0, 0.5),
       momentum_stable = c(0.99, 0.5, 0),
       input_dropout_ratio = c(0, 0.1, 0.2),
       max_w2 = c(10, 100, 1000, 3.4028235e+38)
     )

     search_criteria <- list(strategy = "RandomDiscrete", 
                             max_models = 100,
                             max_runtime_secs = 900,
                             stopping_tolerance = 0.001,
                             stopping_rounds = 15)

     dl_grid <- h2o.grid(algorithm = "deeplearning", 
                         x = X,
                         y = Y,
                         grid_id = "dl_grid",
                         training_frame = as_h2o(train.set),
                         validation_frame = as_h2o(valid.set),
                         nfolds = 25,                           
                         fold_assignment = "Stratified",
                         hyper_params = hyper_params,
                         search_criteria = search_criteria
     )

但是在结果中,我收到了一些在hyper_params中未提及的神经网络,它们的大小意外,例如:[10,10,10,10],[50,50,50]。

整个结果:

> dl_grid
H2O Grid Details
================

Grid ID: dl_grid 
Used hyper parameters: 
  -  activation 
  -  epochs 
  -  epsilon 
  -  hidden 
  -  input_dropout_ratio 
  -  l1 
  -  l2 
  -  max_w2 
  -  momentum_stable 
  -  momentum_start 
  -  rate 
  -  rate_annealing 
  -  rho 
Number of models: 13 
Number of failed models: 1 

Hyper-Parameter Search Summary: ordered by increasing logloss
             activation             epochs epsilon           hidden
1             Rectifier 24.666234282086002  1.0E-6     [19, 15, 11]
2             Rectifier  27.58637697029444  1.0E-6 [10, 10, 10, 10]
3             Rectifier  20.26209344328687  1.0E-6     [15, 16, 17]
4             Rectifier  18.57634281485049  1.0E-6     [17, 16, 15]
5             Rectifier 50.032621172309156  1.0E-6     [17, 16, 15]
6             Rectifier 50.032621172309156  1.0E-6     [17, 16, 15]
7                Maxout   8.38177768101728  1.0E-4     [20, 15, 10]
8     MaxoutWithDropout 1.6076279182111595  1.0E-8     [17, 16, 15]
9  RectifierWithDropout 0.5012088413637236 1.0E-10     [15, 15, 15]
10 RectifierWithDropout 0.5012088413637236 1.0E-10     [15, 15, 15]
11    MaxoutWithDropout 28.578195951798776  1.0E-4     [12, 13, 12]
12    MaxoutWithDropout 10.073383841883308  1.0E-4     [15, 16, 17]
13 RectifierWithDropout 0.5012088413637236 1.0E-10     [50, 50, 50]

谁能解释为什么会这样?

2 个答案:

答案 0 :(得分:0)

劳伦,谢谢你的帖子。这是信用卡欺诈检测建模的示例

 hyper_params <- list(
   activation = c("Rectifier", "Maxout", "Tanh", "RectifierWithDropout", "MaxoutWithDropout", "TanhWithDropout"), 
   hidden = list(c(17,16,15), c(19,15,11), c(16,14,12),c(20,15,10),c(25,17,10),c(15,10,5)),
   epochs = c(50, 100, 200),
   l1 = c(0, 0.001,0.00001, 0.0001), 
   l2 = c(0,0.001, 0.00001, 0.0001),
   rate = c(0, 0.1, 0.005, 0.001),
   rate_annealing = c(1e-8, 1e-7, 1e-6),
   rho = c(0.9, 0.95, 0.99, 0.999),
   epsilon = c(1e-10, 1e-8, 1e-6, 1e-4),
   momentum_start = c(0, 0.5),
   momentum_stable = c(0.99, 0.5, 0),
   input_dropout_ratio = c(0, 0.1, 0.2),
   max_w2 = c(10, 100, 1000, 3.4028235e+38)
 )

 search_criteria <- list(strategy = "RandomDiscrete", 
                         max_models = 100,
                         max_runtime_secs = 900,
                         stopping_tolerance = 0.001,
                         stopping_rounds = 15)

 dl_grid <- h2o.grid(algorithm = "deeplearning", 
                     x = X,
                     y = Y,
                     grid_id = "dl_grid",
                     training_frame = as_h2o(train.set),
                     validation_frame = as_h2o(valid.set),
                     nfolds = 25,                           
                     fold_assignment = "Stratified",
                     hyper_params = hyper_params,
                     search_criteria = search_criteria
 )

答案 1 :(得分:0)

运行H2O网格搜索时,您应该只看到使用与您为网格搜索提供的隐藏层数量相同的模型。这是一个代码示例,您可以运行并试用它,看看是否可以重现问题。

#############################################################
library(h2o)
h2o.init()

train = h2o.importFile("https://h2o-public-test-data.s3.amazonaws.com/bigdata/laptop/mnist/train.csv.gz")

# Specify the response and predictor columns
y <- "C785"
x <- setdiff(names(train), y)

# Encode the response column as categorical for multinomial classification
train[,y] <- as.factor(train[,y])
test[,y] <- as.factor(test[,y])

# do a random grid search
hidden_opt <- list(c(32,32), c(32,16,8), c(100,100))
l1_opt <- c(1e-4,1e-3)
hyper_params <- list(hidden = hidden_opt, l1 = l1_opt)
search_criteria = list(strategy = "RandomDiscrete", 
                       max_models = 10, 
                       seed=123456)



# grid search
model_grid <- h2o.grid("deeplearning", 
                       grid_id = "mygrid_1",
                       hyper_params = hyper_params, 
                       search_criteria = search_criteria,
                       x = x,
                       y = y,
                       distribution = "multinomial", 
                       training_frame = train, 
                       nfolds = 3,
                       score_interval = 2,
                       stopping_rounds = 3,
                       stopping_tolerance = 0.05,
                       stopping_metric = "misclassification")
model_grid

# Output
# H2O Grid Details
# ================
#   
#   Grid ID: mygrid_1 
# Used hyper parameters: 
#   -  hidden 
# -  l1 
# Number of models: 6 
# Number of failed models: 0 
# 
# Hyper-Parameter Search Summary: ordered by increasing logloss
# hidden     l1        model_ids             logloss
# 1  [100, 100] 1.0E-4 mygrid_1_model_0 0.11350390885225858
# 2  [100, 100]  0.001 mygrid_1_model_4 0.13184550642109982
# 3    [32, 32]  0.001 mygrid_1_model_3 0.13869444872607956
# 4 [32, 16, 8]  0.001 mygrid_1_model_5 0.16575514373784073
# 5    [32, 32] 1.0E-4 mygrid_1_model_2 0.17190959951587054
# 6 [32, 16, 8] 1.0E-4 mygrid_1_model_1 0.20832913000853842

注意,在输出中,您只会看到隐藏层等于设置的原始值的模型:c(32,32),c(32,16,8),c(100,100)