我用一组不同的hiperparameters运行h2o.grid。在下面,您可以看到与其关联的代码:
library(caret)
library(ROCR)
library(h2o)
h2o.init()
#creditcard <- read.csv("creditcard.csv") #https://www.kaggle.com/mlg-ulb/creditcardfraud
as_h2o <- function(df) {
for (colname in colnames(df)) {
if (class(df[[colname]]) == "character") {
df[[colname]] <- as.factor(df[[colname]])
}
}
df <- as.h2o(df)
df
}
creditcard[,-31]<-scale(creditcard[,-31])
index <- createDataPartition(creditcard$Class, p = 0.3, list = FALSE)
train.set <- creditcard[-index, -1]
test.set <- creditcard[index, -1]
index.valid <- createDataPartition(creditcard$Class, p = 0.2, list = FALSE)
train.set <- creditcard[-index.valid, -1]
valid.set <- creditcard[index.valid, -1]
Y = "Class"
X = colnames(test.set[,-30])
hyper_params <- list(
activation = c("Rectifier", "Maxout", "Tanh", "RectifierWithDropout", "MaxoutWithDropout", "TanhWithDropout"),
hidden = list(c(17,16,15), c(19,15,11), c(16,14,12),c(20,15,10),c(25,17,10),c(15,10,5)),
epochs = c(50, 100, 200),
l1 = c(0, 0.001,0.00001, 0.0001),
l2 = c(0,0.001, 0.00001, 0.0001),
rate = c(0, 0.1, 0.005, 0.001),
rate_annealing = c(1e-8, 1e-7, 1e-6),
rho = c(0.9, 0.95, 0.99, 0.999),
epsilon = c(1e-10, 1e-8, 1e-6, 1e-4),
momentum_start = c(0, 0.5),
momentum_stable = c(0.99, 0.5, 0),
input_dropout_ratio = c(0, 0.1, 0.2),
max_w2 = c(10, 100, 1000, 3.4028235e+38)
)
search_criteria <- list(strategy = "RandomDiscrete",
max_models = 100,
max_runtime_secs = 900,
stopping_tolerance = 0.001,
stopping_rounds = 15)
dl_grid <- h2o.grid(algorithm = "deeplearning",
x = X,
y = Y,
grid_id = "dl_grid",
training_frame = as_h2o(train.set),
validation_frame = as_h2o(valid.set),
nfolds = 25,
fold_assignment = "Stratified",
hyper_params = hyper_params,
search_criteria = search_criteria
)
但是在结果中,我收到了一些在hyper_params中未提及的神经网络,它们的大小意外,例如:[10,10,10,10],[50,50,50]。
整个结果:
> dl_grid
H2O Grid Details
================
Grid ID: dl_grid
Used hyper parameters:
- activation
- epochs
- epsilon
- hidden
- input_dropout_ratio
- l1
- l2
- max_w2
- momentum_stable
- momentum_start
- rate
- rate_annealing
- rho
Number of models: 13
Number of failed models: 1
Hyper-Parameter Search Summary: ordered by increasing logloss
activation epochs epsilon hidden
1 Rectifier 24.666234282086002 1.0E-6 [19, 15, 11]
2 Rectifier 27.58637697029444 1.0E-6 [10, 10, 10, 10]
3 Rectifier 20.26209344328687 1.0E-6 [15, 16, 17]
4 Rectifier 18.57634281485049 1.0E-6 [17, 16, 15]
5 Rectifier 50.032621172309156 1.0E-6 [17, 16, 15]
6 Rectifier 50.032621172309156 1.0E-6 [17, 16, 15]
7 Maxout 8.38177768101728 1.0E-4 [20, 15, 10]
8 MaxoutWithDropout 1.6076279182111595 1.0E-8 [17, 16, 15]
9 RectifierWithDropout 0.5012088413637236 1.0E-10 [15, 15, 15]
10 RectifierWithDropout 0.5012088413637236 1.0E-10 [15, 15, 15]
11 MaxoutWithDropout 28.578195951798776 1.0E-4 [12, 13, 12]
12 MaxoutWithDropout 10.073383841883308 1.0E-4 [15, 16, 17]
13 RectifierWithDropout 0.5012088413637236 1.0E-10 [50, 50, 50]
谁能解释为什么会这样?
答案 0 :(得分:0)
劳伦,谢谢你的帖子。这是信用卡欺诈检测建模的示例
hyper_params <- list(
activation = c("Rectifier", "Maxout", "Tanh", "RectifierWithDropout", "MaxoutWithDropout", "TanhWithDropout"),
hidden = list(c(17,16,15), c(19,15,11), c(16,14,12),c(20,15,10),c(25,17,10),c(15,10,5)),
epochs = c(50, 100, 200),
l1 = c(0, 0.001,0.00001, 0.0001),
l2 = c(0,0.001, 0.00001, 0.0001),
rate = c(0, 0.1, 0.005, 0.001),
rate_annealing = c(1e-8, 1e-7, 1e-6),
rho = c(0.9, 0.95, 0.99, 0.999),
epsilon = c(1e-10, 1e-8, 1e-6, 1e-4),
momentum_start = c(0, 0.5),
momentum_stable = c(0.99, 0.5, 0),
input_dropout_ratio = c(0, 0.1, 0.2),
max_w2 = c(10, 100, 1000, 3.4028235e+38)
)
search_criteria <- list(strategy = "RandomDiscrete",
max_models = 100,
max_runtime_secs = 900,
stopping_tolerance = 0.001,
stopping_rounds = 15)
dl_grid <- h2o.grid(algorithm = "deeplearning",
x = X,
y = Y,
grid_id = "dl_grid",
training_frame = as_h2o(train.set),
validation_frame = as_h2o(valid.set),
nfolds = 25,
fold_assignment = "Stratified",
hyper_params = hyper_params,
search_criteria = search_criteria
)
答案 1 :(得分:0)
运行H2O网格搜索时,您应该只看到使用与您为网格搜索提供的隐藏层数量相同的模型。这是一个代码示例,您可以运行并试用它,看看是否可以重现问题。
#############################################################
library(h2o)
h2o.init()
train = h2o.importFile("https://h2o-public-test-data.s3.amazonaws.com/bigdata/laptop/mnist/train.csv.gz")
# Specify the response and predictor columns
y <- "C785"
x <- setdiff(names(train), y)
# Encode the response column as categorical for multinomial classification
train[,y] <- as.factor(train[,y])
test[,y] <- as.factor(test[,y])
# do a random grid search
hidden_opt <- list(c(32,32), c(32,16,8), c(100,100))
l1_opt <- c(1e-4,1e-3)
hyper_params <- list(hidden = hidden_opt, l1 = l1_opt)
search_criteria = list(strategy = "RandomDiscrete",
max_models = 10,
seed=123456)
# grid search
model_grid <- h2o.grid("deeplearning",
grid_id = "mygrid_1",
hyper_params = hyper_params,
search_criteria = search_criteria,
x = x,
y = y,
distribution = "multinomial",
training_frame = train,
nfolds = 3,
score_interval = 2,
stopping_rounds = 3,
stopping_tolerance = 0.05,
stopping_metric = "misclassification")
model_grid
# Output
# H2O Grid Details
# ================
#
# Grid ID: mygrid_1
# Used hyper parameters:
# - hidden
# - l1
# Number of models: 6
# Number of failed models: 0
#
# Hyper-Parameter Search Summary: ordered by increasing logloss
# hidden l1 model_ids logloss
# 1 [100, 100] 1.0E-4 mygrid_1_model_0 0.11350390885225858
# 2 [100, 100] 0.001 mygrid_1_model_4 0.13184550642109982
# 3 [32, 32] 0.001 mygrid_1_model_3 0.13869444872607956
# 4 [32, 16, 8] 0.001 mygrid_1_model_5 0.16575514373784073
# 5 [32, 32] 1.0E-4 mygrid_1_model_2 0.17190959951587054
# 6 [32, 16, 8] 1.0E-4 mygrid_1_model_1 0.20832913000853842
注意,在输出中,您只会看到隐藏层等于设置的原始值的模型:c(32,32),c(32,16,8),c(100,100)