我正在R上运行h2o网格搜索。该模型是使用gamma分布的glm。 我已经使用以下设置定义了网格。 hyper_parameters = list(alpha = c(0,.5),missing_values_handling = c(“ Skip”,“ MeanImputation”))
h2o.grid(algorithm = "glm", # Setting algorithm type
grid_id = "grid.s", # Id so retrieving information on iterations will be easier later
x = predictors, # Setting predictive features
y = response, # Setting target variable
training_frame = data, # Setting training set
validation_frame = validate, # Setting validation frame
hyper_params = hyper_parameters, # Setting apha values for iterations
remove_collinear_columns = T, # Parameter to remove collinear columns
lambda_search = T, # Setting parameter to find optimal lambda value
seed = 1234, # Setting to ensure replicateable results
keep_cross_validation_predictions = F, # Setting to save cross validation predictions
compute_p_values = F, # Calculating p-values of the coefficients
family = 'gamma', # Distribution type used
standardize = T, # Standardizing continuous variables
nfolds = 2, # Number of cross-validations
fold_assignment = "Modulo", # Specifying fold assignment type to use for cross validations
link = "log")
运行上面的脚本时,出现以下错误: hyper_names [[index2]]错误:下标超出范围
请帮助我找出错误所在
答案 0 :(得分:0)
正如注释中所讨论的,很难说出没有样本error
和data
的原因code
可能是什么。 out-of-bounds
error
可能是因为code
试图访问input
中不存在的值。因此,可能是inputs
到h2o.grid()
中的任何一个。我将检查train
和validation
数据集中的列和行。问题中的hyperparameters
与family="binomial"
一起很好地运行。
下面的code
与glm()
可以很好地运行。我做了几个假设,例如:(1)基于创建的family=binomial
使用了family=gamma
而不是sample data
,(2)响应y
是binary
, (3)train
和test
split ratio
,(4)responses
的数量限制为三个predictors
或independent variables
({{1} },x1
,x2
),(5)一个x3
y`)。
导入库
binary response variable (
创建示例数据
library(h2o)
library(h2oEnsemble)
初始化x1 <- abs(100*rnorm(100))
x2 <- 10+abs(100*rnorm(100))
x3 <- 100+abs(100*rnorm(100))
#y <- ronorm(100)
y <- floor(runif(100,0,1.5))
df <- data.frame(x1, x2, x3,y)
df$y <- ifelse(df$y==1, 'yes', 'no')
df$y <- as.factor(df$y)
head(df)
h2o
以必需的h2o.init()
格式准备data
h2o
设置df <- as.h2o(df)
y <- "y"
x <- setdiff( names(df), y )
df<- df[ df$y %in% c("no", "yes"), ]
h2o.setLevels(df$y, c("no","yes") )
# Split data into train and validate sets
data <- h2o.splitFrame( df, ratios = c(.6, 0.15) )
names(data) <- c('train', 'valid', 'test')
data$train
parameters
适合grid_id <- 'glm_grid'
hyper_parameters <- list( alpha = c(0, .5, 1),
lambda = c(1, 0.5, 0.1, 0.01),
missing_values_handling = c("Skip", "MeanImputation"),
tweedie_variance_power = c(0, 1, 1.1,1.8,1.9,2,2.1,2.5,2.6,3, 5, 7),
#tweedie_variance_power = c(0, 1, 1.1,1.8,1.9,2,2.1,2.5,2.6,3, 5, 7),
seed = 1234
)
h2o.grid()
输出