我一直在阅读R的深度学习,第6章介绍了生成器。产生(样本,输出)的生成器在fit_generator或evaluate_generator中使用时没有问题:
generator <- function(data, lookback, delay, min_index, max_index,
shuffle = FALSE, batch_size = 60, step = 1) {
if (is.null(max_index))
max_index <- nrow(data) - delay - 1
i <- min_index + lookback
function() {
if (shuffle) {
rows <- sample(c( (min_index+lookback) : max_index ), size = batch_size)
} else {
if (i + batch_size >= max_index)
i <<- min_index + lookback
rows <- c(i : min(i + batch_size - 1, max_index))
rows
length(rows)
i <<- i + length(rows)
}
samples <- array(0, dim = c(length(rows),
lookback / step,
dim(data)[[-1]]))
targets <- array(0, dim = c(length(rows)))
for (j in 1:length(rows)) {
indices <- seq(rows[[j]] - lookback, rows[[j]],
length.out = dim(samples)[[2]])
samples[j,,] <- data[indices,]
targets[[j]] <- data[rows[[j]] + delay, 9]
}
list(samples, targets)
}
}
test_gen <- generator(
data,
lookback = lookback,
delay = delay,
min_index = validation_index+1,
max_index = NULL,
step = step,
batch_size = batch_size
)
## no issues here
test_steps <- (nrow(data) - validation_index+1 - lookback) / batch_size
perf <- my_model %>% evaluate_generator(test_gen, steps = test_steps)
但是,当尝试将生成器更改为仅生成样本时:
generator_pred <- function(data, lookback, delay, min_index, max_index,
shuffle = FALSE, batch_size = 60, step = 1) {
if (is.null(max_index))
max_index <- nrow(data) - delay - 1
i <- min_index + lookback
function() {
if (shuffle) {
rows <- sample(c( (min_index+lookback) : max_index ), size = batch_size)
} else {
if (i + batch_size >= max_index)
i <<- min_index + lookback
rows <- c(i : min(i + batch_size - 1, max_index))
rows
length(rows)
i <<- i + length(rows)
}
samples <- array(0, dim = c(length(rows),
lookback / step,
dim(data)[[-1]]))
for (j in 1:length(rows)) {
indices <- seq(rows[[j]] - lookback, rows[[j]],
length.out = dim(samples)[[2]])
samples[j,,] <- data[indices,]
}
samples
}
}
test_gen_pred <- generator_pred(
data,
lookback = lookback,
delay = delay,
min_index = validation_index+1,
max_index = NULL,
step = step,
batch_size = batch_size
)
test_steps <- (nrow(data) - validation_index+1 - lookback) / batch_size
predict_generator(my_model, test_gen_pred, steps = test_steps)
我得到一个错误,关于等效的python生成器返回一个用于比较的数组:
Exception in thread Thread-1064:
Traceback (most recent call last):
File "C:\Users\PBORDE~1\AppData\Local\CONTIN~1\ANACON~1\envs\R-TENS~1\lib\threading.py", line 916, in _bootstrap_inner
self.run()
File "C:\Users\PBORDE~1\AppData\Local\CONTIN~1\ANACON~1\envs\R-TENS~1\lib\threading.py", line 864, in run
self._target(*self._args, **self._kwargs)
File "C:\Users\PBORDE~1\AppData\Local\CONTIN~1\ANACON~1\envs\R-TENS~1\lib\site-packages\keras\utils\data_utils.py", line 579, in data_generator_task
generator_output = next(self._generator)
File "C:/Users/pbordeaux/Documents/R/win-library/3.4/reticulate/python\rpytools\generator.py", line 23, in __next__
return self.next()
File "C:/Users/pbordeaux/Documents/R/win-library/3.4/reticulate/python\rpytools\generator.py", line 39, in next
if (res == self.completed):
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
我读到生成器必须返回与predict_on_batch作为输入相同的对象。我成功运行了以下内容:
test_gen_pred <- generator_pred(
data,
lookback = lookback,
delay = delay,
min_index = validation_index+1,
max_index = NULL,
step = step,
batch_size = batch_size
)
t <- test_gen_pred()
predict_on_batch(my_model, t)
生成器接口是否未正确实现?我检查了当调用test_gen_pred()时它返回一个具有正确形状的张量,它确实如此,因为我可以通过它的调用成功调用predict_on_batch。
答案 0 :(得分:3)
您还可以使用series_generator()
软件包中的kerasgenerator
,如果您想将其用于预测,则可以提供return_target
选项。
一些简单示例:
首先进行一些受监管的设置:
# example data
data <- data.frame(
x = runif(80),
y = runif(80),
z = runif(80)
)
# variables
x <- c("x", "y")
y <- 2:3
# supervise settings
lookback <- 10
timesteps <- 10
# number of train sample
train_length <- 40
# data settings
batch_size <- 10
# train row indices
train_end <- nrow(data)
train_start <- train_end - train_length + 1
# number of steps to see full data
train_steps <- train_length / batch_size
然后您可以这样定义生成器:
# import libs
library(kerasgenerator)
# train generator
train_gen <- series_generator(
data = data,
y = y,
x = x,
lookback = lookback,
timesteps = timesteps,
start_index = train_start,
end_index = train_end,
batch_size = batch_size,
return_target = TRUE
)
# predict generator
predict_gen <- series_generator(
data = data,
y = y,
x = x,
lookback = lookback,
timesteps = timesteps,
start_index = train_start,
end_index = train_end,
batch_size = batch_size,
return_target = FALSE
)
让我们尝试使用示例模型上的数据生成器:
# import libs
library(keras)
# initiate a sequential model
model <- keras_model_sequential()
# define the model
model %>%
# layer lstm
layer_lstm(
name = "lstm",
input_shape = list(timesteps, length(x)),
units = 16,
dropout = 0.1,
recurrent_dropout = 0.1,
return_sequences = FALSE
) %>%
# layer output
layer_dense(
name = "output",
units = length(y)
)
# compile the model
model %>% compile(
optimizer = "rmsprop",
loss = "mse"
)
# model summary
summary(model)
# set number of epochs
epochs <- 10
# model fitting
history <- model %>% fit_generator(
generator = train_gen,
steps_per_epoch = train_steps,
epochs = epochs
)
# history plot
plot(history)
# evaluate on train dataset
model %>% evaluate_generator(
generator = train_gen,
steps = train_steps
)
# predict on train dataset
model %>% predict_generator(
generator = predict_gen,
steps = train_steps
)
如果您对预测感兴趣,它还会提供forecast_generator()
。有关完整示例,请参见vignettes。
免责声明:我是包裹的作者。
答案 1 :(得分:2)
我几天来一直在寻找完全相同的答案,最后让我的pred_generator返回列表(而不是直接采样)!
在你的情况下:
generator_pred <- function(data, lookback, delay, min_index, max_index,
shuffle = FALSE, batch_size = 60, step = 1) {
<...>
list(samples)
}
}