在Keras R界面中将predict_generator与自定义生成器一起使用时出错

时间:2018-01-31 22:04:44

标签: r keras

我一直在阅读R的深度学习,第6章介绍了生成器。产生(样本,输出)的生成器在fit_generator或evaluate_generator中使用时没有问题:

generator <- function(data, lookback, delay, min_index, max_index,
                      shuffle = FALSE, batch_size = 60, step = 1) {
  if (is.null(max_index))
    max_index <- nrow(data) - delay - 1
  i <- min_index + lookback
  function() {
    if (shuffle) {
      rows <- sample(c( (min_index+lookback) : max_index ), size = batch_size)
    } else {
      if (i + batch_size >= max_index)
        i <<- min_index + lookback
      rows <- c(i : min(i + batch_size - 1, max_index))
      rows
      length(rows)
      i <<- i + length(rows)
    }

    samples <- array(0, dim = c(length(rows), 
                                lookback / step,
                                dim(data)[[-1]]))
    targets <- array(0, dim = c(length(rows)))

    for (j in 1:length(rows)) {
      indices <- seq(rows[[j]] - lookback, rows[[j]], 
                     length.out = dim(samples)[[2]])
      samples[j,,] <- data[indices,]
      targets[[j]] <- data[rows[[j]] + delay, 9]
    }            

    list(samples, targets)
  }
}

test_gen <- generator(
  data,
  lookback = lookback,
  delay = delay,
  min_index = validation_index+1,
  max_index = NULL,
  step = step,
  batch_size = batch_size
)
## no issues here
test_steps <- (nrow(data) - validation_index+1 - lookback) / batch_size
perf <- my_model %>% evaluate_generator(test_gen, steps = test_steps)

但是,当尝试将生成器更改为仅生成样本时:

generator_pred <- function(data, lookback, delay, min_index, max_index,
                      shuffle = FALSE, batch_size = 60, step = 1) {
  if (is.null(max_index))
    max_index <- nrow(data) - delay - 1
  i <- min_index + lookback
  function() {
    if (shuffle) {
      rows <- sample(c( (min_index+lookback) : max_index ), size = batch_size)
    } else {
      if (i + batch_size >= max_index)
        i <<- min_index + lookback
      rows <- c(i : min(i + batch_size - 1, max_index))
      rows
      length(rows)
      i <<- i + length(rows)
    }

    samples <- array(0, dim = c(length(rows), 
                                lookback / step,
                                dim(data)[[-1]]))

    for (j in 1:length(rows)) {
      indices <- seq(rows[[j]] - lookback, rows[[j]], 
                     length.out = dim(samples)[[2]])
      samples[j,,] <- data[indices,]
    }            
    samples
  }
}

test_gen_pred <- generator_pred(
      data,
      lookback = lookback,
      delay = delay,
      min_index = validation_index+1,
      max_index = NULL,
      step = step,
      batch_size = batch_size
    )

test_steps <- (nrow(data) - validation_index+1 - lookback) / batch_size
predict_generator(my_model, test_gen_pred, steps = test_steps)

我得到一个错误,关于等效的python生成器返回一个用于比较的数组:

Exception in thread Thread-1064:
Traceback (most recent call last):
  File "C:\Users\PBORDE~1\AppData\Local\CONTIN~1\ANACON~1\envs\R-TENS~1\lib\threading.py", line 916, in _bootstrap_inner
    self.run()
  File "C:\Users\PBORDE~1\AppData\Local\CONTIN~1\ANACON~1\envs\R-TENS~1\lib\threading.py", line 864, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\PBORDE~1\AppData\Local\CONTIN~1\ANACON~1\envs\R-TENS~1\lib\site-packages\keras\utils\data_utils.py", line 579, in data_generator_task
    generator_output = next(self._generator)
  File "C:/Users/pbordeaux/Documents/R/win-library/3.4/reticulate/python\rpytools\generator.py", line 23, in __next__
    return self.next()
  File "C:/Users/pbordeaux/Documents/R/win-library/3.4/reticulate/python\rpytools\generator.py", line 39, in next
    if (res == self.completed):
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all() 

我读到生成器必须返回与predict_on_batch作为输入相同的对象。我成功运行了以下内容:

test_gen_pred <- generator_pred(
  data,
  lookback = lookback,
  delay = delay,
  min_index = validation_index+1,
  max_index = NULL,
  step = step,
  batch_size = batch_size
)

t <- test_gen_pred()
predict_on_batch(my_model, t)

生成器接口是否未正确实现?我检查了当调用test_gen_pred()时它返回一个具有正确形状的张量,它确实如此,因为我可以通过它的调用成功调用predict_on_batch。

2 个答案:

答案 0 :(得分:3)

您还可以使用series_generator()软件包中的kerasgenerator,如果您想将其用于预测,则可以提供return_target选项。

一些简单示例

首先进行一些受监管的设置:

# example data
data <- data.frame(
  x = runif(80),
  y = runif(80),
  z = runif(80)
)

# variables
x <- c("x", "y")
y <- 2:3

# supervise settings
lookback <- 10
timesteps <- 10

# number of train sample
train_length <- 40

# data settings
batch_size <- 10

# train row indices
train_end <- nrow(data)
train_start <- train_end - train_length + 1

# number of steps to see full data
train_steps <- train_length / batch_size

然后您可以这样定义生成器:

# import libs
library(kerasgenerator)

# train generator
train_gen <- series_generator(
  data = data,
  y = y,
  x = x,
  lookback = lookback,
  timesteps = timesteps,
  start_index = train_start,
  end_index = train_end,
  batch_size = batch_size,
  return_target = TRUE
)

# predict generator
predict_gen <- series_generator(
  data = data,
  y = y,
  x = x,
  lookback = lookback,
  timesteps = timesteps,
  start_index = train_start,
  end_index = train_end,
  batch_size = batch_size,
  return_target = FALSE
)

让我们尝试使用示例模型上的数据生成器:

# import libs
library(keras)

# initiate a sequential model
model <- keras_model_sequential()

# define the model
model %>%

  # layer lstm
  layer_lstm(
    name = "lstm",
    input_shape = list(timesteps, length(x)),
    units = 16,
    dropout = 0.1,
    recurrent_dropout = 0.1,
    return_sequences = FALSE
  ) %>%

  # layer output
  layer_dense(
    name = "output",
    units = length(y)
  )

# compile the model
model %>% compile(
  optimizer = "rmsprop",
  loss = "mse"
)

# model summary
summary(model)

# set number of epochs
epochs <- 10

# model fitting
history <- model %>% fit_generator(
  generator = train_gen,
  steps_per_epoch = train_steps,
  epochs = epochs
)

# history plot
plot(history)

# evaluate on train dataset
model %>% evaluate_generator(
  generator = train_gen,
  steps = train_steps
)

# predict on train dataset
model %>% predict_generator(
  generator = predict_gen,
  steps = train_steps
)

如果您对预测感兴趣,它还会提供forecast_generator()。有关完整示例,请参见vignettes

免责声明:我是包裹的作者。

答案 1 :(得分:2)

我几天来一直在寻找完全相同的答案,最后让我的pred_generator返回列表(而不是直接采样)!

在你的情况下:

generator_pred <- function(data, lookback, delay, min_index, max_index,
                  shuffle = FALSE, batch_size = 60, step = 1) {
    <...>            
    list(samples)
  }
}