这摘自Francois Chollet所著的《 R的深度学习》,第6.3章。
我可以使用其他模型(例如covnets)在2 gpu上成功训练NN。当我尝试使用顺序模型进行处理时,GPU的运行速度比CPU慢。
我监视GPU,实际上它们正在运行并被利用。我还尝试将批处理大小增加几倍,但是没有运气。
我想知道可能的瓶颈是什么。可复制的代码在下面作为R笔记本。我只更改了使用multi_gpu_model()的最后一块。
```{r, eval=FALSE, echo=TRUE}
dir.create("~/Downloads/jena_climate", recursive = TRUE)
download.file(
"https://s3.amazonaws.com/keras-datasets/jena_climate_2009_2016.csv.zip",
"~/Downloads/jena_climate/jena_climate_2009_2016.csv.zip"
)
unzip(
"~/Downloads/jena_climate/jena_climate_2009_2016.csv.zip",
exdir = "~/Downloads/jena_climate"
)
```
```{r, results='hide'}
library(tibble)
library(readr)
data_dir <- "~/Downloads/jena_climate"
fname <- file.path(data_dir, "jena_climate_2009_2016.csv")
data <- read_csv(fname)
```
```{r}
data <- data.matrix(data[,-1])
```
```{r}
train_data <- data[1:200000,]
mean <- apply(train_data, 2, mean)
std <- apply(train_data, 2, sd)
data <- scale(data, center = mean, scale = std)
```
```{r}
generator <- function(data, lookback, delay, min_index, max_index,
shuffle = FALSE, batch_size = 128, step = 6) {
if (is.null(max_index))
max_index <- nrow(data) - delay - 1
i <- min_index + lookback
function() {
if (shuffle) {
rows <- sample(c((min_index+lookback):max_index), size = batch_size)
} else {
if (i + batch_size >= max_index)
i <<- min_index + lookback
rows <- c(i:min(i+batch_size, max_index))
i <<- i + length(rows)
}
samples <- array(0, dim = c(length(rows),
lookback / step,
dim(data)[[-1]]))
targets <- array(0, dim = c(length(rows)))
for (j in 1:length(rows)) {
indices <- seq(rows[[j]] - lookback, rows[[j]],
length.out = dim(samples)[[2]])
samples[j,,] <- data[indices,]
targets[[j]] <- data[rows[[j]] + delay,2]
}
list(samples, targets)
}
}
```
```{r}
lookback <- 1440
step <- 6
delay <- 144
batch_size <- 128
train_gen <- generator(
data,
lookback = lookback,
delay = delay,
min_index = 1,
max_index = 200000,
shuffle = TRUE,
step = step,
batch_size = batch_size
)
val_gen = generator(
data,
lookback = lookback,
delay = delay,
min_index = 200001,
max_index = 300000,
step = step,
batch_size = batch_size
)
test_gen <- generator(
data,
lookback = lookback,
delay = delay,
min_index = 300001,
max_index = NULL,
step = step,
batch_size = batch_size
)
# This is how many steps to draw from `val_gen`
# in order to see the whole validation set:
val_steps <- (300000 - 200001 - lookback) / batch_size
# This is how many steps to draw from `test_gen`
# in order to see the whole test set:
test_steps <- (nrow(data) - 300001 - lookback) / batch_size
```
```{}
model <- keras_model_sequential() %>%
layer_gru(units = 32, input_shape = list(NULL, dim(data)[[-1]])) %>%
layer_dense(units = 1)
parallel_model <- multi_gpu_model(model, gpus = 2)
parallel_model %>% compile(
optimizer = optimizer_rmsprop(),
loss = "mae"
)
history <- parallel_model %>% fit_generator(
train_gen,
steps_per_epoch = 500,
epochs = 20,
validation_data = val_gen,
validation_steps = val_steps
)
```