Mxnet RNN时间序列预测

时间:2017-01-02 10:20:08

标签: r recurrent-neural-network mxnet

我有一段代码,它应该构建一个带有5个滞后变量的RNN模型,用于观察时间序列数据。这是代码:

library(Quandl)
key<-"*******************"
Quandl.api_key(key)

sh_stock_ex <- Quandl("YAHOO/SS_600292", type="xts")
library(xts)
data <- scale(sh_stock_ex[-1,5])
feat <- merge(na.trim(lag(data,1)), na.trim(lag(data,2)), na.trim(lag(data,3)), na.trim(lag(data,4)),
              na.trim(lag(data,5)), all=FALSE)

dataset <- merge(feat, data, all = FALSE)
colnames(dataset) <- c("lag.1", "lag.2","lag.3","lag.4","lag.5", "obj")

index <- 1:4000
training <- as.data.frame(dataset[index,])
testing <- as.data.frame(dataset[-index,])

library(mxnet)
train.x <- data.matrix(training[,-6])
train.y <- training[,6]
test.x <- data.matrix(testing[,-6])
test.y <- testing[,6]

get.label <- function(X) {
  label <- array(0, dim=dim(X))
  d <- dim(X)[1]
  w <- dim(X)[2]
  for (i in 0:(w-1)) {
    for (j in 1:d) {
      label[i*d+j] <- X[(i*d+j)%%(w*d)+1]
    }
  }
  return (label)
}
X.train.label <- get.label(t(train.x))
X.val.label <- get.label(t(test.x))

X.train <- list(data=t(train.x), label=X.train.label)
X.val <- list(data=t(test.x), label=X.val.label)


#X.train <- list(data=t(train.x), label=X.train.label)
#X.val <- list(data=t(test.x), label=X.val.label)

batch.size = 5
seq.len = 5
num.hidden = 3
num.embed = 3
num.rnn.layer = 1
num.lstm.layer = 1
num.round = 1
update.period = 1
learning.rate= 0.1
wd=0.00001
clip_gradient=1

mx.set.seed(0)
model <- mx.rnn(X.train, X.val, num.rnn.layer=num.rnn.layer, seq.len=seq.len, num.hidden=num.hidden,
                num.embed=num.embed, num.label=5, batch.size=batch.size, input.size=5, ctx = mx.cpu(),
                num.round = num.round, update.period = update.period, initializer = mx.init.uniform(0.01),
                dropout = 0, optimizer = "sgd", batch.norm = FALSE,
                learning.rate=learning.rate, wd=wd, clip_gradient=clip_gradient)

#preds = predict(model,t(test.x))

mx.rnn.inference(num.rnn.layer = num.rnn.layer,input.size = 5,num.hidden = num.hidden,
                 num.embed = num.embed,num.label = 5,batch.size = batch.size,ctx = mx.cpu(),
                 dropout = 0,batch.norm = FALSE,arg.params = model$arg.params)

在调用mx.rnn时会引发以下错误:

[15:36:29] src/operator/./reshape-inl.h:311: Using target_shape will be deprecated.
[15:36:29] src/operator/./reshape-inl.h:311: Using target_shape will be deprecated.
[15:36:29] src/operator/./reshape-inl.h:311: Using target_shape will be deprecated.
[15:36:29] src/operator/./reshape-inl.h:311: Using target_shape will be deprecated.
[15:36:29] C:/Users/qkou/mxnet/dmlc-core/include/dmlc/logging.h:235: [15:36:29] src/ndarray/ndarray.cc:231: Check failed: from.shape() == to->shape() operands shape mismatch
Error in exec$update.arg.arrays(arg.arrays, match.name, skip.null) : 
  [15:36:29] src/ndarray/ndarray.cc:231: Check failed: from.shape() == to->shape() operands shape mismatch

不是我每次都得到这个。在此代码实际运行之前进行了几次运行。 你能不能帮我弄清楚发生了什么?

1 个答案:

答案 0 :(得分:1)

最有可能的问题是您从Quandl收到的数据或您如何处理它。

如果NA在中间,则在na.trim()之后,NAs停留在数组中。也许它会在某些情况下导致形状匹配失败。一旦你再次看到失败,我建议你查看输入的状态。

否则,在添加一些额外的必需回调后,您的代码有效。这里是使用内联和使用合成数据添加的参数:

library(mxnet)
library(zoo)

dataset <- data.frame(lag.1 = rexp(100), lag.2 = rexp(100), lag.3 = rexp(100), lag.4 = rexp(100), lag.5 = rexp(100), obj = rexp(100))

index <- 1:80
training <- as.data.frame(dataset[index,])
testing <- as.data.frame(dataset[-index,])

train.x <- data.matrix(training[,-6])
train.y <- training[,6]
test.x <- data.matrix(testing[,-6])
test.y <- testing[,6]

get.label <- function(X) {
  label <- array(0, dim=dim(X))
  d <- dim(X)[1]
  w <- dim(X)[2]
  for (i in 0:(w-1)) {
    for (j in 1:d) {
      label[i*d+j] <- X[(i*d+j)%%(w*d)+1]
    }
  }
  return (label)
}

X.train.label <- get.label(t(train.x))
X.val.label <- get.label(t(test.x))

X.train <- list(data=t(train.x), label=X.train.label)
X.val <- list(data=t(test.x), label=X.val.label)

mx.set.seed(0)
model <- mx.rnn(X.train, 
                X.val, 
                num.rnn.layer=1,
                seq.len=5, 
                num.hidden=3,
                num.embed=3, 
                num.label=5, 
                batch.size=5, 
                input.size=5, 
                ctx = mx.cpu(),
                num.round = 10, 
                update.period = 1, 
                initializer = mx.init.uniform(0.01),
                dropout = 0, 
                optimizer = "sgd", 
                batch.norm = FALSE,
                learning.rate=0.1, 
                wd=0.00001, 
                batch.end.callback=mx.callback.log.train.metric(100),
                epoch.end.callback=mx.callback.log.train.metric(100),
                clip_gradient=1)


inference_model <- mx.rnn.inference(num.rnn.layer=1,
                 input.size=5,
                 num.hidden=3,
                 num.embed=3,
                 num.label=5,
                 batch.size=5,
                 ctx = mx.cpu(),
                 dropout = 0,
                 batch.norm = FALSE,
                 arg.params = model$arg.params)

forward_return <- mx.rnn.forward(inference_model, rexp(5), FALSE)

如果我运行它,我会得到:

Iter [1] Train: Time: 0.0565540790557861 sec, NLL=1.24340298398393, Perp=3.46739289327374
Iter [1] Val: NLL=0.972949155822989, Perp=2.6457356516903
Iter [2] Train: Time: 0.0448548793792725 sec, NLL=1.05087134012195, Perp=2.86014218926439
Iter [2] Val: NLL=0.933895505149184, Perp=2.5444016267033
Iter [3] Train: Time: 0.0413610935211182 sec, NLL=1.03151336330655, Perp=2.80530807416766
Iter [3] Val: NLL=0.928943917861239, Perp=2.53183394062849
Iter [4] Train: Time: 0.0391628742218018 sec, NLL=1.02448851825416, Perp=2.78567027662271
Iter [4] Val: NLL=0.929383579241022, Perp=2.53294733497252
Iter [5] Train: Time: 0.0570688247680664 sec, NLL=1.02105997322465, Perp=2.77613583461354
Iter [5] Val: NLL=0.931082768161547, Perp=2.53725494970957
Iter [6] Train: Time: 0.0490150451660156 sec, NLL=1.01910802455776, Perp=2.77072224521523
Iter [6] Val: NLL=0.932948367271994, Perp=2.54199286844103
Iter [7] Train: Time: 0.0413157939910889 sec, NLL=1.01788873616316, Perp=2.76734599446694
Iter [7] Val: NLL=0.934675413492902, Perp=2.54638680078803
Iter [8] Train: Time: 0.039240837097168 sec, NLL=1.01707631967569, Perp=2.76509866995983
Iter [8] Val: NLL=0.936200143876176, Perp=2.5502723155392
Iter [9] Train: Time: 0.054689884185791 sec, NLL=1.01650758366912, Perp=2.76352650590005
Iter [9] Val: NLL=0.937529913411492, Perp=2.55366584577767
Iter [10] Train: Time: 0.0455291271209717 sec, NLL=1.01609363851068, Perp=2.762382794216
Iter [10] Val: NLL=0.938689138057902, Perp=2.55662783463834

如果我执行预测,我会收到:

[1] "prob"
           [,1]       [,2]       [,3]       [,4]       [,5]
[1,] 0.64961344 0.64961344 0.64961344 0.64961344 0.64961469
[2,] 0.21320672 0.21320672 0.21320672 0.21320672 0.21320683
[3,] 0.08818325 0.08818325 0.08818325 0.08818325 0.08818299
[4,] 0.03633073 0.03633073 0.03633073 0.03633073 0.03633021
[5,] 0.01266575 0.01266575 0.01266575 0.01266575 0.01266531