查看我的代码:
const cells = [
tf.layers.simpleRNNCell({units: 4, activation: 'relu'}),
tf.layers.simpleRNNCell({units: 3, activation: 'relu'}),
];
const startX = tf.tensor([
[
[10, 8, 5],
],
[
[0, 2, 0],
],
]);
const y = tf.tensor([
[
[0.3, 0.5, 0.4],
],
[
[0.1, 0, 0]
]
])
const initRnn = ({ inputShape, ...rnnProps }) => {
const rnn = tf.layers.rnn({
...rnnProps,
returnState: true
});
const train = (x, state) => {
if (!state) {
state = rnnProps.cell
.map(({ units }) => tf.ones([x.shape[0], units]))
.reverse();
}
const [yPred, ...newState] = rnn.apply(x, { initialState: state });
return [yPred, newState];
};
return { train };
}
const optimizer = tf.train.rmsprop(0.1);
const getLoss = (y, yPred) =>
tf.losses.softmaxCrossEntropy(y, yPred).mean();
const { train } = initRnn({
cell: cells,
returnSequences: true,
inputShape: startX.shape
});
train(tf.zeros(startX.shape)); //to initialize weights
for (let i = 0; i < 10; i++) {
let state = undefined;
let x = startX.clone();
optimizer.minimize(() => {
for (let ts = 0; ts < 10; ts++) {
const [yPred, newState] = train(x, state);
state = newState;
x = yPred.clone();
if (ts === 9) {
const loss = getLoss(y, yPred);
loss.print()
return loss; //the error is thrown here
}
};
})
};
<script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs@latest"></script>
它引发错误Uncaught Error: Argument tensors passed to stack must be a Tensor[] or TensorLike[]
但是看看如果我一直使用startX
作为输入会发生什么情况。
const cells = [
tf.layers.simpleRNNCell({units: 4, activation: 'relu'}),
tf.layers.simpleRNNCell({units: 3, activation: 'relu'}),
];
const startX = tf.tensor([
[
[10, 8, 5],
],
[
[0, 2, 0],
],
]);
const y = tf.tensor([
[
[0.3, 0.5, 0.4],
],
[
[0.1, 0, 0]
]
])
const initRnn = ({ inputShape, ...rnnProps }) => {
const rnn = tf.layers.rnn({
...rnnProps,
returnState: true
});
const train = (x, state) => {
if (!state) {
state = rnnProps.cell
.map(({ units }) => tf.ones([x.shape[0], units]))
.reverse();
}
const [yPred, ...newState] = rnn.apply(x, { initialState: state });
return [yPred, newState];
};
return { train };
}
const optimizer = tf.train.rmsprop(0.1);
const getLoss = (y, yPred) =>
tf.losses.softmaxCrossEntropy(y, yPred).mean();
const { train } = initRnn({
cell: cells,
returnSequences: true,
inputShape: startX.shape
});
train(tf.zeros(startX.shape)); //to initialize weights
for (let i = 0; i < 10; i++) {
let state = undefined;
let x = startX.clone();
optimizer.minimize(() => {
for (let ts = 0; ts < 10; ts++) {
const [yPred, newState] = train(startX, state);
state = newState;
x = yPred.clone();
if (ts === 9) {
const loss = getLoss(y, yPred);
loss.print()
return loss;
}
};
})
};
<script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs@latest"></script>
代码贯穿始终。这两个代码的唯一区别是失败的地方,我将以前的时间步结果用作输入,传递的地方,startX
一直被用作输入。
令人惊讶的是,它在优化器回调内部返回时引发错误。因此,重用先前的输出10次(在返回损失的位置)没有问题,并且权重应该由优化器调整。但是,如果使用先前的输出作为输入在10个时间步内没有引起任何问题,为什么在计算梯度时会这样做呢?