我正在尝试使用tensorflow制作DQN代理。 我从这个资料库中汲取了灵感: https://github.com/seann999/dodge_tfjs/blob/master/agent.js
我写了一个类Agent,它由2个神经网络组成,并具有以下功能:
const DEFAULT_OPTS: any = {
activation: 'sigmoid',
};
export function createModel(inputSize: number, outputSize: number, opts: any = {}): tf.Model {
const model = tf.sequential();
opts = Object.assign({}, DEFAULT_OPTS, opts);
/* INPUT */
model.add(
tf.layers.dense({
inputDim: inputSize,
units: 64,
activation: opts.activation,
})
);
/* HIDDEN */
model.add(
tf.layers.dense({
inputDim: 64,
units: 128,
activation: opts.activation,
})
);
model.add(
tf.layers.dense({
inputDim: 128,
units: 64,
activation: opts.activation,
})
);
/* OUTPUT */
model.add(
tf.layers.dense({
inputDim: 64,
units: outputSize,
activation: 'relu',
})
);
return model;
}
然后我写了一个学习方法,可以计算损耗并尽量减少误差(仍然受seann999的启发)。
constructor(config: AgentConfig) {
this.config = Object.assign({}, DEFAULT_CONFIG, config);
this.memory = new Memory(config.memorySize);
this.Q = createModel(config.inputSize, config.outputSize);
this.QTarget = createModel(config.inputSize, config.outputSize);
this.optimizer = tf.train.adam(this.config.learningRate);
// Get weights refs
this.weights = [];
for (const w of this.Q.weights) {
this.weights.push((w as any).val);
}
this.updateTarget();
}
...
public async learn() {
if (this.stats.learnCount % this.config.refreshTargetEvery === 0) {
this.updateTarget();
}
const batchSize = 32;
if (this.memory.getLength() > batchSize) {
const batch = this.memory.getBatch(batchSize);
// Batch tensors
const batchState = tf.tensor2d(batch.map((el: any) => el.state)).asType('float32');
const batchAction = tf
.oneHot(tf.tensor1d(batch.map((el: any) => actions.indexOf(el.action)), 'int32'), actions.length)
.asType('float32');
const batchReward = tf.tensor1d(batch.map((el: any) => el.reward)).asType('float32');
const batchNextState = tf.tensor2d(batch.map((el: any) => el.nextState)).asType('float32');
const batchDone = tf.tensor1d(batch.map((el: any) => el.done)).asType('float32');
// prodict nextState with targetNet
const targets = this.calcTarget(batchReward, batchNextState, batchDone).asType('float32');
const loss = this.optimizer.minimize(
() => {
const x = tf.variable(batchState);
const predictions = (this.Q.predict(x) as tf.Tensor).argMax(1).asType('float32');
return tf.losses.meanSquaredError(targets, predictions) as any;
},
true,
this.weights
);
console.log('loss');
console.log(loss);
}
// qMaxNextState = reward + game + this.QTarget.predict()
this.stats.learnCount++;
return;
}
private calcTarget(batchReward: any, batchNextState: any, batchDone: any) {
return tf.tidy(() => {
const maxQ = (this.QTarget.predict(batchNextState) as tf.Tensor).argMax(1).asType('float32');
const targets = batchReward.add(maxQ.mul(tf.scalar(this.config.rewardDiscount)).mul(batchDone));
return targets;
});
}
但是当我执行我的代码时,我得到一个错误。 在optimizer.minimize函数期间发生错误。 跟踪:
TypeError: Cannot read property 'values' of undefined
at NodeJSKernelBackend.getInputTensorIds (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-node/dist/nodejs_kernel_backend.js:99:22)
at NodeJSKernelBackend.executeSingleOutput (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-node/dist/nodejs_kernel_backend.js:123:73)
at NodeJSKernelBackend.subtract (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-node/dist/nodejs_kernel_backend.js:248:21)
at environment_1.ENV.engine.runKernel.$a (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/ops/binary_ops.ts:202:33)
at /home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/engine.ts:206:22
at Engine.scopedRun (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/engine.ts:167:19)
at Engine.runKernel (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/engine.ts:202:10)
at sub_ (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/ops/binary_ops.ts:201:21)
at Object.sub (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/ops/operation.ts:46:24)
at Tensor.sub (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/tensor.ts:842:22)
at Object.$x (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/ops/unary_ops.ts:372:46)
at _loop_1 (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/tape.ts:171:43)
at Object.backpropagateGradients (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/dist/tape.js:112:9)
at /home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/engine.ts:500:7
at /home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/engine.ts:156:20
at Engine.scopedRun (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/engine.ts:167:19)
当我在tensorflow中探索引发问题的代码行时,我试图找出为什么这样做的原因。
NodeJSKernelBackend.prototype.getInputTensorIds = function (tensors) {
var ids = [];
for (var i = 0; i < tensors.length; i++) {
var info = this.tensorMap.get(tensors[i].dataId);
/*if (!info) {
console.log('tensors[i]')
console.log(this.tensorMap)
console.log(tensors[i])
console.log(info)
}*/
if (info.values != null) {
info.id =
this.binding.createTensor(info.shape, info.dtype, info.values);
info.values = null;
this.tensorMap.set(tensors[i].dataId, info);
}
ids.push(info.id);
}
return ids;
};
我在info为null且看到张量时登录
Tensor {
isDisposedInternal: true,
shape: [ 32, 64 ],
dtype: 'float32',
size: 2048,
strides: [ 64 ],
dataId: {},
id: 1494,
rankType: '2' }
但是tensorMap.get方法无法检索它,这使得信息不确定。 我试图了解为什么这是他的问题来修复我的代码,但是我正在努力。
我是tensorflow的初学者,任何帮助都很好。
谢谢
编辑: 我不知道为什么,但是当我将激活功能从“ Sigmoid”更改为“ relu”时,它可以工作。如果有人理解,我想知道原因。