Question

我正在尝试使用tensorflow制作DQN代理。我从这个资料库中汲取了灵感： https://github.com/seann999/dodge_tfjs/blob/master/agent.js

我写了一个类Agent，它由2个神经网络组成，并具有以下功能：

const DEFAULT_OPTS: any = {
  activation: 'sigmoid',
};

export function createModel(inputSize: number, outputSize: number, opts: any = {}): tf.Model {
  const model = tf.sequential();
  opts = Object.assign({}, DEFAULT_OPTS, opts);
  /* INPUT */
  model.add(
    tf.layers.dense({
      inputDim: inputSize,
      units: 64,
      activation: opts.activation,
    })
  );

  /* HIDDEN */
  model.add(
    tf.layers.dense({
      inputDim: 64,
      units: 128,
      activation: opts.activation,
    })
  );
  model.add(
    tf.layers.dense({
      inputDim: 128,
      units: 64,
      activation: opts.activation,
    })
  );

  /* OUTPUT */
  model.add(
    tf.layers.dense({
      inputDim: 64,
      units: outputSize,
      activation: 'relu',
    })
  );

  return model;
}

然后我写了一个学习方法，可以计算损耗并尽量减少误差（仍然受seann999的启发）。

  constructor(config: AgentConfig) {
    this.config = Object.assign({}, DEFAULT_CONFIG, config);
    this.memory = new Memory(config.memorySize);
    this.Q = createModel(config.inputSize, config.outputSize);
    this.QTarget = createModel(config.inputSize, config.outputSize);
    this.optimizer = tf.train.adam(this.config.learningRate);
    // Get weights refs
    this.weights = [];
    for (const w of this.Q.weights) {
      this.weights.push((w as any).val);
    }
    this.updateTarget();
  }

...

  public async learn() {
    if (this.stats.learnCount % this.config.refreshTargetEvery === 0) {
      this.updateTarget();
    }
    const batchSize = 32;
    if (this.memory.getLength() > batchSize) {
      const batch = this.memory.getBatch(batchSize);
      // Batch tensors
      const batchState = tf.tensor2d(batch.map((el: any) => el.state)).asType('float32');
      const batchAction = tf
        .oneHot(tf.tensor1d(batch.map((el: any) => actions.indexOf(el.action)), 'int32'), actions.length)
        .asType('float32');
      const batchReward = tf.tensor1d(batch.map((el: any) => el.reward)).asType('float32');
      const batchNextState = tf.tensor2d(batch.map((el: any) => el.nextState)).asType('float32');
      const batchDone = tf.tensor1d(batch.map((el: any) => el.done)).asType('float32');

      // prodict nextState with targetNet
      const targets = this.calcTarget(batchReward, batchNextState, batchDone).asType('float32');

      const loss = this.optimizer.minimize(
        () => {
          const x = tf.variable(batchState);
          const predictions = (this.Q.predict(x) as tf.Tensor).argMax(1).asType('float32');
          return tf.losses.meanSquaredError(targets, predictions) as any;
        },
        true,
        this.weights
      );
      console.log('loss');
      console.log(loss);
    }
    // qMaxNextState = reward + game + this.QTarget.predict()
    this.stats.learnCount++;
    return;
  }

  private calcTarget(batchReward: any, batchNextState: any, batchDone: any) {
    return tf.tidy(() => {
      const maxQ = (this.QTarget.predict(batchNextState) as tf.Tensor).argMax(1).asType('float32');
      const targets = batchReward.add(maxQ.mul(tf.scalar(this.config.rewardDiscount)).mul(batchDone));
      return targets;
    });
  }

但是当我执行我的代码时，我得到一个错误。在optimizer.minimize函数期间发生错误。跟踪：

TypeError: Cannot read property 'values' of undefined
    at NodeJSKernelBackend.getInputTensorIds (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-node/dist/nodejs_kernel_backend.js:99:22)
    at NodeJSKernelBackend.executeSingleOutput (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-node/dist/nodejs_kernel_backend.js:123:73)
    at NodeJSKernelBackend.subtract (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-node/dist/nodejs_kernel_backend.js:248:21)
    at environment_1.ENV.engine.runKernel.$a (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/ops/binary_ops.ts:202:33)
    at /home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/engine.ts:206:22
    at Engine.scopedRun (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/engine.ts:167:19)
    at Engine.runKernel (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/engine.ts:202:10)
    at sub_ (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/ops/binary_ops.ts:201:21)
    at Object.sub (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/ops/operation.ts:46:24)
    at Tensor.sub (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/tensor.ts:842:22)
    at Object.$x (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/ops/unary_ops.ts:372:46)
    at _loop_1 (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/tape.ts:171:43)
    at Object.backpropagateGradients (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/dist/tape.js:112:9)
    at /home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/engine.ts:500:7
    at /home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/engine.ts:156:20
    at Engine.scopedRun (/home/clement/DEV/Crypto/influx-crypto-trader/node_modules/@tensorflow/tfjs-core/src/engine.ts:167:19)

当我在tensorflow中探索引发问题的代码行时，我试图找出为什么这样做的原因。

NodeJSKernelBackend.prototype.getInputTensorIds = function (tensors) {
    var ids = [];
    for (var i = 0; i < tensors.length; i++) {
        var info = this.tensorMap.get(tensors[i].dataId);
        /*if (!info) {
            console.log('tensors[i]')
            console.log(this.tensorMap)
            console.log(tensors[i])
            console.log(info)
         }*/
        if (info.values != null) {
            info.id =
                this.binding.createTensor(info.shape, info.dtype, info.values);
            info.values = null;
            this.tensorMap.set(tensors[i].dataId, info);
        }
        ids.push(info.id);
    }
    return ids;
};

我在info为null且看到张量时登录

Tensor {
  isDisposedInternal: true,
  shape: [ 32, 64 ],
  dtype: 'float32',
  size: 2048,
  strides: [ 64 ],
  dataId: {},
  id: 1494,
  rankType: '2' }

但是tensorMap.get方法无法检索它，这使得信息不确定。我试图了解为什么这是他的问题来修复我的代码，但是我正在努力。

我是tensorflow的初学者，任何帮助都很好。

谢谢

编辑：我不知道为什么，但是当我将激活功能从“ Sigmoid”更改为“ relu”时，它可以工作。如果有人理解，我想知道原因。

tfjs tensorflowjs通过自定义损失getInputTensorIds问题进行优化

0 个答案: