自定义损失函数:NotImplementedError:无法将符号张量(truediv_2:0)转换为numpy数组

时间:2020-03-20 19:30:05

标签: python python-3.x numpy tensorflow keras

我正在尝试编写一个带有内部参数的自定义损失函数,以实现演员评论器算法:

            def loss(y_true, y_pred):
                y_pred_clipped = K.clip(y_pred, 1e-8, 1 - 1e-8)
                log_likelihood = y_true * K.log(y_pred_clipped)
                return K.sum(-log_likelihood * delta)
            return loss

但是我得到了错误:

NotImplementedError: Cannot convert a symbolic Tensor (truediv_2:0) to a numpy array.

完整代码:

from tensorflow.keras.layers import Dense
from tensorflow.keras import Input, Model, callbacks, models
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
tf.config.experimental_run_functions_eagerly(True)
import numpy as np
import os


class Agent(object):
    def __init__(self, alpha, beta, gamma=0.99, n_action=2, load=False,
                 input_dims=4, layer_shared=1024, layer_actor=128, layer_critic=128):
        self.alpha = alpha
        self.beta = beta
        self.gamma = gamma
        self.load = load
        self.input_dims = input_dims
        self.n_action = n_action
        self.layer_shared = layer_shared
        self.layer_actor = layer_actor
        self.layer_critic = layer_critic
        self.action_space = [i for i in range(n_action)]

        self.actor, self.critic, self.policy = self.build_actor_critic_network()



    def build_actor_critic_network(self, load=False):

        main_input = Input(shape=(self.input_dims,), name='main_input')
        delta = Input(shape=(1), name='delta')

        dense_shared = Dense(self.layer_shared, activation='relu', name='dense_shared')(main_input)

        dense_actor = Dense(self.layer_actor, activation='relu', name='dense_actor')(dense_shared)
        output_actor = Dense(self.n_action, activation='softmax', name='output_actor')(dense_actor)

        dense_critic = Dense(self.layer_critic, activation='relu', name='dense_critic')(dense_shared)
        output_critic = Dense(1, activation='linear', name='output_critic')(dense_critic)

        def custom_loss(delta):
            def loss(y_true, y_pred):
                y_pred_clipped = K.clip(y_pred, 1e-8, 1 - 1e-8)
                log_likelihood = y_true * K.log(y_pred_clipped)
                return K.sum(-log_likelihood * delta)
            return loss

        model_actor = Model(inputs=[main_input, delta], outputs=output_actor, name='model_actor')
        model_actor.compile(optimizer=Adam(lr=self.alpha), loss=custom_loss(delta))

        model_critic = Model(inputs=[main_input], outputs=output_critic, name='model_critic')
        model_critic.compile(optimizer=Adam(lr=self.beta), loss='mean_squared_error')

        model_policy = Model(inputs=[main_input], outputs=output_actor)
        model_critic.layers[1].trainable = False
        print(f'layer "{model_critic.layers[1].name}" of the "model_critic" frozen')

        return model_actor, model_critic, model_policy

    def choose_action(self, state):
        state = state[np.newaxis,:]
        probabilities = self.policy.predict(state)[0]
        action = np.random.choice(self.action_space, p=probabilities)
        return action

    def learn(self, state, action, reward, state_, done):

        state = state[np.newaxis,:]
        state_ = state_[np.newaxis,:]

        critic_value_ = self.critic.predict(state_)
        critic_value = self.critic.predict(state)

        target = reward + self.gamma * critic_value_ * (1 - int(done))
        delta = target - critic_value

        actions = np.zeros([1, self.n_action])
        actions[np.arange(1), action] = 1.0
        self.actor.fit([state, delta], actions, verbose=1)
        self.critic.fit([state], target, verbose=1)

运行器代码:

import gym

env = gym.make('LunarLander-v2')
agent = Agent(alpha=0.00002, beta=0.0001, input_dims=8, n_action=4, load=False)

num_episodes = 2000
length_episode = 100
score_history = []

log = 1

for i in range(num_episodes):
    done = False
    score = 0
    observation = env.reset()
    for t in range(length_episode):
        action = agent.choose_action(observation)
        observation_, reward, done, info = env.step(action)
        agent.learn(observation, action, reward, observation_, done)
        observation = observation_

        score += reward
    score_history.append(score)
    avg_score = np.mean(score_history[-log:])
    if i % log ==0:
        print(f'episode n°{i}, score {avg_score}')

错误:

layer "dense_shared" of the "model_critic" frozen
Train on 1 samples
1/1 [==============================]
---------------------------------------------------------------------------
NotImplementedError                       Traceback (most recent call last)
<ipython-input-14-1d63e256ee52> in <module>
     16         action = agent.choose_action(observation)
     17         observation_, reward, done, info = env.step(action)
---> 18         agent.learn(observation, action, reward, observation_, done)
     19         observation = observation_
     20         if done:

<ipython-input-13-92083476d368> in learn(self, state, action, reward, state_, done)
     82         actions = np.zeros([1, self.n_action])
     83         actions[np.arange(1), action] = 1.0
---> 84         self.actor.fit([state, delta_tensor], actions, verbose=1)
     85         self.critic.fit([state], target, verbose=1)

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    817         max_queue_size=max_queue_size,
    818         workers=workers,
--> 819         use_multiprocessing=use_multiprocessing)
    820 
    821   def evaluate(self,

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    395                       total_epochs=1)
    396                   cbks.make_logs(model, epoch_logs, eval_result, ModeKeys.TEST,
--> 397                                  prefix='val_')
    398 
    399     return model.history

C:\ProgramData\Anaconda3\lib\contextlib.py in __exit__(self, type, value, traceback)
    117         if type is None:
    118             try:
--> 119                 next(self.gen)
    120             except StopIteration:
    121                 return False

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in on_epoch(self, epoch, mode)
    770         # Epochs only apply to `fit`.
    771         self.callbacks.on_epoch_end(epoch, epoch_logs)
--> 772       self.progbar.on_epoch_end(epoch, epoch_logs)
    773 
    774   @tf_contextlib.contextmanager

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\keras\callbacks.py in on_epoch_end(self, epoch, logs)
    787         self.log_values.append((k, logs[k]))
    788     if self.verbose:
--> 789       self.progbar.update(self.seen, self.log_values)
    790 
    791 

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\keras\utils\generic_utils.py in update(self, current, values)
    557         info += ' - %s:' % k
    558         if isinstance(self._values[k], list):
--> 559           avg = np.mean(self._values[k][0] / max(1, self._values[k][1]))
    560           if abs(avg) > 1e-3:
    561             info += ' %.4f' % avg

<__array_function__ internals> in mean(*args, **kwargs)

C:\ProgramData\Anaconda3\lib\site-packages\numpy\core\fromnumeric.py in mean(a, axis, dtype, out, keepdims)
   3333 
   3334     return _methods._mean(a, axis=axis, dtype=dtype,
-> 3335                           out=out, **kwargs)
   3336 
   3337 

C:\ProgramData\Anaconda3\lib\site-packages\numpy\core\_methods.py in _mean(a, axis, dtype, out, keepdims)
    133 
    134 def _mean(a, axis=None, dtype=None, out=None, keepdims=False):
--> 135     arr = asanyarray(a)
    136 
    137     is_float16_result = False

C:\ProgramData\Anaconda3\lib\site-packages\numpy\core\_asarray.py in asanyarray(a, dtype, order)
    136 
    137     """
--> 138     return array(a, dtype, copy=False, order=order, subok=True)
    139 
    140 

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\framework\ops.py in __array__(self)
    726   def __array__(self):
    727     raise NotImplementedError("Cannot convert a symbolic Tensor ({}) to a numpy"
--> 728                               " array.".format(self.name))
    729 
    730   def __len__(self):

NotImplementedError: Cannot convert a symbolic Tensor (truediv_2:0) to a numpy array.

我对此错误感到有些困惑,我尝试搜索多个不同的帖子,但没有一个解决了我的问题。我知道这与delta应该是张量的事实有关,但是我觉得由于我将其初始化为“ Input”,所以应该没问题。我也尝试在不同的地方转换它,但并没有解决问题 如果您知道如何解决此问题,将不胜感激:)

谢谢!

虚拟模型尝试输入((1)):

from tensorflow.keras.layers import Input,Dense
from tensorflow.keras.models import Model
import numpy as np

x_train = np.random.random((1000, 1))
y_train = np.random.randint(2, size=(1000, 1))

inp = Input((1))
dense = Dense(10)(inp)
out = Dense(1, activation='sigmoid')(dense)
model = Model(inp,out)

model.compile(loss='binary_crossentropy',optimizer='rmsprop',metrics=['accuracy'])
model.fit(x_train, y_train,epochs=1,batch_size=100)

版本:

import tensorflow as tf
print('tf:', tf.__version__) 
print('keras:', tf.keras.__version__)

输出: tf:2.1.0 keras:2.2.4-tf

编辑: 谢谢你mdaoust! 它有效:)

但是,也许我启动delta变量的方式不太好,我不知道您如何将变量添加到模型“ actor”中 self.actor.delta.assign(delta) 请让我知道是否有更漂亮的方法!

遇到相同问题的人的工作代码:

from tensorflow.keras.layers import Dense
from tensorflow.keras import Input, Model, callbacks, models
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
# tf.config.experimental_run_functions_eagerly(True)
import numpy as np
import os


class Agent(object):
    def __init__(self, alpha, beta, gamma=0.99, n_action=2, load=False,
                 input_dims=4, layer_shared=1024, layer_actor=128, layer_critic=128):
        self.alpha = alpha
        self.beta = beta
        self.gamma = gamma
        self.load = load
        self.input_dims = input_dims
        self.n_action = n_action
        self.layer_shared = layer_shared
        self.layer_actor = layer_actor
        self.layer_critic = layer_critic
        self.action_space = [i for i in range(n_action)]

        self.actor, self.critic, self.policy, self.delta = self.build_actor_critic_network()



    def build_actor_critic_network(self, load=False):

        main_input = Input(shape=(self.input_dims,), name='main_input')
        delta = tf.Variable([[0.]], trainable=False)

        dense_shared = Dense(self.layer_shared, activation='relu', name='dense_shared')(main_input)

        dense_actor = Dense(self.layer_actor, activation='relu', name='dense_actor')(dense_shared)
        output_actor = Dense(self.n_action, activation='softmax', name='output_actor')(dense_actor)

        dense_critic = Dense(self.layer_critic, activation='relu', name='dense_critic')(dense_shared)
        output_critic = Dense(1, activation='linear', name='output_critic')(dense_critic)

        def custom_loss(delta):
            def loss(y_true, y_pred):
                y_pred_clipped = K.clip(y_pred, 1e-8, 1 - 1e-8)
                log_likelihood = y_true * K.log(y_pred_clipped)
                return K.sum(-log_likelihood * delta)
            return loss

        model_actor = Model(inputs=[main_input], outputs=output_actor, name='model_actor')
        model_actor.compile(optimizer=Adam(lr=self.alpha), loss=custom_loss(delta))

        model_critic = Model(inputs=[main_input], outputs=output_critic, name='model_critic')
        model_critic.compile(optimizer=Adam(lr=self.beta), loss='mean_squared_error')

        model_policy = Model(inputs=[main_input], outputs=output_actor)
        model_critic.layers[1].trainable = False
        print(f'layer "{model_critic.layers[1].name}" of the "model_critic" frozen')

        return model_actor, model_critic, model_policy, delta

    def choose_action(self, state):
        state = state[np.newaxis,:]
        probabilities = self.policy.predict(state)[0]
        action = np.random.choice(self.action_space, p=probabilities)
        return action

    def learn(self, state, action, reward, state_, done):

        state = state[np.newaxis,:]
        state_ = state_[np.newaxis,:]

        critic_value_ = self.critic.predict(state_)
        critic_value = self.critic.predict(state)

        target = reward + self.gamma * critic_value_ * (1 - int(done))
        delta_numpy = target - critic_value

        actions = np.zeros([1, self.n_action])
        actions[np.arange(1), action] = 1.0

        self.delta.assign(delta_numpy)
        self.actor.fit(state, actions, verbose=1)
        self.critic.fit(state, target, verbose=1)

1 个答案:

答案 0 :(得分:1)

delta = Input(shape=(1), name='delta')

这是您的问题。这是一个图张量。我想“模型”并不期望图张量会潜入损失函数中。

如果您将delta设置为不可训练的变量

,我认为您会轻松得多
delta = tf.Variable(0.0, trainable=False)

然后,而不是尝试将其作为输入传递:

self.actor.fit([state, delta], actions, verbose=1)

更新变量的值:

self.actor.delta.assign(delta)
self.actor.fit(state, actions, verbose=1)