Source code for stable_learning_control.algos.tf2.policies.critics.L_critic

"""Lyapunov critic policy.

This module contains a TensorFlow 2.x implementation of the Lyapunov Critic policy of
`Han et al. 2020 <https://arxiv.org/abs/2004.14288>`_.
"""

import tensorflow as tf
from tensorflow import nn

from stable_learning_control.algos.tf2.common.helpers import mlp


[docs]class LCritic(tf.keras.Model): """Soft Lyapunov critic Network. Attributes: L (tf.keras.Sequential): The layers of the network. """ def __init__( self, obs_dim, act_dim, hidden_sizes, activation=nn.relu, name="lyapunov_critic", **kwargs, ): """Initialise the LCritic object. Args: obs_dim (int): Dimension of the observation space. act_dim (int): Dimension of the action space. hidden_sizes (list): Sizes of the hidden layers. activation (:obj:`tf.keras.activations`, optional): The activation function. Defaults to :obj:`tf.nn.relu`. name (str, optional): The Lyapunov critic name. Defaults to ``lyapunov_critic``. **kwargs: All kwargs to pass to the :mod:`tf.keras.Model`. Can be used to add additional inputs or outputs. """ super().__init__(name=name, **kwargs)
[docs] self.L = mlp( [obs_dim + act_dim] + list(hidden_sizes), activation, activation, name=name )
# Build the model to initialise the (trainable) variables. self.build((None, obs_dim + act_dim)) @tf.function
[docs] def call(self, inputs): """Perform forward pass through the network. Args: inputs (tuple): tuple containing: - obs (tf.Tensor): The tensor of observations. - act (tf.Tensor): The tensor of actions. Returns: tf.Tensor: The tensor containing the lyapunov values of the input observations and actions. """ L_hid_out = self.L(tf.concat(inputs, axis=-1)) L_out = tf.math.square(L_hid_out) L_out = tf.reduce_sum(L_out, axis=1) return L_out