Source code for stable_learning_control.algos.pytorch.common.buffers

"""Contains several replay buffers used in the Pytorch algorithms."""

import torch

from stable_learning_control.algos.common.buffers import (
    FiniteHorizonReplayBuffer as CommonFiniteHorizonReplayBuffer,
)
from stable_learning_control.algos.common.buffers import (
    ReplayBuffer as CommonReplayBuffer,
)
from stable_learning_control.algos.common.buffers import (
    TrajectoryBuffer as CommonTrajectoryBuffer,
)
from stable_learning_control.algos.pytorch.common.helpers import np_to_torch


[docs]class ReplayBuffer(CommonReplayBuffer):
    """Wrapper around the general FIFO
    :obj:`~stable_learning_control.common.buffers.ReplayBuffer` which makes
    sure a :obj:`torch.tensor` is returned when sampling.

    Attributes:
        device (str): The device the experiences are placed on (options: ``cpu``,
            ``gpu``, ``gpu:0``, ``gpu:1``, etc.).
    """

    def __init__(self, device="cpu", *args, **kwargs):
        """Initialise the ReplayBuffer object.

        Args:
            device (str, optional): The computational device to put the sampled
                experiences on (options: ``cpu``, ``gpu``, ``gpu:0``, ``gpu:1``,
                etc.). Defaults to ``cpu``.
            *args: All args to pass to the :class:`ReplayBuffer` parent class.
            **kwargs: All kwargs to pass to the class:`ReplayBuffer` parent class.
        """
[docs]        self.device = device
        super().__init__(*args, **kwargs)

[docs]    def sample_batch(self, *args, **kwargs):
        """Retrieve a batch of experiences from buffer.

        Args:
            *args: All args to pass to the :meth:`~ReplayBuffer.sample_batch` parent
                method.
            **kwargs: All kwargs to pass to the :meth:`~ReplayBuffer.sample_batch`
                parent method.

        Returns:
            dict: A batch of experiences.
        """
        return np_to_torch(
            super().sample_batch(*args, **kwargs),
            dtype=torch.float32,
            device=self.device,
        )  # Make sure output is a torch tensor.


[docs]class FiniteHorizonReplayBuffer(CommonFiniteHorizonReplayBuffer):
    """Wrapper around the general FIFO
    :obj:`~stable_learning_control.common.buffers.FiniteHorizonReplayBuffer` which makes
    sure a :obj:`torch.tensor` is returned when sampling.

    Attributes:
        device (str): The device the experiences are placed on (options: ``cpu``,
            ``gpu``, ``gpu:0``, ``gpu:1``, etc.).
    """

    def __init__(self, device="cpu", *args, **kwargs):
        """Initialise the FiniteHorizonReplayBuffer object.

        Args:
            device (str, optional): The computational device to put the sampled
                experiences on (options: ``cpu``, ``gpu``, ``gpu:0``, ``gpu:1``,
                etc.). Defaults to ``cpu``.
            *args: All args to pass to the :class:`FiniteHorizonReplayBuffer` parent
                class.
            **kwargs: All kwargs to pass to the class:`FiniteHorizonReplayBuffer` parent
                class.
        """
[docs]        self.device = device
        super().__init__(*args, **kwargs)

[docs]    def sample_batch(self, *args, **kwargs):
        """Retrieve a batch of experiences from buffer.

        Args:
            *args: All args to pass to the
                :meth:`~FiniteHorizonReplayBuffer.sample_batch` parent method.
            **kwargs: All kwargs to pass to the :meth:`~ReplayBuffer.sample_batch`
                parent method.

        Returns:
            dict: A batch of experiences.
        """
        return np_to_torch(
            super().sample_batch(*args, **kwargs),
            dtype=torch.float32,
            device=self.device,
        )  # Make sure output is a torch tensor.


[docs]class TrajectoryBuffer(CommonTrajectoryBuffer):
    """Wrapper around the general
    :obj:`~stable_learning_control.common.buffers.TrajectoryBuffer` which
    makes sure a :obj:`torch.tensor` is returned when sampling.

    Attributes:
        device (str): The device the experiences are placed on (options: ``cpu``,
            ``gpu``, ``gpu:0``, ``gpu:1``, etc.).
    """

    def __init__(self, device="cpu", *args, **kwargs):
        """Initialise the TrajectoryBuffer object.

        Args:
            device (str, optional): The computational device to put the sampled
                experiences on (options: ``cpu``, ``gpu``, ``gpu:0``, ``gpu:1``, etc.).
                Defaults to ``cpu``.
            *args: All args to pass to the :class:`TrajectoryBuffer` parent class.
            **kwargs: All kwargs to pass to the :class:`TrajectoryBuffer` parent class.
        """
[docs]        self.device = device
        super().__init__(*args, **kwargs)

[docs]    def get(self, *args, **kwargs):
        """Retrieve the trajectory buffer.

        Call this at the end of an epoch to get all of the data from
        the buffer. Also, resets some pointers in the buffer.

        Args:
            *args: All args to pass to the :meth:`~TrajectoryBuffer.get` parent method.
            **kwargs: All kwargs to pass to the :meth:`~TrajectoryBuffer.get` parent
                method.

        Returns:
            dict: The trajectory buffer.
        """
        return np_to_torch(
            super().get(*args, **kwargs), dtype=torch.float32, device=self.device
        )  # Make sure output is a torch tensor.