Source code for sbi.inference.trainers.vfpe.npse

# This file is part of sbi, a toolkit for simulation-based inference. sbi is licensed
# under the Apache License Version 2.0, see <https://www.apache.org/licenses/>

import warnings
from typing import Any, Dict, Literal, Optional, Union

from torch.distributions import Distribution
from torch.utils.tensorboard.writer import SummaryWriter

from sbi.inference.posteriors.base_posterior import NeuralPosterior
from sbi.inference.posteriors.posterior_parameters import VectorFieldPosteriorParameters
from sbi.inference.trainers.vfpe.base_vf_inference import (
    VectorFieldTrainer,
)
from sbi.neural_nets.estimators import ConditionalVectorFieldEstimator
from sbi.neural_nets.estimators.base import ConditionalEstimatorBuilder
from sbi.neural_nets.factory import posterior_score_nn
from sbi.sbi_types import Tracker



[docs]
class NPSE(VectorFieldTrainer):
    r"""Neural Posterior Score Estimation (NPSE) [1, 2].

    NPSE trains a neural network to estimate the score function (gradient of the log
    posterior) $\nabla_\theta \log p(\theta|x)$ using denoising score matching. NPSE
    learns the score of a diffusion process that transforms the prior into the
    posterior. The neural network can be any expressive architecture. Sampling is
    performed using SDE solvers (e.g., Langevin dynamics) or ODE solvers, which can
    be slower than flow-based NPE, but expressiveness can be higher.

    NOTE: NPSE does not support multi-round inference with flexible proposals yet.
    You can try multi-round with truncated proposals, but this is not tested.

    [1] Score modeling for simulation-based inference, Geffner et al., ICML 2023.
    [2] Sequential neural score estimation: Likelihood-free inference with conditional
        score based diffusion models, Sharrock et al., ICML 2024.

    Example:
    --------

    ::

        import torch
        from sbi.inference import NPSE
        from sbi.utils import BoxUniform

        # 1. Setup prior and simulate data
        prior = BoxUniform(low=torch.zeros(3), high=torch.ones(3))
        theta = prior.sample((100,))
        x = theta + torch.randn_like(theta) * 0.1

        # 2. Train score estimator
        inference = NPSE(prior=prior, sde_type="ve")
        score_estimator = inference.append_simulations(theta, x).train()

        # 3. Build posterior (uses SDE solver by default)
        posterior = inference.build_posterior(score_estimator)

        # 4. Sample from posterior using Langevin dynamics
        x_o = torch.randn(1, 3)
        samples = posterior.sample((1000,), x=x_o)
    """

    def __init__(
        self,
        prior: Optional[Distribution] = None,
        vf_estimator: Union[
            Literal["mlp", "ada_mlp", "transformer", "transformer_cross_attn"],
            ConditionalEstimatorBuilder[ConditionalVectorFieldEstimator],
        ] = "mlp",
        score_estimator: Optional[
            Union[
                Literal["mlp", "ada_mlp", "transformer", "transformer_cross_attn"],
                ConditionalEstimatorBuilder[ConditionalVectorFieldEstimator],
            ]
        ] = None,
        density_estimator: Optional[
            ConditionalEstimatorBuilder[ConditionalVectorFieldEstimator]
        ] = None,
        sde_type: Literal["vp", "ve", "subvp"] = "ve",
        device: str = "cpu",
        logging_level: Union[int, str] = "WARNING",
        summary_writer: Optional[SummaryWriter] = None,
        tracker: Optional[Tracker] = None,
        show_progress_bars: bool = True,
    ):
        r"""Initialize Neural Posterior Score Estimation.

        Args:
            prior: Prior distribution.
            vf_estimator: Neural network architecture for the
                vector field estimator aiming to estimate the marginal scores of the
                target diffusion process. Can be a string (e.g. 'mlp', 'ada_mlp',
                'transformer' or 'transformer_cross_attn') or a callable that implements
                the `ConditionalEstimatorBuilder` protocol with `__call__` that receives
                `theta` and `x` and returns a `ConditionalVectorFieldEstimator`.
                To configure estimator-level options (e.g. noise schedules for VE),
                use `posterior_score_nn` to build a custom callable and pass it here.
            score_estimator: Deprecated, use `vf_estimator` instead.
            density_estimator: Deprecated, use `vf_estimator` instead.
            sde_type: Type of SDE to use. Must be one of ['vp', 've', 'subvp'].
                Only used when `vf_estimator` is a string (i.e. when using the
                default builder). Ignored when a custom callable is passed.
            device: Device to run the training on.
            logging_level: Logging level for the training. Can be an integer or a
                string.
            summary_writer: Deprecated alias for the TensorBoard summary writer.
                Use ``tracker`` instead.
            tracker: Tracking adapter used to log training metrics. If None, a
                TensorBoard tracker is used with a default log directory.
            show_progress_bars: Whether to show progress bars during training.

        References:
            - Geffner, Tomas, George Papamakarios, and Andriy Mnih. "Score modeling for
                simulation-based inference." ICML 2023.
            - Sharrock, Louis, et al. "Sequential neural score estimation: Likelihood-
                free inference with conditional score based diffusion models." ICML 2024
        """
        if score_estimator is not None:
            warnings.warn(
                "`score_estimator` is deprecated and will be removed in a future "
                "release. Use `vf_estimator` instead.",
                FutureWarning,
                stacklevel=2,
            )
            vf_estimator = score_estimator

        if density_estimator is not None:
            warnings.warn(
                "`density_estimator` is deprecated and will be removed in a future "
                "release. Use `vf_estimator` instead.",
                FutureWarning,
                stacklevel=2,
            )
            vf_estimator = density_estimator

        super().__init__(
            prior=prior,
            vector_field_estimator_builder=vf_estimator,
            device=device,
            logging_level=logging_level,
            summary_writer=summary_writer,
            tracker=tracker,
            show_progress_bars=show_progress_bars,
        )

        # When vf_estimator is a string, build the default neural net using
        # the NPSE-specific builder which requires sde_type.
        if isinstance(vf_estimator, str):
            self._build_neural_net = self._build_default_nn_fn(
                model=vf_estimator, sde_type=sde_type
            )


[docs]
    def build_posterior(
        self,
        vector_field_estimator: Optional[ConditionalVectorFieldEstimator] = None,
        prior: Optional[Distribution] = None,
        sample_with: Literal["ode", "sde"] = "sde",
        vectorfield_sampling_parameters: Optional[Dict[str, Any]] = None,
        posterior_parameters: Optional[VectorFieldPosteriorParameters] = None,
    ) -> NeuralPosterior:
        r"""Build posterior from the vector field estimator.

        Note that this is the same as the FMPE posterior, but the sample_with
        method is set to "sde" by default.

        For NPSE, the posterior distribution that is returned here implements
        the following functionality over the raw neural density estimator:
            - correct the calculation of the log probability such that
              samples outside of the prior bounds have log probability -inf.
            - reject samples that lie outside of the prior bounds.

        Args:
            vector_field_estimator: The vector field estimator that the posterior is
                based on. If `None`, use the latest vector field estimator that was
                trained.
            prior: Prior distribution.
            sample_with: Method to use for sampling from the posterior. Can be one of
                'sde' (default) or 'ode'. The 'sde' method uses the score to
                do a Langevin diffusion step, while the 'ode' method solves a
                probabilistic ODE with a numerical ODE solver.
            vectorfield_sampling_parameters: Additional keyword arguments passed to
                `VectorFieldPosterior`.
            posterior_parameters: Configuration passed to the init method for
                VectorFieldPosterior.

        Returns:
            Posterior $p(\theta|x)$  with `.sample()` and `.log_prob()` methods.
        """
        return super().build_posterior(
            estimator=vector_field_estimator,
            prior=prior,
            sample_with=sample_with,
            vectorfield_sampling_parameters=vectorfield_sampling_parameters,
            posterior_parameters=posterior_parameters,
        )


    def _build_default_nn_fn(
        self,
        model: Literal["mlp", "ada_mlp", "transformer", "transformer_cross_attn"],
        sde_type: Literal["vp", "ve", "subvp"] = "ve",
    ) -> ConditionalEstimatorBuilder[ConditionalVectorFieldEstimator]:
        return posterior_score_nn(model=model, sde_type=sde_type)