Source code for tsbootstrap.base_bootstrap

from __future__ import annotations

import inspect
from collections.abc import Callable
from multiprocessing import Pool
from numbers import Integral
from typing import Optional

import numpy as np
from skbase.base import BaseObject

from tsbootstrap.base_bootstrap_configs import (
    BaseDistributionBootstrapConfig,
    BaseMarkovBootstrapConfig,
    BaseResidualBootstrapConfig,
    BaseSieveBootstrapConfig,
    BaseStatisticPreservingBootstrapConfig,
    BaseTimeSeriesBootstrapConfig,
)
from tsbootstrap.tsfit import TSFitBestLag
from tsbootstrap.utils.odds_and_ends import time_series_split
from tsbootstrap.utils.types import (
    BlockCompressorTypes,
    ModelTypes,
    ModelTypesWithoutArch,
    OrderTypes,
)



[docs]
class BaseTimeSeriesBootstrap(BaseObject):
    """
    Base class for time series bootstrapping.

    Raises
    ------
    ValueError
        If n_bootstraps is not greater than 0.
    """

    _tags = {
        "object_type": "bootstrap",
        "bootstrap_type": "other",
        "capability:multivariate": True,
    }

    def __init__(
        self,
        n_bootstraps: Integral = 10,  # type: ignore
        rng=None,
    ) -> None:
        """
        Initialize self.

        Parameters
        ----------
        n_bootstraps : Integral, default=10
            The number of bootstrap samples to create.
        rng : Integral or np.random.Generator, default=np.random.default_rng()
            The random number generator or seed used to generate the bootstrap samples.
        """
        self.n_bootstraps = n_bootstraps
        self.rng = rng

        super().__init__()
        if type(self) == BaseTimeSeriesBootstrap:
            self.config = BaseTimeSeriesBootstrapConfig(
                n_bootstraps=n_bootstraps, rng=rng
            )


[docs]
    def bootstrap(
        self,
        X: np.ndarray,
        return_indices: bool = False,
        y=None,
        test_ratio: Optional[float] = None,  # noqa: UP007
    ):
        """Generate indices to split data into training and test set.

        Parameters
        ----------
        X : 2D array-like of shape (n_timepoints, n_features)
            The endogenous time series to bootstrap.
            Dimension 0 is assumed to be the time dimension, ordered
        return_indices : bool, default=False
            If True, a second output is retured, integer locations of
            index references for the bootstrap sample, in reference to original indices.
            Indexed values do are not necessarily identical with bootstrapped values.
        y : array-like of shape (n_timepoints, n_features_exog), default=None
            Exogenous time series to use in bootstrapping.
        test_ratio : float, default=0.0
            The ratio of test samples to total samples.
            If provided, test_ratio fraction the data (rounded up)
            is removed from the end before applying the bootstrap logic.

        Yields
        ------
        X_boot_i : 2D np.ndarray-like of shape (n_timepoints_boot_i, n_features)
            i-th bootstrapped sample of X.
        indices_i : 1D np.nparray of shape (n_timepoints_boot_i,) integer values,
            only returned if return_indices=True.
            Index references for the i-th bootstrapped sample of X.
            Indexed values do are not necessarily identical with bootstrapped values.
        """
        X, y = self._check_X_y(X, y)

        if test_ratio is not None:
            X_inner, _ = time_series_split(X, test_ratio=test_ratio)
            if y is not None:
                y_inner, _ = time_series_split(y, test_ratio=test_ratio)
            else:
                y_inner = None
        else:
            X_inner = X
            y_inner = y

        yield from self._bootstrap(
            X=X_inner, return_indices=return_indices, y=y_inner
        )


    def _bootstrap(self, X: np.ndarray, return_indices: bool = False, y=None):
        """Generate indices to split data into training and test set.

        Private method to be implemented by derived classes.
        Input validation is not required in this method.

        Parameters
        ----------
        X : 2D array-like of shape (n_timepoints, n_features)
            The endogenous time series to bootstrap.
            Dimension 0 is assumed to be the time dimension, ordered
        return_indices : bool, default=False
            If True, a second output is retured, integer locations of
            index references for the bootstrap sample, in reference to original indices.
            Indexed values do are not necessarily identical with bootstrapped values.
        y : array-like of shape (n_timepoints, n_features_exog), default=None
            Exogenous time series to use in bootstrapping.

        Yields
        ------
        X_boot_i : 2D np.ndarray-like of shape (n_timepoints_boot_i, n_features)
            i-th bootstrapped sample of X.
        indices_i : 1D np.nparray of shape (n_timepoints_boot_i,) integer values,
            only returned if return_indices=True.
            Index references for the i-th bootstrapped sample of X.
            Indexed values do are not necessarily identical with bootstrapped values.
        """
        # default implementation for current classes using config
        yield from self._generate_samples(
            X=X, return_indices=return_indices, y=y
        )

    def _generate_samples(
        self,
        X: np.ndarray,
        return_indices: bool = False,
        y=None,
        n_jobs: int = 1,
    ):
        """Generate bootstrapped samples directly.

        Parameters
        ----------
        X : array-like of shape (n_timepoints, n_features)
            The input samples.
        return_indices : bool, default=False
            If True, a second output is retured, integer locations of
            index references for the bootstrap sample, in reference to original indices.
            Indexed values do are not necessarily identical with bootstrapped values.
        y : array-like of shape (n_timepoints, n_features_exog), default=None
            Exogenous time series to use in bootstrapping.
        n_jobs : int, default=1
            The number of jobs to run in parallel.

        Yields
        ------
        Iterator[np.ndarray]
            An iterator over the bootstrapped samples.
        """
        if n_jobs == 1:
            # Run bootstrap generation sequentially in the main process
            for _ in range(self.config.n_bootstraps):
                indices, data = self._generate_samples_single_bootstrap(X, y)
                data = np.concatenate(data, axis=0)
                if return_indices:
                    # hack to fix known issue with non-concatenated index sets
                    # see bug issue #81
                    if isinstance(indices, list):
                        indices = np.concatenate(indices, axis=0)
                    yield data, indices
                else:
                    yield data
        else:
            # Use multiprocessing to handle bootstrapping
            args = [(X, y) for _ in range(self.config.n_bootstraps)]
            with Pool(n_jobs) as pool:
                results = pool.starmap(
                    self._generate_samples_single_bootstrap, args
                )

            for indices, data in results:
                data = np.concatenate(data, axis=0)
                if return_indices:
                    # hack to fix known issue with non-concatenated index sets
                    # see bug issue #81
                    if isinstance(indices, list):
                        indices = np.concatenate(indices, axis=0)
                    yield data, indices
                else:
                    yield data

    def _generate_samples_single_bootstrap(self, X: np.ndarray, y=None):
        """Generate list of bootstraps for a single bootstrap iteration."""
        raise NotImplementedError("abstract method")

    def _check_X_y(self, X, y):
        """Check X and y inputs, for bootstrap and get_n_bootstraps methods.

        Checks X to be a 2D array-like, and y to be a 2D array-like or None.
        If X is 1D np.ndarray, it is expanded to 2D via np.expand_dims.

        Parameters
        ----------
        X : checked 2D array-like of shape (n_timepoints, n_features)
            The endogenous time series to bootstrap.
            Dimension 0 is assumed to be the time dimension, ordered
        y : checked array-like of shape (n_timepoints, n_features_exog), default=None
            Exogenous time series to use in bootstrapping.

        Returns
        -------
        X : np.ndarray, coerced to 2D array-like of shape (n_timepoints, n_features)
            The checked endogenous time series.
        y : np.ndarray or None, identical with y
            The checked exogenous time series.

        Raises
        ------
        ValueError : If the input is not valid.
        """
        if X is not None:
            X = np.asarray(X)
            if len(X.shape) < 2:
                print(X)
                X = np.expand_dims(X, 1)

            X = self._check_input(X)
        if y is not None:
            y = self._check_input(y, enforce_univariate=False)

        return X, y

    def _check_input(self, X, enforce_univariate=True):
        """Checks if the input is valid.

        Parameters
        ----------
        X : list of np.ndarray
            The input to check.
        enforce_univariate : bool, default=True
            Whether to enforce univariate input.

        Returns
        -------
        object : The input object if it is valid.

        Raises
        ------
        ValueError
            If the input is not valid.
        """
        if np.any(np.diff([len(x) for x in X]) != 0):
            raise ValueError("All time series must be of the same length.")

        self_can_only_univariate = not self.get_tag("capability:multivariate")
        check_univariate = enforce_univariate and self_can_only_univariate
        if check_univariate and X.shape[1] > 1:
            raise ValueError(
                f"Unsupported input type: the estimator {type(self)} "
                "does not support multivariate endogeneous time series (X argument). "
                "Pass an 1D np.array, or a 2D np.array with a single column."
            )

        return X


[docs]
    def get_n_bootstraps(self, X=None, y=None) -> int:
        """Returns the number of bootstrap instances produced by the bootstrap.

        Parameters
        ----------
        X : 2D array-like of shape (n_timepoints, n_features)
            The endogenous time series to bootstrap.
            Dimension 0 is assumed to be the time dimension, ordered
        y : array-like of shape (n_timepoints, n_features_exog), default=None
            Exogenous time series to use in bootstrapping.

        Returns
        -------
        int : The number of bootstrap instances produced by the bootstrap.
        """
        return self.n_bootstraps  # type: ignore





[docs]
class BaseResidualBootstrap(BaseTimeSeriesBootstrap):
    """Base class for residual bootstrap.

    Parameters
    ----------
    n_bootstraps : Integral, default=10
        The number of bootstrap samples to create.
    model_type : str, default="ar"
        The model type to use. Must be one of "ar", "arima", "sarima", "var", or "arch".
    model_params : dict, default=None
        Additional keyword arguments to pass to the TSFit model.
    order : Integral or list or tuple, default=None
        The order of the model. If None, the best order is chosen via TSFitBestLag.
        If Integral, it is the lag order for AR, ARIMA, and SARIMA, and the lag order
        for ARCH. If list or tuple, the order is a tuple of (p, o, q) for ARIMA
        and (p, d, q, s) for SARIMAX. It is either a single Integral or a
        list of non-consecutive ints for AR, and an Integral for VAR and ARCH.
        If None, the best order is chosen via TSFitBestLag. Do note that TSFitBestLag
        only chooses the best lag, not the best order, so for the tuple values,
        it only chooses the best p, not the best (p, o, q) or (p, d, q, s).
        The rest of the values are set to 0.
    save_models : bool, default=False
        Whether to save the fitted models.
    rng : Integral or np.random.Generator, default=np.random.default_rng()
        The random number generator or seed used to generate the bootstrap samples.

    Attributes
    ----------
    fit_model : TSFitBestLag
        The fitted model.
    resids : np.ndarray
        The residuals of the fitted model.
    X_fitted : np.ndarray
        The fitted values of the fitted model.
    coefs : np.ndarray
        The coefficients of the fitted model.

    Methods
    -------
    __init__ : Initialize self.
    _fit_model : Fits the model to the data and stores the residuals.
    """

    _tags = {
        "python_dependencies": "statsmodels",
        "bootstrap_type": "residual",
        "capability:multivariate": False,
    }

    def __init__(
        self,
        n_bootstraps: Integral = 10,  # type: ignore
        rng=None,
        model_type: ModelTypesWithoutArch = "ar",
        model_params=None,
        order: OrderTypes = None,  # type: ignore
        save_models: bool = False,
    ):
        """
        Initialize self.

        Parameters
        ----------
        n_bootstraps : Integral, default=10
            The number of bootstrap samples to create.
        model_type : str, default="ar"
            The model type to use. Must be one of "ar", "arima", "sarima", "var", or "arch".
        order : Integral or list or tuple, default=None
            The order of the model. If None, the best order is chosen via TSFitBestLag. If Integral, it is the lag order for AR, ARIMA, and SARIMA, and the lag order for ARCH. If list or tuple, the order is a tuple of (p, o, q) for ARIMA and (p, d, q, s) for SARIMAX. It is either a single Integral or a list of non-consecutive ints for AR, and an Integral for VAR and ARCH. If None, the best order is chosen via TSFitBestLag. Do note that TSFitBestLag only chooses the best lag, not the best order, so for the tuple values, it only chooses the best p, not the best (p, o, q) or (p, d, q, s). The rest of the values are set to 0.
        save_models : bool, default=False
            Whether to save the fitted models.
        rng : Integral or np.random.Generator, default=np.random.default_rng()
            The random number generator or seed used to generate the bootstrap samples.
        **kwargs
            Additional keyword arguments to pass to the TSFit model.

        Raises
        ------
        ValueError
            If model_type is not one of "ar", "arima", "sarima", "var", or "arch".

        Notes
        -----
        The model_type and order parameters are passed to TSFitBestLag, which
        chooses the best lag and order for the model. The best lag and order are
        then used to fit the model to the data. The residuals are then stored
        for use in the bootstrap.

        References
        ----------
        .. [^1^] https://en.wikipedia.org/wiki/Bootstrapping_(statistics)#Residual_bootstrap
        """
        self._model_type = model_type
        self.model_type = model_type
        self.order = order
        self.save_models = save_models
        self.model_params = model_params

        self.fit_model = None
        self.resids = None
        self.X_fitted = None
        self.coefs = None

        super().__init__(n_bootstraps=n_bootstraps, rng=rng)

        if not hasattr(self, "config"):
            self.config = BaseResidualBootstrapConfig(
                n_bootstraps=n_bootstraps,
                rng=rng,
                model_type=model_type,
                model_params=model_params,
                order=order,
                save_models=save_models,
            )

    def _fit_model(self, X: np.ndarray, y=None) -> None:
        """Fits the model to the data and stores the residuals."""
        if (
            self.resids is None
            or self.X_fitted is None
            or self.fit_model is None
            or self.coefs is None
        ):
            model_params = self.config.model_params
            if model_params is None:
                model_params = {}
            fit_obj = TSFitBestLag(
                model_type=self.config.model_type,
                order=self.config.order,
                save_models=self.config.save_models,
                **model_params,
            )
            self.fit_model = fit_obj.fit(X=X, y=y).model
            self.X_fitted = fit_obj.get_fitted_X()
            self.resids = fit_obj.get_residuals()
            self.order = fit_obj.get_order()
            self.coefs = fit_obj.get_coefs()




[docs]
class BaseMarkovBootstrap(BaseResidualBootstrap):
    """
    Base class for Markov bootstrap.

    Parameters
    ----------
    n_bootstraps : Integral, default=10
        The number of bootstrap samples to create.
    method : str, default="middle"
        The method to use for compressing the blocks.
        Must be one of "first", "middle", "last", "mean", "mode", "median",
        "kmeans", "kmedians", "kmedoids".
    apply_pca_flag : bool, default=False
        Whether to apply PCA to the residuals before fitting the HMM.
    pca : PCA, default=None
        The PCA object to use for applying PCA to the residuals.
    n_iter_hmm : Integral, default=10
        Number of iterations for fitting the HMM.
    n_fits_hmm : Integral, default=1
        Number of times to fit the HMM.
    blocks_as_hidden_states_flag : bool, default=False
        Whether to use blocks as hidden states.
    n_states : Integral, default=2
        Number of states for the HMM.
    model_type : str, default="ar"
        The model type to use. Must be one of "ar", "arima", "sarima", "var", or "arch".
    model_params : dict, default=None
        Additional keyword arguments to pass to the TSFit model.
    order : Integral or list or tuple, default=None
        The order of the model. If None, the best order is chosen via TSFitBestLag.
        If Integral, it is the lag order for AR, ARIMA, and SARIMA, and the lag order
        for ARCH. If list or tuple, the order is a tuple of (p, o, q) for ARIMA
        and (p, d, q, s) for SARIMAX. It is either a single Integral or a
        list of non-consecutive ints for AR, and an Integral for VAR and ARCH.
        If None, the best order is chosen via TSFitBestLag. Do note that TSFitBestLag
        only chooses the best lag, not the best order, so for the tuple values,
        it only chooses the best p, not the best (p, o, q) or (p, d, q, s).
        The rest of the values are set to 0.
    save_models : bool, default=False
        Whether to save the fitted models.
    rng : Integral or np.random.Generator, default=np.random.default_rng()
        The random number generator or seed used to generate the bootstrap samples.

    Attributes
    ----------
    hmm_object : MarkovSampler or None
        The MarkovSampler object used for sampling.

    Methods
    -------
    __init__ : Initialize the Markov bootstrap.

    Notes
    -----
    Fitting Markov models is expensive, hence we do not allow re-fititng. We instead fit once to the residuals and generate new samples by changing the random_seed.
    """

    def __init__(
        self,
        n_bootstraps: Integral = 10,  # type: ignore
        method: BlockCompressorTypes = "middle",
        apply_pca_flag: bool = False,
        pca=None,
        n_iter_hmm: Integral = 10,  # type: ignore
        n_fits_hmm: Integral = 1,  # type: ignore
        blocks_as_hidden_states_flag: bool = False,
        n_states: Integral = 2,  # type: ignore
        model_type: ModelTypesWithoutArch = "ar",
        model_params=None,
        order: OrderTypes = None,  # type: ignore
        save_models: bool = False,
        rng=None,
        **kwargs,
    ):
        """
        Initialize self.

        Parameters
        ----------
        n_bootstraps : Integral, default=10
            The number of bootstrap samples to create.
        rng : Integral or np.random.Generator, default=np.random.default_rng()
            The random number generator or seed used to generate the bootstrap samples.
        method : str, default="middle"
            The method to use for compressing the blocks. Must be one of "first", "middle", "last", "mean", "mode", "median", "kmeans", "kmedians", "kmedoids".
        apply_pca_flag : bool, default=False
            Whether to apply PCA to the residuals before fitting the HMM.
        pca : PCA, default=None
            The PCA object to use for applying PCA to the residuals.
        n_iter_hmm : Integral, default=10
            Number of iterations for fitting the HMM.
        n_fits_hmm : Integral, default=1
            Number of times to fit the HMM.
        blocks_as_hidden_states_flag : bool, default=False
            Whether to use blocks as hidden states.
        n_states : Integral, default=2
            Number of states for the HMM.
        **kwargs
            Additional keyword arguments to pass to the BaseResidualBootstrapConfig class,
            except for n_bootstraps and rng, which are passed directly to the parent BaseTimeSeriesBootstrapConfig class.
            See the documentation for BaseResidualBootstrapConfig for more information.
        """
        super().__init__(
            n_bootstraps=n_bootstraps,
            order=order,
            model_type=model_type,
            model_params=model_params,
            save_models=save_models,
            rng=rng,
            **kwargs,
        )

        self.method = method
        self.apply_pca_flag = apply_pca_flag
        self.pca = pca
        self.n_iter_hmm = n_iter_hmm
        self.n_fits_hmm = n_fits_hmm
        self.blocks_as_hidden_states_flag = blocks_as_hidden_states_flag
        self.n_states = n_states

        self.hmm_object = None

        self.config = BaseMarkovBootstrapConfig(
            n_bootstraps=n_bootstraps,
            rng=rng,
            method=method,
            apply_pca_flag=apply_pca_flag,
            pca=pca,
            n_iter_hmm=n_iter_hmm,
            n_fits_hmm=n_fits_hmm,
            blocks_as_hidden_states_flag=blocks_as_hidden_states_flag,
            n_states=n_states,
            save_models=save_models,
            model_type=model_type,
            model_params=model_params,
            order=order,
            **kwargs,
        )




[docs]
class BaseStatisticPreservingBootstrap(BaseTimeSeriesBootstrap):
    """Bootstrap class that generates bootstrapped samples preserving a specific statistic.

    This class generates bootstrapped time series data, preserving a given statistic (such as mean, median, etc.)
    The statistic is calculated from the original data and then used as a parameter for generating the bootstrapped samples.
    For example, if the statistic is np.mean, then the mean of the original data is calculated and then used as a parameter for generating the bootstrapped samples.

    Parameters
    ----------
    n_bootstraps : Integral, default=10
        The number of bootstrap samples to create.
    statistic : Callable, default=np.mean
        A callable function to compute the statistic that should be preserved.
    statistic_axis : Integral, default=0
        The axis along which the statistic should be computed.
    statistic_keepdims : bool, default=False
        Whether to keep the dimensions of the statistic or not.
    rng :  Integral or np.random.Generator, default=np.random.default_rng()
        The random number generator or seed used to generate the bootstrap samples.

    Attributes
    ----------
    statistic_X : np.ndarray, default=None
        The statistic calculated from the original data. This is used as a parameter for generating the bootstrapped samples.

    Methods
    -------
    __init__ : Initialize the BaseStatisticPreservingBootstrap class.
    _calculate_statistic(X: np.ndarray) -> np.ndarray : Calculate the statistic from the input data.
    """

    def __init__(
        self,
        n_bootstraps: Integral = 10,  # type: ignore
        statistic: Optional[Callable] = None,  # noqa: UP007
        statistic_axis: Integral = 0,  # type: ignore
        statistic_keepdims: bool = False,
        rng=None,
    ) -> None:
        """
        Initialize the BaseStatisticPreservingBootstrap class.

        Parameters
        ----------
        config : BaseStatisticPreservingBootstrapConfig
            The configuration object.
        """
        self.n_bootstraps = n_bootstraps
        self.rng = rng
        self.statistic = statistic
        self.statistic_axis = statistic_axis
        self.statistic_keepdims = statistic_keepdims

        if statistic is None:
            statistic = np.mean

        self.config = BaseStatisticPreservingBootstrapConfig(
            n_bootstraps=n_bootstraps,
            rng=rng,
            statistic=statistic,
            statistic_axis=statistic_axis,
            statistic_keepdims=statistic_keepdims,
        )

        super().__init__(n_bootstraps=n_bootstraps, rng=rng)

        self.statistic_X = None


[docs]
    def _calculate_statistic(self, X: np.ndarray) -> np.ndarray:
        params = inspect.signature(self.config.statistic).parameters
        kwargs_stat = {
            "axis": self.config.statistic_axis,
            "keepdims": self.config.statistic_keepdims,
        }
        kwargs_stat = {k: v for k, v in kwargs_stat.items() if k in params}
        statistic_X = self.config.statistic(X, **kwargs_stat)
        return statistic_X




# We can only fit uni-variate distributions, so X must be a 1D array, and `model_type` in BaseResidualBootstrap must not be "var".

[docs]
class BaseDistributionBootstrap(BaseResidualBootstrap):
    r"""
    Implementation of the Distribution Bootstrap (DB) method for time series data.

    The DB method is a non-parametric method that generates bootstrapped samples by fitting a distribution to the residuals and then generating new residuals from the fitted distribution. The new residuals are then added to the fitted values to create the bootstrapped samples.

    Parameters
    ----------
    n_bootstraps : Integral, default=10
        The number of bootstrap samples to create.
    distribution: str, default='normal'
        The distribution to use for generating the bootstrapped samples.
        Must be one of 'poisson', 'exponential', 'normal', 'gamma', 'beta',
        'lognormal', 'weibull', 'pareto', 'geometric', or 'uniform'.
    refit: bool, default=False
        Whether to refit the distribution to the resampled residuals for each
        bootstrap. If False, the distribution is fit once to the residuals and
        the same distribution is used for all bootstraps.
    model_type : str, default="ar"
        The model type to use. Must be one of "ar", "arima", "sarima", "var", or "arch".
    model_params : dict, default=None
        Additional keyword arguments to pass to the TSFit model.
    order : Integral or list or tuple, default=None
        The order of the model. If None, the best order is chosen via TSFitBestLag.
        If Integral, it is the lag order for AR, ARIMA, and SARIMA, and the lag order
        for ARCH. If list or tuple, the order is a tuple of (p, o, q) for ARIMA
        and (p, d, q, s) for SARIMAX. It is either a single Integral or a
        list of non-consecutive ints for AR, and an Integral for VAR and ARCH.
        If None, the best order is chosen via TSFitBestLag. Do note that TSFitBestLag
        only chooses the best lag, not the best order, so for the tuple values,
        it only chooses the best p, not the best (p, o, q) or (p, d, q, s).
        The rest of the values are set to 0.
    save_models : bool, default=False
        Whether to save the fitted models.
    rng : Integral or np.random.Generator, default=np.random.default_rng()
        The random number generator or seed used to generate the bootstrap samples.

    Attributes
    ----------
    resids_dist : scipy.stats.rv_continuous or None
        The distribution object used to generate the bootstrapped samples. If None, the distribution has not been fit yet.
    resids_dist_params : tuple or None
        The parameters of the distribution used to generate the bootstrapped samples. If None, the distribution has not been fit yet.

    Methods
    -------
    __init__ : Initialize the BaseDistributionBootstrap class.
    fit_distribution(resids: np.ndarray) -> tuple[rv_continuous, tuple]
        Fit the specified distribution to the residuals and return the distribution object and the parameters of the distribution.

    Notes
    -----
    The DB method is defined as:

    .. math::
        \\hat{X}_t = \\hat{\\mu} + \\epsilon_t

    where :math:`\\epsilon_t \\sim F_{\\hat{\\epsilon}}` is a random variable
    sampled from the distribution :math:`F_{\\hat{\\epsilon}}` fitted to the
    residuals :math:`\\hat{\\epsilon}`.

    References
    ----------
    .. [^1^] Politis, Dimitris N., and Joseph P. Romano. "The stationary bootstrap." Journal of the American Statistical Association 89.428 (1994): 1303-1313.
    """

    def __init__(
        self,
        n_bootstraps: Integral = 10,  # type: ignore
        distribution: str = "normal",
        refit: bool = False,
        model_type: ModelTypesWithoutArch = "ar",
        model_params=None,
        order: OrderTypes = None,  # type: ignore
        save_models: bool = False,
        rng=None,
        **kwargs,
    ) -> None:
        """
        Initialize the BaseStatisticPreservingBootstrap class.

        Parameters
        ----------
        config : BaseStatisticPreservingBootstrapConfig
            The configuration object.
        """
        self.n_bootstraps = n_bootstraps
        self.rng = rng
        self.distribution = distribution
        self.refit = refit

        self.config = BaseDistributionBootstrapConfig(
            n_bootstraps=n_bootstraps,
            rng=rng,
            distribution=distribution,
            refit=refit,
            save_models=save_models,
            order=order,
            model_type=model_type,
            model_params=model_params,
            **kwargs,
        )

        super().__init__(
            n_bootstraps=n_bootstraps,
            rng=rng,
            save_models=save_models,
            order=order,
            model_type=model_type,
            model_params=model_params,
            **kwargs,
        )

        self.resids_dist = None
        self.resids_dist_params = ()

    def _fit_distribution(self, resids: np.ndarray):
        """
        Fit the specified distribution to the residuals and return the distribution object and the parameters of the distribution.

        Parameters
        ----------
        resids : np.ndarray
            The residuals to fit the distribution to.

        Returns
        -------
        resids_dist : scipy.stats.rv_continuous
            The distribution object used to generate the bootstrapped samples.
        resids_dist_params : tuple
            The parameters of the distribution used to generate the bootstrapped samples.
        """
        resids_dist = self.config.distribution_methods[
            self.config.distribution
        ]
        # Fit the distribution to the residuals
        resids_dist_params = resids_dist.fit(resids)
        return resids_dist, resids_dist_params




[docs]
class BaseSieveBootstrap(BaseResidualBootstrap):
    """
    Base class for Sieve bootstrap.

    This class provides the core functionalities for implementing the Sieve
    bootstrap method, allowing for the fitting of various models to the residuals
    and generation of bootstrapped samples. The Sieve bootstrap is a parametric method
    that generates bootstrapped samples by fitting a model to the residuals
    and then generating new residuals from the fitted model.
    The new residuals are then added to the fitted values to create
    the bootstrapped samples.

    Parameters
    ----------
    resids_model_type : str, default="ar"
        The model type to use for fitting the residuals. Must be one of "ar", "arima", "sarima", "var", or "arch".
    resids_order : Integral or list or tuple, default=None
        The order of the model to use for fitting the residuals. If None, the order is automatically determined.
    save_resids_models : bool, default=False
        Whether to save the fitted models for the residuals.
    kwargs_base_sieve : dict, default=None
        Keyword arguments to pass to the SieveBootstrap class.
    model_type : str, default="ar"
        The model type to use. Must be one of "ar", "arima", "sarima", "var", or "arch".
    model_params : dict, default=None
        Additional keyword arguments to pass to the TSFit model.
    order : Integral or list or tuple, default=None
        The order of the model. If None, the best order is chosen via TSFitBestLag.
        If Integral, it is the lag order for AR, ARIMA, and SARIMA,
        and the lag order for ARCH. If list or tuple, the order is a
        tuple of (p, o, q) for ARIMA and (p, d, q, s) for SARIMAX.
        It is either a single Integral or a list of non-consecutive ints for AR,
        and an Integral for VAR and ARCH. If None, the best order is chosen via
        TSFitBestLag. Do note that TSFitBestLag only chooses the best lag,
        not the best order, so for the tuple values, it only chooses the best p,
        not the best (p, o, q) or (p, d, q, s). The rest of the values are set to 0.

    Attributes
    ----------
    resids_coefs : type or None
        Coefficients of the fitted residual model. Replace "type" with the specific type if known.
    resids_fit_model : type or None
        Fitted residual model object. Replace "type" with the specific type if known.

    Methods
    -------
    __init__ : Initialize the BaseSieveBootstrap class.
    _fit_resids_model : Fit the residual model to the residuals.
    """

    def __init__(
        self,
        n_bootstraps: Integral = 10,  # type: ignore
        rng=None,
        resids_model_type: ModelTypes = "ar",
        resids_order=None,
        save_resids_models: bool = False,
        kwargs_base_sieve=None,
        model_type: ModelTypesWithoutArch = "ar",
        model_params=None,
        order: OrderTypes = None,  # type: ignore
        **kwargs_base_residual,
    ) -> None:
        """
        Initialize the BaseSieveBootstrap class.

        Parameters
        ----------
        config : BaseSieveBootstrapConfig
            The configuration object.
        """
        self.n_bootstraps = n_bootstraps
        self.rng = rng
        self.resids_model_type = resids_model_type
        self.resids_order = resids_order
        self.save_resids_models = save_resids_models
        self.kwargs_base_sieve = kwargs_base_sieve

        self.config = BaseSieveBootstrapConfig(
            n_bootstraps=n_bootstraps,
            rng=rng,
            resids_model_type=resids_model_type,
            resids_order=resids_order,
            save_resids_models=save_resids_models,
            kwargs_base_sieve=kwargs_base_sieve,
            model_type=model_type,
            model_params=model_params,
            order=order,
            **kwargs_base_residual,
        )
        super().__init__(
            n_bootstraps=n_bootstraps,
            model_type=model_type,
            model_params=model_params,
            rng=rng,
            **kwargs_base_residual,
        )

        self.resids_coefs = None
        self.resids_fit_model = None

    def _fit_resids_model(self, X: np.ndarray) -> None:
        """
        Fit the residual model to the residuals.

        Parameters
        ----------
        X : np.ndarray
            The residuals to fit the model to.

        Returns
        -------
        resids_fit_model : type
            The fitted residual model object. Replace "type" with the specific type if known.
        resids_order : Integral or list or tuple
            The order of the fitted residual model.
        resids_coefs : np.ndarray
            The coefficients of the fitted residual model.
        """
        if self.resids_fit_model is None or self.resids_coefs is None:
            resids_fit_obj = TSFitBestLag(
                model_type=self.config.resids_model_type,
                order=self.config.resids_order,
                save_models=self.config.save_resids_models,
                **self.config.resids_model_params,
            )
            resids_fit_model = resids_fit_obj.fit(X, y=None).model
            resids_order = resids_fit_obj.get_order()
            resids_coefs = resids_fit_obj.get_coefs()
            self.resids_fit_model = resids_fit_model
            self.resids_order = resids_order
            self.resids_coefs = resids_coefs