Source code for tsbootstrap.base_bootstrap

from __future__ import annotations

import inspect
from collections.abc import Callable
from multiprocessing import Pool
from numbers import Integral
from typing import Optional

import numpy as np
from skbase.base import BaseObject

from tsbootstrap.base_bootstrap_configs import (
    BaseDistributionBootstrapConfig,
    BaseMarkovBootstrapConfig,
    BaseResidualBootstrapConfig,
    BaseSieveBootstrapConfig,
    BaseStatisticPreservingBootstrapConfig,
    BaseTimeSeriesBootstrapConfig,
)
from tsbootstrap.tsfit import TSFitBestLag
from tsbootstrap.utils.odds_and_ends import time_series_split
from tsbootstrap.utils.types import (
    BlockCompressorTypes,
    ModelTypes,
    ModelTypesWithoutArch,
    OrderTypes,
)


[docs] class BaseTimeSeriesBootstrap(BaseObject): """ Base class for time series bootstrapping. Raises ------ ValueError If n_bootstraps is not greater than 0. """ _tags = { "object_type": "bootstrap", "bootstrap_type": "other", "capability:multivariate": True, } def __init__( self, n_bootstraps: Integral = 10, # type: ignore rng=None, ) -> None: """ Initialize self. Parameters ---------- n_bootstraps : Integral, default=10 The number of bootstrap samples to create. rng : Integral or np.random.Generator, default=np.random.default_rng() The random number generator or seed used to generate the bootstrap samples. """ self.n_bootstraps = n_bootstraps self.rng = rng super().__init__() if type(self) == BaseTimeSeriesBootstrap: self.config = BaseTimeSeriesBootstrapConfig( n_bootstraps=n_bootstraps, rng=rng )
[docs] def bootstrap( self, X: np.ndarray, return_indices: bool = False, y=None, test_ratio: Optional[float] = None, # noqa: UP007 ): """Generate indices to split data into training and test set. Parameters ---------- X : 2D array-like of shape (n_timepoints, n_features) The endogenous time series to bootstrap. Dimension 0 is assumed to be the time dimension, ordered return_indices : bool, default=False If True, a second output is retured, integer locations of index references for the bootstrap sample, in reference to original indices. Indexed values do are not necessarily identical with bootstrapped values. y : array-like of shape (n_timepoints, n_features_exog), default=None Exogenous time series to use in bootstrapping. test_ratio : float, default=0.0 The ratio of test samples to total samples. If provided, test_ratio fraction the data (rounded up) is removed from the end before applying the bootstrap logic. Yields ------ X_boot_i : 2D np.ndarray-like of shape (n_timepoints_boot_i, n_features) i-th bootstrapped sample of X. indices_i : 1D np.nparray of shape (n_timepoints_boot_i,) integer values, only returned if return_indices=True. Index references for the i-th bootstrapped sample of X. Indexed values do are not necessarily identical with bootstrapped values. """ X, y = self._check_X_y(X, y) if test_ratio is not None: X_inner, _ = time_series_split(X, test_ratio=test_ratio) if y is not None: y_inner, _ = time_series_split(y, test_ratio=test_ratio) else: y_inner = None else: X_inner = X y_inner = y yield from self._bootstrap( X=X_inner, return_indices=return_indices, y=y_inner )
def _bootstrap(self, X: np.ndarray, return_indices: bool = False, y=None): """Generate indices to split data into training and test set. Private method to be implemented by derived classes. Input validation is not required in this method. Parameters ---------- X : 2D array-like of shape (n_timepoints, n_features) The endogenous time series to bootstrap. Dimension 0 is assumed to be the time dimension, ordered return_indices : bool, default=False If True, a second output is retured, integer locations of index references for the bootstrap sample, in reference to original indices. Indexed values do are not necessarily identical with bootstrapped values. y : array-like of shape (n_timepoints, n_features_exog), default=None Exogenous time series to use in bootstrapping. Yields ------ X_boot_i : 2D np.ndarray-like of shape (n_timepoints_boot_i, n_features) i-th bootstrapped sample of X. indices_i : 1D np.nparray of shape (n_timepoints_boot_i,) integer values, only returned if return_indices=True. Index references for the i-th bootstrapped sample of X. Indexed values do are not necessarily identical with bootstrapped values. """ # default implementation for current classes using config yield from self._generate_samples( X=X, return_indices=return_indices, y=y ) def _generate_samples( self, X: np.ndarray, return_indices: bool = False, y=None, n_jobs: int = 1, ): """Generate bootstrapped samples directly. Parameters ---------- X : array-like of shape (n_timepoints, n_features) The input samples. return_indices : bool, default=False If True, a second output is retured, integer locations of index references for the bootstrap sample, in reference to original indices. Indexed values do are not necessarily identical with bootstrapped values. y : array-like of shape (n_timepoints, n_features_exog), default=None Exogenous time series to use in bootstrapping. n_jobs : int, default=1 The number of jobs to run in parallel. Yields ------ Iterator[np.ndarray] An iterator over the bootstrapped samples. """ if n_jobs == 1: # Run bootstrap generation sequentially in the main process for _ in range(self.config.n_bootstraps): indices, data = self._generate_samples_single_bootstrap(X, y) data = np.concatenate(data, axis=0) if return_indices: # hack to fix known issue with non-concatenated index sets # see bug issue #81 if isinstance(indices, list): indices = np.concatenate(indices, axis=0) yield data, indices else: yield data else: # Use multiprocessing to handle bootstrapping args = [(X, y) for _ in range(self.config.n_bootstraps)] with Pool(n_jobs) as pool: results = pool.starmap( self._generate_samples_single_bootstrap, args ) for indices, data in results: data = np.concatenate(data, axis=0) if return_indices: # hack to fix known issue with non-concatenated index sets # see bug issue #81 if isinstance(indices, list): indices = np.concatenate(indices, axis=0) yield data, indices else: yield data def _generate_samples_single_bootstrap(self, X: np.ndarray, y=None): """Generate list of bootstraps for a single bootstrap iteration.""" raise NotImplementedError("abstract method") def _check_X_y(self, X, y): """Check X and y inputs, for bootstrap and get_n_bootstraps methods. Checks X to be a 2D array-like, and y to be a 2D array-like or None. If X is 1D np.ndarray, it is expanded to 2D via np.expand_dims. Parameters ---------- X : checked 2D array-like of shape (n_timepoints, n_features) The endogenous time series to bootstrap. Dimension 0 is assumed to be the time dimension, ordered y : checked array-like of shape (n_timepoints, n_features_exog), default=None Exogenous time series to use in bootstrapping. Returns ------- X : np.ndarray, coerced to 2D array-like of shape (n_timepoints, n_features) The checked endogenous time series. y : np.ndarray or None, identical with y The checked exogenous time series. Raises ------ ValueError : If the input is not valid. """ if X is not None: X = np.asarray(X) if len(X.shape) < 2: print(X) X = np.expand_dims(X, 1) X = self._check_input(X) if y is not None: y = self._check_input(y, enforce_univariate=False) return X, y def _check_input(self, X, enforce_univariate=True): """Checks if the input is valid. Parameters ---------- X : list of np.ndarray The input to check. enforce_univariate : bool, default=True Whether to enforce univariate input. Returns ------- object : The input object if it is valid. Raises ------ ValueError If the input is not valid. """ if np.any(np.diff([len(x) for x in X]) != 0): raise ValueError("All time series must be of the same length.") self_can_only_univariate = not self.get_tag("capability:multivariate") check_univariate = enforce_univariate and self_can_only_univariate if check_univariate and X.shape[1] > 1: raise ValueError( f"Unsupported input type: the estimator {type(self)} " "does not support multivariate endogeneous time series (X argument). " "Pass an 1D np.array, or a 2D np.array with a single column." ) return X
[docs] def get_n_bootstraps(self, X=None, y=None) -> int: """Returns the number of bootstrap instances produced by the bootstrap. Parameters ---------- X : 2D array-like of shape (n_timepoints, n_features) The endogenous time series to bootstrap. Dimension 0 is assumed to be the time dimension, ordered y : array-like of shape (n_timepoints, n_features_exog), default=None Exogenous time series to use in bootstrapping. Returns ------- int : The number of bootstrap instances produced by the bootstrap. """ return self.n_bootstraps # type: ignore
[docs] class BaseResidualBootstrap(BaseTimeSeriesBootstrap): """Base class for residual bootstrap. Parameters ---------- n_bootstraps : Integral, default=10 The number of bootstrap samples to create. model_type : str, default="ar" The model type to use. Must be one of "ar", "arima", "sarima", "var", or "arch". model_params : dict, default=None Additional keyword arguments to pass to the TSFit model. order : Integral or list or tuple, default=None The order of the model. If None, the best order is chosen via TSFitBestLag. If Integral, it is the lag order for AR, ARIMA, and SARIMA, and the lag order for ARCH. If list or tuple, the order is a tuple of (p, o, q) for ARIMA and (p, d, q, s) for SARIMAX. It is either a single Integral or a list of non-consecutive ints for AR, and an Integral for VAR and ARCH. If None, the best order is chosen via TSFitBestLag. Do note that TSFitBestLag only chooses the best lag, not the best order, so for the tuple values, it only chooses the best p, not the best (p, o, q) or (p, d, q, s). The rest of the values are set to 0. save_models : bool, default=False Whether to save the fitted models. rng : Integral or np.random.Generator, default=np.random.default_rng() The random number generator or seed used to generate the bootstrap samples. Attributes ---------- fit_model : TSFitBestLag The fitted model. resids : np.ndarray The residuals of the fitted model. X_fitted : np.ndarray The fitted values of the fitted model. coefs : np.ndarray The coefficients of the fitted model. Methods ------- __init__ : Initialize self. _fit_model : Fits the model to the data and stores the residuals. """ _tags = { "python_dependencies": "statsmodels", "bootstrap_type": "residual", "capability:multivariate": False, } def __init__( self, n_bootstraps: Integral = 10, # type: ignore rng=None, model_type: ModelTypesWithoutArch = "ar", model_params=None, order: OrderTypes = None, # type: ignore save_models: bool = False, ): """ Initialize self. Parameters ---------- n_bootstraps : Integral, default=10 The number of bootstrap samples to create. model_type : str, default="ar" The model type to use. Must be one of "ar", "arima", "sarima", "var", or "arch". order : Integral or list or tuple, default=None The order of the model. If None, the best order is chosen via TSFitBestLag. If Integral, it is the lag order for AR, ARIMA, and SARIMA, and the lag order for ARCH. If list or tuple, the order is a tuple of (p, o, q) for ARIMA and (p, d, q, s) for SARIMAX. It is either a single Integral or a list of non-consecutive ints for AR, and an Integral for VAR and ARCH. If None, the best order is chosen via TSFitBestLag. Do note that TSFitBestLag only chooses the best lag, not the best order, so for the tuple values, it only chooses the best p, not the best (p, o, q) or (p, d, q, s). The rest of the values are set to 0. save_models : bool, default=False Whether to save the fitted models. rng : Integral or np.random.Generator, default=np.random.default_rng() The random number generator or seed used to generate the bootstrap samples. **kwargs Additional keyword arguments to pass to the TSFit model. Raises ------ ValueError If model_type is not one of "ar", "arima", "sarima", "var", or "arch". Notes ----- The model_type and order parameters are passed to TSFitBestLag, which chooses the best lag and order for the model. The best lag and order are then used to fit the model to the data. The residuals are then stored for use in the bootstrap. References ---------- .. [^1^] https://en.wikipedia.org/wiki/Bootstrapping_(statistics)#Residual_bootstrap """ self._model_type = model_type self.model_type = model_type self.order = order self.save_models = save_models self.model_params = model_params self.fit_model = None self.resids = None self.X_fitted = None self.coefs = None super().__init__(n_bootstraps=n_bootstraps, rng=rng) if not hasattr(self, "config"): self.config = BaseResidualBootstrapConfig( n_bootstraps=n_bootstraps, rng=rng, model_type=model_type, model_params=model_params, order=order, save_models=save_models, ) def _fit_model(self, X: np.ndarray, y=None) -> None: """Fits the model to the data and stores the residuals.""" if ( self.resids is None or self.X_fitted is None or self.fit_model is None or self.coefs is None ): model_params = self.config.model_params if model_params is None: model_params = {} fit_obj = TSFitBestLag( model_type=self.config.model_type, order=self.config.order, save_models=self.config.save_models, **model_params, ) self.fit_model = fit_obj.fit(X=X, y=y).model self.X_fitted = fit_obj.get_fitted_X() self.resids = fit_obj.get_residuals() self.order = fit_obj.get_order() self.coefs = fit_obj.get_coefs()
[docs] class BaseMarkovBootstrap(BaseResidualBootstrap): """ Base class for Markov bootstrap. Parameters ---------- n_bootstraps : Integral, default=10 The number of bootstrap samples to create. method : str, default="middle" The method to use for compressing the blocks. Must be one of "first", "middle", "last", "mean", "mode", "median", "kmeans", "kmedians", "kmedoids". apply_pca_flag : bool, default=False Whether to apply PCA to the residuals before fitting the HMM. pca : PCA, default=None The PCA object to use for applying PCA to the residuals. n_iter_hmm : Integral, default=10 Number of iterations for fitting the HMM. n_fits_hmm : Integral, default=1 Number of times to fit the HMM. blocks_as_hidden_states_flag : bool, default=False Whether to use blocks as hidden states. n_states : Integral, default=2 Number of states for the HMM. model_type : str, default="ar" The model type to use. Must be one of "ar", "arima", "sarima", "var", or "arch". model_params : dict, default=None Additional keyword arguments to pass to the TSFit model. order : Integral or list or tuple, default=None The order of the model. If None, the best order is chosen via TSFitBestLag. If Integral, it is the lag order for AR, ARIMA, and SARIMA, and the lag order for ARCH. If list or tuple, the order is a tuple of (p, o, q) for ARIMA and (p, d, q, s) for SARIMAX. It is either a single Integral or a list of non-consecutive ints for AR, and an Integral for VAR and ARCH. If None, the best order is chosen via TSFitBestLag. Do note that TSFitBestLag only chooses the best lag, not the best order, so for the tuple values, it only chooses the best p, not the best (p, o, q) or (p, d, q, s). The rest of the values are set to 0. save_models : bool, default=False Whether to save the fitted models. rng : Integral or np.random.Generator, default=np.random.default_rng() The random number generator or seed used to generate the bootstrap samples. Attributes ---------- hmm_object : MarkovSampler or None The MarkovSampler object used for sampling. Methods ------- __init__ : Initialize the Markov bootstrap. Notes ----- Fitting Markov models is expensive, hence we do not allow re-fititng. We instead fit once to the residuals and generate new samples by changing the random_seed. """ def __init__( self, n_bootstraps: Integral = 10, # type: ignore method: BlockCompressorTypes = "middle", apply_pca_flag: bool = False, pca=None, n_iter_hmm: Integral = 10, # type: ignore n_fits_hmm: Integral = 1, # type: ignore blocks_as_hidden_states_flag: bool = False, n_states: Integral = 2, # type: ignore model_type: ModelTypesWithoutArch = "ar", model_params=None, order: OrderTypes = None, # type: ignore save_models: bool = False, rng=None, **kwargs, ): """ Initialize self. Parameters ---------- n_bootstraps : Integral, default=10 The number of bootstrap samples to create. rng : Integral or np.random.Generator, default=np.random.default_rng() The random number generator or seed used to generate the bootstrap samples. method : str, default="middle" The method to use for compressing the blocks. Must be one of "first", "middle", "last", "mean", "mode", "median", "kmeans", "kmedians", "kmedoids". apply_pca_flag : bool, default=False Whether to apply PCA to the residuals before fitting the HMM. pca : PCA, default=None The PCA object to use for applying PCA to the residuals. n_iter_hmm : Integral, default=10 Number of iterations for fitting the HMM. n_fits_hmm : Integral, default=1 Number of times to fit the HMM. blocks_as_hidden_states_flag : bool, default=False Whether to use blocks as hidden states. n_states : Integral, default=2 Number of states for the HMM. **kwargs Additional keyword arguments to pass to the BaseResidualBootstrapConfig class, except for n_bootstraps and rng, which are passed directly to the parent BaseTimeSeriesBootstrapConfig class. See the documentation for BaseResidualBootstrapConfig for more information. """ super().__init__( n_bootstraps=n_bootstraps, order=order, model_type=model_type, model_params=model_params, save_models=save_models, rng=rng, **kwargs, ) self.method = method self.apply_pca_flag = apply_pca_flag self.pca = pca self.n_iter_hmm = n_iter_hmm self.n_fits_hmm = n_fits_hmm self.blocks_as_hidden_states_flag = blocks_as_hidden_states_flag self.n_states = n_states self.hmm_object = None self.config = BaseMarkovBootstrapConfig( n_bootstraps=n_bootstraps, rng=rng, method=method, apply_pca_flag=apply_pca_flag, pca=pca, n_iter_hmm=n_iter_hmm, n_fits_hmm=n_fits_hmm, blocks_as_hidden_states_flag=blocks_as_hidden_states_flag, n_states=n_states, save_models=save_models, model_type=model_type, model_params=model_params, order=order, **kwargs, )
[docs] class BaseStatisticPreservingBootstrap(BaseTimeSeriesBootstrap): """Bootstrap class that generates bootstrapped samples preserving a specific statistic. This class generates bootstrapped time series data, preserving a given statistic (such as mean, median, etc.) The statistic is calculated from the original data and then used as a parameter for generating the bootstrapped samples. For example, if the statistic is np.mean, then the mean of the original data is calculated and then used as a parameter for generating the bootstrapped samples. Parameters ---------- n_bootstraps : Integral, default=10 The number of bootstrap samples to create. statistic : Callable, default=np.mean A callable function to compute the statistic that should be preserved. statistic_axis : Integral, default=0 The axis along which the statistic should be computed. statistic_keepdims : bool, default=False Whether to keep the dimensions of the statistic or not. rng : Integral or np.random.Generator, default=np.random.default_rng() The random number generator or seed used to generate the bootstrap samples. Attributes ---------- statistic_X : np.ndarray, default=None The statistic calculated from the original data. This is used as a parameter for generating the bootstrapped samples. Methods ------- __init__ : Initialize the BaseStatisticPreservingBootstrap class. _calculate_statistic(X: np.ndarray) -> np.ndarray : Calculate the statistic from the input data. """ def __init__( self, n_bootstraps: Integral = 10, # type: ignore statistic: Optional[Callable] = None, # noqa: UP007 statistic_axis: Integral = 0, # type: ignore statistic_keepdims: bool = False, rng=None, ) -> None: """ Initialize the BaseStatisticPreservingBootstrap class. Parameters ---------- config : BaseStatisticPreservingBootstrapConfig The configuration object. """ self.n_bootstraps = n_bootstraps self.rng = rng self.statistic = statistic self.statistic_axis = statistic_axis self.statistic_keepdims = statistic_keepdims if statistic is None: statistic = np.mean self.config = BaseStatisticPreservingBootstrapConfig( n_bootstraps=n_bootstraps, rng=rng, statistic=statistic, statistic_axis=statistic_axis, statistic_keepdims=statistic_keepdims, ) super().__init__(n_bootstraps=n_bootstraps, rng=rng) self.statistic_X = None
[docs] def _calculate_statistic(self, X: np.ndarray) -> np.ndarray: params = inspect.signature(self.config.statistic).parameters kwargs_stat = { "axis": self.config.statistic_axis, "keepdims": self.config.statistic_keepdims, } kwargs_stat = {k: v for k, v in kwargs_stat.items() if k in params} statistic_X = self.config.statistic(X, **kwargs_stat) return statistic_X
# We can only fit uni-variate distributions, so X must be a 1D array, and `model_type` in BaseResidualBootstrap must not be "var".
[docs] class BaseDistributionBootstrap(BaseResidualBootstrap): r""" Implementation of the Distribution Bootstrap (DB) method for time series data. The DB method is a non-parametric method that generates bootstrapped samples by fitting a distribution to the residuals and then generating new residuals from the fitted distribution. The new residuals are then added to the fitted values to create the bootstrapped samples. Parameters ---------- n_bootstraps : Integral, default=10 The number of bootstrap samples to create. distribution: str, default='normal' The distribution to use for generating the bootstrapped samples. Must be one of 'poisson', 'exponential', 'normal', 'gamma', 'beta', 'lognormal', 'weibull', 'pareto', 'geometric', or 'uniform'. refit: bool, default=False Whether to refit the distribution to the resampled residuals for each bootstrap. If False, the distribution is fit once to the residuals and the same distribution is used for all bootstraps. model_type : str, default="ar" The model type to use. Must be one of "ar", "arima", "sarima", "var", or "arch". model_params : dict, default=None Additional keyword arguments to pass to the TSFit model. order : Integral or list or tuple, default=None The order of the model. If None, the best order is chosen via TSFitBestLag. If Integral, it is the lag order for AR, ARIMA, and SARIMA, and the lag order for ARCH. If list or tuple, the order is a tuple of (p, o, q) for ARIMA and (p, d, q, s) for SARIMAX. It is either a single Integral or a list of non-consecutive ints for AR, and an Integral for VAR and ARCH. If None, the best order is chosen via TSFitBestLag. Do note that TSFitBestLag only chooses the best lag, not the best order, so for the tuple values, it only chooses the best p, not the best (p, o, q) or (p, d, q, s). The rest of the values are set to 0. save_models : bool, default=False Whether to save the fitted models. rng : Integral or np.random.Generator, default=np.random.default_rng() The random number generator or seed used to generate the bootstrap samples. Attributes ---------- resids_dist : scipy.stats.rv_continuous or None The distribution object used to generate the bootstrapped samples. If None, the distribution has not been fit yet. resids_dist_params : tuple or None The parameters of the distribution used to generate the bootstrapped samples. If None, the distribution has not been fit yet. Methods ------- __init__ : Initialize the BaseDistributionBootstrap class. fit_distribution(resids: np.ndarray) -> tuple[rv_continuous, tuple] Fit the specified distribution to the residuals and return the distribution object and the parameters of the distribution. Notes ----- The DB method is defined as: .. math:: \\hat{X}_t = \\hat{\\mu} + \\epsilon_t where :math:`\\epsilon_t \\sim F_{\\hat{\\epsilon}}` is a random variable sampled from the distribution :math:`F_{\\hat{\\epsilon}}` fitted to the residuals :math:`\\hat{\\epsilon}`. References ---------- .. [^1^] Politis, Dimitris N., and Joseph P. Romano. "The stationary bootstrap." Journal of the American Statistical Association 89.428 (1994): 1303-1313. """ def __init__( self, n_bootstraps: Integral = 10, # type: ignore distribution: str = "normal", refit: bool = False, model_type: ModelTypesWithoutArch = "ar", model_params=None, order: OrderTypes = None, # type: ignore save_models: bool = False, rng=None, **kwargs, ) -> None: """ Initialize the BaseStatisticPreservingBootstrap class. Parameters ---------- config : BaseStatisticPreservingBootstrapConfig The configuration object. """ self.n_bootstraps = n_bootstraps self.rng = rng self.distribution = distribution self.refit = refit self.config = BaseDistributionBootstrapConfig( n_bootstraps=n_bootstraps, rng=rng, distribution=distribution, refit=refit, save_models=save_models, order=order, model_type=model_type, model_params=model_params, **kwargs, ) super().__init__( n_bootstraps=n_bootstraps, rng=rng, save_models=save_models, order=order, model_type=model_type, model_params=model_params, **kwargs, ) self.resids_dist = None self.resids_dist_params = () def _fit_distribution(self, resids: np.ndarray): """ Fit the specified distribution to the residuals and return the distribution object and the parameters of the distribution. Parameters ---------- resids : np.ndarray The residuals to fit the distribution to. Returns ------- resids_dist : scipy.stats.rv_continuous The distribution object used to generate the bootstrapped samples. resids_dist_params : tuple The parameters of the distribution used to generate the bootstrapped samples. """ resids_dist = self.config.distribution_methods[ self.config.distribution ] # Fit the distribution to the residuals resids_dist_params = resids_dist.fit(resids) return resids_dist, resids_dist_params
[docs] class BaseSieveBootstrap(BaseResidualBootstrap): """ Base class for Sieve bootstrap. This class provides the core functionalities for implementing the Sieve bootstrap method, allowing for the fitting of various models to the residuals and generation of bootstrapped samples. The Sieve bootstrap is a parametric method that generates bootstrapped samples by fitting a model to the residuals and then generating new residuals from the fitted model. The new residuals are then added to the fitted values to create the bootstrapped samples. Parameters ---------- resids_model_type : str, default="ar" The model type to use for fitting the residuals. Must be one of "ar", "arima", "sarima", "var", or "arch". resids_order : Integral or list or tuple, default=None The order of the model to use for fitting the residuals. If None, the order is automatically determined. save_resids_models : bool, default=False Whether to save the fitted models for the residuals. kwargs_base_sieve : dict, default=None Keyword arguments to pass to the SieveBootstrap class. model_type : str, default="ar" The model type to use. Must be one of "ar", "arima", "sarima", "var", or "arch". model_params : dict, default=None Additional keyword arguments to pass to the TSFit model. order : Integral or list or tuple, default=None The order of the model. If None, the best order is chosen via TSFitBestLag. If Integral, it is the lag order for AR, ARIMA, and SARIMA, and the lag order for ARCH. If list or tuple, the order is a tuple of (p, o, q) for ARIMA and (p, d, q, s) for SARIMAX. It is either a single Integral or a list of non-consecutive ints for AR, and an Integral for VAR and ARCH. If None, the best order is chosen via TSFitBestLag. Do note that TSFitBestLag only chooses the best lag, not the best order, so for the tuple values, it only chooses the best p, not the best (p, o, q) or (p, d, q, s). The rest of the values are set to 0. Attributes ---------- resids_coefs : type or None Coefficients of the fitted residual model. Replace "type" with the specific type if known. resids_fit_model : type or None Fitted residual model object. Replace "type" with the specific type if known. Methods ------- __init__ : Initialize the BaseSieveBootstrap class. _fit_resids_model : Fit the residual model to the residuals. """ def __init__( self, n_bootstraps: Integral = 10, # type: ignore rng=None, resids_model_type: ModelTypes = "ar", resids_order=None, save_resids_models: bool = False, kwargs_base_sieve=None, model_type: ModelTypesWithoutArch = "ar", model_params=None, order: OrderTypes = None, # type: ignore **kwargs_base_residual, ) -> None: """ Initialize the BaseSieveBootstrap class. Parameters ---------- config : BaseSieveBootstrapConfig The configuration object. """ self.n_bootstraps = n_bootstraps self.rng = rng self.resids_model_type = resids_model_type self.resids_order = resids_order self.save_resids_models = save_resids_models self.kwargs_base_sieve = kwargs_base_sieve self.config = BaseSieveBootstrapConfig( n_bootstraps=n_bootstraps, rng=rng, resids_model_type=resids_model_type, resids_order=resids_order, save_resids_models=save_resids_models, kwargs_base_sieve=kwargs_base_sieve, model_type=model_type, model_params=model_params, order=order, **kwargs_base_residual, ) super().__init__( n_bootstraps=n_bootstraps, model_type=model_type, model_params=model_params, rng=rng, **kwargs_base_residual, ) self.resids_coefs = None self.resids_fit_model = None def _fit_resids_model(self, X: np.ndarray) -> None: """ Fit the residual model to the residuals. Parameters ---------- X : np.ndarray The residuals to fit the model to. Returns ------- resids_fit_model : type The fitted residual model object. Replace "type" with the specific type if known. resids_order : Integral or list or tuple The order of the fitted residual model. resids_coefs : np.ndarray The coefficients of the fitted residual model. """ if self.resids_fit_model is None or self.resids_coefs is None: resids_fit_obj = TSFitBestLag( model_type=self.config.resids_model_type, order=self.config.resids_order, save_models=self.config.save_resids_models, **self.config.resids_model_params, ) resids_fit_model = resids_fit_obj.fit(X, y=None).model resids_order = resids_fit_obj.get_order() resids_coefs = resids_fit_obj.get_coefs() self.resids_fit_model = resids_fit_model self.resids_order = resids_order self.resids_coefs = resids_coefs