Source code for tsbootstrap.bootstrap

from __future__ import annotations

from numbers import Integral
from typing import Optional

import numpy as np

from tsbootstrap.base_bootstrap import (
    BaseDistributionBootstrap,
    BaseMarkovBootstrap,
    BaseResidualBootstrap,
    BaseSieveBootstrap,
    BaseStatisticPreservingBootstrap,
)
from tsbootstrap.block_bootstrap import (
    BlockBootstrap,
    MovingBlockBootstrap,
)
from tsbootstrap.markov_sampler import MarkovSampler
from tsbootstrap.time_series_simulator import TimeSeriesSimulator
from tsbootstrap.utils.odds_and_ends import generate_random_indices
from tsbootstrap.utils.types import (
    BlockCompressorTypes,
    ModelTypes,
    ModelTypesWithoutArch,
    OrderTypes,
    RngTypes,
)

# TODO: add a check if generated block is only one unit long
# TODO: ensure docstrings align with functionality
# TODO: test -- check len(returned_indices) == X.shape[0]
# TODO: ensure x is 2d only for var, otherwise 1d or 2d with 1 feature
# TODO: block_weights=p with block_length=1 should be equivalent to the iid bootstrap
# TODO: add test to fit_ar to ensure input lags, if list, are unique
# TODO: for `StatisticPreservingBootstrap`, see if the statistic on the bootstrapped
# sample is close to the statistic on the original sample
# TODO: in `DistributionBootstrap`, allow mixture of distributions


# Fit, then resample residuals.
[docs] class WholeResidualBootstrap(BaseResidualBootstrap): """ Whole Residual Bootstrap class for time series data. This class applies residual bootstrapping to the entire time series, without any block structure. This is the most basic form of residual bootstrapping. The residuals are resampled with replacement and added to the fitted values to generate new samples. Parameters ---------- n_bootstraps : Integral, default=10 The number of bootstrap samples to create. model_type : str, default="ar" The model type to use. Must be one of "ar", "arima", "sarima", "var", or "arch". model_params : dict, default=None Additional keyword arguments to pass to the TSFit model. order : OrderTypes, default=None The order of the model. If None, the best order is chosen via TSFitBestLag. If Integral, it is the lag order for AR, ARIMA, and SARIMA, and the lag order for ARCH. If list or tuple, the order is a tuple of (p, o, q) for ARIMA and (p, d, q, s) for SARIMAX. It is either a single Integral or a list of non-consecutive ints for AR, and an Integral for VAR and ARCH. If None, the best order is chosen via TSFitBestLag. Do note that TSFitBestLag only chooses the best lag, not the best order, so for the tuple values, it only chooses the best p, not the best (p, o, q) or (p, d, q, s). The rest of the values are set to 0. save_models : bool, default=False Whether to save the fitted models. rng : RngTypes, default=None The random number generator or seed used to generate the bootstrap samples. Methods ------- __init__ : Initialize self. _generate_samples_single_bootstrap : Generate a single bootstrap sample. """ def __init__( self, n_bootstraps: Integral = 10, # type: ignore rng: RngTypes = None, # type: ignore model_type: ModelTypesWithoutArch = "ar", model_params: Optional[dict] = None, order: OrderTypes = None, # type: ignore save_models: bool = False, ): self._model_type = model_type super().__init__( n_bootstraps=n_bootstraps, rng=rng, model_type=model_type, model_params=model_params, order=order, save_models=save_models, ) def _generate_samples_single_bootstrap(self, X: np.ndarray, y=None): self._fit_model(X=X, y=y) # Resample residuals resampled_indices = generate_random_indices( self.resids.shape[0], self.config.rng # type: ignore ) resampled_residuals = self.resids[resampled_indices] # type: ignore # Add the bootstrapped residuals to the fitted values bootstrap_samples = self.X_fitted + resampled_residuals return [resampled_indices], [bootstrap_samples]
[docs] class BlockResidualBootstrap(BaseResidualBootstrap): """ Block Residual Bootstrap class for time series data. This class applies residual bootstrapping to blocks of the time series. The residuals are bootstrapped using the specified block structure and added to the fitted values to generate new samples. Parameters ---------- block_bootstrap : BlockBootstrap, default=MovingBlockBootstrap() The block bootstrap algorithm. n_bootstraps : Integral, default=10 The number of bootstrap samples to create. model_type : str, default="ar" The model type to use. Must be one of "ar", "arima", "sarima", "var", or "arch". model_params : dict, default=None Additional keyword arguments to pass to the TSFit model. order : OrderTypes, default=None The order of the model. If None, the best order is chosen via TSFitBestLag. If Integral, it is the lag order for AR, ARIMA, and SARIMA, and the lag order for ARCH. If list or tuple, the order is a tuple of (p, o, q) for ARIMA and (p, d, q, s) for SARIMAX. It is either a single Integral or a list of non-consecutive ints for AR, and an Integral for VAR and ARCH. If None, the best order is chosen via TSFitBestLag. Do note that TSFitBestLag only chooses the best lag, not the best order, so for the tuple values, it only chooses the best p, not the best (p, o, q) or (p, d, q, s). The rest of the values are set to 0. save_models : bool, default=False Whether to save the fitted models. rng : RngTypes, default=None The random number generator or seed used to generate the bootstrap samples. Methods ------- __init__ : Initialize self. _generate_samples_single_bootstrap : Generate a single bootstrap sample. """ def __init__( self, n_bootstraps: Integral = 10, # type: ignore block_bootstrap: Optional[BlockBootstrap] = None, model_type: ModelTypesWithoutArch = "ar", model_params: Optional[dict] = None, order: OrderTypes = None, # type: ignore save_models: bool = False, rng: RngTypes = None, # type: ignore ) -> None: super().__init__( n_bootstraps=n_bootstraps, rng=rng, model_type=model_type, model_params=model_params, order=order, save_models=save_models, ) if block_bootstrap is None: block_bootstrap = MovingBlockBootstrap() self.block_bootstrap = block_bootstrap def _generate_samples_single_bootstrap(self, X: np.ndarray, y=None): # Fit the model and store residuals, fitted values, etc. BaseResidualBootstrap._fit_model(self, X=X, y=y) # Generate blocks of residuals ( block_indices, block_data, ) = self.block_bootstrap._generate_samples_single_bootstrap( X=self.resids # type: ignore ) # Add the bootstrapped residuals to the fitted values bootstrap_samples = self.X_fitted + np.concatenate(block_data, axis=0) return block_indices, [bootstrap_samples]
[docs] @classmethod def get_test_params(cls, parameter_set="default"): from tsbootstrap.block_bootstrap import MovingBlockBootstrap bs = MovingBlockBootstrap() return {"block_bootstrap": bs}
[docs] class WholeMarkovBootstrap(BaseMarkovBootstrap): """ Whole Markov Bootstrap class for time series data. This class applies Markov bootstrapping to the entire time series, without any block structure. This is the most basic form of Markov bootstrapping. The residuals are fit to a Markov model, and then resampled using the Markov model. The resampled residuals are added to the fitted values to generate new samples. Methods ------- _generate_samples_single_bootstrap : Generate a single bootstrap sample. Notes ----- Fitting Markov models is expensive, hence we do not allow re-fititng. We instead fit once to the residuals and generate new samples by changing the random_seed. """ def _generate_samples_single_bootstrap(self, X: np.ndarray, y=None): # Fit the model and store residuals, fitted values, etc. self._fit_model(X=X, y=y) # Fit HMM to residuals, just once. random_seed = self.config.rng.integers(0, 1000) if self.hmm_object is None: markov_sampler = MarkovSampler( apply_pca_flag=self.config.apply_pca_flag, pca=self.config.pca, n_iter_hmm=self.config.n_iter_hmm, n_fits_hmm=self.config.n_fits_hmm, method=self.config.method, # type: ignore blocks_as_hidden_states_flag=self.config.blocks_as_hidden_states_flag, random_seed=random_seed, # type: ignore ) markov_sampler.fit( blocks=self.resids, n_states=self.config.n_states # type: ignore ) self.hmm_object = markov_sampler # Resample the fitted values using the HMM. bootstrapped_resids = self.hmm_object.sample( random_seed=random_seed + self.config.rng.integers(0, 1000) # type: ignore )[0] # Add the bootstrapped residuals to the fitted values bootstrap_samples = self.X_fitted + bootstrapped_resids return [np.arange(X.shape[0])], [bootstrap_samples]
[docs] class BlockMarkovBootstrap(BaseMarkovBootstrap): """ Block Markov Bootstrap class for time series data. This class applies Markov bootstrapping to blocks of the time series. The residuals are fit to a Markov model, then resampled using the specified block structure. The resampled residuals are added to the fitted values to generate new samples. This class is a combination of the `BlockResidualBootstrap` and `WholeMarkovBootstrap` classes. Parameters ---------- block_bootstrap : BlockBootstrap, default=MovingBlockBootstrap() The block bootstrap algorithm. n_bootstraps : Integral, default=10 The number of bootstrap samples to create. method : str, default="middle" The method to use for compressing the blocks. Must be one of "first", "middle", "last", "mean", "mode", "median", "kmeans", "kmedians", "kmedoids". apply_pca_flag : bool, default=False Whether to apply PCA to the residuals before fitting the HMM. pca : PCA, default=None The PCA object to use for applying PCA to the residuals. n_iter_hmm : Integral, default=10 Number of iterations for fitting the HMM. n_fits_hmm : Integral, default=1 Number of times to fit the HMM. blocks_as_hidden_states_flag : bool, default=False Whether to use blocks as hidden states. n_states : Integral, default=2 Number of states for the HMM. model_type : str, default="ar" The model type to use. Must be one of "ar", "arima", "sarima", "var", or "arch". model_params : dict, default=None Additional keyword arguments to pass to the TSFit model. order : Integral or list or tuple, default=None The order of the model. If None, the best order is chosen via TSFitBestLag. If Integral, it is the lag order for AR, ARIMA, and SARIMA, and the lag order for ARCH. If list or tuple, the order is a tuple of (p, o, q) for ARIMA and (p, d, q, s) for SARIMAX. It is either a single Integral or a list of non-consecutive ints for AR, and an Integral for VAR and ARCH. If None, the best order is chosen via TSFitBestLag. Do note that TSFitBestLag only chooses the best lag, not the best order, so for the tuple values, it only chooses the best p, not the best (p, o, q) or (p, d, q, s). The rest of the values are set to 0. save_models : bool, default=False Whether to save the fitted models. rng : Integral or np.random.Generator, default=np.random.default_rng() The random number generator or seed used to generate the bootstrap samples. Methods ------- __init__ : Initialize self. _generate_samples_single_bootstrap : Generate a single bootstrap sample. Notes ----- Fitting Markov models is expensive, hence we do not allow re-fititng. We instead fit once to the residuals, resample using blocks once, and generate new samples by changing the random_seed. """ def __init__( self, n_bootstraps: Integral = 10, # type: ignore block_bootstrap: Optional[BlockBootstrap] = None, method: BlockCompressorTypes = "middle", apply_pca_flag: bool = False, pca=None, n_iter_hmm: Integral = 10, # type: ignore n_fits_hmm: Integral = 1, # type: ignore blocks_as_hidden_states_flag: bool = False, n_states: Integral = 2, # type: ignore model_type: ModelTypesWithoutArch = "ar", model_params: Optional[dict] = None, order=None, save_models: bool = False, rng: RngTypes = None, # type: ignore ) -> None: super().__init__( n_bootstraps=n_bootstraps, method=method, apply_pca_flag=apply_pca_flag, pca=pca, n_iter_hmm=n_iter_hmm, n_fits_hmm=n_fits_hmm, blocks_as_hidden_states_flag=blocks_as_hidden_states_flag, n_states=n_states, model_type=model_type, model_params=model_params, order=order, save_models=save_models, rng=rng, ) if block_bootstrap is None: block_bootstrap = MovingBlockBootstrap() self.block_bootstrap = block_bootstrap def _generate_samples_single_bootstrap(self, X: np.ndarray, y=None): # Fit the model and store residuals, fitted values, etc. super()._fit_model(X=X, y=y) # Generate blocks of residuals ( block_indices, block_data, ) = self.block_bootstrap._generate_samples_single_bootstrap( X=self.resids # type: ignore ) random_seed = self.config.rng.integers(0, 1000) if self.hmm_object is None: markov_sampler = MarkovSampler( apply_pca_flag=self.config.apply_pca_flag, pca=self.config.pca, n_iter_hmm=self.config.n_iter_hmm, n_fits_hmm=self.config.n_fits_hmm, method=self.config.method, # type: ignore blocks_as_hidden_states_flag=self.config.blocks_as_hidden_states_flag, random_seed=random_seed, # type: ignore ) markov_sampler.fit( blocks=block_data, n_states=self.config.n_states ) self.hmm_object = markov_sampler # Resample the fitted values using the HMM. bootstrapped_resids = self.hmm_object.sample( random_seed=random_seed + self.config.rng.integers(0, 1000) # type: ignore )[0] # Add the bootstrapped residuals to the fitted values bootstrap_samples = self.X_fitted + bootstrapped_resids return block_indices, [bootstrap_samples]
[docs] @classmethod def get_test_params(cls, parameter_set="default"): from tsbootstrap.block_bootstrap import MovingBlockBootstrap bs = MovingBlockBootstrap() return {"block_bootstrap": bs}
[docs] class WholeStatisticPreservingBootstrap(BaseStatisticPreservingBootstrap): """ Whole Statistic Preserving Bootstrap class for time series data. This class applies statistic-preserving bootstrapping to the entire time series, without any block structure. This is the most basic form of statistic-preserving bootstrapping. The residuals are resampled with replacement and added to the fitted values to generate new samples. Methods ------- _generate_samples_single_bootstrap : Generate a single bootstrap sample. """ def _generate_samples_single_bootstrap(self, X: np.ndarray, y=None): if self.statistic_X is None: self.statistic_X = self._calculate_statistic(X=X) # Resample residuals resampled_indices = generate_random_indices( X.shape[0], self.config.rng # type: ignore ) bootstrapped_sample = X[resampled_indices] # Calculate the bootstrapped statistic statistic_bootstrapped = self._calculate_statistic(bootstrapped_sample) # Calculate the bias bias = self.statistic_X - statistic_bootstrapped # Add the bias to the bootstrapped sample bootstrap_sample_bias_corrected = bootstrapped_sample + bias return [resampled_indices], [bootstrap_sample_bias_corrected]
[docs] class BlockStatisticPreservingBootstrap(BaseStatisticPreservingBootstrap): """ Block Statistic Preserving Bootstrap class for time series data. This class applies statistic-preserving bootstrapping to blocks of the time series. The residuals are resampled using the specified block structure and added to the fitted values to generate new samples. Parameters ---------- block_bootstrap : BlockBootstrap, default=MovingBlockBootstrap() The block bootstrap algorithm. n_bootstraps : Integral, default=10 The number of bootstrap samples to create. statistic : Callable, default=np.mean A callable function to compute the statistic that should be preserved. statistic_axis : Integral, default=0 The axis along which the statistic should be computed. statistic_keepdims : bool, default=False Whether to keep the dimensions of the statistic or not. rng : Integral or np.random.Generator, default=np.random.default_rng() The random number generator or seed used to generate the bootstrap samples. Attributes ---------- statistic_X : np.ndarray, default=None The statistic calculated from the original data. This is used as a parameter for generating the bootstrapped samples. Methods ------- __init__ : Initialize self. _generate_samples_single_bootstrap : Generate a single bootstrap sample. """ def __init__( self, n_bootstraps: Integral = 10, # type: ignore block_bootstrap: Optional[BlockBootstrap] = None, statistic=None, statistic_axis: Integral = 0, # type: ignore statistic_keepdims: bool = False, rng: RngTypes = None, # type: ignore ) -> None: """ Initialize self. Parameters ---------- statistic_config : BaseStatisticPreservingBootstrapConfig The configuration object for the bias corrected bootstrap. block_config : BaseBlockBootstrapConfig The configuration object for the block bootstrap. """ super().__init__( n_bootstraps=n_bootstraps, statistic=statistic, statistic_axis=statistic_axis, statistic_keepdims=statistic_keepdims, rng=rng, ) if block_bootstrap is None: block_bootstrap = MovingBlockBootstrap() self.block_bootstrap = block_bootstrap def _generate_samples_single_bootstrap(self, X: np.ndarray, y=None): if self.statistic_X is None: self.statistic_X = super()._calculate_statistic(X=X) ( block_indices, block_data, ) = self.block_bootstrap._generate_samples_single_bootstrap(X=X) block_data_concat = np.concatenate(block_data, axis=0) # Calculate the bootstrapped statistic statistic_bootstrapped = self._calculate_statistic(block_data_concat) # Calculate the bias bias = self.statistic_X - statistic_bootstrapped # Add the bias to the bootstrapped sample bootstrap_samples = block_data_concat + bias return block_indices, [bootstrap_samples]
[docs] @classmethod def get_test_params(cls, parameter_set="default"): from tsbootstrap.block_bootstrap import MovingBlockBootstrap bs = MovingBlockBootstrap() return {"block_bootstrap": bs}
[docs] class WholeDistributionBootstrap(BaseDistributionBootstrap): """ Whole Distribution Bootstrap class for time series data. This class applies distribution bootstrapping to the entire time series, without any block structure. This is the most basic form of distribution bootstrapping. The residuals are fit to a distribution, and then resampled using the distribution. The resampled residuals are added to the fitted values to generate new samples. Attributes ---------- resids_dist : scipy.stats.rv_continuous or None The distribution object used to generate the bootstrapped samples. If None, the distribution has not been fit yet. resids_dist_params : tuple or None The parameters of the distribution used to generate the bootstrapped samples. If None, the distribution has not been fit yet. Methods ------- __init__ : Initialize self. _generate_samples_single_bootstrap : Generate a single bootstrap sample. Notes ----- We either fit the distribution to the residuals once and generate new samples from the fitted distribution with a new random seed, or resample the residuals once and fit the distribution to the resampled residuals, then generate new samples from the fitted distribution with the same random seed n_bootstrap times. """ def _generate_samples_single_bootstrap(self, X: np.ndarray, y=None): # Fit the model and residuals self._fit_model(X=X, y=y) # Fit the specified distribution to the residuals if not self.config.refit: if self.resids_dist is None or self.resids_dist_params == (): ( self.resids_dist, self.resids_dist_params, ) = super()._fit_distribution(self.resids) # Generate new residuals from the fitted distribution bootstrap_residuals = self.resids_dist.rvs( *self.resids_dist_params, size=X.shape[0], random_state=self.config.rng.integers(0, 2**32 - 1), ).reshape(-1, 1) # Add new residuals to the fitted values to create the bootstrap time series bootstrap_samples = self.X_fitted + bootstrap_residuals return [np.arange(0, X.shape[0])], [bootstrap_samples] else: # Resample residuals resampled_indices = generate_random_indices( self.resids.shape[0], self.config.rng # type: ignore ) resampled_residuals = self.resids[resampled_indices] resids_dist, resids_dist_params = super()._fit_distribution( resampled_residuals ) # Generate new residuals from the fitted distribution bootstrap_residuals = resids_dist.rvs( *resids_dist_params, size=X.shape[0], random_state=self.config.rng, ).reshape(-1, 1) # Add the bootstrapped residuals to the fitted values bootstrap_samples = self.X_fitted + resampled_residuals return [resampled_indices], [bootstrap_samples]
[docs] class BlockDistributionBootstrap(BaseDistributionBootstrap): """ Block Distribution Bootstrap class for time series data. This class applies distribution bootstrapping to blocks of the time series. The residuals are fit to a distribution, then resampled using the specified block structure. Then new residuals are generated from the fitted distribution and added to the fitted values to generate new samples. Parameters ---------- block_bootstrap : BlockBootstrap, default=MovingBlockBootstrap() The block bootstrap algorithm. n_bootstraps : Integral, default=10 The number of bootstrap samples to create. distribution: str, default='normal' The distribution to use for generating the bootstrapped samples. Must be one of 'poisson', 'exponential', 'normal', 'gamma', 'beta', 'lognormal', 'weibull', 'pareto', 'geometric', or 'uniform'. refit: bool, default=False Whether to refit the distribution to the resampled residuals for each bootstrap. If False, the distribution is fit once to the residuals and the same distribution is used for all bootstraps. model_type : str, default="ar" The model type to use. Must be one of "ar", "arima", "sarima", "var", or "arch". model_params : dict, default=None Additional keyword arguments to pass to the TSFit model. order : Integral or list or tuple, default=None The order of the model. If None, the best order is chosen via TSFitBestLag. If Integral, it is the lag order for AR, ARIMA, and SARIMA, and the lag order for ARCH. If list or tuple, the order is a tuple of (p, o, q) for ARIMA and (p, d, q, s) for SARIMAX. It is either a single Integral or a list of non-consecutive ints for AR, and an Integral for VAR and ARCH. If None, the best order is chosen via TSFitBestLag. Do note that TSFitBestLag only chooses the best lag, not the best order, so for the tuple values, it only chooses the best p, not the best (p, o, q) or (p, d, q, s). The rest of the values are set to 0. save_models : bool, default=False Whether to save the fitted models. rng : Integral or np.random.Generator, default=np.random.default_rng() The random number generator or seed used to generate the bootstrap samples. Attributes ---------- resids_dist : scipy.stats.rv_continuous or None The distribution object used to generate the bootstrapped samples. If None, the distribution has not been fit yet. resids_dist_params : tuple or None The parameters of the distribution used to generate the bootstrapped samples. If None, the distribution has not been fit yet. Methods ------- __init__ : Initialize self. _generate_samples_single_bootstrap : Generate a single bootstrap sample. Notes ----- We either fit the distribution to the residuals once and generate new samples from the fitted distribution with a new random seed, or resample the residuals once and fit the distribution to the resampled residuals, then generate new samples from the fitted distribution with the same random seed n_bootstrap times. """ def __init__( self, n_bootstraps: Integral = 10, # type: ignore block_bootstrap: Optional[BlockBootstrap] = None, distribution: str = "normal", refit: bool = False, model_type: ModelTypesWithoutArch = "ar", model_params: Optional[dict] = None, order=None, save_models: bool = False, rng: RngTypes = None, # type: ignore ) -> None: """ Initialize self. Parameters ---------- distribution_config : BaseDistributionBootstrapConfig The configuration object for the distribution bootstrap. block_config : BaseBlockBootstrapConfig The configuration object for the block bootstrap. """ super().__init__( n_bootstraps=n_bootstraps, distribution=distribution, refit=refit, save_models=save_models, order=order, model_type=model_type, model_params=model_params, rng=rng, ) if block_bootstrap is None: block_bootstrap = MovingBlockBootstrap() self.block_bootstrap = block_bootstrap def _generate_samples_single_bootstrap(self, X: np.ndarray, y=None): # Fit the model and residuals super()._fit_model(X=X, y=y) ( block_indices, block_data, ) = self.block_bootstrap._generate_samples_single_bootstrap( X=self.resids ) block_data_concat = np.concatenate(block_data, axis=0) # Fit the specified distribution to the residuals if not self.config.refit: if self.resids_dist is None or self.resids_dist_params == (): ( self.resids_dist, self.resids_dist_params, ) = super()._fit_distribution(block_data_concat) # Generate new residuals from the fitted distribution bootstrap_residuals = self.resids_dist.rvs( *self.resids_dist_params, size=block_data_concat.shape[0], random_state=self.config.rng.integers(0, 2**32 - 1), ).reshape(-1, 1) # Add new residuals to the fitted values to create the bootstrap time series bootstrap_samples = self.X_fitted + bootstrap_residuals return [np.arange(0, block_data_concat.shape[0])], [ bootstrap_samples ] else: # Resample residuals resids_dist, resids_dist_params = super()._fit_distribution( block_data_concat ) # Generate new residuals from the fitted distribution bootstrap_residuals = resids_dist.rvs( *resids_dist_params, size=block_data_concat.shape[0], random_state=self.config.rng, ).reshape(-1, 1) # Add the bootstrapped residuals to the fitted values bootstrap_samples = self.X_fitted + bootstrap_residuals return block_indices, [bootstrap_samples]
[docs] @classmethod def get_test_params(cls, parameter_set="default"): from tsbootstrap.block_bootstrap import MovingBlockBootstrap bs = MovingBlockBootstrap() return {"block_bootstrap": bs}
[docs] class WholeSieveBootstrap(BaseSieveBootstrap): """ Implementation of the Sieve bootstrap method for time series data. This class applies Sieve bootstrapping to the entire time series, without any block structure. This is the most basic form of Sieve bootstrapping. The residuals are fit to a second model, and then new samples are generated by adding the new residuals to the fitted values. Parameters ---------- resids_model_type : str, default="ar" The model type to use for fitting the residuals. Must be one of "ar", "arima", "sarima", "var", or "arch". resids_order : Integral or list or tuple, default=None The order of the model to use for fitting the residuals. If None, the order is automatically determined. save_resids_models : bool, default=False Whether to save the fitted models for the residuals. kwargs_base_sieve : dict, default=None Keyword arguments to pass to the SieveBootstrap class. model_type : str, default="ar" The model type to use. Must be one of "ar", "arima", "sarima", "var", or "arch". model_params : dict, default=None Additional keyword arguments to pass to the TSFit model. order : Integral or list or tuple, default=None The order of the model. If None, the best order is chosen via TSFitBestLag. If Integral, it is the lag order for AR, ARIMA, and SARIMA, and the lag order for ARCH. If list or tuple, the order is a tuple of (p, o, q) for ARIMA and (p, d, q, s) for SARIMAX. It is either a single Integral or a list of non-consecutive ints for AR, and an Integral for VAR and ARCH. If None, the best order is chosen via TSFitBestLag. Do note that TSFitBestLag only chooses the best lag, not the best order, so for the tuple values, it only chooses the best p, not the best (p, o, q) or (p, d, q, s). The rest of the values are set to 0. Methods ------- _generate_samples_single_bootstrap : Generate a single bootstrapped sample. """ def _generate_samples_single_bootstrap(self, X: np.ndarray, y=None): self._fit_model(X=X, y=y) self._fit_resids_model(X=self.resids) ts_simulator = TimeSeriesSimulator( X_fitted=self.X_fitted, rng=self.config.rng, fitted_model=self.resids_fit_model, ) simulated_samples = ts_simulator.generate_samples_sieve( model_type=self.config.resids_model_type, # type: ignore resids_lags=self.resids_order, resids_coefs=self.resids_coefs, resids=self.resids, ) return [np.arange(X.shape[0])], [simulated_samples]
[docs] class BlockSieveBootstrap(BaseSieveBootstrap): """ Implementation of the Sieve bootstrap method for time series data. This class applies Sieve bootstrapping to blocks of the time series. The residuals are fit to a second model, then resampled using the specified block structure. The new residuals are then added to the fitted values to generate new samples. Parameters ---------- block_bootstrap : BlockBootstrap, default=MovingBlockBootstrap() The block bootstrap algorithm. resids_model_type : str, default="ar" The model type to use for fitting the residuals. Must be one of "ar", "arima", "sarima", "var", or "arch". resids_order : Integral or list or tuple, default=None The order of the model to use for fitting the residuals. If None, the order is automatically determined. save_resids_models : bool, default=False Whether to save the fitted models for the residuals. kwargs_base_sieve : dict, default=None Keyword arguments to pass to the SieveBootstrap class. model_type : str, default="ar" The model type to use. Must be one of "ar", "arima", "sarima", "var", or "arch". model_params : dict, default=None Additional keyword arguments to pass to the TSFit model. order : Integral or list or tuple, default=None The order of the model. If None, the best order is chosen via TSFitBestLag. If Integral, it is the lag order for AR, ARIMA, and SARIMA, and the lag order for ARCH. If list or tuple, the order is a tuple of (p, o, q) for ARIMA and (p, d, q, s) for SARIMAX. It is either a single Integral or a list of non-consecutive ints for AR, and an Integral for VAR and ARCH. If None, the best order is chosen via TSFitBestLag. Do note that TSFitBestLag only chooses the best lag, not the best order, so for the tuple values, it only chooses the best p, not the best (p, o, q) or (p, d, q, s). The rest of the values are set to 0. save_models : bool, default=False Whether to save the fitted models. Methods ------- _init_ : Initialize self. _generate_samples_single_bootstrap : Generate a single bootstrapped sample. """ def __init__( self, n_bootstraps: Integral = 10, # type: ignore block_bootstrap: Optional[BlockBootstrap] = None, resids_model_type: ModelTypes = "ar", resids_order=None, save_resids_models: bool = False, kwargs_base_sieve=None, model_type: ModelTypesWithoutArch = "ar", model_params: Optional[dict] = None, order=None, save_models: bool = False, rng: RngTypes = None, # type: ignore ) -> None: """ Initialize self. Parameters ---------- sieve_config : BaseSieveBootstrapConfig The configuration object for the sieve bootstrap. block_config : BaseBlockBootstrapConfig The configuration object for the block bootstrap. """ super().__init__( n_bootstraps=n_bootstraps, resids_model_type=resids_model_type, resids_order=resids_order, save_resids_models=save_resids_models, kwargs_base_sieve=kwargs_base_sieve, model_type=model_type, model_params=model_params, order=order, save_models=save_models, rng=rng, ) if block_bootstrap is None: block_bootstrap = MovingBlockBootstrap() self.block_bootstrap = block_bootstrap def _generate_samples_single_bootstrap(self, X: np.ndarray, y=None): # Fit the model and residuals super()._fit_model(X=X, y=y) super()._fit_resids_model(X=self.resids) ts_simulator = TimeSeriesSimulator( X_fitted=self.X_fitted, rng=self.config.rng, fitted_model=self.resids_fit_model, ) simulated_samples = ts_simulator.generate_samples_sieve( model_type=self.config.resids_model_type, # type: ignore resids_lags=self.resids_order, resids_coefs=self.resids_coefs, resids=self.resids, ) resids_resids = self.X_fitted - simulated_samples ( block_indices, resids_resids_resampled, ) = self.block_bootstrap._generate_samples_single_bootstrap( X=resids_resids ) resids_resids_resampled_concat = np.concatenate( resids_resids_resampled, axis=0 ) bootstrapped_samples = self.X_fitted + resids_resids_resampled_concat return block_indices, [bootstrapped_samples]
[docs] @classmethod def get_test_params(cls, parameter_set="default"): from tsbootstrap.block_bootstrap import MovingBlockBootstrap bs = MovingBlockBootstrap() return {"block_bootstrap": bs}