Source code for tsbootstrap.bootstrap

"""
Bootstrap Methods: Where Time Series Meet Uncertainty.

When we first started working with time series, we were struck by how often we make
predictions without acknowledging our uncertainty. That's why we created this module—to
give you the tools to honestly quantify how much you don't know.

We've organized these methods into two philosophical camps, each reflecting a different
way of thinking about time and randomness:

**Model-based approaches** (Residual, Sieve): Here, we help you separate the predictable
from the unpredictable. We fit a model to capture the patterns, then play with the
leftover randomness to understand your uncertainty. These methods shine when you have
a good grasp of your data's structure—think of them as precision instruments that
reward careful calibration.

**Model-free approaches** (Block methods): Sometimes, we prefer not to impose our
assumptions on your data. These methods preserve whatever correlation patterns exist,
without trying to model them explicitly. They're our go-to when the data's structure
is complex or unknown—robust workhorses that rarely let us down.

A Note on Our Journey Forward
-----------------------------
We're currently transitioning to a faster backend system. Here's what you need to know:
- Right now (v0.9.0): We're using the speedy new backends by default
- Coming soon (v0.10.0): We'll gently remind you if you're using the old system
- Eventually (v1.0.0): We'll bid farewell to the legacy code entirely

Examples
--------
Let us show you how we approach different scenarios:

>>> # When we know it's an AR(2) process—no need to be coy about it
>>> bootstrap = WholeResidualBootstrap(n_bootstraps=1000, model_type='ar', order=2)

>>> # When we're not sure about the order—we'll let the data tell its story
>>> bootstrap = WholeSieveBootstrap(n_bootstraps=1000, min_lag=1, max_lag=10)

>>> # When the dependencies are too complex for simple models—we preserve what we see
>>> bootstrap = BlockResidualBootstrap(n_bootstraps=1000, block_length=20)

We offer both 'whole' variants (where we treat residuals as exchangeable) and 'block'
variants (where we preserve local patterns even in the noise). Choose based on how
much structure you believe lurks in your residuals.
"""

from __future__ import annotations

from typing import TYPE_CHECKING, Optional

if TYPE_CHECKING:
    from tsbootstrap.time_series_model import TimeSeriesModel

import numpy as np
from pydantic import Field, computed_field, model_validator

from tsbootstrap.base_bootstrap import (
    BaseTimeSeriesBootstrap,
    BlockBasedBootstrap,
    WholeDataBootstrap,
)
from tsbootstrap.services.service_container import BootstrapServices
from tsbootstrap.utils.types import ModelTypesWithoutArch
from tsbootstrap.validators import ModelOrder



[docs]
class ModelBasedBootstrap(BaseTimeSeriesBootstrap):
    """
    Foundation for bootstrap methods that trust in the power of models.

    Our core philosophy is simple yet profound: we believe every time series tells two
    stories—one of pattern and one of chance. When you give us your data, we carefully
    separate these narratives. The patterns (what we can predict) go into our model,
    while the surprises (the residuals) become the raw material for understanding
    uncertainty.

    Here's how we work our magic: First, we fit a model to capture your data's rhythm.
    Then we take the leftover randomness—the residuals—and reshuffle them like a
    deck of cards. By recombining these shuffled residuals with the original patterns,
    we create new possible histories for your data, each one slightly different but
    following the same underlying rules.

    We're particularly powerful when:
    - Your model captures the true dynamics well (we preserve those dynamics exactly)
    - You need efficient uncertainty estimates (we often converge faster than model-free cousins)
    - You want to peek into the future (we can extrapolate beyond what you've observed)
    - Consistency matters (our forecasts always respect your model's logic)

    But we'll be honest with you—we assume your model is right. That's a big assumption!
    Make sure to check the residuals for any patterns we might have missed. If you see
    structure there, we might be telling you an incomplete story.
    """

    # Model configuration fields
    model_type: ModelTypesWithoutArch = Field(
        default="ar",
        description="The model type to use. Options are 'ar', 'ma', 'arma', 'arima', 'sarima', 'var'.",
    )
    order: Optional[ModelOrder] = Field(default=None, description="The order of the model.")
    seasonal_order: Optional[ModelOrder] = Field(
        default=None, description="The seasonal order for SARIMA models."
    )
    save_models: bool = Field(
        default=False, description="Whether to save fitted models for each bootstrap."
    )
    use_backend: bool = Field(
        default=True,
        description="Whether to use the backend system (e.g., statsforecast) for model fitting.",
    )

    # Private attributes
    _fitted_model: Optional[TimeSeriesModel] = None
    _residuals: Optional[np.ndarray] = None
    _fitted_values: Optional[np.ndarray] = None

    def __init__(self, services: Optional[BootstrapServices] = None, **data):
        """Initialize with model-based services."""
        # Create appropriate services if not provided
        if services is None:
            # Extract use_backend from data if provided, otherwise use the field default
            use_backend = data.get("use_backend", True)  # Match the field default
            services = BootstrapServices.create_for_model_based_bootstrap(use_backend=use_backend)

        super().__init__(services=services, **data)

        # Update residual resampler with our RNG
        if self._services.residual_resampler:
            self._services.residual_resampler.rng = self.rng

    @computed_field
    @property
    def requires_model_fitting(self) -> bool:
        """Whether this bootstrap requires model fitting."""
        return True

    def _fit_model_if_needed(self, X: np.ndarray, y: Optional[np.ndarray] = None):
        """Fit model if not already fitted."""
        if self._fitted_model is None:
            # Default order for AR models if not specified
            order = self.order
            if order is None and self.model_type == "ar":
                order = 1  # Default AR(1)

            # Use model fitting service
            (
                self._fitted_model,
                self._fitted_values,
                self._residuals,
            ) = self._services.model_fitter.fit_model(
                X=X,
                model_type=self.model_type,
                order=order,
                seasonal_order=self.seasonal_order,
            )

    def _pad_to_original_length(self, bootstrapped_series: np.ndarray, X: np.ndarray) -> np.ndarray:
        """Pad bootstrapped series to match original length, handling shape mismatches."""
        if len(bootstrapped_series) >= len(X):
            return bootstrapped_series

        pad_length = len(X) - len(bootstrapped_series)

        # Handle 1D case
        if X.ndim == 1:
            padding = np.repeat(bootstrapped_series[-1], pad_length)
            return np.concatenate([bootstrapped_series, padding])

        # Handle 2D case - ensure bootstrapped_series matches X dimensionality
        if bootstrapped_series.ndim == 1 and X.ndim == 2:
            if X.shape[1] == 1:
                bootstrapped_series = bootstrapped_series.reshape(-1, 1)
            else:
                raise ValueError(
                    f"Shape mismatch: bootstrapped series is 1D but X has {X.shape[1]} columns"
                )

        # Now pad
        padding = np.tile(bootstrapped_series[-1], (pad_length, 1))
        return np.vstack([bootstrapped_series, padding])


[docs]
    @classmethod
    def get_test_params(cls):
        """Return testing parameter settings for the estimator."""
        # Abstract class - return empty list
        return []





[docs]
class WholeResidualBootstrap(ModelBasedBootstrap, WholeDataBootstrap):
    """
    Bootstrap time series by resampling model residuals independently.

    This method embodies the classical residual bootstrap approach: fit a time
    series model, extract the residuals (the unpredictable component), then
    create new series by adding resampled residuals back to the fitted values.
    It's the go-to method when you have confidence in your model specification
    and the residuals appear independent.

    The power of this approach lies in its simplicity and efficiency. By assuming
    residuals are exchangeable, we can resample them freely without worrying about
    preserving local structures. This makes it computationally fast and statistically
    straightforward, perfect for AR, MA, and ARMA models where the model captures
    all systematic patterns.

    Examples
    --------
    Bootstrap an AR(2) process to quantify parameter uncertainty:

    >>> ts = simulate_ar2_process(n=200)
    >>> bootstrap = WholeResidualBootstrap(
    ...     n_bootstraps=1000,
    ...     model_type='ar',
    ...     order=2
    ... )
    >>> samples = bootstrap.bootstrap(ts)
    >>>
    >>> # Estimate parameter confidence intervals
    >>> ar_params = [fit_ar2(sample) for sample in samples]
    >>> ci_lower, ci_upper = np.percentile(ar_params, [2.5, 97.5], axis=0)

    Notes
    -----
    This method assumes residuals are IID. Always check residual diagnostics:
    - Ljung-Box test for serial correlation
    - Jarque-Bera test for normality
    - ACF/PACF plots for remaining structure

    If residuals show patterns, consider BlockResidualBootstrap instead.
    """

    def _generate_samples_single_bootstrap(
        self, X: np.ndarray, y: Optional[np.ndarray] = None
    ) -> np.ndarray:
        """Generate a single bootstrap sample by resampling residuals."""
        # Ensure model is fitted
        self._fit_model_if_needed(X, y)

        if self._residuals is None:
            raise ValueError(
                "No residuals available for bootstrapping. "
                "This typically occurs when model fitting failed. "
                "Check that your data is appropriate for the specified model type."
            )

        # Handle multivariate case
        if X.ndim == 2 and X.shape[1] > 1 and self.model_type.lower() == "var":
            # For VAR models, residuals and fitted values have fewer rows due to lags
            # We need to pad to match original length
            n_samples = X.shape[0]
            n_fitted = self._fitted_values.shape[0]
            n_pad = n_samples - n_fitted

            # Resample residuals
            resampled_residuals = self._services.residual_resampler.resample_residuals_whole(
                residuals=self._residuals, n_samples=n_fitted
            )

            # Reconstruct for fitted portion
            bootstrapped = self._services.reconstructor.reconstruct_time_series(
                fitted_values=self._fitted_values, resampled_residuals=resampled_residuals
            )

            # Pad at the beginning to match original length
            if n_pad > 0:
                # Use the first values from X as padding
                padding = X[:n_pad]
                bootstrapped_series = np.vstack([padding, bootstrapped])
            else:
                bootstrapped_series = bootstrapped

            return bootstrapped_series
        else:
            # Standard case
            resampled_residuals = self._services.residual_resampler.resample_residuals_whole(
                residuals=self._residuals, n_samples=len(X)
            )

            # Use reconstruction service
            bootstrapped_series = self._services.reconstructor.reconstruct_time_series(
                fitted_values=self._fitted_values, resampled_residuals=resampled_residuals
            )

            # Handle length mismatch and shape for models that lose observations
            bootstrapped_series = self._pad_to_original_length(bootstrapped_series, X)

            # Reshape to match input
            return bootstrapped_series.reshape(X.shape)


[docs]
    @classmethod
    def get_test_params(cls):
        """Return testing parameter settings for the estimator."""
        return [{"n_bootstraps": 10}]





[docs]
class BlockResidualBootstrap(ModelBasedBootstrap, BlockBasedBootstrap):
    """
    Bootstrap time series by resampling model residuals in blocks.

    When model residuals exhibit serial correlation or heteroskedasticity,
    treating them as independent (as in WholeResidualBootstrap) can lead to
    severely biased inference. This method preserves local dependence structures
    by resampling residuals in contiguous blocks, maintaining the delicate
    patterns that remain after model fitting.

    Consider this the "safety net" of residual bootstrapping. Even if your model
    captures most of the structure, real-world residuals often contain subtle
    patterns - volatility clustering in financial data, seasonal heteroskedasticity
    in climate series, or complex error dependencies in sensor networks. Block
    resampling respects these nuances.

    Parameters
    ----------
    block_length : int
        Length of residual blocks to preserve. Too short destroys dependencies;
        too long reduces diversity. Common heuristics: n^(1/3) for general use,
        or match the residual correlation length.

    Examples
    --------
    Handle GARCH-type effects in financial returns:

    >>> returns = load_stock_returns()
    >>> bootstrap = BlockResidualBootstrap(
    ...     n_bootstraps=1000,
    ...     model_type='ar',
    ...     order=1,
    ...     block_length=20  # Preserve volatility clusters
    ... )
    >>> samples = bootstrap.bootstrap(returns)
    >>>
    >>> # Compute VaR with proper uncertainty quantification
    >>> var_estimates = [compute_var(sample, alpha=0.05) for sample in samples]

    See Also
    --------
    WholeResidualBootstrap : When residuals are truly independent
    MovingBlockBootstrap : For model-free block bootstrapping
    """

    def __init__(self, services: Optional[BootstrapServices] = None, **data):
        """Initialize with appropriate services."""
        # Ensure we have model-based services
        if services is None:
            # Extract use_backend from data if provided, otherwise use the field default
            use_backend = data.get("use_backend", True)  # Match the field default
            services = BootstrapServices.create_for_model_based_bootstrap(use_backend=use_backend)

        super().__init__(services=services, **data)

    def _generate_samples_single_bootstrap(
        self, X: np.ndarray, y: Optional[np.ndarray] = None
    ) -> np.ndarray:
        """Generate a single bootstrap sample by resampling residuals in blocks."""
        # Ensure model is fitted
        self._fit_model_if_needed(X, y)

        if self._residuals is None:
            raise ValueError(
                "No residuals available for bootstrapping. "
                "This typically occurs when model fitting failed. "
                "Check that your data is appropriate for the specified model type."
            )

        # Use block residual resampling service
        # Resample to match the length of fitted values/residuals
        n_fitted = len(self._fitted_values)
        resampled_residuals = self._services.residual_resampler.resample_residuals_block(
            residuals=self._residuals, block_length=self.block_length, n_samples=n_fitted
        )

        # Use reconstruction service
        bootstrapped_series = self._services.reconstructor.reconstruct_time_series(
            fitted_values=self._fitted_values, resampled_residuals=resampled_residuals
        )

        # Handle length mismatch and shape for models that lose observations
        bootstrapped_series = self._pad_to_original_length(bootstrapped_series, X)

        # Reshape to match input
        return bootstrapped_series.reshape(X.shape)


[docs]
    @classmethod
    def get_test_params(cls):
        """Return testing parameter settings for the estimator."""
        return [{"n_bootstraps": 10}]





[docs]
class WholeSieveBootstrap(ModelBasedBootstrap, WholeDataBootstrap):
    """
    Bootstrap with automatic model order selection for each sample.

    The sieve bootstrap addresses a fundamental challenge in time series analysis:
    model uncertainty. Rather than assuming we know the true model order, this
    method acknowledges our ignorance by selecting the order anew for each
    bootstrap sample. This "honest" approach propagates both parameter uncertainty
    and model selection uncertainty into our final inference.

    The name "sieve" comes from the method's ability to filter through different
    model complexities, selecting the one that best fits each bootstrap sample.
    As sample size grows, the selected models become increasingly complex,
    asymptotically capturing the true (possibly infinite-order) dynamics.

    This is particularly powerful for:
    - Forecasting when model order is uncertain
    - Robust inference without pre-specifying model complexity
    - Capturing both model and parameter uncertainty
    """

    # Sieve-specific fields
    min_lag: int = Field(default=1, ge=1, description="Minimum lag for order selection")
    max_lag: int = Field(default=10, ge=1, description="Maximum lag for order selection")
    criterion: str = Field(default="aic", description="Information criterion for order selection")

    def __init__(self, services: Optional[BootstrapServices] = None, **data):
        """Initialize with sieve bootstrap services."""
        if services is None:
            # Extract use_backend from data if provided, otherwise use the field default
            use_backend = data.get("use_backend", True)  # Match the field default
            services = BootstrapServices.create_for_sieve_bootstrap(use_backend=use_backend)

        super().__init__(services=services, **data)


[docs]
    @model_validator(mode="after")
    def validate_lag_range(self):
        """Ensure max_lag >= min_lag."""
        if self.max_lag < self.min_lag:
            raise ValueError("max_lag must be >= min_lag")
        return self


    def _fit_model_if_needed(self, X: np.ndarray, y: Optional[np.ndarray] = None):
        """Override to use order selection for sieve bootstrap."""
        if self._fitted_model is None:
            # First select order using order selection service
            selected_order = self._services.order_selector.select_order(
                X=X, min_lag=self.min_lag, max_lag=self.max_lag, criterion=self.criterion
            )

            # Update order
            self.order = selected_order

            # Now fit model with selected order
            super()._fit_model_if_needed(X, y)

    def _generate_samples_single_bootstrap(
        self, X: np.ndarray, y: Optional[np.ndarray] = None
    ) -> np.ndarray:
        """Generate sieve bootstrap sample with order selection."""
        # For each bootstrap, reselect order
        selected_order = self._services.order_selector.select_order(
            X=X, min_lag=self.min_lag, max_lag=self.max_lag, criterion=self.criterion
        )

        # Fit model with new order
        fitted_model, fitted_values, residuals = self._services.model_fitter.fit_model(
            X=X,
            model_type=self.model_type,
            order=selected_order,
            seasonal_order=self.seasonal_order,
        )

        # Resample residuals
        resampled_residuals = self._services.residual_resampler.resample_residuals_whole(
            residuals=residuals, n_samples=len(X)
        )

        # Reconstruct
        bootstrapped_series = self._services.reconstructor.reconstruct_time_series(
            fitted_values=fitted_values, resampled_residuals=resampled_residuals
        )

        # Handle length mismatch and shape for models that lose observations
        bootstrapped_series = self._pad_to_original_length(bootstrapped_series, X)

        return bootstrapped_series.reshape(X.shape)


[docs]
    @classmethod
    def get_test_params(cls):
        """Return testing parameter settings for the estimator."""
        return [{"n_bootstraps": 10}]





[docs]
def demonstrate_service_architecture():
    """
    Showcase the power of service-based bootstrap architecture.

    This example illustrates how the service architecture enables
    flexible, testable, and extensible bootstrap implementations.
    Each service handles a specific responsibility, making the
    system both powerful and maintainable.
    """
    import numpy as np

    # Generate sample data
    np.random.seed(42)
    n = 100
    X = np.cumsum(np.random.randn(n)).reshape(-1, 1)

    # Standard usage with default services
    bootstrap = WholeResidualBootstrap(n_bootstraps=5, model_type="ar", order=2)
    samples = bootstrap.bootstrap(X)

    # Advanced usage with custom service configuration
    # This demonstrates the flexibility of dependency injection
    custom_services = BootstrapServices.create_for_model_based_bootstrap()

    # Services can be customized for specific needs:
    # - Different model fitting algorithms
    # - Alternative resampling strategies
    # - Custom reconstruction methods
    # - Specialized validation rules

    bootstrap_custom = WholeResidualBootstrap(
        services=custom_services, n_bootstraps=5, model_type="ar", order=2
    )
    samples_custom = bootstrap_custom.bootstrap(X)

    # The architecture benefits:
    # 1. Each service is independently testable
    # 2. Services can be mocked for unit testing
    # 3. New functionality via service composition
    # 4. Clear interfaces and responsibilities
    # 5. Performance optimizations per service

    return samples, samples_custom




[docs]
class BlockSieveBootstrap(BlockBasedBootstrap, WholeSieveBootstrap):
    """
    The most conservative bootstrap: block resampling with order selection.

    This method combines two forms of uncertainty quantification: model selection
    uncertainty (through order selection) and residual dependence uncertainty
    (through block resampling). It's the "belt and suspenders" approach to
    time series bootstrap, appropriate when you need the most honest assessment
    of uncertainty.

    Use this when:
    - Model order is unknown AND residuals may be dependent
    - Working with complex real-world data where assumptions are questionable
    - Conservative inference is more important than efficiency
    - Publishing results that must withstand scrutiny

    The computational cost is higher than simpler methods, but the robustness
    gained often justifies the expense in critical applications.

    Examples
    --------
    Robust forecasting with full uncertainty quantification:

    >>> series = load_complex_time_series()
    >>> bootstrap = BlockSieveBootstrap(
    ...     n_bootstraps=1000,
    ...     min_lag=1,
    ...     max_lag=15,
    ...     block_length=25,
    ...     criterion='bic'  # More conservative than AIC
    ... )
    >>> samples = bootstrap.bootstrap(series)
    >>>
    >>> # Generate honest prediction intervals
    >>> forecasts = [forecast_series(s) for s in samples]
    >>> # These intervals account for both model and residual uncertainty
    """

    def __init__(self, services: Optional[BootstrapServices] = None, **data):
        """Initialize with sieve bootstrap services."""
        if services is None:
            # Extract use_backend from data if provided, otherwise use the field default
            use_backend = data.get("use_backend", True)  # Match the field default
            services = BootstrapServices.create_for_sieve_bootstrap(use_backend=use_backend)

        super().__init__(services=services, **data)

    def _generate_samples_single_bootstrap(
        self, X: np.ndarray, y: Optional[np.ndarray] = None
    ) -> np.ndarray:
        """Generate sieve bootstrap sample with block resampling."""
        # Select order for this bootstrap
        selected_order = self._services.order_selector.select_order(
            X=X, min_lag=self.min_lag, max_lag=self.max_lag, criterion=self.criterion
        )

        # Fit model with selected order
        fitted_model, fitted_values, residuals = self._services.model_fitter.fit_model(
            X=X,
            model_type=self.model_type,
            order=selected_order,
            seasonal_order=self.seasonal_order,
        )

        # Resample residuals in blocks
        n_fitted = len(fitted_values)
        resampled_residuals = self._services.residual_resampler.resample_residuals_block(
            residuals=residuals, block_length=self.block_length, n_samples=n_fitted
        )

        # Reconstruct
        bootstrapped_series = self._services.reconstructor.reconstruct_time_series(
            fitted_values=fitted_values, resampled_residuals=resampled_residuals
        )

        # Handle length mismatch and shape for models that lose observations
        bootstrapped_series = self._pad_to_original_length(bootstrapped_series, X)

        return bootstrapped_series.reshape(X.shape)


[docs]
    @classmethod
    def get_test_params(cls):
        """Return testing parameter settings for the estimator."""
        return [{"n_bootstraps": 10}]