Source code for tsbootstrap.block_bootstrap

"""
Block bootstrap methods for preserving temporal dependencies in time series.

This module provides a comprehensive suite of block bootstrap methods, each
designed to handle different aspects of temporal correlation. From simple
moving blocks to sophisticated tapered methods, these implementations enable
valid statistical inference for dependent data.

The block bootstrap philosophy is elegant: rather than assuming independence
(as in IID bootstrap), we preserve local temporal structures by resampling
contiguous data segments. This respects the "grammar" of time series - the
patterns, cycles, and dependencies that make temporal data unique.

Key innovations in this implementation:
- Service-based architecture for maximum flexibility
- Efficient block generation with minimal memory overhead
- Support for variable block lengths and weighted sampling
- Tapered block methods for smooth transitions
- Circular variants for periodic/seasonal data

Examples
--------
Choose the right block method for your data:

>>> # For general time series with unknown dependence structure
>>> bootstrap = MovingBlockBootstrap(n_bootstraps=1000, block_length=30)
>>>
>>> # For data with changing variance (heteroskedasticity)
>>> bootstrap = StationaryBlockBootstrap(n_bootstraps=1000, block_length=25)
>>>
>>> # For seasonal/periodic data
>>> bootstrap = CircularBlockBootstrap(n_bootstraps=1000, block_length=12)
>>>
>>> # For smooth spectral estimation
>>> bootstrap = BartlettsBootstrap(n_bootstraps=1000, block_length=20)

Notes
-----
Block length selection is crucial. Common approaches:
- n^(1/3) for general use (Hall et al., 1995)
- Match dominant periodicity for seasonal data
- Use cross-validation for data-driven selection
- Consider multiple lengths for robustness

See Also
--------
BlockGenerationService : Core block generation algorithms
BlockResamplingService : Efficient block resampling strategies
WindowFunctionService : Tapering functions for smooth transitions
"""

from __future__ import annotations

from typing import Callable, List, Optional, Union

import numpy as np
from pydantic import Field, PrivateAttr

from tsbootstrap.base_bootstrap import (
    BlockBasedBootstrap,
)
from tsbootstrap.services.block_bootstrap_services import (
    BlockGenerationService,
    BlockResamplingService,
    WindowFunctionService,
)
from tsbootstrap.services.service_container import BootstrapServices



[docs]
class BlockBootstrap(BlockBasedBootstrap):
    """
    Foundation for all block bootstrap methods.

    This class orchestrates the block bootstrap process through specialized
    services, providing a clean separation between block generation, resampling,
    and reconstruction. The architecture supports diverse block strategies while
    maintaining consistent interfaces and predictable behavior.

    The block bootstrap addresses a fundamental challenge: how to generate valid
    confidence intervals when observations are dependent? By resampling blocks
    rather than individual points, we preserve the correlation structure within
    each block, leading to valid inference even under strong dependence.

    Parameters
    ----------
    block_length : int
        The fundamental building block size. This parameter controls the
        bias-variance tradeoff: larger blocks better preserve long-range
        dependencies but reduce sample diversity.

    block_length_distribution : str, optional
        Distribution for variable block lengths. Options include:
        - None: Fixed length blocks
        - 'geometric': Memoryless random lengths (stationary bootstrap)
        - 'uniform': Random lengths within bounds

    wrap_around_flag : bool, default=False
        Whether to treat data as circular. Essential for periodic data
        where the end connects to the beginning (e.g., seasonal patterns).

    combine_generation_and_sampling_flag : bool, default=False
        Whether to regenerate blocks for each bootstrap sample. True gives
        more variability but higher computational cost.

    block_weights : array-like or callable, optional
        Weights for block selection. Enables emphasis on certain time
        periods or implementation of model-based block selection.

    tapered_weights : callable, optional
        Function generating within-block weights for smooth transitions.
        Used by windowed methods to reduce bias at block boundaries.

    Notes
    -----
    The service architecture enables sophisticated patterns:
    - Block generation strategies can be swapped without changing the API
    - Custom weighting schemes for domain-specific requirements
    - Efficient caching of blocks when appropriate
    - Parallel block generation for large datasets
    """

    # Block bootstrap configuration
    block_length_distribution: Optional[str] = Field(
        default=None, description="Distribution for variable block lengths"
    )
    wrap_around_flag: bool = Field(
        default=False, description="Whether to wrap around data when generating blocks"
    )
    combine_generation_and_sampling_flag: bool = Field(
        default=False, description="Whether to regenerate blocks for each bootstrap"
    )
    block_weights: Optional[Union[np.ndarray, Callable]] = Field(
        default=None,
        description="Weights for block sampling",
        exclude=True,  # Exclude from serialization/cloning when Callable
    )
    overlap_length: Optional[int] = Field(
        default=None, ge=1, description="Length of overlap between blocks"
    )
    min_block_length: Optional[int] = Field(default=None, ge=1, description="Minimum block length")

    # Private attributes
    _block_gen_service: BlockGenerationService = PrivateAttr(default=None)
    _block_resample_service: BlockResamplingService = PrivateAttr(default=None)
    _blocks: Optional[List[np.ndarray]] = PrivateAttr(default=None)

    def __init__(self, services: Optional[BootstrapServices] = None, **data):
        """Initialize with block bootstrap services."""
        super().__init__(services=services, **data)

        # Create block services
        self._block_gen_service = BlockGenerationService()
        self._block_resample_service = BlockResamplingService()


[docs]
    @classmethod
    def get_test_params(cls):
        """Return testing parameter settings for the estimator."""
        return [{"n_bootstraps": 10, "block_length": 10}]


    def _generate_blocks_if_needed(self, X: np.ndarray) -> List[np.ndarray]:
        """Generate blocks if needed based on configuration."""
        if self.combine_generation_and_sampling_flag or self._blocks is None:
            # Generate new blocks
            blocks = self._block_gen_service.generate_blocks(
                X=X,
                block_length=self.block_length,
                block_length_distribution=self.block_length_distribution,
                wrap_around_flag=self.wrap_around_flag,
                overlap_flag=self.overlap_flag,
                overlap_length=self.overlap_length,
                min_block_length=self.min_block_length,
                rng=self.rng,
            )

            # Cache blocks if not regenerating each time
            if not self.combine_generation_and_sampling_flag:
                self._blocks = blocks

            return blocks
        else:
            return self._blocks

    def _generate_samples_single_bootstrap(
        self, X: np.ndarray, y: Optional[np.ndarray] = None
    ) -> np.ndarray:
        """Generate a single bootstrap sample using block resampling."""
        # Generate or retrieve blocks
        blocks = self._generate_blocks_if_needed(X)

        # Resample blocks
        # Only pass tapered_weights if it exists (for windowed bootstraps)
        tapered_weights = getattr(self, "tapered_weights", None)
        block_indices, block_data = self._block_resample_service.resample_blocks(
            X=X,
            blocks=blocks,
            n=len(X),
            block_weights=self.block_weights,
            tapered_weights=tapered_weights,
            rng=self.rng,
        )

        # Concatenate block data
        if block_data:
            result = np.concatenate(block_data, axis=0)
            # Ensure correct length
            if len(result) > len(X):
                result = result[: len(X)]
            # Ensure we maintain the original shape
            # Handle case where we have an extra trailing dimension of size 1
            while result.ndim > 1 and result.shape[-1] == 1 and len(result.shape) > len(X.shape):
                result = result.squeeze(-1)
            return result.reshape(X.shape)
        else:
            return np.empty_like(X)


[docs]
    def get_params(self, deep=True):
        """Get parameters, excluding non-cloneable fields."""
        params = super().get_params(deep=deep)
        # Remove callable fields that can't be cloned
        if "block_weights" in params and callable(params.get("block_weights")):
            params.pop("block_weights", None)
        return params



[docs]
    def set_params(self, **params):
        """Set parameters, handling excluded fields."""
        # Don't try to set callable fields directly
        if "block_weights" in params and callable(params["block_weights"]):
            params.pop("block_weights")
        return super().set_params(**params)





[docs]
class MovingBlockBootstrap(BlockBootstrap):
    """
    The classic moving block bootstrap for general time series.

    This is the Swiss Army knife of block methods - simple, robust, and
    effective for a wide range of time series. Blocks of fixed length slide
    across the data, and we resample these blocks with replacement to build
    new series that preserve local correlation structures.

    The method's simplicity is its strength. No distributional assumptions,
    no model specifications - just the empirical preservation of whatever
    dependencies exist in your data. It's particularly effective for:
    - Stationary time series with unknown correlation structure
    - Moderate to strong serial dependence
    - General-purpose uncertainty quantification

    Examples
    --------
    Confidence intervals for autocorrelated data:

    >>> series = load_temperature_anomalies()  # Daily data with serial correlation
    >>> bootstrap = MovingBlockBootstrap(
    ...     n_bootstraps=2000,
    ...     block_length=30  # Monthly blocks for daily data
    ... )
    >>> samples = bootstrap.bootstrap(series)
    >>> trend_ci = compute_trend_confidence_interval(samples)

    Notes
    -----
    The overlapping blocks ensure all data points have equal probability
    of selection, maintaining the marginal distribution. However, this can
    lead to slight bias at the series boundaries.
    """

    def __init__(self, services: Optional[BootstrapServices] = None, **data):
        """Initialize with moving block settings."""
        # Set moving block defaults
        data.setdefault("wrap_around_flag", False)
        data.setdefault("overlap_flag", True)

        super().__init__(services=services, **data)


[docs]
    @classmethod
    def get_test_params(cls):
        """Return testing parameter settings for the estimator."""
        return [{"n_bootstraps": 10, "block_length": 10}]





[docs]
class StationaryBlockBootstrap(BlockBootstrap):
    """
    Bootstrap with random block lengths for optimal bias-variance tradeoff.

    The stationary bootstrap elegantly solves a dilemma in block bootstrapping:
    fixed block lengths create artificial periodicity in the resampled series.
    By using geometrically distributed random block lengths, this method
    maintains stationarity of the bootstrap distribution while preserving
    temporal dependencies.

    The geometric distribution has a beautiful property: it's memoryless.
    At each step, the probability of ending the current block is constant,
    leading to more natural block boundaries that don't impose artificial
    structure on the resampled data.

    Ideal for:
    - Time series with varying dependence scales
    - Avoiding artifacts from fixed block lengths
    - Theoretical work requiring exact stationarity
    - Long-range dependent processes

    Examples
    --------
    Financial returns with time-varying volatility:

    >>> returns = load_stock_returns()
    >>> bootstrap = StationaryBlockBootstrap(
    ...     n_bootstraps=5000,
    ...     block_length=20  # Expected block length
    ... )
    >>> samples = bootstrap.bootstrap(returns)
    >>> # Sharpe ratio CI accounting for serial correlation
    >>> sharpe_ratios = [compute_sharpe(s) for s in samples]
    >>> ci = np.percentile(sharpe_ratios, [2.5, 97.5])

    Notes
    -----
    The block_length parameter represents the expected (mean) block length.
    Actual blocks follow a geometric distribution with parameter p = 1/block_length.
    """

    def __init__(self, services: Optional[BootstrapServices] = None, **data):
        """Initialize with stationary block settings."""
        # Set stationary block defaults
        data.setdefault("block_length_distribution", "geometric")
        data.setdefault("wrap_around_flag", False)
        data.setdefault("overlap_flag", True)

        super().__init__(services=services, **data)


[docs]
    @classmethod
    def get_test_params(cls):
        """Return testing parameter settings for the estimator."""
        return [{"n_bootstraps": 10, "block_length": 10}]





[docs]
class CircularBlockBootstrap(BlockBootstrap):
    """
    Bootstrap for periodic and seasonal time series.

    Many time series are inherently circular - seasonal patterns, daily cycles,
    business cycles. The circular block bootstrap recognizes this by treating
    the data as a continuous loop, where the end connects seamlessly to the
    beginning. This eliminates edge effects and ensures all observations have
    equal representation.

    The method is particularly powerful for:
    - Seasonal data (monthly, quarterly patterns)
    - Daily cycles (hourly data through a day)
    - Any series with natural periodicity
    - Eliminating boundary bias in finite samples

    By wrapping around, blocks can span the end-to-beginning boundary,
    capturing patterns that would be split in standard methods.

    Examples
    --------
    Monthly seasonal patterns in sales data:

    >>> monthly_sales = load_monthly_sales(years=10)  # 120 observations
    >>> bootstrap = CircularBlockBootstrap(
    ...     n_bootstraps=1000,
    ...     block_length=12  # Full year blocks
    ... )
    >>> samples = bootstrap.bootstrap(monthly_sales)
    >>> # Analyze seasonal patterns with proper uncertainty
    >>> seasonal_effects = estimate_seasonal_effects(samples)

    Daily patterns in electricity demand:

    >>> hourly_demand = load_hourly_electricity()  # 24-hour cycles
    >>> bootstrap = CircularBlockBootstrap(
    ...     n_bootstraps=2000,
    ...     block_length=6  # 6-hour blocks
    ... )
    """

    def __init__(self, services: Optional[BootstrapServices] = None, **data):
        """Initialize with circular block settings."""
        # Set circular block defaults
        data["wrap_around_flag"] = True  # Always wrap for circular
        data.setdefault("overlap_flag", True)

        super().__init__(services=services, **data)


[docs]
    @classmethod
    def get_test_params(cls):
        """Return testing parameter settings for the estimator."""
        return [{"n_bootstraps": 10, "block_length": 10}]





[docs]
class NonOverlappingBlockBootstrap(BlockBootstrap):
    """
    Bootstrap with disjoint blocks for maximum independence.

    While most block methods allow overlapping blocks, sometimes you need
    clean separation between resampled segments. This method divides the
    data into non-overlapping blocks and resamples these disjoint pieces.

    This approach is valuable when:
    - Blocks represent natural units (weeks, quarters, regimes)
    - You need to preserve specific boundary conditions
    - Computational efficiency is paramount
    - Working with multi-scale data structures

    The trade-off is reduced flexibility in block placement, which can
    lead to higher variance in small samples.

    Examples
    --------
    Weekly patterns in daily data:

    >>> daily_activity = load_user_activity(days=364)  # 52 weeks
    >>> bootstrap = NonOverlappingBlockBootstrap(
    ...     n_bootstraps=1000,
    ...     block_length=7  # Weekly blocks
    ... )
    >>> samples = bootstrap.bootstrap(daily_activity)
    >>> # Preserves weekly structure exactly
    """

    def __init__(self, services: Optional[BootstrapServices] = None, **data):
        """Initialize with non-overlapping settings."""
        # Set non-overlapping defaults
        data["overlap_flag"] = False  # Never overlap
        data.setdefault("wrap_around_flag", False)

        super().__init__(services=services, **data)


[docs]
    @classmethod
    def get_test_params(cls):
        """Return testing parameter settings for the estimator."""
        return [{"n_bootstraps": 10, "block_length": 10}]




# Window function bootstraps



[docs]
class WindowedBlockBootstrap(BlockBootstrap):
    """
    Foundation for tapered block methods with smooth transitions.

    A sophisticated enhancement to block bootstrapping that addresses the
    "hard boundary" problem. Standard blocks have sharp cutoffs that can
    introduce discontinuities. Windowed methods apply tapering functions
    that smoothly down-weight observations near block edges, creating more
    natural transitions in the resampled series.

    The tapering approach offers several advantages:
    - Reduced bias from block boundary effects
    - Smoother spectral estimates
    - Better small-sample properties
    - More accurate for derivative statistics

    This base class provides the framework for various window functions,
    each with different trade-offs between bias reduction and variance.
    """

    window_type: str = Field(default="hanning", description="Type of window function")

    # Private attributes
    _window_service: Optional[WindowFunctionService] = PrivateAttr(default=None)
    _tapered_weights_cache: Optional[Callable] = PrivateAttr(default=None)

    def __init__(self, services: Optional[BootstrapServices] = None, **data):
        """Initialize with window service."""
        super().__init__(services=services, **data)

        # Create window service
        self._window_service = WindowFunctionService()
        # Don't set tapered_weights here - use property instead

    @property
    def tapered_weights(self) -> Optional[Callable]:
        """Get tapered weights function for current window type."""
        if self._tapered_weights_cache is None and self._window_service is not None:
            self._tapered_weights_cache = self._create_tapered_weights()
        return self._tapered_weights_cache


[docs]
    @classmethod
    def get_test_params(cls):
        """Return testing parameter settings for the estimator."""
        # Abstract base for windowed methods - return empty
        return []


    def _create_tapered_weights(self) -> Callable:
        """Create tapered weights function based on window type."""
        if self._window_service is None:
            self._window_service = WindowFunctionService()

        window_func = self._window_service.get_window_function(self.window_type)

        def tapered_weights(block_length: int) -> np.ndarray:
            return window_func(block_length)

        return tapered_weights




[docs]
class BartlettsBootstrap(WindowedBlockBootstrap):
    """
    Bootstrap with triangular tapering for optimal bias properties.

    Bartlett's method uses a triangular (tent-shaped) window that linearly
    decreases weight from the block center to edges. This simple tapering
    provides excellent bias reduction while maintaining computational efficiency.

    The triangular window has special theoretical properties:
    - Optimal for spectral density estimation
    - Minimizes integrated squared bias
    - Provides consistent estimates under weak conditions
    - Natural choice for linear statistics

    Examples
    --------
    Spectral analysis with proper uncertainty:

    >>> signal = load_vibration_data()
    >>> bootstrap = BartlettsBootstrap(
    ...     n_bootstraps=1000,
    ...     block_length=50
    ... )
    >>> samples = bootstrap.bootstrap(signal)
    >>> spectra = [compute_spectrum(s) for s in samples]
    >>> # Confidence bands for spectral density
    """

    def __init__(self, services: Optional[BootstrapServices] = None, **data):
        """Initialize with Bartlett window."""
        data["window_type"] = "bartletts"
        data.setdefault("wrap_around_flag", False)
        data.setdefault("overlap_flag", True)

        super().__init__(services=services, **data)


[docs]
    @classmethod
    def get_test_params(cls):
        """Return testing parameter settings for the estimator."""
        return [{"n_bootstraps": 10, "block_length": 10}]





[docs]
class BlackmanBootstrap(WindowedBlockBootstrap):
    """
    Blackman Bootstrap using composition.

    Uses Blackman window for tapering.
    """

    def __init__(self, services: Optional[BootstrapServices] = None, **data):
        """Initialize with Blackman window."""
        data["window_type"] = "blackman"
        data.setdefault("wrap_around_flag", False)
        data.setdefault("overlap_flag", True)

        super().__init__(services=services, **data)


[docs]
    @classmethod
    def get_test_params(cls):
        """Return testing parameter settings for the estimator."""
        return [{"n_bootstraps": 10, "block_length": 10}]





[docs]
class HammingBootstrap(WindowedBlockBootstrap):
    """
    Bootstrap with Hamming window for excellent frequency properties.

    The Hamming window provides an excellent balance between main lobe width
    and side lobe suppression, making it ideal for frequency domain analysis.
    The smooth tapering reduces spectral leakage while preserving temporal
    resolution.

    Particularly effective for:
    - Frequency domain bootstrap
    - Harmonic analysis
    - Signal processing applications
    - Reducing edge artifacts

    Examples
    --------
    >>> audio = load_audio_signal()
    >>> bootstrap = HammingBootstrap(
    ...     n_bootstraps=500,
    ...     block_length=256  # FFT-friendly size
    ... )
    >>> samples = bootstrap.bootstrap(audio)
    """

    def __init__(self, services: Optional[BootstrapServices] = None, **data):
        """Initialize with Hamming window."""
        data["window_type"] = "hamming"
        data.setdefault("wrap_around_flag", False)
        data.setdefault("overlap_flag", True)

        super().__init__(services=services, **data)


[docs]
    @classmethod
    def get_test_params(cls):
        """Return testing parameter settings for the estimator."""
        return [{"n_bootstraps": 10, "block_length": 10}]





[docs]
class HanningBootstrap(WindowedBlockBootstrap):
    """
    Hanning Bootstrap using composition.

    Uses Hanning window for tapering.
    """

    def __init__(self, services: Optional[BootstrapServices] = None, **data):
        """Initialize with Hanning window."""
        data["window_type"] = "hanning"
        data.setdefault("wrap_around_flag", False)
        data.setdefault("overlap_flag", True)

        super().__init__(services=services, **data)


[docs]
    @classmethod
    def get_test_params(cls):
        """Return testing parameter settings for the estimator."""
        return [{"n_bootstraps": 10, "block_length": 10}]





[docs]
class TukeyBootstrap(WindowedBlockBootstrap):
    """
    Flexible tapered bootstrap with adjustable edge smoothing.

    The Tukey window (tapered cosine) provides a unique adjustable parameter
    that controls the proportion of the block that is tapered. This flexibility
    makes it adaptable to different dependency structures and sample sizes.

    The alpha parameter controls tapering:
    - alpha = 0: Rectangular window (no tapering)
    - alpha = 1: Hann window (maximum tapering)
    - alpha = 0.5: Common default, tapers 50% of block

    This adaptability makes Tukey ideal for:
    - Exploratory analysis with unknown structure
    - Adaptive methods that tune alpha
    - Transitioning between block methods

    Examples
    --------
    Adaptive tapering based on correlation structure:

    >>> series = load_complex_series()
    >>> # Estimate optimal alpha from ACF
    >>> alpha_opt = estimate_tukey_alpha(series)
    >>> bootstrap = TukeyBootstrap(
    ...     n_bootstraps=2000,
    ...     block_length=40,
    ...     alpha=alpha_opt
    ... )
    """

    alpha: float = Field(default=0.5, ge=0.0, le=1.0, description="Tukey window shape parameter")

    def __init__(self, services: Optional[BootstrapServices] = None, **data):
        """Initialize with Tukey window."""
        data["window_type"] = "tukey"
        data.setdefault("wrap_around_flag", False)
        data.setdefault("overlap_flag", True)

        super().__init__(services=services, **data)


[docs]
    @classmethod
    def get_test_params(cls):
        """Return testing parameter settings for the estimator."""
        return [{"n_bootstraps": 10, "block_length": 10}]


    def _create_tapered_weights(self) -> Callable:
        """Create Tukey tapered weights with alpha parameter."""

        def tapered_weights(block_length: int) -> np.ndarray:
            return self._window_service.tukey_window(block_length, alpha=self.alpha)

        return tapered_weights