Source code for tsbootstrap.diagnostics

"""``diagnose(X)``: inspect a series and recommend bootstrap methods.

A lightweight, honest advisor, it measures serial dependence and stationarity
and maps them to suitable method specs. It does not choose for you; it explains
what it sees and what fits.
"""

from __future__ import annotations

from dataclasses import dataclass

import numpy as np
from numpy.typing import NDArray

from tsbootstrap.validation import coerce_observations

_DEPENDENCE_THRESHOLD = 0.2


[docs] @dataclass(frozen=True, slots=True) class Diagnosis: """What ``diagnose`` found and what it recommends.""" n_obs: int n_series: int lag1_autocorr: float dependent: bool nonstationary: bool recommended_methods: tuple[str, ...] notes: tuple[str, ...]
def _max_lag1_autocorr(arr: NDArray[np.float64]) -> float: best = 0.0 for j in range(arr.shape[1]): col = arr[:, j] if col.std() > 0: best = max(best, abs(float(np.corrcoef(col[:-1], col[1:])[0, 1]))) return best def _looks_nonstationary(arr: NDArray[np.float64], lag1: float) -> bool: try: from statsmodels.tsa.stattools import adfuller except ImportError: # pragma: no cover - fall back without statsmodels return lag1 > 0.95 for j in range(arr.shape[1]): col = arr[:, j] if col.std() == 0: continue try: pvalue = float(adfuller(col, autolag="AIC")[1]) except ValueError: # series too short for the test return lag1 > 0.95 if pvalue > 0.05: # fail to reject the unit-root null return True return False
[docs] def diagnose(X: object) -> Diagnosis: """Inspect ``X`` and recommend bootstrap methods for it.""" arr, _ = coerce_observations(X) n_obs, n_series = arr.shape lag1 = _max_lag1_autocorr(arr) dependent = lag1 > _DEPENDENCE_THRESHOLD nonstationary = _looks_nonstationary(arr, lag1) recommended: list[str] = [] notes: list[str] = [] if nonstationary: recommended += ["ResidualBootstrap(model=ARIMA(...))", "SieveAR"] notes.append( "Series looks non-stationary (unit root): difference it via ARIMA, or use the sieve." ) elif dependent: recommended += ["StationaryBlock", "MovingBlock", "SieveAR"] notes.append( f"Serial dependence present (lag-1 autocorrelation {lag1:.2f}): use a block method or the sieve." ) else: recommended += ["IID", "MovingBlock"] notes.append( "Serial dependence is weak: i.i.d. resampling is acceptable; a block method is a safe default." ) if n_series > 1: recommended.insert(0, "ResidualBootstrap(model=VAR(...))") notes.append( "Multivariate input: VAR captures cross-series dependence; block methods preserve it by resampling whole rows." ) if dependent and not nonstationary: from tsbootstrap.block.pwsd import optimal_block_length suggested = optimal_block_length(arr, kind="stationary") notes.append(f"Suggested automatic block length (Politis-White): {suggested}.") return Diagnosis( n_obs=n_obs, n_series=n_series, lag1_autocorr=lag1, dependent=dependent, nonstationary=nonstationary, recommended_methods=tuple(dict.fromkeys(recommended)), notes=tuple(notes), )
__all__ = ["Diagnosis", "diagnose"]