Source code for tsbootstrap.diagnostics

"""``diagnose(X)``: inspect a series and recommend bootstrap methods.

A lightweight, honest advisor, it measures serial dependence and stationarity
and maps them to suitable method specs. It does not choose for you; it explains
what it sees and what fits.
"""

from __future__ import annotations

from dataclasses import dataclass

import numpy as np
from numpy.typing import NDArray

from tsbootstrap.validation import coerce_observations

_DEPENDENCE_THRESHOLD = 0.2



[docs]
@dataclass(frozen=True, slots=True)
class Diagnosis:
    """What ``diagnose`` found and what it recommends."""

    n_obs: int
    n_series: int
    lag1_autocorr: float
    dependent: bool
    nonstationary: bool
    recommended_methods: tuple[str, ...]
    notes: tuple[str, ...]



def _max_lag1_autocorr(arr: NDArray[np.float64]) -> float:
    best = 0.0
    for j in range(arr.shape[1]):
        col = arr[:, j]
        if col.std() > 0:
            best = max(best, abs(float(np.corrcoef(col[:-1], col[1:])[0, 1])))
    return best


def _looks_nonstationary(arr: NDArray[np.float64], lag1: float) -> bool:
    try:
        from statsmodels.tsa.stattools import adfuller
    except ImportError:  # pragma: no cover - fall back without statsmodels
        return lag1 > 0.95
    for j in range(arr.shape[1]):
        col = arr[:, j]
        if col.std() == 0:
            continue
        try:
            pvalue = float(adfuller(col, autolag="AIC")[1])
        except ValueError:  # series too short for the test
            return lag1 > 0.95
        if pvalue > 0.05:  # fail to reject the unit-root null
            return True
    return False



[docs]
def diagnose(X: object) -> Diagnosis:
    """Inspect ``X`` and recommend bootstrap methods for it."""
    arr, _ = coerce_observations(X)
    n_obs, n_series = arr.shape
    lag1 = _max_lag1_autocorr(arr)
    dependent = lag1 > _DEPENDENCE_THRESHOLD
    nonstationary = _looks_nonstationary(arr, lag1)

    recommended: list[str] = []
    notes: list[str] = []

    if nonstationary:
        recommended += ["ResidualBootstrap(model=ARIMA(...))", "SieveAR"]
        notes.append(
            "Series looks non-stationary (unit root): difference it via ARIMA, or use the sieve."
        )
    elif dependent:
        recommended += ["StationaryBlock", "MovingBlock", "SieveAR"]
        notes.append(
            f"Serial dependence present (lag-1 autocorrelation {lag1:.2f}): use a block method or the sieve."
        )
    else:
        recommended += ["IID", "MovingBlock"]
        notes.append(
            "Serial dependence is weak: i.i.d. resampling is acceptable; a block method is a safe default."
        )

    if n_series > 1:
        recommended.insert(0, "ResidualBootstrap(model=VAR(...))")
        notes.append(
            "Multivariate input: VAR captures cross-series dependence; block methods preserve it by resampling whole rows."
        )

    if dependent and not nonstationary:
        from tsbootstrap.block.pwsd import optimal_block_length

        suggested = optimal_block_length(arr, kind="stationary")
        notes.append(f"Suggested automatic block length (Politis-White): {suggested}.")

    return Diagnosis(
        n_obs=n_obs,
        n_series=n_series,
        lag1_autocorr=lag1,
        dependent=dependent,
        nonstationary=nonstationary,
        recommended_methods=tuple(dict.fromkeys(recommended)),
        notes=tuple(notes),
    )



__all__ = ["Diagnosis", "diagnose"]