Source code for tsbootstrap.ranklags

from __future__ import annotations

import logging
from numbers import Integral

import numpy as np

from tsbootstrap.utils.types import ModelTypes
from tsbootstrap.utils.validate import validate_integers, validate_literal_type

logger = logging.getLogger("tsbootstrap")


[docs] class RankLags: """ A class that uses several metrics to rank lags for time series models. Methods ------- rank_lags_by_aic_bic() Rank lags based on Akaike information criterion (AIC) and Bayesian information criterion (BIC). rank_lags_by_pacf() Rank lags based on Partial Autocorrelation Function (PACF) values. estimate_conservative_lag() Estimate a conservative lag value by considering various metrics. get_model(order) Retrieve a previously fitted model given an order. Examples -------- >>> from tsbootstrap import RankLags >>> import numpy as np >>> X = np.random.normal(size=(100, 1)) >>> rank_obj = RankLags(X, model_type='ar') >>> rank_obj.estimate_conservative_lag() 2 >>> rank_obj.rank_lags_by_aic_bic() (array([2, 1]), array([2, 1])) >>> rank_obj.rank_lags_by_pacf() array([1, 2]) """ _tags = {"python_dependencies": "statsmodels"} def __init__( self, X: np.ndarray, model_type: ModelTypes, max_lag: Integral = 10, y=None, save_models: bool = False, ) -> None: """ Initialize the RankLags object. Parameters ---------- X : np.ndarray The input data. model_type : str The type of model to fit. One of 'ar', 'arima', 'sarima', 'var', 'arch'. max_lag : int, optional, default=10 Maximum lag to consider. y : np.ndarray, optional, default=None Exogenous variables to include in the model. save_models : bool, optional, default=False Whether to save the models. """ self.X = X self.max_lag = max_lag self.model_type = model_type self.y = y self.save_models = save_models self.models = [] @property def X(self) -> np.ndarray: """ The input data. Returns ------- np.ndarray The input data. """ return self._X @X.setter def X(self, value: np.ndarray) -> None: """ Set the input data. Parameters ---------- X : np.ndarray The input data. """ if not isinstance(value, np.ndarray): raise TypeError("X must be a numpy array.") self._X = value @property def max_lag(self) -> Integral: """ Maximum lag to consider. Returns ------- int Maximum lag to consider. """ return self._max_lag @max_lag.setter def max_lag(self, value: Integral) -> None: """ Set the maximum lag to consider. Parameters ---------- max_lag : int Maximum lag to consider. """ validate_integers(value, min_value=1) self._max_lag = value @property def model_type(self) -> ModelTypes: """ The type of model to fit. Returns ------- str The type of model to fit. """ return self._model_type @model_type.setter def model_type(self, value: ModelTypes) -> None: """ Set the type of model to fit. Parameters ---------- value : ModelTypes The type of model to fit. One of 'ar', 'arima', 'sarima', 'var', 'arch'. """ validate_literal_type(value, ModelTypes) self._model_type = value.lower() @property def y(self) -> np.ndarray: """ Exogenous variables to include in the model. Returns ------- np.ndarray Exogenous variables to include in the model. """ return self._y @y.setter def y(self, value: np.ndarray) -> None: """ Set the exogenous variables to include in the model. Parameters ---------- y : np.ndarray Exogenous variables to include in the model. """ if value is not None and not isinstance(value, np.ndarray): raise TypeError("y must be a numpy array.") self._y = value
[docs] def rank_lags_by_aic_bic(self): """ Rank lags based on Akaike information criterion (AIC) and Bayesian information criterion (BIC). Returns ------- Tuple[np.ndarray, np.ndarray] aic_ranked_lags: Lags ranked by AIC. bic_ranked_lags: Lags ranked by BIC. """ from tsbootstrap.tsfit import TSFit aic_values = [] bic_values = [] for lag in range(1, self.max_lag + 1): try: fit_obj = TSFit(order=lag, model_type=self.model_type) model = fit_obj.fit(X=self.X, y=self.y).model except Exception as e: # raise RuntimeError(f"An error occurred during fitting: {e}") logger.warning( f"An error occurred during fitting for lag {lag}. Skipping remaining lags." ) logger.debug(f"{e}") break if self.save_models: self.models.append(model) aic_values.append(model.aic) bic_values.append(model.bic) aic_ranked_lags = np.argsort(aic_values) + 1 bic_ranked_lags = np.argsort(bic_values) + 1 return aic_ranked_lags, bic_ranked_lags
[docs] def rank_lags_by_pacf(self) -> np.ndarray: """ Rank lags based on Partial Autocorrelation Function (PACF) values. Returns ------- np.ndarray Lags ranked by PACF values. """ from statsmodels.tsa.stattools import pacf # Can only compute partial correlations for lags up to 50% of the sample size. We use the minimum of max_lag and third of the sample size, to allow for other parameters and trends to be included in the model. pacf_values = pacf( self.X, nlags=max(min(self.max_lag, self.X.shape[0] // 3 - 1), 1) )[1:] ci = 1.96 / np.sqrt(len(self.X)) significant_lags = np.where(np.abs(pacf_values) > ci)[0] + 1 return significant_lags
[docs] def estimate_conservative_lag(self) -> int: """ Estimate a conservative lag value by considering various metrics. Returns ------- int A conservative lag value. """ aic_ranked_lags, bic_ranked_lags = self.rank_lags_by_aic_bic() # PACF is only available for univariate data if self.X.shape[1] == 1: pacf_ranked_lags = self.rank_lags_by_pacf() highest_ranked_lags = set(aic_ranked_lags).intersection( bic_ranked_lags, pacf_ranked_lags ) else: highest_ranked_lags = set(aic_ranked_lags).intersection( bic_ranked_lags ) if not highest_ranked_lags: return aic_ranked_lags[-1] else: return min(highest_ranked_lags)
[docs] def get_model(self, order: int): """ Retrieve a previously fitted model given an order. Parameters ---------- order : int Order of the model to retrieve. Returns ------- Union[AutoRegResultsWrapper, ARIMAResultsWrapper, SARIMAXResultsWrapper, VARResultsWrapper, ARCHModelResult] The fitted model. """ return self.models[order - 1] if self.save_models else None