Source code for tsbootstrap.utils.odds_and_ends

"""
Utility functions: Essential tools refined through production experience.

This module contains utility functions that have proven indispensable across
our bootstrap implementations. Each function represents a crystallization of
patterns we've encountered repeatedly—abstracted, optimized, and battle-tested.

These utilities embody the principle that good infrastructure makes the right
thing easy and the wrong thing hard. From random number generation with proper
seeding to output suppression for clean interfaces, each tool addresses a
specific need identified through real-world usage.
"""

import os
from contextlib import contextmanager

import numpy as np

from tsbootstrap.utils.types import RngTypes
from tsbootstrap.utils.validate import validate_rng



[docs]
def generate_random_indices(num_samples: int, rng: RngTypes = None) -> np.ndarray:  # type: ignore
    """
    Generate bootstrap indices with proper randomization control.

    This function implements the core resampling mechanism for bootstrap methods,
    generating indices that sample with replacement from the original data. The
    implementation ensures both statistical validity and computational efficiency,
    with careful attention to random number generation best practices.

    We provide flexible randomization control to support both exploratory analysis
    (where reproducibility matters) and production systems (where true randomness
    is essential). The function integrates seamlessly with numpy's modern random
    number generation framework.

    Parameters
    ----------
    num_samples : int
        Number of indices to generate, typically matching the original data size.
        This maintains the same sample size across bootstrap iterations, ensuring
        valid statistical inference.

    rng : RngTypes, optional
        Random number control. Accepts an integer seed for reproducibility,
        a configured Generator for fine control, or None for system entropy.
        We recommend explicit seeding for research reproducibility.

    Returns
    -------
    np.ndarray
        Array of indices for resampling, shape (num_samples,). Each index
        references a position in the original data, with repetition reflecting
        the sampling with replacement process.

    Examples
    --------
    >>> # Reproducible sampling for research
    >>> generate_random_indices(5, rng=42)
    array([4, 0, 3, 3, 3])

    >>> # Production usage with system randomness
    >>> indices = generate_random_indices(1000)  # True random sampling
    """
    # Check types and values of num_samples and random_seed
    from tsbootstrap.utils.validate import validate_integers

    validate_integers(num_samples, min_value=1)  # type: ignore
    rng = validate_rng(rng, allow_seed=True)

    # Generate random indices with replacement
    in_bootstrap_indices = rng.choice(
        np.arange(num_samples), size=num_samples, replace=True  # type: ignore
    )

    return in_bootstrap_indices




[docs]
@contextmanager
def suppress_output(verbose: int = 2):
    """A context manager for controlling the suppression of stdout and stderr.

    Parameters
    ----------
    verbose : int, optional
        Verbosity level controlling suppression.
        2 - No suppression (default)
        1 - Suppress stdout only
        0 - Suppress both stdout and stderr

    Returns
    -------
    None

    Examples
    --------
    with suppress_output(verbose=1):
        print('This will not be printed to stdout')
    """
    # No suppression required
    if verbose == 2:
        yield
        return

    # Open null files as needed
    null_fds = [os.open(os.devnull, os.O_RDWR) for _ in range(2 if verbose == 0 else 1)]
    # Save the actual stdout (1) and possibly stderr (2) file descriptors.
    save_fds = [os.dup(1), os.dup(2)] if verbose == 0 else [os.dup(1)]
    try:
        # Assign the null pointers as required
        os.dup2(null_fds[0], 1)
        if verbose == 0:
            os.dup2(null_fds[1], 2)
        yield
    finally:
        # Re-assign the real stdout/stderr back
        for fd, save_fd in zip(null_fds, save_fds):
            os.dup2(save_fd, fd)
        # Close the null files and saved file descriptors
        for fd in null_fds + save_fds:
            os.close(fd)



def _check_nan_inf_locations(a: np.ndarray, b: np.ndarray, check_same: bool) -> bool:
    """
    Check the locations of NaNs and Infs in both arrays.

    Parameters
    ----------
    a, b : np.ndarray
        The arrays to be compared.
    check_same : bool
        If True, checks if NaNs and Infs are in the same locations.

    Returns
    -------
    bool
        True if locations do not match and check_same is False, otherwise False.

    Raises
    ------
    ValueError
        If check_same is True and the arrays have NaNs or Infs in different locations.
    """
    a_nan_locs = np.isnan(a)
    b_nan_locs = np.isnan(b)
    a_inf_locs = np.isinf(a)
    b_inf_locs = np.isinf(b)

    if not np.array_equal(a_nan_locs, b_nan_locs) or not np.array_equal(a_inf_locs, b_inf_locs):
        if check_same:
            raise ValueError(
                "Arrays have NaN or infinity values at different positions. "
                "For arrays to be considered equal, special values (NaN, inf, -inf) "
                "must appear at the same indices in both arrays. Check your data "
                "for inconsistent handling of missing or infinite values."
            )
        else:
            return True

    return False


def _check_inf_signs(a: np.ndarray, b: np.ndarray, check_same: bool) -> bool:
    """
    Check the signs of Infs in both arrays.

    Parameters
    ----------
    a, b : np.ndarray
        The arrays to be compared.
    check_same : bool
        If True, checks if Infs have the same signs.

    Returns
    -------
    bool
        True if signs do not match and check_same is False, otherwise False.

    Raises
    ------
    ValueError
        If check_same is True and the arrays have Infs with different signs.
    """
    a_inf_locs = np.isinf(a)
    b_inf_locs = np.isinf(b)

    if not np.array_equal(np.sign(a[a_inf_locs]), np.sign(b[b_inf_locs])):
        if check_same:
            raise ValueError(
                "Arrays contain infinities with different signs at the same position. "
                "One array has positive infinity while the other has negative infinity "
                "at corresponding indices. These values cannot be considered approximately equal."
            )
        else:
            return True

    return False


def _check_close_values(
    a: np.ndarray, b: np.ndarray, rtol: float, atol: float, check_same: bool
) -> bool:
    """
    Check that the finite values in the arrays are close.

    Parameters
    ----------
    a, b : np.ndarray
        The arrays to be compared.
    rtol : float
        The relative tolerance parameter for the np.allclose function.
    atol : float
        The absolute tolerance parameter for the np.allclose function.
    check_same : bool
        If True, checks if the arrays are almost equal.

    Returns
    -------
    bool
        True if values are not close and check_same is False, otherwise False.

    Raises
    ------
    ValueError
        If check_same is True and the arrays are not almost equal.
    """
    a_nan_locs = np.isnan(a)
    b_nan_locs = np.isnan(b)
    a_inf_locs = np.isinf(a)
    b_inf_locs = np.isinf(b)
    a_masked = np.ma.masked_where(a_nan_locs | a_inf_locs, a)
    b_masked = np.ma.masked_where(b_nan_locs | b_inf_locs, b)

    if check_same:
        if not np.allclose(a_masked, b_masked, rtol=rtol, atol=atol):
            raise ValueError(
                f"Arrays are not approximately equal within tolerance. "
                f"The relative tolerance is rtol={rtol} and absolute tolerance is atol={atol}. "
                f"Some values differ by more than these tolerances allow. "
                f"Consider increasing tolerance if small differences are acceptable."
            )
    else:
        if np.any(~np.isclose(a_masked, b_masked, rtol=rtol, atol=atol)):
            return True

    return False



[docs]
def assert_arrays_compare(
    a: np.ndarray, b: np.ndarray, rtol=1e-5, atol=1e-8, check_same=True
) -> bool:
    """
    Assert that two arrays are almost equal.

    This function compares two arrays for equality, allowing for NaNs and Infs in the arrays.
    The arrays are considered equal if the following conditions are satisfied:
    1. The locations of NaNs and Infs in both arrays are the same.
    2. The signs of the infinite values in both arrays are the same.
    3. The finite values are almost equal.

    Parameters
    ----------
    a, b : np.ndarray
        The arrays to be compared.
    rtol : float, optional
        The relative tolerance parameter for the np.allclose function.
        Default is 1e-5.
    atol : float, optional
        The absolute tolerance parameter for the np.allclose function.
        Default is 1e-8.
    check_same : bool, optional
        If True, raise an AssertionError if the arrays are not almost equal.
        If False, return True if the arrays are not almost equal and False otherwise.
        Default is True.

    Returns
    -------
    bool
        If check_same is False, returns True if the arrays are not almost equal and False otherwise.
        If check_same is True, returns True if the arrays are almost equal and False otherwise.

    Raises
    ------
    AssertionError
        If check_same is True and the arrays are not almost equal.
    ValueError
        If check_same is True and the arrays have NaNs or Infs in different locations.
        If check_same is True and the arrays have Infs with different signs.
    """
    if _check_nan_inf_locations(a, b, check_same):
        return not check_same
    if _check_inf_signs(a, b, check_same):
        return not check_same
    if _check_close_values(a, b, rtol, atol, check_same):
        return not check_same

    return not check_same if not check_same else True