Source code for tseda.core.timeseries

"""
Core :class:`TimeSeries` data structure.

This module defines the single object that every tseda analysis operates on.
It wraps a :class:`pandas.Series` with a :class:`pandas.DatetimeIndex`,
validates inputs, infers frequency, and exposes a clean, chainable API for
basic transforms.

Design Principles
-----------------
* **Effectively immutable** — transform methods return new :class:`TimeSeries`
  objects; the original is never modified.
* **Single source of truth** — all data lives in one private ``pd.Series``;
  ``values`` and ``index`` are read-only views.
* **Explicit over implicit** — every parameter has a clear type and a
  descriptive error message when violated.
* **Minimal dependencies** — only numpy, pandas, and scipy at runtime.

Examples
--------
Build from a numpy array and a date range:

>>> import numpy as np, pandas as pd
>>> from tseda import TimeSeries
>>> idx = pd.date_range("2020-01-01", periods=365, freq="D")
>>> ts = TimeSeries(np.random.randn(365), index=idx, name="returns", unit="USD")
>>> ts.n
365
>>> ts.freq
'D'

Build from an existing :class:`pandas.Series`:

>>> s = pd.Series([1.0, 2.0, 3.0], index=pd.date_range("2020", periods=3, freq="D"))
>>> ts2 = TimeSeries.from_series(s, name="price")
>>> ts2.start
Timestamp('2020-01-01 00:00:00')
"""
from __future__ import annotations

import warnings
from typing import Callable, Optional, Union

import numpy as np
import pandas as pd

from tseda.core.types import AggMethod, ArrayLike, DatetimeLike, DiffMethod, Frequency
from tseda.core.validator import (
    validate_data_array,
    validate_datetime_index,
    validate_freq_string,
    validate_positive_int,
)

__all__ = ["TimeSeries"]

# ---------------------------------------------------------------------------
# Private helpers
# ---------------------------------------------------------------------------

# Map median gap (seconds) → pandas freq alias.
# Ordered from smallest to largest; each entry is (seconds, alias).
_GAP_TO_ALIAS: list[tuple[float, str]] = [
    (1.0,           "s"),
    (60.0,          "min"),
    (3_600.0,       "h"),
    (86_400.0,      "D"),
    (7 * 86_400.0,  "W"),
    (30 * 86_400.0, "MS"),
    (91 * 86_400.0, "QS"),
    (365 * 86_400.0, "YS"),
]

# Mapping from freq alias prefix → human-readable label.
_ALIAS_TO_LABEL: dict[str, str] = {
    "s":   "Secondly",
    "T":   "Minutely",
    "min": "Minutely",
    "H":   "Hourly",
    "h":   "Hourly",
    "D":   "Daily",
    "B":   "Business daily",
    "W":   "Weekly",
    "M":   "Monthly (end)",
    "MS":  "Monthly (start)",
    "ME":  "Monthly (end)",
    "Q":   "Quarterly (end)",
    "QS":  "Quarterly (start)",
    "QE":  "Quarterly (end)",
    "A":   "Annual (end)",
    "AS":  "Annual (start)",
    "Y":   "Annual (end)",
    "YS":  "Annual (start)",
    "YE":  "Annual (end)",
}


def _infer_freq(index: pd.DatetimeIndex) -> Optional[str]:
    """Return a pandas offset alias for *index*, or ``None`` if indeterminate.

    Tries :func:`pandas.infer_freq` first; falls back to a median-gap
    heuristic (within 10 % of a known period).
    """
    if len(index) < 3:
        return None

    freq = pd.infer_freq(index)
    if freq is not None:
        return freq

    # Median gap in seconds — compute via timedelta so result is unit-independent
    # (pandas 2.2+ uses second-resolution DatetimeIndex for day-level freq, so
    # asi8 / astype(int64) is no longer guaranteed to be nanoseconds).
    try:
        gaps_td = np.diff(index.to_numpy())            # timedelta64[*]
        gaps_s  = gaps_td / np.timedelta64(1, "s")    # → float seconds
        median_s = float(np.median(gaps_s))
    except Exception:
        return None

    for threshold_s, alias in _GAP_TO_ALIAS:
        if abs(median_s - threshold_s) / threshold_s < 0.10:
            return alias

    return None


def _freq_label(freq: Optional[str]) -> str:
    """Return a human-readable label for a pandas freq alias."""
    if freq is None:
        return "Irregular / unknown"
    # Strip leading multiplier digits (e.g. "15min" → "min")
    key = freq.lstrip("0123456789")
    return _ALIAS_TO_LABEL.get(key, freq)


# ---------------------------------------------------------------------------
# Main class
# ---------------------------------------------------------------------------



[docs]
class TimeSeries:
    """Univariate time series with a :class:`pandas.DatetimeIndex`.

    Parameters
    ----------
    data:
        Numeric values.  Accepted types:

        * 1-D :class:`numpy.ndarray`
        * :class:`pandas.Series` — values are extracted; the Series index
          is used unless *index* is also provided.
        * :class:`list` or :class:`tuple` of numbers

    index:
        Datetime timestamps aligned with *data*.  When *data* is a
        :class:`pandas.Series` with a :class:`pandas.DatetimeIndex` this
        argument may be omitted.  Accepted types:

        * :class:`pandas.DatetimeIndex`
        * :class:`list` / :class:`numpy.ndarray` of datetime-like strings
          or :class:`numpy.datetime64` objects

    name:
        Short identifier for the series (used in plots and reports).
        Default ``"value"``.
    freq:
        Pandas offset alias (e.g., ``"D"``, ``"h"``, ``"MS"``).
        When ``None`` (default) the frequency is inferred automatically.
    unit:
        Physical unit of the values (e.g., ``"USD"``, ``"°C"``).
        Purely informational — used in axis labels.
    description:
        Free-text description stored in :attr:`metadata`.

    Raises
    ------
    TypeError
        If *data* or *index* have an unsupported type.
    ValueError
        If *data* and *index* have different lengths, if *index* is not
        monotonically increasing, or if *index* contains duplicates.

    Examples
    --------
    From a numpy array:

    >>> import numpy as np, pandas as pd
    >>> from tseda import TimeSeries
    >>> idx = pd.date_range("2020-01-01", periods=5, freq="D")
    >>> ts = TimeSeries([10.0, 11.5, 9.8, 12.0, 11.0], index=idx)
    >>> ts.n
    5

    From a pandas Series:

    >>> s = pd.Series([1, 2, 3], index=pd.date_range("2020", periods=3, freq="D"))
    >>> ts = TimeSeries.from_series(s)
    """

    # ------------------------------------------------------------------
    # Construction
    # ------------------------------------------------------------------


[docs]
    def __init__(
        self,
        data: Union[ArrayLike, pd.Series],
        *,
        index: Optional[DatetimeLike] = None,
        name: str = "value",
        freq: Optional[str] = None,
        unit: Optional[str] = None,
        description: Optional[str] = None,
    ) -> None:
        # ── resolve index ──────────────────────────────────────────────
        if isinstance(data, pd.Series) and index is None:
            # Use the Series' own index if it is already a DatetimeIndex.
            if isinstance(data.index, pd.DatetimeIndex):
                index = data.index
            else:
                raise ValueError(
                    "When constructing from a pandas Series without an explicit "
                    "'index', the Series must already have a DatetimeIndex.  "
                    "Pass 'index=' or use TimeSeries.from_series()."
                )
        elif index is None:
            raise ValueError(
                "'index' is required when 'data' is not a pandas Series "
                "with a DatetimeIndex."
            )

        # ── validate data ──────────────────────────────────────────────
        values: np.ndarray = validate_data_array(data, name="data")

        # ── validate index ─────────────────────────────────────────────
        dti: pd.DatetimeIndex = validate_datetime_index(index, name="index")

        # ── length check ───────────────────────────────────────────────
        if len(values) != len(dti):
            raise ValueError(
                f"'data' and 'index' must have the same length; "
                f"got {len(values)} values and {len(dti)} timestamps."
            )

        # ── store immutable internal Series ───────────────────────────
        self._data: pd.Series = pd.Series(values, index=dti, dtype=float)

        # ── metadata ──────────────────────────────────────────────────
        self._name: str = str(name)
        self._unit: Optional[str] = str(unit) if unit is not None else None
        self._description: Optional[str] = (
            str(description) if description is not None else None
        )

        # ── frequency ─────────────────────────────────────────────────
        if freq is not None:
            self._freq: Optional[str] = validate_freq_string(freq, name="freq")
        else:
            self._freq = _infer_freq(dti)

        # ── cached derived attributes ──────────────────────────────────
        self._is_regular: bool = self._compute_is_regular()


    # ------------------------------------------------------------------
    # Class-method constructors
    # ------------------------------------------------------------------


[docs]
    @classmethod
    def from_series(
        cls,
        series: pd.Series,
        *,
        name: Optional[str] = None,
        freq: Optional[str] = None,
        unit: Optional[str] = None,
        description: Optional[str] = None,
    ) -> "TimeSeries":
        """Construct a :class:`TimeSeries` from a :class:`pandas.Series`.

        Parameters
        ----------
        series:
            Must have a :class:`pandas.DatetimeIndex`.
        name:
            Override the Series' ``.name`` attribute.  When ``None`` the
            Series name (if any) is used, falling back to ``"value"``.
        freq, unit, description:
            Forwarded to :class:`TimeSeries.__init__`.

        Returns
        -------
        TimeSeries

        Examples
        --------
        >>> s = pd.Series([1.0, 2.0], index=pd.date_range("2020", periods=2, freq="D"))
        >>> TimeSeries.from_series(s, name="x").name
        'x'
        """
        if not isinstance(series, pd.Series):
            raise TypeError(
                f"'series' must be a pandas.Series, got {type(series).__name__!r}."
            )
        _name = name if name is not None else (str(series.name) if series.name is not None else "value")
        return cls(
            series,
            name=_name,
            freq=freq,
            unit=unit,
            description=description,
        )



[docs]
    @classmethod
    def from_arrays(
        cls,
        values: ArrayLike,
        index: DatetimeLike,
        *,
        name: str = "value",
        freq: Optional[str] = None,
        unit: Optional[str] = None,
        description: Optional[str] = None,
    ) -> "TimeSeries":
        """Construct a :class:`TimeSeries` from parallel arrays.

        Parameters
        ----------
        values:
            1-D numeric array.
        index:
            Datetime-like array of the same length.
        name, freq, unit, description:
            Forwarded to :class:`TimeSeries.__init__`.

        Returns
        -------
        TimeSeries

        Examples
        --------
        >>> import numpy as np, pandas as pd
        >>> vals = np.array([1.0, 2.0, 3.0])
        >>> idx  = pd.date_range("2021-01-01", periods=3, freq="D")
        >>> TimeSeries.from_arrays(vals, idx).n
        3
        """
        return cls(values, index=index, name=name, freq=freq, unit=unit,
                   description=description)



[docs]
    @classmethod
    def from_dataframe(
        cls,
        df: pd.DataFrame,
        column: str,
        *,
        name: Optional[str] = None,
        freq: Optional[str] = None,
        unit: Optional[str] = None,
        description: Optional[str] = None,
    ) -> "TimeSeries":
        """Extract one column from a :class:`pandas.DataFrame`.

        Parameters
        ----------
        df:
            Source DataFrame.  Must have a :class:`pandas.DatetimeIndex`.
        column:
            Column name to extract.
        name:
            Override the column name as the series name.
        freq, unit, description:
            Forwarded to :class:`TimeSeries.__init__`.

        Returns
        -------
        TimeSeries

        Raises
        ------
        KeyError
            If *column* is not in *df*.

        Examples
        --------
        >>> import pandas as pd
        >>> df = pd.DataFrame({"temp": [20.0, 21.0, 19.5]},
        ...                    index=pd.date_range("2020", periods=3, freq="D"))
        >>> TimeSeries.from_dataframe(df, "temp").name
        'temp'
        """
        if not isinstance(df, pd.DataFrame):
            raise TypeError(
                f"'df' must be a pandas.DataFrame, got {type(df).__name__!r}."
            )
        if column not in df.columns:
            raise KeyError(
                f"Column {column!r} not found in DataFrame. "
                f"Available columns: {list(df.columns)}"
            )
        series = df[column]
        _name = name if name is not None else column
        return cls.from_series(series, name=_name, freq=freq, unit=unit,
                               description=description)


    # ------------------------------------------------------------------
    # Private helpers
    # ------------------------------------------------------------------

    def _compute_is_regular(self) -> bool:
        """Return ``True`` when all consecutive time gaps are identical."""
        if len(self._data) < 2:
            return True
        gaps_td = np.diff(self._data.index.to_numpy())   # timedelta64[*] — unit-agnostic
        return bool(np.all(gaps_td == gaps_td[0]))

    def _copy_with(self, new_data: pd.Series, **meta_overrides) -> "TimeSeries":
        """Return a new :class:`TimeSeries` sharing metadata with *self*."""
        return TimeSeries(
            new_data.values,
            index=new_data.index,
            name=meta_overrides.get("name", self._name),
            freq=meta_overrides.get("freq", None),   # re-infer from new index
            unit=meta_overrides.get("unit", self._unit),
            description=meta_overrides.get("description", self._description),
        )

    # ------------------------------------------------------------------
    # Core properties — data access
    # ------------------------------------------------------------------

    @property
    def values(self) -> np.ndarray:
        """1-D ``float64`` array of observed values.

        Returns
        -------
        numpy.ndarray
            A *copy* to protect the internal state.
        """
        return self._data.values.copy()

    @property
    def index(self) -> pd.DatetimeIndex:
        """Datetime index of the series.

        Returns
        -------
        pandas.DatetimeIndex
        """
        return self._data.index

    @property
    def n(self) -> int:
        """Number of observations.

        Returns
        -------
        int
        """
        return len(self._data)

    @property
    def start(self) -> pd.Timestamp:
        """Timestamp of the first observation.

        Returns
        -------
        pandas.Timestamp
        """
        return self._data.index[0]

    @property
    def end(self) -> pd.Timestamp:
        """Timestamp of the last observation.

        Returns
        -------
        pandas.Timestamp
        """
        return self._data.index[-1]

    @property
    def duration(self) -> pd.Timedelta:
        """Wall-clock span from the first to the last observation.

        Returns
        -------
        pandas.Timedelta
        """
        return self.end - self.start

    # ------------------------------------------------------------------
    # Metadata properties
    # ------------------------------------------------------------------

    @property
    def name(self) -> str:
        """Short identifier for the series.

        Returns
        -------
        str
        """
        return self._name

    @property
    def unit(self) -> Optional[str]:
        """Physical unit of the values, or ``None`` if unspecified.

        Returns
        -------
        str or None
        """
        return self._unit

    @property
    def description(self) -> Optional[str]:
        """Free-text description, or ``None`` if unspecified.

        Returns
        -------
        str or None
        """
        return self._description

    # ------------------------------------------------------------------
    # Frequency properties
    # ------------------------------------------------------------------

    @property
    def freq(self) -> Optional[str]:
        """Pandas offset alias (e.g., ``"D"``), or ``None`` for irregular data.

        Returns
        -------
        str or None
        """
        return self._freq

    @property
    def freq_label(self) -> str:
        """Human-readable frequency label (e.g., ``"Daily"``).

        Returns
        -------
        str
        """
        return _freq_label(self._freq)

    # ------------------------------------------------------------------
    # Quality properties
    # ------------------------------------------------------------------

    @property
    def has_nan(self) -> bool:
        """``True`` when at least one value is NaN.

        Returns
        -------
        bool
        """
        return bool(self._data.isna().any())

    @property
    def n_nan(self) -> int:
        """Number of NaN values.

        Returns
        -------
        int
        """
        return int(self._data.isna().sum())

    @property
    def is_regular(self) -> bool:
        """``True`` when all consecutive time gaps are identical.

        A regular series has no missing timestamps (assuming a fixed
        sampling interval).  An irregular series may be the result of
        market holidays, sensor outages, or event-driven sampling.

        Returns
        -------
        bool
        """
        return self._is_regular

    # ------------------------------------------------------------------
    # Conversion
    # ------------------------------------------------------------------


[docs]
    def to_series(self) -> pd.Series:
        """Return the data as a :class:`pandas.Series`.

        The returned Series uses the same DatetimeIndex and the
        :attr:`name` attribute as its Series name.

        Returns
        -------
        pandas.Series
        """
        s = self._data.copy()
        s.name = self._name
        return s



[docs]
    def to_frame(self) -> pd.DataFrame:
        """Return the data as a single-column :class:`pandas.DataFrame`.

        Returns
        -------
        pandas.DataFrame
            Column name equals :attr:`name`.
        """
        return self._data.rename(self._name).to_frame()



[docs]
    def to_numpy(self) -> np.ndarray:
        """Return a copy of the raw values as a 1-D numpy array.

        Returns
        -------
        numpy.ndarray
        """
        return self.values



[docs]
    def copy(self) -> "TimeSeries":
        """Return a deep copy of this :class:`TimeSeries`.

        Returns
        -------
        TimeSeries
        """
        return TimeSeries(
            self._data.values.copy(),
            index=self._data.index.copy(),
            name=self._name,
            freq=self._freq,
            unit=self._unit,
            description=self._description,
        )


    # ------------------------------------------------------------------
    # Slicing
    # ------------------------------------------------------------------


[docs]
    def slice(
        self,
        start: Optional[Union[str, pd.Timestamp]] = None,
        end: Optional[Union[str, pd.Timestamp]] = None,
    ) -> "TimeSeries":
        """Return a time-bounded subset of the series.

        Both *start* and *end* are **inclusive**.  Either may be ``None``
        to leave that boundary open.

        Parameters
        ----------
        start:
            Start timestamp (inclusive).  Accepts any value parseable by
            :func:`pandas.Timestamp` (e.g., ``"2020-01-01"``).
        end:
            End timestamp (inclusive).

        Returns
        -------
        TimeSeries

        Raises
        ------
        ValueError
            If the resulting slice is empty.

        Examples
        --------
        >>> import pandas as pd, numpy as np
        >>> idx = pd.date_range("2020-01-01", periods=365, freq="D")
        >>> ts = TimeSeries(np.arange(365.0), index=idx)
        >>> q1 = ts.slice("2020-01-01", "2020-03-31")
        >>> q1.n
        91
        """
        sliced = self._data.loc[start:end]
        if sliced.empty:
            raise ValueError(
                f"Slice [{start!r} : {end!r}] produced an empty series. "
                "Check that the bounds fall within the series range "
                f"[{self.start} : {self.end}]."
            )
        return self._copy_with(sliced)


    # ------------------------------------------------------------------
    # Resampling
    # ------------------------------------------------------------------


[docs]
    def resample(
        self,
        freq: str,
        *,
        agg: Union[str, AggMethod] = AggMethod.MEAN,
    ) -> "TimeSeries":
        """Resample the series to a new frequency.

        Parameters
        ----------
        freq:
            Target pandas offset alias (e.g., ``"W"``, ``"MS"``).
        agg:
            Aggregation method.  Either an :class:`~tseda.core.AggMethod`
            member or its string value.  Default ``"mean"``.

        Returns
        -------
        TimeSeries

        Raises
        ------
        ValueError
            If *freq* is not recognised by pandas.
        AttributeError
            If *agg* is not a valid resampler method.

        Examples
        --------
        >>> import pandas as pd, numpy as np
        >>> idx = pd.date_range("2020-01-01", periods=365, freq="D")
        >>> ts = TimeSeries(np.ones(365), index=idx)
        >>> ts.resample("MS").n    # 12 monthly values
        12
        """
        validate_freq_string(freq, name="freq")
        agg_str = agg.value if isinstance(agg, AggMethod) else str(agg)

        resampler = self._data.resample(freq)
        try:
            resampled: pd.Series = getattr(resampler, agg_str)()
        except AttributeError:
            raise AttributeError(
                f"'{agg_str}' is not a valid resampling aggregation. "
                f"Valid options: {[m.value for m in AggMethod]}."
            )
        resampled = resampled.dropna()
        return self._copy_with(resampled, freq=freq)


    # ------------------------------------------------------------------
    # Transforms — all return new TimeSeries objects
    # ------------------------------------------------------------------


[docs]
    def diff(
        self,
        periods: int = 1,
        *,
        method: Union[str, DiffMethod] = DiffMethod.SIMPLE,
    ) -> "TimeSeries":
        """Difference the series.

        Parameters
        ----------
        periods:
            Number of periods to lag.  Default 1 (first difference).
        method:
            One of:

            * ``"simple"`` — ``y[t] - y[t-k]``
            * ``"log"``    — ``log(y[t]) - log(y[t-k])``
            * ``"percent"``— ``(y[t] - y[t-k]) / y[t-k]``

        Returns
        -------
        TimeSeries
            The leading NaN rows introduced by differencing are dropped.

        Raises
        ------
        ValueError
            If *method* is ``"log"`` or ``"percent"`` and the series
            contains non-positive values.

        Examples
        --------
        >>> import pandas as pd, numpy as np
        >>> idx = pd.date_range("2020", periods=5, freq="D")
        >>> ts = TimeSeries([10.0, 11.0, 12.0, 11.0, 13.0], index=idx)
        >>> ts.diff().values
        array([1., 1., -1., 2.])
        """
        periods = validate_positive_int(periods, name="periods")
        m = DiffMethod(method) if isinstance(method, str) else method

        vals = self._data.values.copy()

        if m in (DiffMethod.LOG, DiffMethod.PERCENT):
            if np.any(vals <= 0):
                raise ValueError(
                    f"DiffMethod.{m.name} requires strictly positive values; "
                    "the series contains zero or negative observations."
                )

        if m == DiffMethod.SIMPLE:
            result = self._data.diff(periods)
        elif m == DiffMethod.LOG:
            log_series = np.log(self._data)
            result = log_series.diff(periods)
        else:  # PERCENT
            result = self._data.pct_change(periods)

        result = result.dropna()
        suffix = f"_diff{periods}" if m == DiffMethod.SIMPLE else f"_{m.value}{periods}"
        return self._copy_with(result, name=self._name + suffix)



[docs]
    def log(self) -> "TimeSeries":
        """Apply the natural logarithm element-wise.

        Returns
        -------
        TimeSeries

        Raises
        ------
        ValueError
            If the series contains non-positive values.

        Examples
        --------
        >>> import pandas as pd, numpy as np
        >>> idx = pd.date_range("2020", periods=3, freq="D")
        >>> TimeSeries([1.0, np.e, np.e**2], index=idx).log().values
        array([0., 1., 2.])
        """
        if np.any(self._data.values <= 0):
            raise ValueError(
                "log() requires strictly positive values; "
                "the series contains zero or negative observations."
            )
        result = np.log(self._data)
        return self._copy_with(result, name=f"log({self._name})")



[docs]
    def standardize(self) -> "TimeSeries":
        """Standardise to zero mean and unit variance (z-score).

        The transform is ``(x - mean) / std``.  NaN values are ignored
        when computing statistics but preserved in position.

        Returns
        -------
        TimeSeries

        Raises
        ------
        ValueError
            If the standard deviation is zero (constant series).

        Examples
        --------
        >>> import pandas as pd, numpy as np
        >>> idx = pd.date_range("2020", periods=4, freq="D")
        >>> ts = TimeSeries([2.0, 4.0, 6.0, 8.0], index=idx)
        >>> z = ts.standardize()
        >>> round(float(z.values.mean()), 10)
        0.0
        """
        mean = float(np.nanmean(self._data.values))
        std  = float(np.nanstd(self._data.values, ddof=1))
        if std == 0.0:
            raise ValueError(
                "standardize() requires a non-constant series (std == 0)."
            )
        result = (self._data - mean) / std
        return self._copy_with(result, name=f"z({self._name})")



[docs]
    def normalize(
        self,
        *,
        lower: float = 0.0,
        upper: float = 1.0,
    ) -> "TimeSeries":
        """Min-max normalise the series to [*lower*, *upper*].

        Parameters
        ----------
        lower:
            Target minimum value.  Default ``0.0``.
        upper:
            Target maximum value.  Default ``1.0``.

        Returns
        -------
        TimeSeries

        Raises
        ------
        ValueError
            If the series has zero range (max == min) or *lower* >= *upper*.

        Examples
        --------
        >>> import pandas as pd, numpy as np
        >>> idx = pd.date_range("2020", periods=3, freq="D")
        >>> ts = TimeSeries([0.0, 5.0, 10.0], index=idx)
        >>> ts.normalize().values
        array([0. , 0.5, 1. ])
        """
        if lower >= upper:
            raise ValueError(
                f"'lower' ({lower}) must be less than 'upper' ({upper})."
            )
        mn = float(np.nanmin(self._data.values))
        mx = float(np.nanmax(self._data.values))
        if mx == mn:
            raise ValueError(
                "normalize() requires a non-constant series (max == min)."
            )
        result = lower + (self._data - mn) / (mx - mn) * (upper - lower)
        return self._copy_with(result, name=f"norm({self._name})")



[docs]
    def rolling(
        self,
        window: int,
        *,
        agg: Union[str, AggMethod] = AggMethod.MEAN,
        center: bool = False,
        min_periods: Optional[int] = None,
    ) -> "TimeSeries":
        """Apply a rolling-window aggregation.

        Parameters
        ----------
        window:
            Size of the rolling window in number of observations.
        agg:
            Aggregation method (default ``"mean"``).
        center:
            Whether to set the window labels as the centre of the window
            (default ``False`` — trailing window).
        min_periods:
            Minimum number of non-NaN observations required to produce a
            value.  Defaults to *window*.

        Returns
        -------
        TimeSeries
            Leading/trailing NaNs introduced by the window are dropped.

        Examples
        --------
        >>> import pandas as pd, numpy as np
        >>> idx = pd.date_range("2020", periods=6, freq="D")
        >>> ts = TimeSeries([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], index=idx)
        >>> ts.rolling(3).values
        array([2., 3., 4., 5.])
        """
        validate_positive_int(window, name="window")
        agg_str = agg.value if isinstance(agg, AggMethod) else str(agg)

        roller = self._data.rolling(
            window=window, center=center, min_periods=min_periods
        )
        try:
            result: pd.Series = getattr(roller, agg_str)()
        except AttributeError:
            raise AttributeError(
                f"'{agg_str}' is not a valid rolling aggregation. "
                f"Valid options: {[m.value for m in AggMethod]}."
            )
        result = result.dropna()
        return self._copy_with(
            result,
            name=f"rolling_{window}_{agg_str}({self._name})",
        )



[docs]
    def apply(self, func: Callable[[np.ndarray], np.ndarray], *, name: Optional[str] = None) -> "TimeSeries":
        """Apply an arbitrary element-wise function to the values.

        Parameters
        ----------
        func:
            Callable that takes a 1-D ``numpy.ndarray`` and returns a 1-D
            array of the same length.
        name:
            Name for the resulting series.  Defaults to
            ``"f({self.name})"``.

        Returns
        -------
        TimeSeries

        Raises
        ------
        ValueError
            If *func* changes the length of the array.

        Examples
        --------
        >>> import pandas as pd, numpy as np
        >>> idx = pd.date_range("2020", periods=3, freq="D")
        >>> ts = TimeSeries([1.0, 4.0, 9.0], index=idx)
        >>> ts.apply(np.sqrt).values
        array([1., 2., 3.])
        """
        result_vals = func(self._data.values.copy())
        if len(result_vals) != len(self._data):
            raise ValueError(
                f"'func' must return an array of the same length as the input "
                f"({len(self._data)}); got {len(result_vals)}."
            )
        result = pd.Series(result_vals, index=self._data.index, dtype=float)
        _name = name if name is not None else f"f({self._name})"
        return self._copy_with(result, name=_name)


    # ------------------------------------------------------------------
    # Dunder methods
    # ------------------------------------------------------------------


[docs]
    def __len__(self) -> int:
        return len(self._data)



[docs]
    def __contains__(self, timestamp: object) -> bool:
        """Check whether a timestamp exists in the index.

        Examples
        --------
        >>> import pandas as pd, numpy as np
        >>> idx = pd.date_range("2020", periods=3, freq="D")
        >>> ts = TimeSeries([1.0, 2.0, 3.0], index=idx)
        >>> pd.Timestamp("2020-01-02") in ts
        True
        """
        try:
            ts = pd.Timestamp(timestamp)  # type: ignore[arg-type]
        except Exception:
            return False
        return ts in self._data.index



[docs]
    def __getitem__(self, key: Union[int, slice]) -> Union[float, "TimeSeries"]:
        """Positional indexing by integer or slice.

        Parameters
        ----------
        key:
            * ``int`` — return the scalar value at that position.
            * ``slice`` — return a new :class:`TimeSeries` for that range.

        Examples
        --------
        >>> import pandas as pd, numpy as np
        >>> idx = pd.date_range("2020", periods=5, freq="D")
        >>> ts = TimeSeries([10.0, 20.0, 30.0, 40.0, 50.0], index=idx)
        >>> ts[0]
        10.0
        >>> ts[-1]
        50.0
        >>> ts[1:3].values
        array([20., 30.])
        """
        if isinstance(key, int):
            return float(self._data.iloc[key])
        if isinstance(key, slice):
            sliced = self._data.iloc[key]
            return self._copy_with(sliced)
        raise TypeError(
            f"Indices must be integers or slices, not {type(key).__name__!r}. "
            "For datetime-based slicing use ts.slice()."
        )


    def __eq__(self, other: object) -> bool:
        """Two :class:`TimeSeries` objects are equal when values and index match."""
        if not isinstance(other, TimeSeries):
            return NotImplemented
        return (
            self._name == other._name
            and self._data.index.equals(other._data.index)
            and np.array_equal(self._data.values, other._data.values, equal_nan=True)
        )


[docs]
    def __repr__(self) -> str:
        unit_line = f"\n  unit        : {self._unit}" if self._unit else ""
        desc_line = (
            f"\n  description : {self._description[:60]}{'...' if len(self._description) > 60 else ''}"
            if self._description
            else ""
        )
        nan_pct = 100.0 * self.n_nan / max(self.n, 1)
        return (
            f"TimeSeries(\n"
            f"  name        : {self._name}\n"
            f"  n           : {self.n:,}\n"
            f"  start       : {self.start}\n"
            f"  end         : {self.end}\n"
            f"  duration    : {self.duration}\n"
            f"  freq        : {self._freq or 'unknown'} ({self.freq_label})\n"
            f"  is_regular  : {self.is_regular}\n"
            f"  has_nan     : {self.has_nan} ({self.n_nan} / {nan_pct:.1f}%)"
            f"{unit_line}{desc_line}\n"
            f")"
        )