Source code for tseda.features.temporal

"""
Temporal feature extraction for time series.

Extracts calendar-based and time-index features from a
:class:`~tseda.core.TimeSeries`.  All features are deterministic functions
of the datetime index — no statistical estimation required.

Two categories are produced:

* **Calendar features** — year, month, day, hour, day-of-week, quarter,
  and boolean flags (is_weekend, is_month_start, is_month_end).
* **Cyclic encodings** — sine/cosine projections of periodic calendar
  fields (month, day-of-week, hour) so that ``month 12`` and ``month 1``
  are close in feature space.

Classes
-------
TemporalFeatureExtractor
    Stateless extractor returning a :class:`pandas.DataFrame`.

Examples
--------
>>> import pandas as pd, numpy as np
>>> from tseda import TimeSeries
>>> from tseda.features.temporal import TemporalFeatureExtractor

>>> idx = pd.date_range("2020-01-01", periods=10, freq="D")
>>> ts  = TimeSeries(np.arange(10.0), index=idx)
>>> df  = TemporalFeatureExtractor().extract(ts)
>>> list(df.columns[:4])
['year', 'month', 'day', 'dayofweek']
"""
from __future__ import annotations

import numpy as np
import pandas as pd

from tseda.core.timeseries import TimeSeries

__all__ = ["TemporalFeatureExtractor"]


[docs] class TemporalFeatureExtractor: """Extract calendar and cyclic time features from a :class:`~tseda.core.TimeSeries`. Methods ------- extract(ts, cyclic, time_index) Return a :class:`pandas.DataFrame` with one feature column per row aligned to ``ts.index``. Examples -------- >>> import pandas as pd, numpy as np >>> from tseda import TimeSeries >>> from tseda.features.temporal import TemporalFeatureExtractor >>> idx = pd.date_range("2020-01-01", periods=5, freq="D") >>> ts = TimeSeries([10.0, 11.0, 12.0, 11.5, 10.5], index=idx) >>> df = TemporalFeatureExtractor().extract(ts) >>> int(df["year"].iloc[0]) 2020 >>> int(df["month"].iloc[0]) 1 """
[docs] def extract( self, ts: TimeSeries, *, cyclic: bool = True, time_index: bool = True, ) -> pd.DataFrame: """Extract temporal features aligned to ``ts.index``. Parameters ---------- ts : TimeSeries Input series. cyclic : bool, optional When ``True`` (default), add sine/cosine encodings for ``month``, ``dayofweek``, and ``hour``. time_index : bool, optional When ``True`` (default), add ``days_since_start`` and ``time_norm`` (0 → 1 over the series span). Returns ------- pandas.DataFrame Index matches ``ts.index``. Columns: Always present: ``year``, ``month``, ``day``, ``dayofweek``, ``hour``, ``quarter``, ``weekofyear``, ``is_weekend``, ``is_month_start``, ``is_month_end``. When ``cyclic=True``: ``month_sin``, ``month_cos``, ``dow_sin``, ``dow_cos``, ``hour_sin``, ``hour_cos``. When ``time_index=True``: ``days_since_start``, ``time_norm``. Raises ------ TypeError If *ts* is not a :class:`~tseda.core.TimeSeries`. Examples -------- >>> import pandas as pd, numpy as np >>> from tseda import TimeSeries >>> from tseda.features.temporal import TemporalFeatureExtractor >>> idx = pd.date_range("2020-01-01", periods=7, freq="D") >>> ts = TimeSeries(np.ones(7), index=idx) >>> df = TemporalFeatureExtractor().extract(ts, cyclic=False, time_index=False) >>> set(df.columns) >= {"year", "month", "day", "dayofweek", "is_weekend"} True """ if not isinstance(ts, TimeSeries): raise TypeError( f"'ts' must be a TimeSeries, got {type(ts).__name__!r}." ) idx = ts.index features: dict[str, np.ndarray] = {} # ── Calendar ────────────────────────────────────────────────── features["year"] = idx.year.to_numpy() features["month"] = idx.month.to_numpy() features["day"] = idx.day.to_numpy() features["dayofweek"] = idx.dayofweek.to_numpy() # 0=Mon, 6=Sun features["hour"] = idx.hour.to_numpy() features["quarter"] = idx.quarter.to_numpy() features["weekofyear"] = idx.isocalendar().week.to_numpy(dtype=int) features["is_weekend"] = (idx.dayofweek >= 5).astype(float) features["is_month_start"] = idx.is_month_start.astype(float) features["is_month_end"] = idx.is_month_end.astype(float) # ── Cyclic encodings ────────────────────────────────────────── if cyclic: # Month (1–12) m = features["month"] features["month_sin"] = np.sin(2 * np.pi * (m - 1) / 12) features["month_cos"] = np.cos(2 * np.pi * (m - 1) / 12) # Day-of-week (0–6) d = features["dayofweek"] features["dow_sin"] = np.sin(2 * np.pi * d / 7) features["dow_cos"] = np.cos(2 * np.pi * d / 7) # Hour (0–23) h = features["hour"] features["hour_sin"] = np.sin(2 * np.pi * h / 24) features["hour_cos"] = np.cos(2 * np.pi * h / 24) # ── Time index ──────────────────────────────────────────────── if time_index: days = (idx - idx[0]).total_seconds() / 86_400.0 features["days_since_start"] = days span = float(days[-1]) if len(days) > 1 else 1.0 features["time_norm"] = days / span return pd.DataFrame(features, index=idx)