StanfordBDHG · KarlDeck · Apr 9, 2026 · Apr 12, 2026 · Apr 12, 2026 · Apr 12, 2026
diff --git a/captionizer.py b/captionizer.py
@@ -53,6 +53,10 @@ def run(
     from mhc.dataset import MHCDataset
     from mhc.transformer import MHCTransformer
     from mhc.constants import MHC_CHANNEL_CONFIG
+    from extractors.cross_channel import CrossChannelExtractor
+    from synthesizers.cardio import CardioSynthesizer
+    from synthesizers.sleep import SleepSynthesizer
+    from synthesizers.stationary_activity import StationaryActivitySynthesizer
     from extractors.statistical import StatisticalExtractor
     from extractors.structural import StructuralExtractor
     from models.local import LocalConfig, LocalModel
@@ -66,6 +70,14 @@ def run(
         StatisticalExtractor(MHC_CHANNEL_CONFIG),
         StructuralExtractor(MHC_CHANNEL_CONFIG),
         SemanticExtractor(MHC_CHANNEL_CONFIG),
+        CrossChannelExtractor(
+            MHC_CHANNEL_CONFIG,
+            synthesizers=[
+                SleepSynthesizer(min_duration=5),
+                StationaryActivitySynthesizer(min_duration=5),
+                CardioSynthesizer(min_duration=5),
+            ],
+        ),
     ])
 
     captionizer = Captionizer(dataset, MHCTransformer(), annotator)

diff --git a/explorer.py b/explorer.py
diff --git a/extractors/__init__.py b/extractors/__init__.py
@@ -9,16 +9,16 @@
 import abc
 import pathlib
 import re
-import zlib
 from dataclasses import dataclass, field
 
 from aggregators import MetricAggregator
 from detectors import StructuralDetector
 from timef.schema import Annotation, Recording
+from util import seed_from_key
 
 DEFAULT_TEMPLATES_PATH = pathlib.Path(__file__).resolve().parent.parent / "templates" / "templates.json"
 
-VALID_CAPTION_TYPES = ("statistical", "structural", "semantic")
+VALID_CAPTION_TYPES = ("statistical", "structural", "semantic", "cross_channel")
 
 
 _ACTIVITY_RE = re.compile(r"HKWorkoutActivityType(.+)$")
@@ -59,7 +59,7 @@ def __init__(self, config: ChannelConfig):
 
     @staticmethod
     def _seed(key: str) -> int:
-        return zlib.crc32(key.encode("utf-8")) & 0xFFFFFFFF
+        return seed_from_key(key)
 
     @abc.abstractmethod
     def extract(self, row: Recording) -> list[Annotation]:

diff --git a/extractors/cross_channel.py b/extractors/cross_channel.py
@@ -0,0 +1,25 @@
+#
+# SPDX-FileCopyrightText: 2026 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md)
+# SPDX-FileCopyrightText: 2026 This source file is part of the SensorTSLM open-source project.
+#
+# SPDX-License-Identifier: MIT
+#
+from __future__ import annotations
+
+from extractors import CaptionExtractor, ChannelConfig
+from synthesizers import CrossChannelSynthesizer
+from timef.schema import Annotation, Recording
+
+
+class CrossChannelExtractor(CaptionExtractor):
+    caption_type = "cross_channel"
+
+    def __init__(self, config: ChannelConfig, synthesizers: list[CrossChannelSynthesizer]):
+        super().__init__(config)
+        self.synthesizers = synthesizers
+
+    def extract(self, row: Recording) -> list[Annotation]:
+        results: list[Annotation] = []
+        for synth in self.synthesizers:
+            results.extend(synth.synthesize(row, self.config))
+        return results
diff --git a/mhc/constants.py b/mhc/constants.py
@@ -58,6 +58,40 @@
 ]
 
 SLEEP_CHANNELS = ["sleep:asleep", "sleep:inbed"]
+WATCH_HR_CHANNEL = "hk_watch:HKQuantityTypeIdentifierHeartRate"
+WATCH_DISTANCE_CHANNEL = "hk_watch:HKQuantityTypeIdentifierDistanceWalkingRunning"
+WATCH_STEP_CHANNEL = "hk_watch:HKQuantityTypeIdentifierStepCount"
+
+CARDIO_WORKOUT_CHANNELS = (
+    (
+        "workout:HKWorkoutActivityTypeRunning",
+        "cardio_running",
+        "running",
+    ),
+    (
+        "workout:HKWorkoutActivityTypeCycling",
+        "cardio_cycling",
+        "cycling",
+    ),
+)
+
+STATIONARY_WORKOUT_CHANNELS = (
+    (
+        "workout:HKWorkoutActivityTypeHighIntensityIntervalTraining",
+        "stationary_hiit",
+        "stationary HIIT",
+    ),
+    (
+        "workout:HKWorkoutActivityTypeTraditionalStrengthTraining",
+        "stationary_strength",
+        "traditional strength",
+    ),
+    (
+        "workout:HKWorkoutActivityTypeFunctionalStrengthTraining",
+        "stationary_functional",
+        "functional",
+    ),
+)
 
 
 MHC_CHANNEL_CONFIG = ChannelConfig(

diff --git a/mhc_weekly/constants.py b/mhc_weekly/constants.py
@@ -9,8 +9,8 @@
 
 from extractors import ChannelConfig
 from aggregators import NonZeroAggregator
-from detectors.spike import SpikeDetector
 from detectors.trend import TrendDetector
+from detectors.spike import SpikeDetector
 from mhc.constants import ACTIVITY_CHANNELS, CHANNEL_NAMES, CONTINUOUS_CHANNELS, SLEEP_CHANNELS
 
 HOURLY_TEMPLATES_PATH = pathlib.Path(__file__).resolve().parent.parent / "templates" / "templates_hourly.json"

diff --git a/synthesizers/__init__.py b/synthesizers/__init__.py
@@ -0,0 +1,80 @@
+#
+# SPDX-FileCopyrightText: 2026 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md)
+# SPDX-FileCopyrightText: 2026 This source file is part of the SensorTSLM open-source project.
+#
+# SPDX-License-Identifier: MIT
+#
+from __future__ import annotations
+
+import abc
+
+import numpy as np
+
+from extractors import ChannelConfig
+from timef.schema import Annotation, Recording
+
+
+class CrossChannelSynthesizer(abc.ABC):
+    @abc.abstractmethod
+    def synthesize(self, row: Recording, config: ChannelConfig) -> list[Annotation]: ...
+
+    @staticmethod
+    def _index_or_none(row: Recording, channel_name: str) -> int | None:
+        try:
+            return row.channel_names.index(channel_name)
+        except ValueError:
+            return None
+
+    @staticmethod
+    def _positive_metric_values(row: Recording, idx: int | None, start: int, end: int) -> np.ndarray | None:
+        if idx is None:
+            return None
+        values = np.asarray(row.values[idx][start:end], dtype=float)
+        valid = np.isfinite(values) & (values > 0)
+        if not valid.any():
+            return None
+        return values[valid]
+
+    @classmethod
+    def _metric_mean(cls, row: Recording, idx: int | None, start: int, end: int) -> float | None:
+        values = cls._positive_metric_values(row, idx, start, end)
+        if values is None:
+            return None
+        return float(np.mean(values))
+
+    @classmethod
+    def _metric_peak(cls, row: Recording, idx: int | None, start: int, end: int) -> float | None:
+        values = cls._positive_metric_values(row, idx, start, end)
+        if values is None:
+            return None
+        return float(np.max(values))
+
+    @classmethod
+    def _metric_total(cls, row: Recording, idx: int | None, start: int, end: int) -> float | None:
+        values = cls._positive_metric_values(row, idx, start, end)
+        if values is None:
+            return None
+        return float(np.sum(values))
+
+    @classmethod
+    def _metric_day_mean_delta(cls, row: Recording, idx: int | None, start: int, end: int) -> float | None:
+        window_mean = cls._metric_mean(row, idx, start, end)
+        if window_mean is None or idx is None:
+            return None
+
+        day_values = cls._positive_metric_values(row, idx, 0, row.values.shape[1])
+        if day_values is None:
+            return None
+        return float(window_mean - np.mean(day_values))
+
+
+def contiguous_windows(mask: np.ndarray, min_duration: int) -> list[tuple[int, int]]:
+    if not mask.any():
+        return []
+
+    padded = np.concatenate(([False], mask, [False]))
+    diffs = np.diff(padded.astype(np.int8))
+    starts = np.where(diffs == 1)[0]
+    ends = np.where(diffs == -1)[0]
+    keep = (ends - starts) >= min_duration
+    return list(zip(starts[keep].tolist(), ends[keep].tolist()))
diff --git a/synthesizers/cardio.py b/synthesizers/cardio.py
@@ -0,0 +1,136 @@
+#
+# SPDX-FileCopyrightText: 2026 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md)
+# SPDX-FileCopyrightText: 2026 This source file is part of the SensorTSLM open-source project.
+#
+# SPDX-License-Identifier: MIT
+#
+from __future__ import annotations
+
+import json
+
+import numpy as np
+
+from extractors import ChannelConfig
+from mhc.constants import CARDIO_WORKOUT_CHANNELS, WATCH_DISTANCE_CHANNEL, WATCH_HR_CHANNEL, WATCH_STEP_CHANNEL
+from synthesizers import CrossChannelSynthesizer, contiguous_windows
+from timef.schema import Annotation, Recording
+from util import seed_from_key
+
+
+class CardioSynthesizer(CrossChannelSynthesizer):
+    def __init__(self, min_duration: int = 0, hr_elevated_threshold_bpm: float = 100.0):
+        self.min_duration = min_duration
+        self.hr_elevated_threshold_bpm = hr_elevated_threshold_bpm
+
+    def synthesize(self, row: Recording, config: ChannelConfig) -> list[Annotation]:
+        hr_idx = self._index_or_none(row, WATCH_HR_CHANNEL)
+        distance_idx = self._index_or_none(row, WATCH_DISTANCE_CHANNEL)
+        step_idx = self._index_or_none(row, WATCH_STEP_CHANNEL)
+
+        templates = json.loads(config.templates_path.read_text())["cross_channel"]["cardio"]
+        time_unit = "hour" if config.time_unit == "hours" else "minute"
+        seed = seed_from_key(row.row_id)
+
+        results: list[Annotation] = []
+        for workout_channel, label, activity_name in CARDIO_WORKOUT_CHANNELS:
+            try:
+                workout_idx = row.channel_names.index(workout_channel)
+            except ValueError:
+                continue
+
+            workout = np.asarray(row.values[workout_idx], dtype=float)
+            workout_active = np.isfinite(workout) & (workout > 0)
+            if not workout_active.any():
+                continue
+
+            windows = contiguous_windows(workout_active, self.min_duration)
+            for i, (start, end) in enumerate(windows):
+                end_inclusive = max(start, end - 1)
+                template = templates[(seed + i) % len(templates)]
+                channel_idxs = [workout_idx]
+                metrics_suffix = self._metrics_suffix(
+                    label=label,
+                    row=row,
+                    start=start,
+                    end=end,
+                    hr_idx=hr_idx,
+                    distance_idx=distance_idx,
+                    step_idx=step_idx,
+                    channel_idxs=channel_idxs,
+                )
+                text = template.format(
+                    activity_name=activity_name,
+                    time_unit=time_unit,
+                    start=start,
+                    end=end_inclusive,
+                    metrics_suffix=metrics_suffix,
+                )
+                results.append(
+                    Annotation(
+                        caption_type="cross_channel",
+                        text=text,
+                        channel_idxs=tuple(channel_idxs),
+                        window=(start, end),
+                        label=label,
+                    )
+                )
+        return results
+
+    def _metrics_suffix(
+        self,
+        label: str,
+        row: Recording,
+        start: int,
+        end: int,
+        hr_idx: int | None,
+        distance_idx: int | None,
+        step_idx: int | None,
+        channel_idxs: list[int],
+    ) -> str:
+        parts: list[str] = []
+
+        hr_mean = self._metric_mean(row, hr_idx, start, end)
+        if hr_mean is not None and hr_idx is not None:
+            hr_summary = []
+            hr_peak = self._metric_peak(row, hr_idx, start, end)
+            if hr_peak is not None:
+                hr_summary.append(f"avg HR {hr_mean:.0f} bpm")
+                if hr_mean > self.hr_elevated_threshold_bpm:
+                    hr_summary.append("the heartrate was elevated during this phase")
+                hr_summary.append(f"peak HR {hr_peak:.0f} bpm")
+            else:
+                hr_summary.append(f"avg HR {hr_mean:.0f} bpm")
+                if hr_mean > self.hr_elevated_threshold_bpm:
+                    hr_summary.append("the heartrate was elevated during this phase")
+            parts.append(", ".join(hr_summary))
+
+            hr_day_delta = self._metric_day_mean_delta(row, hr_idx, start, end)
+            if hr_day_delta is not None:
+                direction = "higher" if hr_day_delta >= 0 else "lower"
+                parts.append(f"this means HR is {abs(hr_day_delta):.0f} bpm {direction} than the mean of the day")
+            channel_idxs.append(hr_idx)
+
+        distance_mean = self._metric_mean(row, distance_idx, start, end)
+        distance_total = self._metric_total(row, distance_idx, start, end)
+        if distance_mean is not None and distance_idx is not None:
+            if distance_total is not None:
+                parts.append(
+                    f"avg watch distance {distance_mean:.1f} m/min, total watch distance {distance_total:.1f} m"
+                )
+            else:
+                parts.append(f"avg watch distance {distance_mean:.1f} m/min")
+            channel_idxs.append(distance_idx)
+
+        if label == "cardio_running":
+            step_mean = self._metric_mean(row, step_idx, start, end)
+            step_total = self._metric_total(row, step_idx, start, end)
+            if step_mean is not None and step_idx is not None:
+                if step_total is not None:
+                    parts.append(f"avg watch steps {step_mean:.1f} steps/min, total watch steps {step_total:.0f}")
+                else:
+                    parts.append(f"avg watch steps {step_mean:.1f} steps/min")
+                channel_idxs.append(step_idx)
+
+        if not parts:
+            return ""
+        return ", " + ", ".join(parts)