StanfordBDHG · KarlDeck · Apr 9, 2026 · Apr 12, 2026 · Apr 12, 2026 · Apr 12, 2026
diff --git a/captionizer.py b/captionizer.py
@@ -53,6 +53,10 @@ def run(
     from mhc.dataset import MHCDataset
     from mhc.transformer import MHCTransformer
     from mhc.constants import MHC_CHANNEL_CONFIG
+    from extractors.cross_channel import CrossChannelExtractor
+    from synthesizers.cardio import CardioSynthesizer
+    from synthesizers.sleep import SleepSynthesizer
+    from synthesizers.stationary_activity import StationaryActivitySynthesizer
     from extractors.statistical import StatisticalExtractor
     from extractors.structural import StructuralExtractor
     from models.local import LocalConfig, LocalModel
@@ -66,6 +70,10 @@ def run(
         StatisticalExtractor(MHC_CHANNEL_CONFIG),
         StructuralExtractor(MHC_CHANNEL_CONFIG),
         SemanticExtractor(MHC_CHANNEL_CONFIG),
+        CrossChannelExtractor(
+            MHC_CHANNEL_CONFIG,
+            synthesizers=[SleepSynthesizer(), StationaryActivitySynthesizer(), CardioSynthesizer()],
+        ),
     ])
 
     captionizer = Captionizer(dataset, MHCTransformer(), annotator)

diff --git a/explorer.py b/explorer.py
diff --git a/extractors/__init__.py b/extractors/__init__.py
@@ -18,7 +18,7 @@
 
 DEFAULT_TEMPLATES_PATH = pathlib.Path(__file__).resolve().parent.parent / "templates" / "templates.json"
 
-VALID_CAPTION_TYPES = ("statistical", "structural", "semantic")
+VALID_CAPTION_TYPES = ("statistical", "structural", "semantic", "cross_channel")
 
 
 _ACTIVITY_RE = re.compile(r"HKWorkoutActivityType(.+)$")

diff --git a/extractors/cross_channel.py b/extractors/cross_channel.py
@@ -0,0 +1,25 @@
+#
+# SPDX-FileCopyrightText: 2026 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md)
+# SPDX-FileCopyrightText: 2026 This source file is part of the SensorTSLM open-source project.
+#
+# SPDX-License-Identifier: MIT
+#
+from __future__ import annotations
+
+from extractors import CaptionExtractor, ChannelConfig
+from synthesizers import CrossChannelSynthesizer
+from timef.schema import Annotation, Recording
+
+
+class CrossChannelExtractor(CaptionExtractor):
+    caption_type = "cross_channel"
+
+    def __init__(self, config: ChannelConfig, synthesizers: list[CrossChannelSynthesizer]):
+        super().__init__(config)
+        self.synthesizers = synthesizers
+
+    def extract(self, row: Recording) -> list[Annotation]:
+        results: list[Annotation] = []
+        for synth in self.synthesizers:
+            results.extend(synth.synthesize(row, self.config))
+        return results
diff --git a/mhc_weekly/constants.py b/mhc_weekly/constants.py
@@ -9,8 +9,8 @@
 
 from extractors import ChannelConfig
 from aggregators import NonZeroAggregator
-from detectors.spike import SpikeDetector
 from detectors.trend import TrendDetector
+from detectors.spike import SpikeDetector
 from mhc.constants import ACTIVITY_CHANNELS, CHANNEL_NAMES, CONTINUOUS_CHANNELS, SLEEP_CHANNELS
 
 HOURLY_TEMPLATES_PATH = pathlib.Path(__file__).resolve().parent.parent / "templates" / "templates_hourly.json"

diff --git a/synthesizers/__init__.py b/synthesizers/__init__.py
@@ -0,0 +1,31 @@
+#
+# SPDX-FileCopyrightText: 2026 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md)
+# SPDX-FileCopyrightText: 2026 This source file is part of the SensorTSLM open-source project.
+#
+# SPDX-License-Identifier: MIT
+#
+from __future__ import annotations
+
+import abc
+
+import numpy as np
+
+from extractors import ChannelConfig
+from timef.schema import Annotation, Recording
+
+
+class CrossChannelSynthesizer(abc.ABC):
+    @abc.abstractmethod
+    def synthesize(self, row: Recording, config: ChannelConfig) -> list[Annotation]: ...
+
+
+def contiguous_windows(mask: np.ndarray, min_duration: int) -> list[tuple[int, int]]:
+    if not mask.any():
+        return []
+
+    padded = np.concatenate(([False], mask, [False]))
+    diffs = np.diff(padded.astype(np.int8))
+    starts = np.where(diffs == 1)[0]
+    ends = np.where(diffs == -1)[0]
+    keep = (ends - starts) >= min_duration
+    return list(zip(starts[keep].tolist(), ends[keep].tolist()))
diff --git a/synthesizers/cardio.py b/synthesizers/cardio.py
@@ -0,0 +1,180 @@
+#
+# SPDX-FileCopyrightText: 2026 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md)
+# SPDX-FileCopyrightText: 2026 This source file is part of the SensorTSLM open-source project.
+#
+# SPDX-License-Identifier: MIT
+#
+from __future__ import annotations
+
+import json
+
+import numpy as np
+
+from extractors import CaptionExtractor, ChannelConfig
+from synthesizers import CrossChannelSynthesizer, contiguous_windows
+from timef.schema import Annotation, Recording
+
+
+class CardioSynthesizer(CrossChannelSynthesizer):
+    HR_CHANNEL = "hk_watch:HKQuantityTypeIdentifierHeartRate"
+    DISTANCE_CHANNEL = "hk_watch:HKQuantityTypeIdentifierDistanceWalkingRunning"
+    STEP_CHANNEL = "hk_watch:HKQuantityTypeIdentifierStepCount"
+    WORKOUT_CHANNELS = (
+        (
+            "workout:HKWorkoutActivityTypeRunning",
+            "cardio_running",
+            "running",
+        ),
+        (
+            "workout:HKWorkoutActivityTypeCycling",
+            "cardio_cycling",
+            "cycling",
+        ),
+    )
+
+    def __init__(self, min_duration: int = 5):
+        self.min_duration = min_duration
+
+    def synthesize(self, row: Recording, config: ChannelConfig) -> list[Annotation]:
+        hr_idx = self._index_or_none(row, self.HR_CHANNEL)
+        distance_idx = self._index_or_none(row, self.DISTANCE_CHANNEL)
+        step_idx = self._index_or_none(row, self.STEP_CHANNEL)
+
+        templates = json.loads(config.templates_path.read_text())["cross_channel"]["cardio"]
+        time_unit = "hour" if config.time_unit == "hours" else "minute"
+        seed = CaptionExtractor._seed(row.row_id)
+
+        results: list[Annotation] = []
+        template_offset = 0
+        for workout_channel, label, template_key in self.WORKOUT_CHANNELS:
+            try:
+                workout_idx = row.channel_names.index(workout_channel)
+            except ValueError:
+                continue
+
+            workout = np.asarray(row.values[workout_idx], dtype=float)
+            workout_active = np.isfinite(workout) & (workout > 0)
+            if not workout_active.any():
+                continue
+
+            windows = contiguous_windows(workout_active, self.min_duration)
+            for i, (start, end) in enumerate(windows):
+                end_inclusive = max(start, end - 1)
+                subtype_templates = templates[template_key]
+                template = subtype_templates[(seed + template_offset + i) % len(subtype_templates)]
+                channel_idxs = [workout_idx]
+                metrics_suffix = self._metrics_suffix(
+                    template_key=template_key,
+                    row=row,
+                    start=start,
+                    end=end,
+                    hr_idx=hr_idx,
+                    distance_idx=distance_idx,
+                    step_idx=step_idx,
+                    channel_idxs=channel_idxs,
+                )
+                text = template.format(
+                    time_unit=time_unit,
+                    start=start,
+                    end=end_inclusive,
+                    metrics_suffix=metrics_suffix,
+                )
+                results.append(
+                    Annotation(
+                        caption_type="cross_channel",
+                        text=text,
+                        channel_idxs=tuple(channel_idxs),
+                        window=(start, end),
+                        label=label,
+                    )
+                )
+            template_offset += len(windows)
+        return results
+
+    @staticmethod
+    def _index_or_none(row: Recording, channel_name: str) -> int | None:
+        try:
+            return row.channel_names.index(channel_name)
+        except ValueError:
+            return None
+
+    @staticmethod
+    def _metric_mean(row: Recording, idx: int | None, start: int, end: int) -> float | None:
+        if idx is None:
+            return None
+        values = np.asarray(row.values[idx][start:end], dtype=float)
+        valid = np.isfinite(values) & (values > 0)
+        if not valid.any():
+            return None
+        return float(np.mean(values[valid]))
+
+    @staticmethod
+    def _metric_peak(row: Recording, idx: int | None, start: int, end: int) -> float | None:
+        if idx is None:
+            return None
+        values = np.asarray(row.values[idx][start:end], dtype=float)
+        valid = np.isfinite(values) & (values > 0)
+        if not valid.any():
+            return None
+        return float(np.max(values[valid]))
+
+    @staticmethod
+    def _metric_total(row: Recording, idx: int | None, start: int, end: int) -> float | None:
+        if idx is None:
+            return None
+        values = np.asarray(row.values[idx][start:end], dtype=float)
+        valid = np.isfinite(values) & (values > 0)
+        if not valid.any():
+            return None
+        return float(np.sum(values[valid]))
+
+    def _metrics_suffix(
+        self,
+        template_key: str,
+        row: Recording,
+        start: int,
+        end: int,
+        hr_idx: int | None,
+        distance_idx: int | None,
+        step_idx: int | None,
+        channel_idxs: list[int],
+    ) -> str:
+        parts: list[str] = []
+
+        hr_mean = self._metric_mean(row, hr_idx, start, end)
+        if hr_mean is not None and hr_idx is not None:
+            hr_peak = self._metric_peak(row, hr_idx, start, end)
+            if hr_peak is not None:
+                parts.append(f"avg HR {hr_mean:.0f} bpm, peak HR {hr_peak:.0f} bpm")
+            else:
+                parts.append(f"avg HR {hr_mean:.0f} bpm")
+            channel_idxs.append(hr_idx)
+
+        distance_mean = self._metric_mean(row, distance_idx, start, end)
+        distance_total = self._metric_total(row, distance_idx, start, end)
+        if distance_mean is not None and distance_idx is not None:
+            if template_key == "cycling" and distance_total is not None:
+                parts.append(
+                    f"avg watch distance {distance_mean:.1f} m/min, total watch distance {distance_total:.1f} m"
+                )
+            elif template_key == "running" and distance_total is not None:
+                parts.append(
+                    f"avg watch distance {distance_mean:.1f} m/min, total watch distance {distance_total:.1f} m"
+                )
+            else:
+                parts.append(f"avg watch distance {distance_mean:.1f} m/min")
+            channel_idxs.append(distance_idx)
+
+        if template_key == "running":
+            step_mean = self._metric_mean(row, step_idx, start, end)
+            step_total = self._metric_total(row, step_idx, start, end)
+            if step_mean is not None and step_idx is not None:
+                if step_total is not None:
+                    parts.append(f"avg watch steps {step_mean:.1f} steps/min, total watch steps {step_total:.0f}")
+                else:
+                    parts.append(f"avg watch steps {step_mean:.1f} steps/min")
+                channel_idxs.append(step_idx)
+
+        if not parts:
+            return ""
+        return ", " + ", ".join(parts)
diff --git a/synthesizers/sleep.py b/synthesizers/sleep.py
@@ -0,0 +1,53 @@
+#
+# SPDX-FileCopyrightText: 2026 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md)
+# SPDX-FileCopyrightText: 2026 This source file is part of the SensorTSLM open-source project.
+#
+# SPDX-License-Identifier: MIT
+#
+from __future__ import annotations
+
+import json
+
+import numpy as np
+
+from extractors import CaptionExtractor, ChannelConfig
+from synthesizers import CrossChannelSynthesizer, contiguous_windows
+from timef.schema import Annotation, Recording
+
+
+class SleepSynthesizer(CrossChannelSynthesizer):
+    def __init__(self, min_duration: int = 5):
+        self.min_duration = min_duration
+
+    def synthesize(self, row: Recording, config: ChannelConfig) -> list[Annotation]:
+        try:
+            in_bed_idx = row.channel_names.index("sleep:inbed")
+            asleep_idx = row.channel_names.index("sleep:asleep")
+        except ValueError:
+            return []
+
+        in_bed = np.asarray(row.values[in_bed_idx], dtype=float)
+        asleep = np.asarray(row.values[asleep_idx], dtype=float)
+        if not np.any((~np.isnan(asleep)) & (asleep > 0)):
+            return []
+        mask = (~np.isnan(in_bed)) & (in_bed > 0) & ~((~np.isnan(asleep)) & (asleep > 0))
+
+        time_unit = "hour" if config.time_unit == "hours" else "minute"
+        templates = json.loads(config.templates_path.read_text())["cross_channel"]["sleep"]
+        seed = CaptionExtractor._seed(row.row_id)
+
+        results: list[Annotation] = []
+        for i, (start, end) in enumerate(contiguous_windows(mask, self.min_duration)):
+            end_inclusive = max(start, end - 1)
+            template = templates[(seed + i) % len(templates)]
+            text = template.format(time_unit=time_unit, start=start, end=end_inclusive)
+            results.append(
+                Annotation(
+                    caption_type="cross_channel",
+                    text=text,
+                    channel_idxs=(asleep_idx, in_bed_idx),
+                    window=(start, end),
+                    label="in_bed_not_sleeping",
+                )
+            )
+        return results