Skip to content
Open
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
71cb0b8
In Bed but not Asleep annotation
KarlDeck Apr 9, 2026
e457736
Refactor CrossChannelExtractor into driver with pluggable synthesizers
max-rosenblattl Apr 12, 2026
7596a1b
made stationary activity synthesizer
KarlDeck Apr 12, 2026
e4320e5
added totals
KarlDeck Apr 12, 2026
08af225
added cardio synthesizers with totals
KarlDeck Apr 12, 2026
48927f1
Merge remote-tracking branch 'origin/main' into KarlDeck/Sleep-Bundles
KarlDeck Apr 12, 2026
7596829
Merge remote-tracking branch 'origin/main' into KarlDeck/Sleep-Bundles
KarlDeck Apr 12, 2026
5e3842d
put static methods into parent
KarlDeck Apr 13, 2026
43e3711
made min duration for synthesizer visible to users. Adresses Coderabb…
KarlDeck Apr 13, 2026
9e525e0
adressed coderabbit comment #2 comment
KarlDeck Apr 13, 2026
5158020
put variables into mhc/constants.py
KarlDeck Apr 13, 2026
3601edd
added HR delta
KarlDeck Apr 13, 2026
e7e5d0a
rephrased HR delta
KarlDeck Apr 13, 2026
fc3590a
solved duplication issue in templates/templates.json
KarlDeck Apr 13, 2026
52b8063
fixed --weekly issue
KarlDeck Apr 13, 2026
396e5b1
added 100 bpm threshold
KarlDeck Apr 13, 2026
05fb0ac
put _seed into util.py
KarlDeck Apr 13, 2026
0e09456
Merge remote-tracking branch 'origin/main' into KarlDeck/Sleep-Bundles
KarlDeck Apr 13, 2026
9f72309
reprased the synthesizer outputs
KarlDeck Apr 14, 2026
8a0956e
rephrase 2
KarlDeck Apr 14, 2026
df174d1
split up _metrics_suffix to make it easier to read
KarlDeck Apr 14, 2026
3ee312b
split up _metrics_suffix to make it easier to read
KarlDeck Apr 14, 2026
e6ecd2a
small cleanup
KarlDeck Apr 14, 2026
5cb38ef
Refactored _metrics_suffix into sub functions and transfered into parent
KarlDeck Apr 14, 2026
56d2fad
minor fix
KarlDeck Apr 14, 2026
9e39ee3
added docstrings
KarlDeck Apr 14, 2026
5de955d
comment added
KarlDeck Apr 14, 2026
c2be771
transfer functions from init to _helper
KarlDeck Apr 16, 2026
2fe7c84
created _workout base for cardio, stationary and furutre workouts
KarlDeck Apr 16, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions captionizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ def run(
from mhc.dataset import MHCDataset
from mhc.transformer import MHCTransformer
from mhc.constants import MHC_CHANNEL_CONFIG
from extractors.cross_channel import CrossChannelExtractor
from synthesizers.cardio import CardioSynthesizer
from synthesizers.sleep import SleepSynthesizer
from synthesizers.stationary_activity import StationaryActivitySynthesizer
from extractors.statistical import StatisticalExtractor
from extractors.structural import StructuralExtractor
from models.local import LocalConfig, LocalModel
Expand All @@ -66,6 +70,14 @@ def run(
StatisticalExtractor(MHC_CHANNEL_CONFIG),
StructuralExtractor(MHC_CHANNEL_CONFIG),
SemanticExtractor(MHC_CHANNEL_CONFIG),
CrossChannelExtractor(
MHC_CHANNEL_CONFIG,
synthesizers=[
SleepSynthesizer(min_duration=5),
StationaryActivitySynthesizer(min_duration=5),
CardioSynthesizer(min_duration=5),
],
),
])

captionizer = Captionizer(dataset, MHCTransformer(), annotator)
Expand Down
277 changes: 215 additions & 62 deletions explorer.py

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions extractors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,16 @@
import abc
import pathlib
import re
import zlib
from dataclasses import dataclass, field

from aggregators import MetricAggregator
from detectors import StructuralDetector
from timef.schema import Annotation, Recording
from util import seed_from_key

DEFAULT_TEMPLATES_PATH = pathlib.Path(__file__).resolve().parent.parent / "templates" / "templates.json"

VALID_CAPTION_TYPES = ("statistical", "structural", "semantic")
VALID_CAPTION_TYPES = ("statistical", "structural", "semantic", "cross_channel")


_ACTIVITY_RE = re.compile(r"HKWorkoutActivityType(.+)$")
Expand Down Expand Up @@ -59,7 +59,7 @@ def __init__(self, config: ChannelConfig):

@staticmethod
def _seed(key: str) -> int:
return zlib.crc32(key.encode("utf-8")) & 0xFFFFFFFF
return seed_from_key(key)

@abc.abstractmethod
def extract(self, row: Recording) -> list[Annotation]:
Expand Down
25 changes: 25 additions & 0 deletions extractors/cross_channel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#
# SPDX-FileCopyrightText: 2026 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md)
# SPDX-FileCopyrightText: 2026 This source file is part of the SensorTSLM open-source project.
#
# SPDX-License-Identifier: MIT
#
from __future__ import annotations

from extractors import CaptionExtractor, ChannelConfig
from synthesizers import CrossChannelSynthesizer
from timef.schema import Annotation, Recording


class CrossChannelExtractor(CaptionExtractor):
caption_type = "cross_channel"

def __init__(self, config: ChannelConfig, synthesizers: list[CrossChannelSynthesizer]):
super().__init__(config)
self.synthesizers = synthesizers

def extract(self, row: Recording) -> list[Annotation]:
results: list[Annotation] = []
for synth in self.synthesizers:
results.extend(synth.synthesize(row, self.config))
return results
34 changes: 34 additions & 0 deletions mhc/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,40 @@
]

SLEEP_CHANNELS = ["sleep:asleep", "sleep:inbed"]
WATCH_HR_CHANNEL = "hk_watch:HKQuantityTypeIdentifierHeartRate"
WATCH_DISTANCE_CHANNEL = "hk_watch:HKQuantityTypeIdentifierDistanceWalkingRunning"
WATCH_STEP_CHANNEL = "hk_watch:HKQuantityTypeIdentifierStepCount"

CARDIO_WORKOUT_CHANNELS = (
(
"workout:HKWorkoutActivityTypeRunning",
"cardio_running",
"running",
),
(
"workout:HKWorkoutActivityTypeCycling",
"cardio_cycling",
"cycling",
),
)

STATIONARY_WORKOUT_CHANNELS = (
(
"workout:HKWorkoutActivityTypeHighIntensityIntervalTraining",
"stationary_hiit",
"stationary HIIT",
),
(
"workout:HKWorkoutActivityTypeTraditionalStrengthTraining",
"stationary_strength",
"traditional strength",
),
(
"workout:HKWorkoutActivityTypeFunctionalStrengthTraining",
"stationary_functional",
"functional",
),
)


MHC_CHANNEL_CONFIG = ChannelConfig(
Expand Down
2 changes: 1 addition & 1 deletion mhc_weekly/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@

from extractors import ChannelConfig
from aggregators import NonZeroAggregator
from detectors.spike import SpikeDetector
from detectors.trend import TrendDetector
from detectors.spike import SpikeDetector
from mhc.constants import ACTIVITY_CHANNELS, CHANNEL_NAMES, CONTINUOUS_CHANNELS, SLEEP_CHANNELS

HOURLY_TEMPLATES_PATH = pathlib.Path(__file__).resolve().parent.parent / "templates" / "templates_hourly.json"
Expand Down
80 changes: 80 additions & 0 deletions synthesizers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#
# SPDX-FileCopyrightText: 2026 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md)
# SPDX-FileCopyrightText: 2026 This source file is part of the SensorTSLM open-source project.
#
# SPDX-License-Identifier: MIT
#
from __future__ import annotations

import abc

import numpy as np

from extractors import ChannelConfig
from timef.schema import Annotation, Recording


class CrossChannelSynthesizer(abc.ABC):
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the all the helper methods in __init__.py are better suited in a _helper.py

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

addressed in c2be771

@abc.abstractmethod
def synthesize(self, row: Recording, config: ChannelConfig) -> list[Annotation]: ...

@staticmethod
def _index_or_none(row: Recording, channel_name: str) -> int | None:
try:
return row.channel_names.index(channel_name)
except ValueError:
return None

@staticmethod
def _positive_metric_values(row: Recording, idx: int | None, start: int, end: int) -> np.ndarray | None:
if idx is None:
return None
values = np.asarray(row.values[idx][start:end], dtype=float)
valid = np.isfinite(values) & (values > 0)
if not valid.any():
return None
return values[valid]

@classmethod
def _metric_mean(cls, row: Recording, idx: int | None, start: int, end: int) -> float | None:
values = cls._positive_metric_values(row, idx, start, end)
if values is None:
return None
return float(np.mean(values))

@classmethod
def _metric_peak(cls, row: Recording, idx: int | None, start: int, end: int) -> float | None:
values = cls._positive_metric_values(row, idx, start, end)
if values is None:
return None
return float(np.max(values))

@classmethod
def _metric_total(cls, row: Recording, idx: int | None, start: int, end: int) -> float | None:
values = cls._positive_metric_values(row, idx, start, end)
if values is None:
return None
return float(np.sum(values))

@classmethod
def _metric_day_mean_delta(cls, row: Recording, idx: int | None, start: int, end: int) -> float | None:
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just a question: should we exclude the window mean from the daily mean? probably not though

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No I don't think so. But something I've been wanting to do for a while now is to calculate the Resting HR for the person and then the RR because that's a very informative medical measurement. But this might include calculating the resting HR over multiple days to be more accurate. So either we do multi day values or just do it for one day which is an approximation. For the weekly data we can definitely do it. Even for one time series

window_mean = cls._metric_mean(row, idx, start, end)
if window_mean is None or idx is None:
return None

day_values = cls._positive_metric_values(row, idx, 0, row.values.shape[1])
if day_values is None:
return None
return float(window_mean - np.mean(day_values))


def contiguous_windows(mask: np.ndarray, min_duration: int) -> list[tuple[int, int]]:
if not mask.any():
return []

padded = np.concatenate(([False], mask, [False]))
diffs = np.diff(padded.astype(np.int8))
starts = np.where(diffs == 1)[0]
ends = np.where(diffs == -1)[0]
keep = (ends - starts) >= min_duration
return list(zip(starts[keep].tolist(), ends[keep].tolist()))
136 changes: 136 additions & 0 deletions synthesizers/cardio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
#
# SPDX-FileCopyrightText: 2026 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md)
# SPDX-FileCopyrightText: 2026 This source file is part of the SensorTSLM open-source project.
#
# SPDX-License-Identifier: MIT
#
from __future__ import annotations

import json

import numpy as np

from extractors import ChannelConfig
from mhc.constants import CARDIO_WORKOUT_CHANNELS, WATCH_DISTANCE_CHANNEL, WATCH_HR_CHANNEL, WATCH_STEP_CHANNEL
from synthesizers import CrossChannelSynthesizer, contiguous_windows
from timef.schema import Annotation, Recording
from util import seed_from_key


class CardioSynthesizer(CrossChannelSynthesizer):
def __init__(self, min_duration: int = 0, hr_elevated_threshold_bpm: float = 100.0):
self.min_duration = min_duration
self.hr_elevated_threshold_bpm = hr_elevated_threshold_bpm

def synthesize(self, row: Recording, config: ChannelConfig) -> list[Annotation]:
hr_idx = self._index_or_none(row, WATCH_HR_CHANNEL)
distance_idx = self._index_or_none(row, WATCH_DISTANCE_CHANNEL)
step_idx = self._index_or_none(row, WATCH_STEP_CHANNEL)

templates = json.loads(config.templates_path.read_text())["cross_channel"]["cardio"]
time_unit = "hour" if config.time_unit == "hours" else "minute"
seed = seed_from_key(row.row_id)

results: list[Annotation] = []
for workout_channel, label, activity_name in CARDIO_WORKOUT_CHANNELS:
try:
workout_idx = row.channel_names.index(workout_channel)
except ValueError:
continue

workout = np.asarray(row.values[workout_idx], dtype=float)
workout_active = np.isfinite(workout) & (workout > 0)
if not workout_active.any():
continue

windows = contiguous_windows(workout_active, self.min_duration)
for i, (start, end) in enumerate(windows):
end_inclusive = max(start, end - 1)
template = templates[(seed + i) % len(templates)]
channel_idxs = [workout_idx]
metrics_suffix = self._metrics_suffix(
label=label,
row=row,
start=start,
end=end,
hr_idx=hr_idx,
distance_idx=distance_idx,
step_idx=step_idx,
channel_idxs=channel_idxs,
)
text = template.format(
activity_name=activity_name,
time_unit=time_unit,
start=start,
end=end_inclusive,
metrics_suffix=metrics_suffix,
)
results.append(
Annotation(
caption_type="cross_channel",
text=text,
channel_idxs=tuple(channel_idxs),
window=(start, end),
label=label,
)
)
return results

def _metrics_suffix(
self,
label: str,
row: Recording,
start: int,
end: int,
hr_idx: int | None,
distance_idx: int | None,
step_idx: int | None,
channel_idxs: list[int],
) -> str:
parts: list[str] = []
Comment thread
max-rosenblattl marked this conversation as resolved.

hr_mean = self._metric_mean(row, hr_idx, start, end)
if hr_mean is not None and hr_idx is not None:
hr_summary = []
hr_peak = self._metric_peak(row, hr_idx, start, end)
if hr_peak is not None:
hr_summary.append(f"avg HR {hr_mean:.0f} bpm")
if hr_mean > self.hr_elevated_threshold_bpm:
hr_summary.append("the heartrate was elevated during this phase")
hr_summary.append(f"peak HR {hr_peak:.0f} bpm")
else:
hr_summary.append(f"avg HR {hr_mean:.0f} bpm")
Copy link
Copy Markdown
Collaborator

@max-rosenblattl max-rosenblattl Apr 14, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hr_summary.append(f"avg HR {hr_mean:.0f} bpm") this line is added twice in each if branch

I think more fluent German sentences fit the current captions better than the current abbreviations.
"averaging a heart rate of HR bpm, peaking at HR..."

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lets use the meta channel config for that, we defined it somewhere

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Made sentences more fluid in 9f72309

if hr_mean > self.hr_elevated_threshold_bpm:
hr_summary.append("the heartrate was elevated during this phase")
parts.append(", ".join(hr_summary))

hr_day_delta = self._metric_day_mean_delta(row, hr_idx, start, end)
if hr_day_delta is not None:
direction = "higher" if hr_day_delta >= 0 else "lower"
parts.append(f"this means HR is {abs(hr_day_delta):.0f} bpm {direction} than the mean of the day")
channel_idxs.append(hr_idx)

distance_mean = self._metric_mean(row, distance_idx, start, end)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we split this up in two functions for readibility

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in df174d1

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

refactored and put into parent in 5cb38ef

distance_total = self._metric_total(row, distance_idx, start, end)
if distance_mean is not None and distance_idx is not None:
if distance_total is not None:
parts.append(
f"avg watch distance {distance_mean:.1f} m/min, total watch distance {distance_total:.1f} m"
Comment thread
KarlDeck marked this conversation as resolved.
Outdated
)
else:
parts.append(f"avg watch distance {distance_mean:.1f} m/min")
channel_idxs.append(distance_idx)

if label == "cardio_running":
Comment thread
KarlDeck marked this conversation as resolved.
step_mean = self._metric_mean(row, step_idx, start, end)
step_total = self._metric_total(row, step_idx, start, end)
if step_mean is not None and step_idx is not None:
if step_total is not None:
parts.append(f"avg watch steps {step_mean:.1f} steps/min, total watch steps {step_total:.0f}")
else:
parts.append(f"avg watch steps {step_mean:.1f} steps/min")
channel_idxs.append(step_idx)

if not parts:
return ""
return ", " + ", ".join(parts)
Loading
Loading