Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
71cb0b8
In Bed but not Asleep annotation
KarlDeck Apr 9, 2026
e457736
Refactor CrossChannelExtractor into driver with pluggable synthesizers
max-rosenblattl Apr 12, 2026
7596a1b
made stationary activity synthesizer
KarlDeck Apr 12, 2026
e4320e5
added totals
KarlDeck Apr 12, 2026
08af225
added cardio synthesizers with totals
KarlDeck Apr 12, 2026
48927f1
Merge remote-tracking branch 'origin/main' into KarlDeck/Sleep-Bundles
KarlDeck Apr 12, 2026
7596829
Merge remote-tracking branch 'origin/main' into KarlDeck/Sleep-Bundles
KarlDeck Apr 12, 2026
5e3842d
put static methods into parent
KarlDeck Apr 13, 2026
43e3711
made min duration for synthesizer visible to users. Adresses Coderabb…
KarlDeck Apr 13, 2026
9e525e0
adressed coderabbit comment #2 comment
KarlDeck Apr 13, 2026
5158020
put variables into mhc/constants.py
KarlDeck Apr 13, 2026
3601edd
added HR delta
KarlDeck Apr 13, 2026
e7e5d0a
rephrased HR delta
KarlDeck Apr 13, 2026
fc3590a
solved duplication issue in templates/templates.json
KarlDeck Apr 13, 2026
52b8063
fixed --weekly issue
KarlDeck Apr 13, 2026
396e5b1
added 100 bpm threshold
KarlDeck Apr 13, 2026
05fb0ac
put _seed into util.py
KarlDeck Apr 13, 2026
0e09456
Merge remote-tracking branch 'origin/main' into KarlDeck/Sleep-Bundles
KarlDeck Apr 13, 2026
9f72309
reprased the synthesizer outputs
KarlDeck Apr 14, 2026
8a0956e
rephrase 2
KarlDeck Apr 14, 2026
df174d1
split up _metrics_suffix to make it easier to read
KarlDeck Apr 14, 2026
3ee312b
split up _metrics_suffix to make it easier to read
KarlDeck Apr 14, 2026
e6ecd2a
small cleanup
KarlDeck Apr 14, 2026
5cb38ef
Refactored _metrics_suffix into sub functions and transfered into parent
KarlDeck Apr 14, 2026
56d2fad
minor fix
KarlDeck Apr 14, 2026
9e39ee3
added docstrings
KarlDeck Apr 14, 2026
5de955d
comment added
KarlDeck Apr 14, 2026
c2be771
transfer functions from init to _helper
KarlDeck Apr 16, 2026
2fe7c84
created _workout base for cardio, stationary and furutre workouts
KarlDeck Apr 16, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions captionizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ def run(
from mhc.dataset import MHCDataset
from mhc.transformer import MHCTransformer
from mhc.constants import MHC_CHANNEL_CONFIG
from extractors.cross_channel import CrossChannelExtractor
from synthesizers.cardio import CardioSynthesizer
from synthesizers.sleep import SleepSynthesizer
from synthesizers.stationary_activity import StationaryActivitySynthesizer
from extractors.statistical import StatisticalExtractor
from extractors.structural import StructuralExtractor
from models.local import LocalConfig, LocalModel
Expand All @@ -66,6 +70,14 @@ def run(
StatisticalExtractor(MHC_CHANNEL_CONFIG),
StructuralExtractor(MHC_CHANNEL_CONFIG),
SemanticExtractor(MHC_CHANNEL_CONFIG),
CrossChannelExtractor(
MHC_CHANNEL_CONFIG,
synthesizers=[
SleepSynthesizer(min_duration=5),
StationaryActivitySynthesizer(min_duration=5),
CardioSynthesizer(min_duration=5),
],
),
])

captionizer = Captionizer(dataset, MHCTransformer(), annotator)
Expand Down
277 changes: 215 additions & 62 deletions explorer.py

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions extractors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,16 @@
import abc
import pathlib
import re
import zlib
from dataclasses import dataclass, field

from aggregators import MetricAggregator
from detectors import StructuralDetector
from timef.schema import Annotation, Recording
from util import seed_from_key

DEFAULT_TEMPLATES_PATH = pathlib.Path(__file__).resolve().parent.parent / "templates" / "templates.json"

VALID_CAPTION_TYPES = ("statistical", "structural", "semantic")
VALID_CAPTION_TYPES = ("statistical", "structural", "semantic", "cross_channel")


_ACTIVITY_RE = re.compile(r"HKWorkoutActivityType(.+)$")
Expand Down Expand Up @@ -59,7 +59,7 @@ def __init__(self, config: ChannelConfig):

@staticmethod
def _seed(key: str) -> int:
return zlib.crc32(key.encode("utf-8")) & 0xFFFFFFFF
return seed_from_key(key)

@abc.abstractmethod
def extract(self, row: Recording) -> list[Annotation]:
Expand Down
25 changes: 25 additions & 0 deletions extractors/cross_channel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#
# SPDX-FileCopyrightText: 2026 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md)
# SPDX-FileCopyrightText: 2026 This source file is part of the SensorTSLM open-source project.
#
# SPDX-License-Identifier: MIT
#
from __future__ import annotations

from extractors import CaptionExtractor, ChannelConfig
from synthesizers import CrossChannelSynthesizer
from timef.schema import Annotation, Recording


class CrossChannelExtractor(CaptionExtractor):
caption_type = "cross_channel"

def __init__(self, config: ChannelConfig, synthesizers: list[CrossChannelSynthesizer]):
super().__init__(config)
self.synthesizers = synthesizers

def extract(self, row: Recording) -> list[Annotation]:
results: list[Annotation] = []
for synth in self.synthesizers:
results.extend(synth.synthesize(row, self.config))
return results
34 changes: 34 additions & 0 deletions mhc/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,40 @@
]

SLEEP_CHANNELS = ["sleep:asleep", "sleep:inbed"]
WATCH_HR_CHANNEL = "hk_watch:HKQuantityTypeIdentifierHeartRate"
WATCH_DISTANCE_CHANNEL = "hk_watch:HKQuantityTypeIdentifierDistanceWalkingRunning"
WATCH_STEP_CHANNEL = "hk_watch:HKQuantityTypeIdentifierStepCount"

CARDIO_WORKOUT_CHANNELS = (
(
"workout:HKWorkoutActivityTypeRunning",
"cardio_running",
"running",
),
(
"workout:HKWorkoutActivityTypeCycling",
"cardio_cycling",
"cycling",
),
)

STATIONARY_WORKOUT_CHANNELS = (
(
"workout:HKWorkoutActivityTypeHighIntensityIntervalTraining",
"stationary_hiit",
"stationary HIIT",
),
(
"workout:HKWorkoutActivityTypeTraditionalStrengthTraining",
"stationary_strength",
"traditional strength",
),
(
"workout:HKWorkoutActivityTypeFunctionalStrengthTraining",
"stationary_functional",
"functional",
),
)


MHC_CHANNEL_CONFIG = ChannelConfig(
Expand Down
2 changes: 1 addition & 1 deletion mhc_weekly/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@

from extractors import ChannelConfig
from aggregators import NonZeroAggregator
from detectors.spike import SpikeDetector
from detectors.trend import TrendDetector
from detectors.spike import SpikeDetector
from mhc.constants import ACTIVITY_CHANNELS, CHANNEL_NAMES, CONTINUOUS_CHANNELS, SLEEP_CHANNELS

HOURLY_TEMPLATES_PATH = pathlib.Path(__file__).resolve().parent.parent / "templates" / "templates_hourly.json"
Expand Down
18 changes: 18 additions & 0 deletions synthesizers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#
# SPDX-FileCopyrightText: 2026 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md)
# SPDX-FileCopyrightText: 2026 This source file is part of the SensorTSLM open-source project.
#
# SPDX-License-Identifier: MIT
#
from __future__ import annotations

import abc

from extractors import ChannelConfig
from timef.schema import Annotation, Recording

from synthesizers._workout import WorkoutSynthesizer

class CrossChannelSynthesizer(abc.ABC):
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the all the helper methods in __init__.py are better suited in a _helper.py

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

addressed in c2be771

@abc.abstractmethod
def synthesize(self, row: Recording, config: ChannelConfig) -> list[Annotation]: ...
203 changes: 203 additions & 0 deletions synthesizers/_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
#
# SPDX-FileCopyrightText: 2026 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md)
# SPDX-FileCopyrightText: 2026 This source file is part of the SensorTSLM open-source project.
#
# SPDX-License-Identifier: MIT
#
from __future__ import annotations

import numpy as np

from extractors import ChannelConfig
from mhc.constants import WATCH_HR_CHANNEL
from timef.schema import Recording


def index_or_none(row: Recording, channel_name: str) -> int | None:
try:
return row.channel_names.index(channel_name)
except ValueError:
return None


def positive_metric_values(row: Recording, idx: int | None, start: int, end: int) -> np.ndarray | None:
if idx is None:
return None
values = np.asarray(row.values[idx][start:end], dtype=float)
valid = np.isfinite(values) & (values > 0)
if not valid.any():
return None
return values[valid]


def metric_mean(row: Recording, idx: int | None, start: int, end: int) -> float | None:
values = positive_metric_values(row, idx, start, end)
if values is None:
return None
return float(np.mean(values))


def metric_peak(row: Recording, idx: int | None, start: int, end: int) -> float | None:
values = positive_metric_values(row, idx, start, end)
if values is None:
return None
return float(np.max(values))


def metric_total(row: Recording, idx: int | None, start: int, end: int) -> float | None:
values = positive_metric_values(row, idx, start, end)
if values is None:
return None
return float(np.sum(values))


def metric_day_mean_delta(row: Recording, idx: int | None, start: int, end: int) -> float | None:
window_mean = metric_mean(row, idx, start, end)
if window_mean is None or idx is None:
return None

day_values = positive_metric_values(row, idx, 0, row.values.shape[1])
if day_values is None:
return None
return float(window_mean - np.mean(day_values))


def metric_day_mean(row: Recording, idx: int | None) -> float | None:
if idx is None:
return None
day_values = positive_metric_values(row, idx, 0, row.values.shape[1])
if day_values is None:
return None
return float(np.mean(day_values))


def channel_meta(config: ChannelConfig, channel_name: str) -> tuple[str, str]:
display_name, unit, _ = config.meta.get(channel_name, (config.display_name(channel_name), "", 0))
return display_name, unit


def format_metric_summary(
config: ChannelConfig,
channel_name: str,
mean: float,
peak: float | None = None,
elevated_threshold: float | None = None,
) -> str:
display_name, unit = channel_meta(config, channel_name)
summary = f"averaging a {display_name} of {mean:.0f} {unit}"
if peak is not None:
summary += f", peaking at {peak:.0f} {unit}"
if elevated_threshold is not None and mean > elevated_threshold:
summary += f", with an elevated {display_name} during this phase"
return summary


def sentence(text: str) -> str:
text = text.strip()
if not text:
return ""
return text if text.endswith(".") else f"{text}."


def finalize_caption_text(text: str, metrics_suffix: str) -> str:
if metrics_suffix:
return text.rstrip(".") + "."
return text


def append_hr_metrics(
parts: list[str],
channel_idxs: list[int],
config: ChannelConfig,
row: Recording,
start: int,
end: int,
hr_idx: int | None,
elevated_threshold: float,
include_space_before_day_unit: bool = True,
) -> None:
"""Append heart-rate summary sentences and include the HR channel when present."""
hr_mean = metric_mean(row, hr_idx, start, end)
if hr_mean is None or hr_idx is None:
return

hr_peak = metric_peak(row, hr_idx, start, end)
parts.append(
sentence(
format_metric_summary(
config=config,
channel_name=WATCH_HR_CHANNEL,
mean=hr_mean,
peak=hr_peak,
elevated_threshold=elevated_threshold,
)
)
)

hr_day_delta = metric_day_mean_delta(row, hr_idx, start, end)
hr_day_mean = metric_day_mean(row, hr_idx)
if hr_day_delta is not None and hr_day_mean is not None:
hr_name, hr_unit = channel_meta(config, WATCH_HR_CHANNEL)
direction = "higher" if hr_day_delta >= 0 else "lower"
day_mean_unit = f" {hr_unit}" if include_space_before_day_unit and hr_unit else hr_unit
parts.append(
sentence(
f"The {hr_name} was {abs(hr_day_delta):.0f} {hr_unit} {direction} than the day's mean of {hr_day_mean:.0f}{day_mean_unit}"
)
)

channel_idxs.append(hr_idx)


def append_distance_metrics(
parts: list[str],
channel_idxs: list[int],
row: Recording,
start: int,
end: int,
distance_idx: int | None,
) -> None:
"""Append distance summary sentences and include the distance channel when present."""
distance_mean = metric_mean(row, distance_idx, start, end)
distance_total = metric_total(row, distance_idx, start, end)
if distance_mean is None or distance_idx is None:
return

parts.append(sentence(f"The watch recorded an average distance of {distance_mean:.1f} m/min during this period"))
if distance_total is not None:
parts.append(sentence(f"The total distance recorded by the watch in that interval was {distance_total:.1f} m"))

channel_idxs.append(distance_idx)


def append_step_metrics(
parts: list[str],
channel_idxs: list[int],
row: Recording,
start: int,
end: int,
step_idx: int | None,
) -> None:
"""Append step-count summary sentences and include the step channel when present."""
step_mean = metric_mean(row, step_idx, start, end)
step_total = metric_total(row, step_idx, start, end)
if step_mean is None or step_idx is None:
return

parts.append(sentence(f"The watch recorded an average step count of {step_mean:.1f} steps/min during this period"))
if step_total is not None:
parts.append(sentence(f"The total step count recorded by the watch during that time was {step_total:.0f}"))

channel_idxs.append(step_idx)


def contiguous_windows(mask: np.ndarray, min_duration: int) -> list[tuple[int, int]]:
if not mask.any():
return []

padded = np.concatenate(([False], mask, [False]))
diffs = np.diff(padded.astype(np.int8))
starts = np.where(diffs == 1)[0]
ends = np.where(diffs == -1)[0]
keep = (ends - starts) >= min_duration
return list(zip(starts[keep].tolist(), ends[keep].tolist()))
Loading
Loading