Skip to content
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
71cb0b8
In Bed but not Asleep annotation
KarlDeck Apr 9, 2026
e457736
Refactor CrossChannelExtractor into driver with pluggable synthesizers
max-rosenblattl Apr 12, 2026
7596a1b
made stationary activity synthesizer
KarlDeck Apr 12, 2026
e4320e5
added totals
KarlDeck Apr 12, 2026
08af225
added cardio synthesizers with totals
KarlDeck Apr 12, 2026
48927f1
Merge remote-tracking branch 'origin/main' into KarlDeck/Sleep-Bundles
KarlDeck Apr 12, 2026
7596829
Merge remote-tracking branch 'origin/main' into KarlDeck/Sleep-Bundles
KarlDeck Apr 12, 2026
5e3842d
put static methods into parent
KarlDeck Apr 13, 2026
43e3711
made min duration for synthesizer visible to users. Adresses Coderabb…
KarlDeck Apr 13, 2026
9e525e0
adressed coderabbit comment #2 comment
KarlDeck Apr 13, 2026
5158020
put variables into mhc/constants.py
KarlDeck Apr 13, 2026
3601edd
added HR delta
KarlDeck Apr 13, 2026
e7e5d0a
rephrased HR delta
KarlDeck Apr 13, 2026
fc3590a
solved duplication issue in templates/templates.json
KarlDeck Apr 13, 2026
52b8063
fixed --weekly issue
KarlDeck Apr 13, 2026
396e5b1
added 100 bpm threshold
KarlDeck Apr 13, 2026
05fb0ac
put _seed into util.py
KarlDeck Apr 13, 2026
0e09456
Merge remote-tracking branch 'origin/main' into KarlDeck/Sleep-Bundles
KarlDeck Apr 13, 2026
9f72309
reprased the synthesizer outputs
KarlDeck Apr 14, 2026
8a0956e
rephrase 2
KarlDeck Apr 14, 2026
df174d1
split up _metrics_suffix to make it easier to read
KarlDeck Apr 14, 2026
3ee312b
split up _metrics_suffix to make it easier to read
KarlDeck Apr 14, 2026
e6ecd2a
small cleanup
KarlDeck Apr 14, 2026
5cb38ef
Refactored _metrics_suffix into sub functions and transfered into parent
KarlDeck Apr 14, 2026
56d2fad
minor fix
KarlDeck Apr 14, 2026
9e39ee3
added docstrings
KarlDeck Apr 14, 2026
5de955d
comment added
KarlDeck Apr 14, 2026
c2be771
transfer functions from init to _helper
KarlDeck Apr 16, 2026
2fe7c84
created _workout base for cardio, stationary and furutre workouts
KarlDeck Apr 16, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions captionizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ def run(
from mhc.dataset import MHCDataset
from mhc.transformer import MHCTransformer
from mhc.constants import MHC_CHANNEL_CONFIG
from extractors.cross_channel import CrossChannelExtractor
from synthesizers.cardio import CardioSynthesizer
from synthesizers.sleep import SleepSynthesizer
from synthesizers.stationary_activity import StationaryActivitySynthesizer
from extractors.statistical import StatisticalExtractor
from extractors.structural import StructuralExtractor
from models.local import LocalConfig, LocalModel
Expand All @@ -66,6 +70,10 @@ def run(
StatisticalExtractor(MHC_CHANNEL_CONFIG),
StructuralExtractor(MHC_CHANNEL_CONFIG),
SemanticExtractor(MHC_CHANNEL_CONFIG),
CrossChannelExtractor(
MHC_CHANNEL_CONFIG,
synthesizers=[SleepSynthesizer(), StationaryActivitySynthesizer(), CardioSynthesizer()],
),
])

captionizer = Captionizer(dataset, MHCTransformer(), annotator)
Expand Down
263 changes: 203 additions & 60 deletions explorer.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion extractors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

DEFAULT_TEMPLATES_PATH = pathlib.Path(__file__).resolve().parent.parent / "templates" / "templates.json"

VALID_CAPTION_TYPES = ("statistical", "structural", "semantic")
VALID_CAPTION_TYPES = ("statistical", "structural", "semantic", "cross_channel")


_ACTIVITY_RE = re.compile(r"HKWorkoutActivityType(.+)$")
Expand Down
25 changes: 25 additions & 0 deletions extractors/cross_channel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#
# SPDX-FileCopyrightText: 2026 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md)
# SPDX-FileCopyrightText: 2026 This source file is part of the SensorTSLM open-source project.
#
# SPDX-License-Identifier: MIT
#
from __future__ import annotations

from extractors import CaptionExtractor, ChannelConfig
from synthesizers import CrossChannelSynthesizer
from timef.schema import Annotation, Recording


class CrossChannelExtractor(CaptionExtractor):
caption_type = "cross_channel"

def __init__(self, config: ChannelConfig, synthesizers: list[CrossChannelSynthesizer]):
super().__init__(config)
self.synthesizers = synthesizers

def extract(self, row: Recording) -> list[Annotation]:
results: list[Annotation] = []
for synth in self.synthesizers:
results.extend(synth.synthesize(row, self.config))
return results
2 changes: 1 addition & 1 deletion mhc_weekly/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@

from extractors import ChannelConfig
from aggregators import NonZeroAggregator
from detectors.spike import SpikeDetector
from detectors.trend import TrendDetector
from detectors.spike import SpikeDetector
from mhc.constants import ACTIVITY_CHANNELS, CHANNEL_NAMES, CONTINUOUS_CHANNELS, SLEEP_CHANNELS

HOURLY_TEMPLATES_PATH = pathlib.Path(__file__).resolve().parent.parent / "templates" / "templates_hourly.json"
Expand Down
31 changes: 31 additions & 0 deletions synthesizers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#
# SPDX-FileCopyrightText: 2026 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md)
# SPDX-FileCopyrightText: 2026 This source file is part of the SensorTSLM open-source project.
#
# SPDX-License-Identifier: MIT
#
from __future__ import annotations

import abc

import numpy as np

from extractors import ChannelConfig
from timef.schema import Annotation, Recording


class CrossChannelSynthesizer(abc.ABC):
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the all the helper methods in __init__.py are better suited in a _helper.py

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

addressed in c2be771

@abc.abstractmethod
def synthesize(self, row: Recording, config: ChannelConfig) -> list[Annotation]: ...


def contiguous_windows(mask: np.ndarray, min_duration: int) -> list[tuple[int, int]]:
if not mask.any():
return []

padded = np.concatenate(([False], mask, [False]))
diffs = np.diff(padded.astype(np.int8))
starts = np.where(diffs == 1)[0]
ends = np.where(diffs == -1)[0]
keep = (ends - starts) >= min_duration
return list(zip(starts[keep].tolist(), ends[keep].tolist()))
180 changes: 180 additions & 0 deletions synthesizers/cardio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
#
# SPDX-FileCopyrightText: 2026 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md)
# SPDX-FileCopyrightText: 2026 This source file is part of the SensorTSLM open-source project.
#
# SPDX-License-Identifier: MIT
#
from __future__ import annotations

import json

import numpy as np

from extractors import CaptionExtractor, ChannelConfig
from synthesizers import CrossChannelSynthesizer, contiguous_windows
from timef.schema import Annotation, Recording


class CardioSynthesizer(CrossChannelSynthesizer):
HR_CHANNEL = "hk_watch:HKQuantityTypeIdentifierHeartRate"
DISTANCE_CHANNEL = "hk_watch:HKQuantityTypeIdentifierDistanceWalkingRunning"
STEP_CHANNEL = "hk_watch:HKQuantityTypeIdentifierStepCount"
Comment thread
KarlDeck marked this conversation as resolved.
Outdated
WORKOUT_CHANNELS = (
(
"workout:HKWorkoutActivityTypeRunning",
"cardio_running",
"running",
),
(
"workout:HKWorkoutActivityTypeCycling",
"cardio_cycling",
"cycling",
),
)
Comment thread
KarlDeck marked this conversation as resolved.
Outdated

def __init__(self, min_duration: int = 5):
self.min_duration = min_duration

def synthesize(self, row: Recording, config: ChannelConfig) -> list[Annotation]:
hr_idx = self._index_or_none(row, self.HR_CHANNEL)
distance_idx = self._index_or_none(row, self.DISTANCE_CHANNEL)
step_idx = self._index_or_none(row, self.STEP_CHANNEL)

templates = json.loads(config.templates_path.read_text())["cross_channel"]["cardio"]
time_unit = "hour" if config.time_unit == "hours" else "minute"
seed = CaptionExtractor._seed(row.row_id)
Comment thread
KarlDeck marked this conversation as resolved.
Outdated

results: list[Annotation] = []
template_offset = 0
for workout_channel, label, template_key in self.WORKOUT_CHANNELS:
try:
workout_idx = row.channel_names.index(workout_channel)
except ValueError:
continue

workout = np.asarray(row.values[workout_idx], dtype=float)
workout_active = np.isfinite(workout) & (workout > 0)
if not workout_active.any():
continue

windows = contiguous_windows(workout_active, self.min_duration)
for i, (start, end) in enumerate(windows):
end_inclusive = max(start, end - 1)
subtype_templates = templates[template_key]
template = subtype_templates[(seed + template_offset + i) % len(subtype_templates)]
channel_idxs = [workout_idx]
metrics_suffix = self._metrics_suffix(
template_key=template_key,
row=row,
start=start,
end=end,
hr_idx=hr_idx,
distance_idx=distance_idx,
step_idx=step_idx,
channel_idxs=channel_idxs,
)
text = template.format(
time_unit=time_unit,
start=start,
end=end_inclusive,
metrics_suffix=metrics_suffix,
)
results.append(
Annotation(
caption_type="cross_channel",
text=text,
channel_idxs=tuple(channel_idxs),
window=(start, end),
label=label,
)
)
template_offset += len(windows)
return results

@staticmethod
def _index_or_none(row: Recording, channel_name: str) -> int | None:
try:
return row.channel_names.index(channel_name)
except ValueError:
return None

@staticmethod
def _metric_mean(row: Recording, idx: int | None, start: int, end: int) -> float | None:
if idx is None:
return None
values = np.asarray(row.values[idx][start:end], dtype=float)
valid = np.isfinite(values) & (values > 0)
if not valid.any():
return None
return float(np.mean(values[valid]))
Comment thread
KarlDeck marked this conversation as resolved.
Outdated

@staticmethod
def _metric_peak(row: Recording, idx: int | None, start: int, end: int) -> float | None:
if idx is None:
return None
values = np.asarray(row.values[idx][start:end], dtype=float)
valid = np.isfinite(values) & (values > 0)
if not valid.any():
return None
return float(np.max(values[valid]))

@staticmethod
def _metric_total(row: Recording, idx: int | None, start: int, end: int) -> float | None:
if idx is None:
return None
values = np.asarray(row.values[idx][start:end], dtype=float)
valid = np.isfinite(values) & (values > 0)
if not valid.any():
return None
return float(np.sum(values[valid]))

def _metrics_suffix(
self,
template_key: str,
row: Recording,
start: int,
end: int,
hr_idx: int | None,
distance_idx: int | None,
step_idx: int | None,
channel_idxs: list[int],
) -> str:
parts: list[str] = []
Comment thread
max-rosenblattl marked this conversation as resolved.

hr_mean = self._metric_mean(row, hr_idx, start, end)
if hr_mean is not None and hr_idx is not None:
hr_peak = self._metric_peak(row, hr_idx, start, end)
if hr_peak is not None:
parts.append(f"avg HR {hr_mean:.0f} bpm, peak HR {hr_peak:.0f} bpm")
else:
parts.append(f"avg HR {hr_mean:.0f} bpm")
channel_idxs.append(hr_idx)

distance_mean = self._metric_mean(row, distance_idx, start, end)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we split this up in two functions for readibility

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in df174d1

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

refactored and put into parent in 5cb38ef

distance_total = self._metric_total(row, distance_idx, start, end)
if distance_mean is not None and distance_idx is not None:
if template_key == "cycling" and distance_total is not None:
parts.append(
f"avg watch distance {distance_mean:.1f} m/min, total watch distance {distance_total:.1f} m"
)
elif template_key == "running" and distance_total is not None:
parts.append(
f"avg watch distance {distance_mean:.1f} m/min, total watch distance {distance_total:.1f} m"
Comment thread
KarlDeck marked this conversation as resolved.
Outdated
)
else:
parts.append(f"avg watch distance {distance_mean:.1f} m/min")
channel_idxs.append(distance_idx)

if template_key == "running":
step_mean = self._metric_mean(row, step_idx, start, end)
step_total = self._metric_total(row, step_idx, start, end)
if step_mean is not None and step_idx is not None:
if step_total is not None:
parts.append(f"avg watch steps {step_mean:.1f} steps/min, total watch steps {step_total:.0f}")
else:
parts.append(f"avg watch steps {step_mean:.1f} steps/min")
channel_idxs.append(step_idx)

if not parts:
return ""
return ", " + ", ".join(parts)
53 changes: 53 additions & 0 deletions synthesizers/sleep.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#
# SPDX-FileCopyrightText: 2026 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md)
# SPDX-FileCopyrightText: 2026 This source file is part of the SensorTSLM open-source project.
#
# SPDX-License-Identifier: MIT
#
from __future__ import annotations

import json

import numpy as np

from extractors import CaptionExtractor, ChannelConfig
from synthesizers import CrossChannelSynthesizer, contiguous_windows
from timef.schema import Annotation, Recording


class SleepSynthesizer(CrossChannelSynthesizer):
def __init__(self, min_duration: int = 5):
self.min_duration = min_duration
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated

def synthesize(self, row: Recording, config: ChannelConfig) -> list[Annotation]:
try:
in_bed_idx = row.channel_names.index("sleep:inbed")
asleep_idx = row.channel_names.index("sleep:asleep")
except ValueError:
return []

in_bed = np.asarray(row.values[in_bed_idx], dtype=float)
asleep = np.asarray(row.values[asleep_idx], dtype=float)
if not np.any((~np.isnan(asleep)) & (asleep > 0)):
return []
mask = (~np.isnan(in_bed)) & (in_bed > 0) & ~((~np.isnan(asleep)) & (asleep > 0))
Comment thread
KarlDeck marked this conversation as resolved.
Outdated
Comment thread
KarlDeck marked this conversation as resolved.

time_unit = "hour" if config.time_unit == "hours" else "minute"
templates = json.loads(config.templates_path.read_text())["cross_channel"]["sleep"]
seed = CaptionExtractor._seed(row.row_id)

results: list[Annotation] = []
for i, (start, end) in enumerate(contiguous_windows(mask, self.min_duration)):
end_inclusive = max(start, end - 1)
template = templates[(seed + i) % len(templates)]
text = template.format(time_unit=time_unit, start=start, end=end_inclusive)
results.append(
Annotation(
caption_type="cross_channel",
text=text,
channel_idxs=(asleep_idx, in_bed_idx),
window=(start, end),
label="in_bed_not_sleeping",
)
)
return results
Loading
Loading