-
-
Notifications
You must be signed in to change notification settings - Fork 1
Cross-Channel Extractors #49
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 18 commits
71cb0b8
e457736
7596a1b
e4320e5
08af225
48927f1
7596829
5e3842d
43e3711
9e525e0
5158020
3601edd
e7e5d0a
fc3590a
52b8063
396e5b1
05fb0ac
0e09456
9f72309
8a0956e
df174d1
3ee312b
e6ecd2a
5cb38ef
56d2fad
9e39ee3
5de955d
c2be771
2fe7c84
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,25 @@ | ||
| # | ||
| # SPDX-FileCopyrightText: 2026 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) | ||
| # SPDX-FileCopyrightText: 2026 This source file is part of the SensorTSLM open-source project. | ||
| # | ||
| # SPDX-License-Identifier: MIT | ||
| # | ||
| from __future__ import annotations | ||
|
|
||
| from extractors import CaptionExtractor, ChannelConfig | ||
| from synthesizers import CrossChannelSynthesizer | ||
| from timef.schema import Annotation, Recording | ||
|
|
||
|
|
||
| class CrossChannelExtractor(CaptionExtractor): | ||
| caption_type = "cross_channel" | ||
|
|
||
| def __init__(self, config: ChannelConfig, synthesizers: list[CrossChannelSynthesizer]): | ||
| super().__init__(config) | ||
| self.synthesizers = synthesizers | ||
|
|
||
| def extract(self, row: Recording) -> list[Annotation]: | ||
| results: list[Annotation] = [] | ||
| for synth in self.synthesizers: | ||
| results.extend(synth.synthesize(row, self.config)) | ||
| return results |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,80 @@ | ||
| # | ||
| # SPDX-FileCopyrightText: 2026 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) | ||
| # SPDX-FileCopyrightText: 2026 This source file is part of the SensorTSLM open-source project. | ||
| # | ||
| # SPDX-License-Identifier: MIT | ||
| # | ||
| from __future__ import annotations | ||
|
|
||
| import abc | ||
|
|
||
| import numpy as np | ||
|
|
||
| from extractors import ChannelConfig | ||
| from timef.schema import Annotation, Recording | ||
|
|
||
|
|
||
| class CrossChannelSynthesizer(abc.ABC): | ||
| @abc.abstractmethod | ||
| def synthesize(self, row: Recording, config: ChannelConfig) -> list[Annotation]: ... | ||
|
|
||
| @staticmethod | ||
| def _index_or_none(row: Recording, channel_name: str) -> int | None: | ||
| try: | ||
| return row.channel_names.index(channel_name) | ||
| except ValueError: | ||
| return None | ||
|
|
||
| @staticmethod | ||
| def _positive_metric_values(row: Recording, idx: int | None, start: int, end: int) -> np.ndarray | None: | ||
| if idx is None: | ||
| return None | ||
| values = np.asarray(row.values[idx][start:end], dtype=float) | ||
| valid = np.isfinite(values) & (values > 0) | ||
| if not valid.any(): | ||
| return None | ||
| return values[valid] | ||
|
|
||
| @classmethod | ||
| def _metric_mean(cls, row: Recording, idx: int | None, start: int, end: int) -> float | None: | ||
| values = cls._positive_metric_values(row, idx, start, end) | ||
| if values is None: | ||
| return None | ||
| return float(np.mean(values)) | ||
|
|
||
| @classmethod | ||
| def _metric_peak(cls, row: Recording, idx: int | None, start: int, end: int) -> float | None: | ||
| values = cls._positive_metric_values(row, idx, start, end) | ||
| if values is None: | ||
| return None | ||
| return float(np.max(values)) | ||
|
|
||
| @classmethod | ||
| def _metric_total(cls, row: Recording, idx: int | None, start: int, end: int) -> float | None: | ||
| values = cls._positive_metric_values(row, idx, start, end) | ||
| if values is None: | ||
| return None | ||
| return float(np.sum(values)) | ||
|
|
||
| @classmethod | ||
| def _metric_day_mean_delta(cls, row: Recording, idx: int | None, start: int, end: int) -> float | None: | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just a question: should we exclude the window mean from the daily mean? probably not though
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No I don't think so. But something I've been wanting to do for a while now is to calculate the Resting HR for the person and then the RR because that's a very informative medical measurement. But this might include calculating the resting HR over multiple days to be more accurate. So either we do multi day values or just do it for one day which is an approximation. For the weekly data we can definitely do it. Even for one time series |
||
| window_mean = cls._metric_mean(row, idx, start, end) | ||
| if window_mean is None or idx is None: | ||
| return None | ||
|
|
||
| day_values = cls._positive_metric_values(row, idx, 0, row.values.shape[1]) | ||
| if day_values is None: | ||
| return None | ||
| return float(window_mean - np.mean(day_values)) | ||
|
|
||
|
|
||
| def contiguous_windows(mask: np.ndarray, min_duration: int) -> list[tuple[int, int]]: | ||
| if not mask.any(): | ||
| return [] | ||
|
|
||
| padded = np.concatenate(([False], mask, [False])) | ||
| diffs = np.diff(padded.astype(np.int8)) | ||
| starts = np.where(diffs == 1)[0] | ||
| ends = np.where(diffs == -1)[0] | ||
| keep = (ends - starts) >= min_duration | ||
| return list(zip(starts[keep].tolist(), ends[keep].tolist())) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,136 @@ | ||
| # | ||
| # SPDX-FileCopyrightText: 2026 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) | ||
| # SPDX-FileCopyrightText: 2026 This source file is part of the SensorTSLM open-source project. | ||
| # | ||
| # SPDX-License-Identifier: MIT | ||
| # | ||
| from __future__ import annotations | ||
|
|
||
| import json | ||
|
|
||
| import numpy as np | ||
|
|
||
| from extractors import ChannelConfig | ||
| from mhc.constants import CARDIO_WORKOUT_CHANNELS, WATCH_DISTANCE_CHANNEL, WATCH_HR_CHANNEL, WATCH_STEP_CHANNEL | ||
| from synthesizers import CrossChannelSynthesizer, contiguous_windows | ||
| from timef.schema import Annotation, Recording | ||
| from util import seed_from_key | ||
|
|
||
|
|
||
| class CardioSynthesizer(CrossChannelSynthesizer): | ||
| def __init__(self, min_duration: int = 0, hr_elevated_threshold_bpm: float = 100.0): | ||
| self.min_duration = min_duration | ||
| self.hr_elevated_threshold_bpm = hr_elevated_threshold_bpm | ||
|
|
||
| def synthesize(self, row: Recording, config: ChannelConfig) -> list[Annotation]: | ||
| hr_idx = self._index_or_none(row, WATCH_HR_CHANNEL) | ||
| distance_idx = self._index_or_none(row, WATCH_DISTANCE_CHANNEL) | ||
| step_idx = self._index_or_none(row, WATCH_STEP_CHANNEL) | ||
|
|
||
| templates = json.loads(config.templates_path.read_text())["cross_channel"]["cardio"] | ||
| time_unit = "hour" if config.time_unit == "hours" else "minute" | ||
| seed = seed_from_key(row.row_id) | ||
|
|
||
| results: list[Annotation] = [] | ||
| for workout_channel, label, activity_name in CARDIO_WORKOUT_CHANNELS: | ||
| try: | ||
| workout_idx = row.channel_names.index(workout_channel) | ||
| except ValueError: | ||
| continue | ||
|
|
||
| workout = np.asarray(row.values[workout_idx], dtype=float) | ||
| workout_active = np.isfinite(workout) & (workout > 0) | ||
| if not workout_active.any(): | ||
| continue | ||
|
|
||
| windows = contiguous_windows(workout_active, self.min_duration) | ||
| for i, (start, end) in enumerate(windows): | ||
| end_inclusive = max(start, end - 1) | ||
| template = templates[(seed + i) % len(templates)] | ||
| channel_idxs = [workout_idx] | ||
| metrics_suffix = self._metrics_suffix( | ||
| label=label, | ||
| row=row, | ||
| start=start, | ||
| end=end, | ||
| hr_idx=hr_idx, | ||
| distance_idx=distance_idx, | ||
| step_idx=step_idx, | ||
| channel_idxs=channel_idxs, | ||
| ) | ||
| text = template.format( | ||
| activity_name=activity_name, | ||
| time_unit=time_unit, | ||
| start=start, | ||
| end=end_inclusive, | ||
| metrics_suffix=metrics_suffix, | ||
| ) | ||
| results.append( | ||
| Annotation( | ||
| caption_type="cross_channel", | ||
| text=text, | ||
| channel_idxs=tuple(channel_idxs), | ||
| window=(start, end), | ||
| label=label, | ||
| ) | ||
| ) | ||
| return results | ||
|
|
||
| def _metrics_suffix( | ||
| self, | ||
| label: str, | ||
| row: Recording, | ||
| start: int, | ||
| end: int, | ||
| hr_idx: int | None, | ||
| distance_idx: int | None, | ||
| step_idx: int | None, | ||
| channel_idxs: list[int], | ||
| ) -> str: | ||
| parts: list[str] = [] | ||
|
max-rosenblattl marked this conversation as resolved.
|
||
|
|
||
| hr_mean = self._metric_mean(row, hr_idx, start, end) | ||
| if hr_mean is not None and hr_idx is not None: | ||
| hr_summary = [] | ||
| hr_peak = self._metric_peak(row, hr_idx, start, end) | ||
| if hr_peak is not None: | ||
| hr_summary.append(f"avg HR {hr_mean:.0f} bpm") | ||
| if hr_mean > self.hr_elevated_threshold_bpm: | ||
| hr_summary.append("the heartrate was elevated during this phase") | ||
| hr_summary.append(f"peak HR {hr_peak:.0f} bpm") | ||
| else: | ||
| hr_summary.append(f"avg HR {hr_mean:.0f} bpm") | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hr_summary.append(f"avg HR {hr_mean:.0f} bpm") this line is added twice in each if branch I think more fluent German sentences fit the current captions better than the current abbreviations.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. lets use the meta channel config for that, we defined it somewhere
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Made sentences more fluid in 9f72309 |
||
| if hr_mean > self.hr_elevated_threshold_bpm: | ||
| hr_summary.append("the heartrate was elevated during this phase") | ||
| parts.append(", ".join(hr_summary)) | ||
|
|
||
| hr_day_delta = self._metric_day_mean_delta(row, hr_idx, start, end) | ||
| if hr_day_delta is not None: | ||
| direction = "higher" if hr_day_delta >= 0 else "lower" | ||
| parts.append(f"this means HR is {abs(hr_day_delta):.0f} bpm {direction} than the mean of the day") | ||
| channel_idxs.append(hr_idx) | ||
|
|
||
| distance_mean = self._metric_mean(row, distance_idx, start, end) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we split this up in two functions for readibility
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Addressed in df174d1
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. refactored and put into parent in 5cb38ef |
||
| distance_total = self._metric_total(row, distance_idx, start, end) | ||
| if distance_mean is not None and distance_idx is not None: | ||
| if distance_total is not None: | ||
| parts.append( | ||
| f"avg watch distance {distance_mean:.1f} m/min, total watch distance {distance_total:.1f} m" | ||
|
KarlDeck marked this conversation as resolved.
Outdated
|
||
| ) | ||
| else: | ||
| parts.append(f"avg watch distance {distance_mean:.1f} m/min") | ||
| channel_idxs.append(distance_idx) | ||
|
|
||
| if label == "cardio_running": | ||
|
KarlDeck marked this conversation as resolved.
|
||
| step_mean = self._metric_mean(row, step_idx, start, end) | ||
| step_total = self._metric_total(row, step_idx, start, end) | ||
| if step_mean is not None and step_idx is not None: | ||
| if step_total is not None: | ||
| parts.append(f"avg watch steps {step_mean:.1f} steps/min, total watch steps {step_total:.0f}") | ||
| else: | ||
| parts.append(f"avg watch steps {step_mean:.1f} steps/min") | ||
| channel_idxs.append(step_idx) | ||
|
|
||
| if not parts: | ||
| return "" | ||
| return ", " + ", ".join(parts) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the all the helper methods in
__init__.pyare better suited in a_helper.pyThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
addressed in c2be771