-
-
Notifications
You must be signed in to change notification settings - Fork 1
Cross-Channel Extractors #49
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 7 commits
71cb0b8
e457736
7596a1b
e4320e5
08af225
48927f1
7596829
5e3842d
43e3711
9e525e0
5158020
3601edd
e7e5d0a
fc3590a
52b8063
396e5b1
05fb0ac
0e09456
9f72309
8a0956e
df174d1
3ee312b
e6ecd2a
5cb38ef
56d2fad
9e39ee3
5de955d
c2be771
2fe7c84
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,25 @@ | ||
| # | ||
| # SPDX-FileCopyrightText: 2026 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) | ||
| # SPDX-FileCopyrightText: 2026 This source file is part of the SensorTSLM open-source project. | ||
| # | ||
| # SPDX-License-Identifier: MIT | ||
| # | ||
| from __future__ import annotations | ||
|
|
||
| from extractors import CaptionExtractor, ChannelConfig | ||
| from synthesizers import CrossChannelSynthesizer | ||
| from timef.schema import Annotation, Recording | ||
|
|
||
|
|
||
| class CrossChannelExtractor(CaptionExtractor): | ||
| caption_type = "cross_channel" | ||
|
|
||
| def __init__(self, config: ChannelConfig, synthesizers: list[CrossChannelSynthesizer]): | ||
| super().__init__(config) | ||
| self.synthesizers = synthesizers | ||
|
|
||
| def extract(self, row: Recording) -> list[Annotation]: | ||
| results: list[Annotation] = [] | ||
| for synth in self.synthesizers: | ||
| results.extend(synth.synthesize(row, self.config)) | ||
| return results |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,31 @@ | ||
| # | ||
| # SPDX-FileCopyrightText: 2026 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) | ||
| # SPDX-FileCopyrightText: 2026 This source file is part of the SensorTSLM open-source project. | ||
| # | ||
| # SPDX-License-Identifier: MIT | ||
| # | ||
| from __future__ import annotations | ||
|
|
||
| import abc | ||
|
|
||
| import numpy as np | ||
|
|
||
| from extractors import ChannelConfig | ||
| from timef.schema import Annotation, Recording | ||
|
|
||
|
|
||
| class CrossChannelSynthesizer(abc.ABC): | ||
| @abc.abstractmethod | ||
| def synthesize(self, row: Recording, config: ChannelConfig) -> list[Annotation]: ... | ||
|
|
||
|
|
||
| def contiguous_windows(mask: np.ndarray, min_duration: int) -> list[tuple[int, int]]: | ||
| if not mask.any(): | ||
| return [] | ||
|
|
||
| padded = np.concatenate(([False], mask, [False])) | ||
| diffs = np.diff(padded.astype(np.int8)) | ||
| starts = np.where(diffs == 1)[0] | ||
| ends = np.where(diffs == -1)[0] | ||
| keep = (ends - starts) >= min_duration | ||
| return list(zip(starts[keep].tolist(), ends[keep].tolist())) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,180 @@ | ||
| # | ||
| # SPDX-FileCopyrightText: 2026 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) | ||
| # SPDX-FileCopyrightText: 2026 This source file is part of the SensorTSLM open-source project. | ||
| # | ||
| # SPDX-License-Identifier: MIT | ||
| # | ||
| from __future__ import annotations | ||
|
|
||
| import json | ||
|
|
||
| import numpy as np | ||
|
|
||
| from extractors import CaptionExtractor, ChannelConfig | ||
| from synthesizers import CrossChannelSynthesizer, contiguous_windows | ||
| from timef.schema import Annotation, Recording | ||
|
|
||
|
|
||
| class CardioSynthesizer(CrossChannelSynthesizer): | ||
| HR_CHANNEL = "hk_watch:HKQuantityTypeIdentifierHeartRate" | ||
| DISTANCE_CHANNEL = "hk_watch:HKQuantityTypeIdentifierDistanceWalkingRunning" | ||
| STEP_CHANNEL = "hk_watch:HKQuantityTypeIdentifierStepCount" | ||
|
KarlDeck marked this conversation as resolved.
Outdated
|
||
| WORKOUT_CHANNELS = ( | ||
| ( | ||
| "workout:HKWorkoutActivityTypeRunning", | ||
| "cardio_running", | ||
| "running", | ||
| ), | ||
| ( | ||
| "workout:HKWorkoutActivityTypeCycling", | ||
| "cardio_cycling", | ||
| "cycling", | ||
| ), | ||
| ) | ||
|
KarlDeck marked this conversation as resolved.
Outdated
|
||
|
|
||
| def __init__(self, min_duration: int = 5): | ||
| self.min_duration = min_duration | ||
|
|
||
| def synthesize(self, row: Recording, config: ChannelConfig) -> list[Annotation]: | ||
| hr_idx = self._index_or_none(row, self.HR_CHANNEL) | ||
| distance_idx = self._index_or_none(row, self.DISTANCE_CHANNEL) | ||
| step_idx = self._index_or_none(row, self.STEP_CHANNEL) | ||
|
|
||
| templates = json.loads(config.templates_path.read_text())["cross_channel"]["cardio"] | ||
| time_unit = "hour" if config.time_unit == "hours" else "minute" | ||
| seed = CaptionExtractor._seed(row.row_id) | ||
|
KarlDeck marked this conversation as resolved.
Outdated
|
||
|
|
||
| results: list[Annotation] = [] | ||
| template_offset = 0 | ||
| for workout_channel, label, template_key in self.WORKOUT_CHANNELS: | ||
| try: | ||
| workout_idx = row.channel_names.index(workout_channel) | ||
| except ValueError: | ||
| continue | ||
|
|
||
| workout = np.asarray(row.values[workout_idx], dtype=float) | ||
| workout_active = np.isfinite(workout) & (workout > 0) | ||
| if not workout_active.any(): | ||
| continue | ||
|
|
||
| windows = contiguous_windows(workout_active, self.min_duration) | ||
| for i, (start, end) in enumerate(windows): | ||
| end_inclusive = max(start, end - 1) | ||
| subtype_templates = templates[template_key] | ||
| template = subtype_templates[(seed + template_offset + i) % len(subtype_templates)] | ||
| channel_idxs = [workout_idx] | ||
| metrics_suffix = self._metrics_suffix( | ||
| template_key=template_key, | ||
| row=row, | ||
| start=start, | ||
| end=end, | ||
| hr_idx=hr_idx, | ||
| distance_idx=distance_idx, | ||
| step_idx=step_idx, | ||
| channel_idxs=channel_idxs, | ||
| ) | ||
| text = template.format( | ||
| time_unit=time_unit, | ||
| start=start, | ||
| end=end_inclusive, | ||
| metrics_suffix=metrics_suffix, | ||
| ) | ||
| results.append( | ||
| Annotation( | ||
| caption_type="cross_channel", | ||
| text=text, | ||
| channel_idxs=tuple(channel_idxs), | ||
| window=(start, end), | ||
| label=label, | ||
| ) | ||
| ) | ||
| template_offset += len(windows) | ||
| return results | ||
|
|
||
| @staticmethod | ||
| def _index_or_none(row: Recording, channel_name: str) -> int | None: | ||
| try: | ||
| return row.channel_names.index(channel_name) | ||
| except ValueError: | ||
| return None | ||
|
|
||
| @staticmethod | ||
| def _metric_mean(row: Recording, idx: int | None, start: int, end: int) -> float | None: | ||
| if idx is None: | ||
| return None | ||
| values = np.asarray(row.values[idx][start:end], dtype=float) | ||
| valid = np.isfinite(values) & (values > 0) | ||
| if not valid.any(): | ||
| return None | ||
| return float(np.mean(values[valid])) | ||
|
KarlDeck marked this conversation as resolved.
Outdated
|
||
|
|
||
| @staticmethod | ||
| def _metric_peak(row: Recording, idx: int | None, start: int, end: int) -> float | None: | ||
| if idx is None: | ||
| return None | ||
| values = np.asarray(row.values[idx][start:end], dtype=float) | ||
| valid = np.isfinite(values) & (values > 0) | ||
| if not valid.any(): | ||
| return None | ||
| return float(np.max(values[valid])) | ||
|
|
||
| @staticmethod | ||
| def _metric_total(row: Recording, idx: int | None, start: int, end: int) -> float | None: | ||
| if idx is None: | ||
| return None | ||
| values = np.asarray(row.values[idx][start:end], dtype=float) | ||
| valid = np.isfinite(values) & (values > 0) | ||
| if not valid.any(): | ||
| return None | ||
| return float(np.sum(values[valid])) | ||
|
|
||
| def _metrics_suffix( | ||
| self, | ||
| template_key: str, | ||
| row: Recording, | ||
| start: int, | ||
| end: int, | ||
| hr_idx: int | None, | ||
| distance_idx: int | None, | ||
| step_idx: int | None, | ||
| channel_idxs: list[int], | ||
| ) -> str: | ||
| parts: list[str] = [] | ||
|
max-rosenblattl marked this conversation as resolved.
|
||
|
|
||
| hr_mean = self._metric_mean(row, hr_idx, start, end) | ||
| if hr_mean is not None and hr_idx is not None: | ||
| hr_peak = self._metric_peak(row, hr_idx, start, end) | ||
| if hr_peak is not None: | ||
| parts.append(f"avg HR {hr_mean:.0f} bpm, peak HR {hr_peak:.0f} bpm") | ||
| else: | ||
| parts.append(f"avg HR {hr_mean:.0f} bpm") | ||
| channel_idxs.append(hr_idx) | ||
|
|
||
| distance_mean = self._metric_mean(row, distance_idx, start, end) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we split this up in two functions for readibility
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Addressed in df174d1
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. refactored and put into parent in 5cb38ef |
||
| distance_total = self._metric_total(row, distance_idx, start, end) | ||
| if distance_mean is not None and distance_idx is not None: | ||
| if template_key == "cycling" and distance_total is not None: | ||
| parts.append( | ||
| f"avg watch distance {distance_mean:.1f} m/min, total watch distance {distance_total:.1f} m" | ||
| ) | ||
| elif template_key == "running" and distance_total is not None: | ||
| parts.append( | ||
| f"avg watch distance {distance_mean:.1f} m/min, total watch distance {distance_total:.1f} m" | ||
|
KarlDeck marked this conversation as resolved.
Outdated
|
||
| ) | ||
| else: | ||
| parts.append(f"avg watch distance {distance_mean:.1f} m/min") | ||
| channel_idxs.append(distance_idx) | ||
|
|
||
| if template_key == "running": | ||
| step_mean = self._metric_mean(row, step_idx, start, end) | ||
| step_total = self._metric_total(row, step_idx, start, end) | ||
| if step_mean is not None and step_idx is not None: | ||
| if step_total is not None: | ||
| parts.append(f"avg watch steps {step_mean:.1f} steps/min, total watch steps {step_total:.0f}") | ||
| else: | ||
| parts.append(f"avg watch steps {step_mean:.1f} steps/min") | ||
| channel_idxs.append(step_idx) | ||
|
|
||
| if not parts: | ||
| return "" | ||
| return ", " + ", ".join(parts) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,53 @@ | ||
| # | ||
| # SPDX-FileCopyrightText: 2026 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md) | ||
| # SPDX-FileCopyrightText: 2026 This source file is part of the SensorTSLM open-source project. | ||
| # | ||
| # SPDX-License-Identifier: MIT | ||
| # | ||
| from __future__ import annotations | ||
|
|
||
| import json | ||
|
|
||
| import numpy as np | ||
|
|
||
| from extractors import CaptionExtractor, ChannelConfig | ||
| from synthesizers import CrossChannelSynthesizer, contiguous_windows | ||
| from timef.schema import Annotation, Recording | ||
|
|
||
|
|
||
| class SleepSynthesizer(CrossChannelSynthesizer): | ||
| def __init__(self, min_duration: int = 5): | ||
| self.min_duration = min_duration | ||
|
coderabbitai[bot] marked this conversation as resolved.
Outdated
|
||
|
|
||
| def synthesize(self, row: Recording, config: ChannelConfig) -> list[Annotation]: | ||
| try: | ||
| in_bed_idx = row.channel_names.index("sleep:inbed") | ||
| asleep_idx = row.channel_names.index("sleep:asleep") | ||
| except ValueError: | ||
| return [] | ||
|
|
||
| in_bed = np.asarray(row.values[in_bed_idx], dtype=float) | ||
| asleep = np.asarray(row.values[asleep_idx], dtype=float) | ||
| if not np.any((~np.isnan(asleep)) & (asleep > 0)): | ||
| return [] | ||
| mask = (~np.isnan(in_bed)) & (in_bed > 0) & ~((~np.isnan(asleep)) & (asleep > 0)) | ||
|
KarlDeck marked this conversation as resolved.
Outdated
KarlDeck marked this conversation as resolved.
|
||
|
|
||
| time_unit = "hour" if config.time_unit == "hours" else "minute" | ||
| templates = json.loads(config.templates_path.read_text())["cross_channel"]["sleep"] | ||
| seed = CaptionExtractor._seed(row.row_id) | ||
|
|
||
| results: list[Annotation] = [] | ||
| for i, (start, end) in enumerate(contiguous_windows(mask, self.min_duration)): | ||
| end_inclusive = max(start, end - 1) | ||
| template = templates[(seed + i) % len(templates)] | ||
| text = template.format(time_unit=time_unit, start=start, end=end_inclusive) | ||
| results.append( | ||
| Annotation( | ||
| caption_type="cross_channel", | ||
| text=text, | ||
| channel_idxs=(asleep_idx, in_bed_idx), | ||
| window=(start, end), | ||
| label="in_bed_not_sleeping", | ||
| ) | ||
| ) | ||
| return results | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the all the helper methods in
__init__.pyare better suited in a_helper.pyThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
addressed in c2be771