Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 26 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@ Dataset-agnostic captioning pipeline for sensor time-series data.

## Setup

Set the dataset path before running:
Install dependencies and set the dataset path before running:

```bash
python3 -m pip install -r requirements.txt
```

```bash
export MHC_DATASET_DIR="../hf-daily_max-nonwear=50"
Expand All @@ -13,5 +17,25 @@ export MHC_DATASET_DIR="../hf-daily_max-nonwear=50"
## Usage

```bash
python captionizer.py
python3 captionizer.py
```

## Explorer

Use the interactive explorer to inspect one row at a time, switch signals, and see which detector events fired where on the time series.

Start it with:

```bash
python3 explorer.py
```

Useful controls:

- Use the bottom row slider or `<` / `>` buttons to move between dataset rows.
- Click a signal in the right-hand signal list or in the channel overview heatmap to switch channels.
- Use the Matplotlib zoom and pan tools on the main plot to inspect parts of the signal in detail.
- Click `reset` or press `home` to reset the zoom.
- Use the overlay buttons to toggle `trend`, `spike`, `drop`, `gap`, and `nonwear` overlays.
- Use the `stats`, `events`, `captions`, and `help` tabs in the details panel to switch what metadata is shown.
- Scroll inside the details panel with the mouse wheel or the `^` / `v` buttons.
48 changes: 46 additions & 2 deletions detectors/spike.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from __future__ import annotations

import numpy as np
from scipy.signal import find_peaks

from detectors import DetectionResult, StructuralDetector

Expand All @@ -15,8 +16,51 @@ class SpikeDetector(StructuralDetector):
"""Detects spikes and drops.
"""

def __init__(self, filter_zeros: bool = False) -> None:
def __init__(
self,
filter_zeros: bool = False,
prominence_scale: float = 3.0,
min_prominence: float = 0.0,
min_distance: int = 1,
) -> None:
super().__init__(filter_zeros=filter_zeros)
self.prominence_scale = prominence_scale
self.min_prominence = min_prominence
self.min_distance = max(1, min_distance)

def _detect(self, series: np.ndarray, indices: np.ndarray) -> list[DetectionResult]:
return []
prominence = self._prominence_threshold(series)
if prominence <= 0:
return []

results: list[DetectionResult] = []
seen_minutes: set[int] = set()

for peak_idx in find_peaks(series, prominence=prominence, distance=self.min_distance)[0]:
minute = int(indices[peak_idx])
if minute in seen_minutes:
continue
results.append(DetectionResult(event_type="spike", spike_minute=minute))
seen_minutes.add(minute)

for peak_idx in find_peaks(-series, prominence=prominence, distance=self.min_distance)[0]:
minute = int(indices[peak_idx])
if minute in seen_minutes:
continue
results.append(DetectionResult(event_type="drop", spike_minute=minute))
seen_minutes.add(minute)

results.sort(key=lambda result: int(result.spike_minute))
return results

def _prominence_threshold(self, series: np.ndarray) -> float:
centered = series - np.median(series)
mad = float(np.median(np.abs(centered)))
if mad > 0:
scale = 1.4826 * mad
return max(self.min_prominence, self.prominence_scale * scale)

spread = float(np.percentile(series, 95) - np.percentile(series, 5))
if spread <= 1e-12:
return 0.0
return max(self.min_prominence, 0.5 * spread)
164 changes: 160 additions & 4 deletions detectors/trend.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,174 @@
# SPDX-License-Identifier: MIT
#
from __future__ import annotations
from dataclasses import dataclass

import numpy as np

from detectors import DetectionResult, StructuralDetector


@dataclass
class _TrendSegment:
direction: str
start_minute: int
end_minute: int
score: float


class TrendDetector(StructuralDetector):
"""Detects statistically significant trends.
"""
"""Detects increasing/decreasing trends across multiple relative window sizes."""

def __init__(self, filter_zeros: bool = False) -> None:
def __init__(
self,
filter_zeros: bool = False,
window_sizes: tuple[int, ...] | None = None,
window_fracs: tuple[float, ...] = (0.1, 0.25, 1.0),
min_window: int = 12,
max_window: int | None = None,
stride_frac: float = 0.25,
min_coverage: float = 0.75,
min_effect: float = 1.25,
min_r2: float = 0.5,
min_span: int = 10,
merge_gap: int = 5,
) -> None:
super().__init__(filter_zeros=filter_zeros)
self.window_sizes = window_sizes
self.window_fracs = tuple(window_fracs)
self.min_window = max(3, min_window)
self.max_window = max_window
self.stride_frac = stride_frac
self.min_coverage = min_coverage
self.min_effect = min_effect
self.min_r2 = min_r2
self.min_span = min_span
self.merge_gap = merge_gap

def _detect(self, series: np.ndarray, indices: np.ndarray) -> list[DetectionResult]:
return []
window_sizes = self._resolve_window_sizes(len(series))
segments: list[_TrendSegment] = []
for window_size in window_sizes:
stride = max(1, int(round(window_size * self.stride_frac)))
for start_idx in self._window_starts(len(series), window_size, stride):
end_idx = start_idx + window_size
segment = self._classify_window(series[start_idx:end_idx], indices[start_idx:end_idx])
if segment is not None:
segments.append(segment)

merged = self._merge_segments(segments)
return [
DetectionResult(
event_type="trend",
start_minute=segment.start_minute,
end_minute=segment.end_minute,
direction=segment.direction,
)
for segment in merged
if segment.end_minute - segment.start_minute >= self.min_span
]

def _resolve_window_sizes(self, n_samples: int) -> list[int]:
sizes = set(self.window_sizes or ())
for frac in self.window_fracs:
if frac <= 0:
continue
sizes.add(int(round(n_samples * frac)))

resolved = []
max_window = self.max_window or n_samples
for size in sorted(sizes):
size = max(self.min_window, min(size, max_window, n_samples))
if size <= n_samples:
resolved.append(size)

return sorted(set(resolved))

@staticmethod
def _window_starts(n_samples: int, window_size: int, stride: int) -> list[int]:
if window_size >= n_samples:
return [0]

starts = list(range(0, n_samples - window_size + 1, stride))
last_start = n_samples - window_size
if starts[-1] != last_start:
starts.append(last_start)
return starts

def _classify_window(
self,
series: np.ndarray,
indices: np.ndarray,
) -> _TrendSegment | None:
if len(series) < self.min_window:
return None

start_minute = int(indices[0])
end_minute = int(indices[-1])
span = end_minute - start_minute
if span < self.min_span:
return None

coverage = len(indices) / (span + 1)
if coverage < self.min_coverage:
return None

x = indices.astype(float)
y = series.astype(float)
x_centered = x - np.mean(x)
y_centered = y - np.mean(y)

denom = float(np.dot(x_centered, x_centered))
if denom <= 0:
return None

slope = float(np.dot(x_centered, y_centered) / denom)
fitted = np.mean(y) + slope * x_centered

ss_tot = float(np.dot(y_centered, y_centered))
if ss_tot <= 1e-12:
return None

ss_res = float(np.dot(y - fitted, y - fitted))
r2 = max(0.0, 1.0 - ss_res / ss_tot)
delta = slope * span
scale = float(np.std(y))
if scale <= 1e-12:
return None

effect = abs(delta) / scale
if effect < self.min_effect or r2 < self.min_r2:
return None

direction = "increasing" if slope > 0 else "decreasing"
return _TrendSegment(
direction=direction,
start_minute=start_minute,
end_minute=end_minute,
score=effect * r2,
)

def _merge_segments(self, segments: list[_TrendSegment]) -> list[_TrendSegment]:
if not segments:
return []

segments.sort(key=lambda s: (s.start_minute, s.end_minute, -s.score))
merged: list[_TrendSegment] = []

for segment in segments:
if not merged:
merged.append(segment)
continue

prev = merged[-1]
if (
segment.direction == prev.direction
and segment.start_minute <= prev.end_minute + self.merge_gap
):
prev.end_minute = max(prev.end_minute, segment.end_minute)
prev.score = max(prev.score, segment.score)
continue

merged.append(segment)

return merged
Loading
Loading