diff --git a/docs/assets/SNMOT-118-single-track-xyxy-vs-xcycsr.mp4 b/docs/assets/SNMOT-118-single-track-xyxy-vs-xcycsr.mp4 new file mode 100644 index 00000000..143de170 Binary files /dev/null and b/docs/assets/SNMOT-118-single-track-xyxy-vs-xcycsr.mp4 differ diff --git a/docs/assets/iou_vs_BIoU_v_9MHDmAMxO5I_c004.mp4 b/docs/assets/iou_vs_BIoU_v_9MHDmAMxO5I_c004.mp4 new file mode 100644 index 00000000..de18a871 Binary files /dev/null and b/docs/assets/iou_vs_BIoU_v_9MHDmAMxO5I_c004.mp4 differ diff --git a/docs/assets/iou_vs_CIoU_v_0kUtTtmLaJA_c006.mp4 b/docs/assets/iou_vs_CIoU_v_0kUtTtmLaJA_c006.mp4 new file mode 100644 index 00000000..e31ece01 Binary files /dev/null and b/docs/assets/iou_vs_CIoU_v_0kUtTtmLaJA_c006.mp4 differ diff --git a/docs/assets/iou_vs_DIoU_v_0kUtTtmLaJA_c006.mp4 b/docs/assets/iou_vs_DIoU_v_0kUtTtmLaJA_c006.mp4 new file mode 100644 index 00000000..495756c4 Binary files /dev/null and b/docs/assets/iou_vs_DIoU_v_0kUtTtmLaJA_c006.mp4 differ diff --git a/docs/assets/iou_vs_GIoU_v_0kUtTtmLaJA_c006.mp4 b/docs/assets/iou_vs_GIoU_v_0kUtTtmLaJA_c006.mp4 new file mode 100644 index 00000000..26f1d64f Binary files /dev/null and b/docs/assets/iou_vs_GIoU_v_0kUtTtmLaJA_c006.mp4 differ diff --git a/docs/index.md b/docs/index.md index edf7cdec..f82f128a 100644 --- a/docs/index.md +++ b/docs/index.md @@ -171,4 +171,14 @@ Try trackers in your browser with our [Hugging Face Playground](https://huggingf [:simple-googlecolab: Run Google Colab](https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/how-to-track-objects-with-bytetrack-tracker.ipynb) +- **How to Track Objects with OC-SORT** + + --- + + [![](url-to-image)](https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/how-to-track-objects-with-ocsort-tracker.ipynb) + + End-to-end example showing how to run RF-DETR detection with the OC-SORT tracker. + + [:simple-googlecolab: Run Google Colab](https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/how-to-track-objects-with-ocsort-tracker.ipynb) + diff --git a/docs/javascripts/mathjax.js b/docs/javascripts/mathjax.js new file mode 100644 index 00000000..be1e8a19 --- /dev/null +++ b/docs/javascripts/mathjax.js @@ -0,0 +1,12 @@ +window.MathJax = { + tex: { + inlineMath: [["\\(", "\\)"], ["$", "$"]], + displayMath: [["\\[", "\\]"], ["$$", "$$"]], + processEscapes: true, + processEnvironments: true, + }, + options: { + ignoreHtmlClass: "^((?!arithmatex).)*$", + processHtmlClass: "arithmatex", + }, +}; diff --git a/docs/learn/iou.md b/docs/learn/iou.md new file mode 100644 index 00000000..5168a268 --- /dev/null +++ b/docs/learn/iou.md @@ -0,0 +1,251 @@ +# IoU variants + +IoU variants are pluggable similarity metrics used during detection-to-track +association. You pass one of these classes to a tracker via the `iou=` argument. + +## Quick Start + +```python +from trackers import SORTTracker +from trackers.utils.iou import IoU + +tracker = SORTTracker( + iou=IoU(), + minimum_iou_threshold=0.3, +) +``` + +## Overview + +| Variant | Score range | When to use | +| :------ | :---------- | :---------- | +| `IoU` | `[0, 1]` | Default — strong baseline for most scenes | +| `GIoU` | `[-1, 1]` | Scenes where boxes frequently lose overlap (occlusion, re-entry) | +| `DIoU` | `[-1, 1]` | Fast-moving objects; centre-distance signal without aspect sensitivity | +| `CIoU` | `(−∞, 1]` | Same as DIoU plus aspect-ratio consistency | +| `BIoU` | `[0, 1]` | Very small or very fast objects where raw boxes rarely overlap | + +Negative thresholds are meaningful for `GIoU`, `DIoU`, and `CIoU` because they extend their range to give a signal even when there is no pixel overlap. For `IoU` and `BIoU` thresholds +must be non-negative. + +--- + +## IoU + +**Standard Intersection over Union** — the classic baseline. + +\[ +\mathrm{IoU}(A, B) = \frac{|A \cap B|}{|A \cup B|} +\] + +Scores are `0` (no overlap) to `1` (perfect overlap). Because it returns `0` whenever +boxes do not intersect, the tracker gets no gradient to recover a lost track; a +variant from the list below can help in those cases. + +```python +from trackers import SORTTracker +from trackers.utils.iou import IoU + +tracker = SORTTracker(iou=IoU(), minimum_iou_threshold=0.3) +``` + +--- + +## GIoU + +**Generalised IoU** (Rezatofighi et al., 2019) — penalises the gap inside the +smallest enclosing box `C` that neither `A` nor `B` fills. + +\[ +\mathrm{GIoU}(A, B) = \mathrm{IoU} - \frac{|C \setminus (A \cup B)|}{|C|} +\] + +When boxes do not overlap at all, IoU is flat at `0`, but the penalty term still +changes as boxes move closer or farther apart — giving the tracker a meaningful +signal to bridge short gaps. + +```python +from trackers import OCSORTTracker +from trackers.utils.iou import GIoU + +# Negative thresholds are valid and often optimal for GIoU +tracker = OCSORTTracker(iou=GIoU(), minimum_iou_threshold=-0.3) +``` + +**Example — SportsMOT `v_0kUtTtmLaJA_c006`** + +| | HOTA (%) | Δ (pts) | +| :- | ------: | ------: | +| Best IoU | 73.07 | — | +| Best GIoU | 89.31 | **+16.24** | + + +Left: IoU. Right: GIoU. Camera movements confuses IoU by introducing an external movement, producing ID switches when this lands in other object. GIoU gives a partial solution to this by considering how similar the boxes are, which keeps most of the tracks that with IoU are confused or lost due direction changes and non linear motion. E.g: tracks 5, 12/13. + + + + +--- + +## DIoU + +**Distance IoU** (Zheng et al., 2019) — adds a centre-distance penalty to IoU, +normalised by the enclosing box diagonal. + +\[ +\mathrm{DIoU}(A, B) = \mathrm{IoU} - \frac{d^2}{c^2 + \epsilon} +\] + +where `d` is the Euclidean distance between box centres and `c` is the diagonal of +the smallest enclosing rectangle. This encourages centre alignment independently of +aspect ratio and tends to produce smoother associations in fast-motion sequences. + +```python +from trackers import OCSORTTracker +from trackers.utils.iou import DIoU + +tracker = OCSORTTracker(iou=DIoU(), minimum_iou_threshold=-0.3) +``` + +**Example — SportsMOT `v_0kUtTtmLaJA_c006`** + +| | HOTA (%) | Δ (pts) | +| :- | ------: | ------: | +| Best IoU | 73.07 | — | +| Best DIoU | 86.53 | **+13.46** | + +Left: IoU. Right: DIoU. Camera movements confuses IoU by introducing an external movement, producing ID switches when this prediction runs into another object. Watch how the centre-distance term keeps track IDs stable +when camera accelerates quickly, so that objects geometrically close would lose the track with IoU. E.g: tracks 3, 4 and 5. + + + +--- + +## CIoU + +**Complete IoU** (Zheng et al., 2019) — extends DIoU with a penalty for aspect-ratio +mismatch between the two boxes. + +\[ +\mathrm{CIoU}(A, B) = \mathrm{DIoU} - \alpha v +\] + +\[ +v = \frac{4}{\pi^2}\!\left(\arctan\frac{w_A}{h_A} - \arctan\frac{w_B}{h_B}\right)^2, \quad +\alpha = \frac{v}{1 - \mathrm{IoU} + v + \epsilon} +\] + +`v` measures aspect-ratio divergence; `α` scales it so the penalty is low when IoU +is already high. On tracking benchmarks CIoU and DIoU behave nearly identically — +the aspect term rarely changes which assignment wins. + +```python +from trackers import OCSORTTracker +from trackers.utils.iou import CIoU + +tracker = OCSORTTracker(iou=CIoU(), minimum_iou_threshold=-0.3) +``` + +**Example — SportsMOT `v_0kUtTtmLaJA_c006`** + +| | HOTA (%) | Δ (pts) | +| :- | ------: | ------: | +| Best IoU | 73.07 | — | +| Best CIoU | 86.53 | **+13.46** | + +Left: IoU. Right: CIoU. The gain here mirrors DIoU. The aspect-ratio term adds +a small tweak for boxes that differ in shape. + + + +--- + +## BIoU + +**Buffered IoU** (Yang et al., 2022) — dilates each box by a relative margin `r` +before computing standard IoU. Let `w = x2 − x1`, `h = y2 − y1`: + +\[ +A^r = (x_1 - rw,\; y_1 - rh,\; x_2 + rw,\; y_2 + rh) +\] + +\[ +\mathrm{BIoU}_r(A, B) = \mathrm{IoU}(A^r, B^r) +\] + +`r = 0` recovers plain IoU exactly. Enlarging boxes creates artificial overlap for +objects that are geometrically close, which is useful when detections are very small +or objects move fast enough so that consecutive boxes miss each other entirely. + +```python +from trackers import SORTTracker +from trackers.utils.iou import BIoU + +tracker = SORTTracker(iou=BIoU(buffer_ratio=0.15), minimum_iou_threshold=0.3) +``` + +**Example — SportsMOT `v_9MHDmAMxO5I_c004`** + +| | HOTA (%) | Δ (pts) | +| :- | ------: | ------: | +| Best IoU | 80.54 | — | +| Best BIoU | 88.00 | **+7.46** | + +Left: IoU. Right: BIoU. Notice how ID switches happen when fast players +temporarily produce non-overlapping boxes between frames. The buffer closes +that gap and keeps the same ID. E.g: tracks 7 and 8. + + + +--- + +## Empirical HOTA deltas + +The following numbers come from running **OC-SORT** on **MOT17 train (FRCNN)** and +**SportsMOT val**: for each sequence the best HOTA over the IoU threshold grid is +taken separately for each variant, and **Δ = HOTA(variant) − HOTA(IoU)**. On +SportsMOT detections are derived from GT boxes (oracle feed); on MOT17 the FRCNN +public detections are used. + +HOTA is shown as a **percentage** (0–100 scale); **Δ** is percentage points. + +| Dataset | Sequences | GIoU mean Δ | DIoU mean Δ | CIoU mean Δ | BIoU mean Δ | +| :------ | --------: | ----------: | ----------: | ----------: | ----------: | +| MOT17 train (FRCNN) | 7 | +0.35 | −0.10 | −0.10 | +0.66 | +| SportsMOT val | 45 | +1.15 | +0.89 | +0.89 | +0.74 | + +--- + +## API Reference + +### BaseIoU + +::: trackers.utils.iou.BaseIoU + +### IoU + +::: trackers.utils.iou.IoU + +### GIoU + +::: trackers.utils.iou.GIoU + +### DIoU + +::: trackers.utils.iou.DIoU + +### CIoU + +::: trackers.utils.iou.CIoU + +### BIoU + +::: trackers.utils.iou.BIoU diff --git a/docs/learn/state-estimators.md b/docs/learn/state-estimators.md new file mode 100644 index 00000000..92d120aa --- /dev/null +++ b/docs/learn/state-estimators.md @@ -0,0 +1,255 @@ +# State Estimators + +Every tracker in `trackers` uses a Kalman filter to predict where objects will appear in the next frame. The **state estimator** controls how bounding boxes are represented inside that filter. Different representations make different assumptions about object motion, and picking the right one can improve tracking quality without changing anything else. + +**What you'll learn:** + +- What state estimators are and why they matter +- How `XYXYStateEstimator` and `XCYCSRStateEstimator` represent bounding boxes +- When to use each representation +- How to swap the state estimator in any tracker + +--- + +## Install + +Get started by installing the package. + +```text +pip install trackers +``` + +For more options, see the [install guide](install.md). + +--- + +## What Is a State Estimator? + +A state estimator wraps a Kalman filter and defines how bounding boxes are encoded into the filter's state vector. The Kalman filter then predicts the next position of each tracked object and corrects that prediction when a new detection arrives. + +Two representations are available: + +| Estimator | State Dimensions | Representation | Aspect Ratio | +| :--------------------: | :--------------: | :---------------------------------------------------- | :-----------: | +| `XYXYStateEstimator` | 8 | Top-left and bottom-right corners + their velocities | Can change | +| `XCYCSRStateEstimator` | 7 | Center point, area, their velocities and aspect ratio | Held constant | + +They accept `[x1, y1, x2, y2]` bounding boxes on input and produce `[x1, y1, x2, y2]` bounding boxes on output. The difference is entirely in how the filter models motion internally. + +--- + +## XYXY — Corner-Based + +`XYXYStateEstimator` tracks the four corner coordinates independently. Each corner gets its own velocity term, giving the filter 8 state variables: + +``` +State: [x1, y1, x2, y2, vx1, vy1, vx2, vy2] +Measure: [x1, y1, x2, y2] +``` + +The transition matrix $F$ defines how the state evolves from one frame to the next. + +State order: $[x_1, y_1, x_2, y_2, v_{x_1}, v_{y_1}, v_{x_2}, v_{y_2}]$ + +$$ +F = +\begin{bmatrix} +1 & 0 & 0 & 0 & 1 & 0 & 0 & 0 \\ +0 & 1 & 0 & 0 & 0 & 1 & 0 & 0 \\ +0 & 0 & 1 & 0 & 0 & 0 & 1 & 0 \\ +0 & 0 & 0 & 1 & 0 & 0 & 0 & 1 \\ +0 & 0 & 0 & 0 & 1 & 0 & 0 & 0 \\ +0 & 0 & 0 & 0 & 0 & 1 & 0 & 0 \\ +0 & 0 & 0 & 0 & 0 & 0 & 1 & 0 \\ +0 & 0 & 0 & 0 & 0 & 0 & 0 & 1 +\end{bmatrix} +$$ + +Equivalent update equations: + +```text +x1' = x1 + vx1 +y1' = y1 + vy1 +x2' = x2 + vx2 +y2' = y2 + vy2 +vx1' = vx1 +vy1' = vy1 +vx2' = vx2 +vy2' = vy2 +``` + +| Row | Meaning | +| :-- | :------------------------------------------------------- | +| 1-4 | Each corner coordinate is updated by adding its velocity | +| 5-8 | Velocities persist unchanged from frame to frame | + +Because each corner moves freely, the box width and height can change between frames. This makes XYXY a natural fit when objects change shape — due to camera perspective, non-rigid motion, or inconsistent detections. + +**In Trackers, this is the default** for `ByteTrackTracker` and `SORTTracker`. + +--- + +## XCYCSR — Center-Based + +`XCYCSRStateEstimator` tracks the box center, area (scale), and aspect ratio. Only the center and scale get velocity terms; aspect ratio is treated as constant. This gives 7 state variables: + +``` +State: [x_center, y_center, scale, aspect_ratio, vx, vy, vs] +Measure: [x_center, y_center, scale, aspect_ratio] +``` + +The transition matrix $F$ shows the key difference: the aspect ratio is propagated without a velocity term. + +State order: $[x_c, y_c, s, r, v_x, v_y, v_s]$ + +$$ +F = +\begin{bmatrix} +1 & 0 & 0 & 0 & 1 & 0 & 0 \\ +0 & 1 & 0 & 0 & 0 & 1 & 0 \\ +0 & 0 & 1 & 0 & 0 & 0 & 1 \\ +0 & 0 & 0 & 1 & 0 & 0 & 0 \\ +0 & 0 & 0 & 0 & 1 & 0 & 0 \\ +0 & 0 & 0 & 0 & 0 & 1 & 0 \\ +0 & 0 & 0 & 0 & 0 & 0 & 1 +\end{bmatrix} +$$ + +Equivalent update equations: + +```text +x_center' = x_center + vx +y_center' = y_center + vy +scale' = scale + vs +aspect_ratio' = aspect_ratio +vx' = vx +vy' = vy +vs' = vs +``` + +| Row | Meaning | +| :-- | :-------------------------------------------------------- | +| 1-3 | Center position and scale follow constant-velocity motion | +| 4 | Aspect ratio is copied forward unchanged | +| 5-7 | Velocities persist unchanged from frame to frame | + +The aspect ratio `r = w / h` is carried forward unchanged. This acts as a regularizer — the filter resists sudden shape changes. It works well for rigid objects whose proportions stay consistent, like pedestrians walking or cars on a highway. + +**This is the default** for `OCSORTTracker`, matching the original OC-SORT paper. + +--- + +## When to Use Each + +| Scenario | Recommended | Why | +| :------------------------------------------- | :--------------------: | :--------------------------------------------------------- | +| Pedestrians, vehicles, rigid objects | `XCYCSRStateEstimator` | Constant aspect ratio stabilizes predictions | +| Non-rigid or deformable objects | `XYXYStateEstimator` | Corners move independently to track shape changes | +| Noisy detections with fluctuating box sizes | `XCYCSRStateEstimator` | Aspect ratio constraint absorbs size noise | +| Strong perspective changes (camera pan/zoom) | `XYXYStateEstimator` | Box proportions shift with viewpoint; corners adapt freely | +| Default choice when unsure | `XYXYStateEstimator` | More general, fewer assumptions | + +We can also benchmark the trackers using the different State Estimators and we get: + +- In **Dancetrack**, with defaults parameters all trackers perform better with XYXYStateEstimator, but with tuned parameters, SORT tracker with XCYCSRStateEstimator gets +0.8% HOTA. +- In **Soccernet dataset**, with defaults parameters SORT tracker with XYXYStateEstimator has ~5% more HOTA than using XCYC, when tuning parameters with grid search this difference is reduced to 2%. For the other trackers we dont find significant advantages of using a different StateEstimators, just having up to 0.2% better HOTA. +- In **SportsMOT**, for OC-SORT and ByteTrack, the StateEstimator doesn't affect the performance, while for SORT XYXYStateEstimator gives a small advantage of ~2% HOTA with default parameters and 0.4% when tuning both. +- In **MOT17**, with default parameters XYXYStateEstimator performs slightly better than XCYCSRStateEstimator with SORT and ByteTrack with up to 0.7% better results, but for OC-SORT XCYCSRStateEstimator gives 0.2% better HOTA. When tuning parameters, XCYCSRStateEstimator performs the best with all the trackers by a small margin, ranging in 0.2-0.4% HOTA. + +But lets visualize where these differences are, here is an example where using XCYCSR State Estimator associates an occluded track correctly, while using XYXY changes the ID: + +
+ +
+ +--- + +## Swapping the Estimator + +All trackers accept a `state_estimator_class` parameter. Import the class you want and pass it to the constructor. + +=== "ByteTrack with XCYCSR" + + ```python + from trackers import ByteTrackTracker + from trackers.utils.state_representations import XCYCSRStateEstimator + + tracker = ByteTrackTracker( + state_estimator_class=XCYCSRStateEstimator, + ) + ``` + +=== "OC-SORT with XYXY" + + ```python + from trackers import OCSORTTracker + from trackers.utils.state_representations import XYXYStateEstimator + + tracker = OCSORTTracker( + state_estimator_class=XYXYStateEstimator, + ) + ``` + +=== "SORT with XCYCSR" + + ```python + from trackers import SORTTracker + from trackers.utils.state_representations import XCYCSRStateEstimator + + tracker = SORTTracker( + state_estimator_class=XCYCSRStateEstimator, + ) + ``` + +Everything else stays the same — detection, association, and visualization work identically regardless of which estimator you choose. + +--- + +## Full Example + +Run ByteTrack with both estimators on the same video and compare the results side by side. + +```python +import cv2 + +import supervision as sv +from inference import get_model +from trackers import ByteTrackTracker +from trackers.utils.state_representations import ( + XCYCSRStateEstimator, + XYXYStateEstimator, +) + +model = get_model("rfdetr-nano") + +tracker_xyxy = ByteTrackTracker( + state_estimator_class=XYXYStateEstimator, +) +tracker_xcycsr = ByteTrackTracker( + state_estimator_class=XCYCSRStateEstimator, +) + +cap = cv2.VideoCapture("source.mp4") +while True: + ret, frame = cap.read() + if not ret: + break + + result = model.infer(frame)[0] + detections = sv.Detections.from_inference(result) + + tracked_xyxy = tracker_xyxy.update(detections) + tracked_xcycsr = tracker_xcycsr.update(detections) + + # Compare tracker_id assignments, box smoothness, etc. + print(f"XYXY IDs: {tracked_xyxy.tracker_id}") + print(f"XCYCSR IDs: {tracked_xcycsr.tracker_id}") +``` + +--- + +## Takeaway + +The state estimator is a single-line change that controls how the Kalman filter models bounding box motion. Use `XCYCSRStateEstimator` when objects keep a consistent shape, and `XYXYStateEstimator` when shape varies or you want fewer assumptions. Try it on your case, the best choice depends on the scene. diff --git a/docs/trackers/comparison.md b/docs/trackers/comparison.md index f877f68b..b0057efe 100644 --- a/docs/trackers/comparison.md +++ b/docs/trackers/comparison.md @@ -81,7 +81,7 @@ Sports broadcast tracking with fast motion, camera pans, and similar-looking tar | Tracker | HOTA | IDF1 | MOTA | | :-------: | :------: | :------: | :------: | - | SORT | 70.9 | 68.9 | 95.7 | + | SORT | 70.8 | 68.9 | 95.5 | | ByteTrack | **73.0** | **72.5** | **96.4** | | OC-SORT | 71.7 | 71.4 | 95.0 | @@ -152,7 +152,7 @@ Long sequences with dense interactions and partial occlusions. Tests long-term I | Tracker | HOTA | IDF1 | MOTA | | :-------: | :------: | :------: | :------: | | SORT | **84.2** | **78.2** | **98.2** | - | ByteTrack | 84.0 | 78.1 | 97.8 | + | ByteTrack | 84.0 | 78.1 | **98.2** | | OC-SORT | 82.9 | 77.9 | 96.8 | Tuned configuration for each tracker. @@ -166,9 +166,9 @@ Long sequences with dense interactions and partial occlusions. Tests long-term I ByteTrack: lost_track_buffer: 30 - track_activation_threshold: 0.5 - minimum_consecutive_frames: 2 - minimum_iou_threshold: 0.1 + track_activation_threshold: 0.2 + minimum_consecutive_frames: 1 + minimum_iou_threshold: 0.05 high_conf_det_threshold: 0.5 OC-SORT: diff --git a/mkdocs.yml b/mkdocs.yml index dfa759b9..76f883ab 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -54,6 +54,8 @@ extra_css: extra_javascript: - javascripts/pycon_copy.js + - javascripts/mathjax.js + - https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js - javascripts/cli_builder_framework.js - javascripts/command_builder.js @@ -75,6 +77,9 @@ markdown_extensions: pygments_lang_class: true # Enables inline code highlighting - pymdownx.inlinehilite + # Enables LaTeX-style math in Markdown + - pymdownx.arithmatex: + generic: true # Allows including content from other files - pymdownx.snippets # Enables nested code blocks and custom fences @@ -120,6 +125,8 @@ nav: - Download Datasets: learn/download.md - Evaluate Trackers: learn/evaluate.md - Detection Quality Matters: learn/detection-quality.md + - State Estimators: learn/state-estimators.md + - IoU Variants: learn/iou.md - Trackers: - Comparison: trackers/comparison.md - SORT: trackers/sort.md diff --git a/pyproject.toml b/pyproject.toml index c662c6ee..a31bb421 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,6 +54,8 @@ dev = [ "uv>=0.4.20", "pytest>=8.3.3", "pre-commit>=4.2.0", + "torch", + "torchvision", ] docs = [ "mkdocs>=1.6.1", diff --git a/test/utils/test_iou.py b/test/utils/test_iou.py new file mode 100644 index 00000000..1af97d2b --- /dev/null +++ b/test/utils/test_iou.py @@ -0,0 +1,547 @@ +# ------------------------------------------------------------------------ +# Trackers +# Copyright (c) 2026 Roboflow. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------ + +from __future__ import annotations + +import numpy as np +import pytest +import torch +import torchvision + +from trackers.utils.iou import BIoU, CIoU, DIoU, GIoU, IoU + + +def _torchvision_giou(boxes_1: np.ndarray, boxes_2: np.ndarray) -> np.ndarray: + """Reference GIoU from torchvision.""" + t1 = torch.tensor(boxes_1, dtype=torch.float64) + t2 = torch.tensor(boxes_2, dtype=torch.float64) + return torchvision.ops.generalized_box_iou(t1, t2).numpy() + + +def _torchvision_diou(boxes_1: np.ndarray, boxes_2: np.ndarray) -> np.ndarray: + """Reference DIoU from torchvision.""" + t1 = torch.tensor(boxes_1, dtype=torch.float64) + t2 = torch.tensor(boxes_2, dtype=torch.float64) + return torchvision.ops.distance_box_iou(t1, t2).numpy() + + +def _torchvision_ciou(boxes_1: np.ndarray, boxes_2: np.ndarray) -> np.ndarray: + """Reference CIoU from torchvision.""" + t1 = torch.tensor(boxes_1, dtype=torch.float64) + t2 = torch.tensor(boxes_2, dtype=torch.float64) + return torchvision.ops.complete_box_iou(t1, t2).numpy() + + +_iou = IoU() +_biou = BIoU() +_giou = GIoU() +_diou = DIoU() +_ciou = CIoU() + + +class TestGIoUAgainstTorchvision: + """Compare our GIoU against torchvision.ops.generalized_box_iou.""" + + def test_identical_boxes(self) -> None: + boxes = np.array([[0.0, 0.0, 10.0, 10.0], [20.0, 20.0, 40.0, 50.0]]) + result = _giou.compute(boxes, boxes) + expected = _torchvision_giou(boxes, boxes) + np.testing.assert_allclose(result, expected, atol=1e-6) + np.testing.assert_allclose(np.diag(result), 1.0, atol=1e-6) + + def test_partial_overlap(self) -> None: + boxes_1 = np.array([[0.0, 0.0, 10.0, 10.0]]) + boxes_2 = np.array([[5.0, 5.0, 15.0, 15.0]]) + result = _giou.compute(boxes_1, boxes_2) + expected = _torchvision_giou(boxes_1, boxes_2) + np.testing.assert_allclose(result, expected, atol=1e-6) + + def test_no_overlap_nearby(self) -> None: + boxes_1 = np.array([[0.0, 0.0, 10.0, 10.0]]) + boxes_2 = np.array([[12.0, 0.0, 22.0, 10.0]]) + result = _giou.compute(boxes_1, boxes_2) + expected = _torchvision_giou(boxes_1, boxes_2) + np.testing.assert_allclose(result, expected, atol=1e-6) + assert result[0, 0] < 0, "GIoU should be negative for non-overlapping boxes" + + def test_no_overlap_far_apart(self) -> None: + boxes_1 = np.array([[0.0, 0.0, 1.0, 1.0]]) + boxes_2 = np.array([[100.0, 100.0, 101.0, 101.0]]) + result = _giou.compute(boxes_1, boxes_2) + expected = _torchvision_giou(boxes_1, boxes_2) + np.testing.assert_allclose(result, expected, atol=1e-6) + assert result[0, 0] < -0.5, "GIoU should be very negative for distant boxes" + + def test_one_box_enclosing_other(self) -> None: + boxes_1 = np.array([[0.0, 0.0, 100.0, 100.0]]) + boxes_2 = np.array([[25.0, 25.0, 75.0, 75.0]]) + result = _giou.compute(boxes_1, boxes_2) + expected = _torchvision_giou(boxes_1, boxes_2) + np.testing.assert_allclose(result, expected, atol=1e-6) + + def test_touching_boxes(self) -> None: + boxes_1 = np.array([[0.0, 0.0, 10.0, 10.0]]) + boxes_2 = np.array([[10.0, 0.0, 20.0, 10.0]]) + result = _giou.compute(boxes_1, boxes_2) + expected = _torchvision_giou(boxes_1, boxes_2) + np.testing.assert_allclose(result, expected, atol=1e-6) + + def test_batch_n_by_m(self) -> None: + boxes_1 = np.array( + [ + [0.0, 0.0, 10.0, 10.0], + [20.0, 20.0, 30.0, 30.0], + [50.0, 50.0, 80.0, 80.0], + ] + ) + boxes_2 = np.array( + [ + [5.0, 5.0, 15.0, 15.0], + [100.0, 100.0, 110.0, 110.0], + ] + ) + result = _giou.compute(boxes_1, boxes_2) + expected = _torchvision_giou(boxes_1, boxes_2) + assert result.shape == (3, 2) + np.testing.assert_allclose(result, expected, atol=1e-6) + + def test_negative_coordinates(self) -> None: + boxes_1 = np.array([[-10.0, -10.0, 5.0, 5.0]]) + boxes_2 = np.array([[-3.0, -3.0, 12.0, 12.0]]) + result = _giou.compute(boxes_1, boxes_2) + expected = _torchvision_giou(boxes_1, boxes_2) + np.testing.assert_allclose(result, expected, atol=1e-6) + + def test_various_aspect_ratios(self) -> None: + boxes_1 = np.array( + [ + [0.0, 0.0, 100.0, 10.0], # wide + [0.0, 0.0, 10.0, 100.0], # tall + [0.0, 0.0, 50.0, 50.0], # square + ] + ) + boxes_2 = np.array( + [ + [10.0, 0.0, 60.0, 8.0], # wide, offset + [2.0, 10.0, 12.0, 80.0], # tall, offset + ] + ) + result = _giou.compute(boxes_1, boxes_2) + expected = _torchvision_giou(boxes_1, boxes_2) + np.testing.assert_allclose(result, expected, atol=1e-6) + + def test_large_random_batch(self) -> None: + rng = np.random.default_rng(42) + xy = rng.uniform(0, 500, size=(50, 2)) + wh = rng.uniform(5, 100, size=(50, 2)) + boxes_1 = np.hstack([xy, xy + wh]) + + xy2 = rng.uniform(0, 500, size=(30, 2)) + wh2 = rng.uniform(5, 100, size=(30, 2)) + boxes_2 = np.hstack([xy2, xy2 + wh2]) + + result = _giou.compute(boxes_1, boxes_2) + expected = _torchvision_giou(boxes_1, boxes_2) + assert result.shape == (50, 30) + np.testing.assert_allclose(result, expected, atol=1e-6) + + +class TestBIoUProperties: + """Verify behavior of Buffered IoU.""" + + def test_buffer_zero_matches_iou(self) -> None: + boxes_1 = np.array( + [[0.0, 0.0, 10.0, 10.0], [20.0, 20.0, 35.0, 40.0]], dtype=np.float64 + ) + boxes_2 = np.array( + [[5.0, 5.0, 15.0, 15.0], [50.0, 50.0, 60.0, 60.0]], dtype=np.float64 + ) + biou0 = BIoU(buffer_ratio=0.0).compute(boxes_1, boxes_2) + iou = _iou.compute(boxes_1, boxes_2).astype(np.float64) + np.testing.assert_allclose(biou0, iou, atol=1e-10) + + def test_nearby_non_overlap_gets_positive_signal(self) -> None: + boxes_1 = np.array([[0.0, 0.0, 10.0, 10.0]]) + boxes_2 = np.array([[11.0, 0.0, 21.0, 10.0]]) + iou = _iou.compute(boxes_1, boxes_2)[0, 0] + biou = BIoU(buffer_ratio=0.1).compute(boxes_1, boxes_2)[0, 0] + assert iou == 0.0 + assert biou > 0.0 + + def test_invalid_negative_buffer_ratio(self) -> None: + with pytest.raises(ValueError, match="buffer_ratio must be non-negative"): + BIoU(buffer_ratio=-0.01) + + +class TestDIoUAgainstTorchvision: + """Compare our DIoU against torchvision.ops.distance_box_iou.""" + + def test_identical_boxes(self) -> None: + boxes = np.array([[0.0, 0.0, 10.0, 10.0], [20.0, 20.0, 40.0, 50.0]]) + result = _diou.compute(boxes, boxes) + expected = _torchvision_diou(boxes, boxes) + np.testing.assert_allclose(result, expected, atol=1e-6) + np.testing.assert_allclose(np.diag(result), 1.0, atol=1e-6) + + def test_partial_overlap(self) -> None: + boxes_1 = np.array([[0.0, 0.0, 10.0, 10.0]]) + boxes_2 = np.array([[5.0, 5.0, 15.0, 15.0]]) + result = _diou.compute(boxes_1, boxes_2) + expected = _torchvision_diou(boxes_1, boxes_2) + np.testing.assert_allclose(result, expected, atol=1e-6) + + def test_no_overlap_nearby(self) -> None: + boxes_1 = np.array([[0.0, 0.0, 10.0, 10.0]]) + boxes_2 = np.array([[12.0, 0.0, 22.0, 10.0]]) + result = _diou.compute(boxes_1, boxes_2) + expected = _torchvision_diou(boxes_1, boxes_2) + np.testing.assert_allclose(result, expected, atol=1e-6) + assert result[0, 0] < 0, "DIoU should be negative for this non-overlap" + + def test_no_overlap_far_apart(self) -> None: + boxes_1 = np.array([[0.0, 0.0, 1.0, 1.0]]) + boxes_2 = np.array([[100.0, 100.0, 101.0, 101.0]]) + result = _diou.compute(boxes_1, boxes_2) + expected = _torchvision_diou(boxes_1, boxes_2) + np.testing.assert_allclose(result, expected, atol=1e-6) + assert result[0, 0] < -0.5, "DIoU should be very negative for distant boxes" + + def test_one_box_enclosing_other(self) -> None: + boxes_1 = np.array([[0.0, 0.0, 100.0, 100.0]]) + boxes_2 = np.array([[25.0, 25.0, 75.0, 75.0]]) + result = _diou.compute(boxes_1, boxes_2) + expected = _torchvision_diou(boxes_1, boxes_2) + np.testing.assert_allclose(result, expected, atol=1e-6) + + def test_touching_boxes(self) -> None: + boxes_1 = np.array([[0.0, 0.0, 10.0, 10.0]]) + boxes_2 = np.array([[10.0, 0.0, 20.0, 10.0]]) + result = _diou.compute(boxes_1, boxes_2) + expected = _torchvision_diou(boxes_1, boxes_2) + np.testing.assert_allclose(result, expected, atol=1e-6) + + def test_batch_n_by_m(self) -> None: + boxes_1 = np.array( + [ + [0.0, 0.0, 10.0, 10.0], + [20.0, 20.0, 30.0, 30.0], + [50.0, 50.0, 80.0, 80.0], + ] + ) + boxes_2 = np.array( + [ + [5.0, 5.0, 15.0, 15.0], + [100.0, 100.0, 110.0, 110.0], + ] + ) + result = _diou.compute(boxes_1, boxes_2) + expected = _torchvision_diou(boxes_1, boxes_2) + assert result.shape == (3, 2) + np.testing.assert_allclose(result, expected, atol=1e-6) + + def test_negative_coordinates(self) -> None: + boxes_1 = np.array([[-10.0, -10.0, 5.0, 5.0]]) + boxes_2 = np.array([[-3.0, -3.0, 12.0, 12.0]]) + result = _diou.compute(boxes_1, boxes_2) + expected = _torchvision_diou(boxes_1, boxes_2) + np.testing.assert_allclose(result, expected, atol=1e-6) + + def test_various_aspect_ratios(self) -> None: + boxes_1 = np.array( + [ + [0.0, 0.0, 100.0, 10.0], # wide + [0.0, 0.0, 10.0, 100.0], # tall + [0.0, 0.0, 50.0, 50.0], # square + ] + ) + boxes_2 = np.array( + [ + [10.0, 0.0, 60.0, 8.0], # wide, offset + [2.0, 10.0, 12.0, 80.0], # tall, offset + ] + ) + result = _diou.compute(boxes_1, boxes_2) + expected = _torchvision_diou(boxes_1, boxes_2) + np.testing.assert_allclose(result, expected, atol=1e-6) + + def test_large_random_batch(self) -> None: + rng = np.random.default_rng(42) + xy = rng.uniform(0, 500, size=(50, 2)) + wh = rng.uniform(5, 100, size=(50, 2)) + boxes_1 = np.hstack([xy, xy + wh]) + + xy2 = rng.uniform(0, 500, size=(30, 2)) + wh2 = rng.uniform(5, 100, size=(30, 2)) + boxes_2 = np.hstack([xy2, xy2 + wh2]) + + result = _diou.compute(boxes_1, boxes_2) + expected = _torchvision_diou(boxes_1, boxes_2) + assert result.shape == (50, 30) + np.testing.assert_allclose(result, expected, atol=1e-6) + + +class TestCIoUAgainstTorchvision: + """Compare our CIoU against torchvision.ops.complete_box_iou.""" + + def test_identical_boxes(self) -> None: + boxes = np.array([[0.0, 0.0, 10.0, 10.0], [20.0, 20.0, 40.0, 50.0]]) + result = _ciou.compute(boxes, boxes) + expected = _torchvision_ciou(boxes, boxes) + np.testing.assert_allclose(result, expected, atol=1e-6) + np.testing.assert_allclose(np.diag(result), 1.0, atol=1e-6) + + def test_partial_overlap(self) -> None: + boxes_1 = np.array([[0.0, 0.0, 10.0, 10.0]]) + boxes_2 = np.array([[5.0, 5.0, 15.0, 15.0]]) + result = _ciou.compute(boxes_1, boxes_2) + expected = _torchvision_ciou(boxes_1, boxes_2) + np.testing.assert_allclose(result, expected, atol=1e-6) + + def test_no_overlap_nearby(self) -> None: + boxes_1 = np.array([[0.0, 0.0, 10.0, 10.0]]) + boxes_2 = np.array([[12.0, 0.0, 22.0, 10.0]]) + result = _ciou.compute(boxes_1, boxes_2) + expected = _torchvision_ciou(boxes_1, boxes_2) + np.testing.assert_allclose(result, expected, atol=1e-6) + assert result[0, 0] < 0, "CIoU should be negative for this non-overlap" + + def test_no_overlap_far_apart(self) -> None: + boxes_1 = np.array([[0.0, 0.0, 1.0, 1.0]]) + boxes_2 = np.array([[100.0, 100.0, 101.0, 101.0]]) + result = _ciou.compute(boxes_1, boxes_2) + expected = _torchvision_ciou(boxes_1, boxes_2) + np.testing.assert_allclose(result, expected, atol=1e-6) + assert result[0, 0] < -0.5, "CIoU should be very negative for distant boxes" + + def test_one_box_enclosing_other(self) -> None: + boxes_1 = np.array([[0.0, 0.0, 100.0, 100.0]]) + boxes_2 = np.array([[25.0, 25.0, 75.0, 75.0]]) + result = _ciou.compute(boxes_1, boxes_2) + expected = _torchvision_ciou(boxes_1, boxes_2) + np.testing.assert_allclose(result, expected, atol=1e-6) + + def test_touching_boxes(self) -> None: + boxes_1 = np.array([[0.0, 0.0, 10.0, 10.0]]) + boxes_2 = np.array([[10.0, 0.0, 20.0, 10.0]]) + result = _ciou.compute(boxes_1, boxes_2) + expected = _torchvision_ciou(boxes_1, boxes_2) + np.testing.assert_allclose(result, expected, atol=1e-6) + + def test_batch_n_by_m(self) -> None: + boxes_1 = np.array( + [ + [0.0, 0.0, 10.0, 10.0], + [20.0, 20.0, 30.0, 30.0], + [50.0, 50.0, 80.0, 80.0], + ] + ) + boxes_2 = np.array( + [ + [5.0, 5.0, 15.0, 15.0], + [100.0, 100.0, 110.0, 110.0], + ] + ) + result = _ciou.compute(boxes_1, boxes_2) + expected = _torchvision_ciou(boxes_1, boxes_2) + assert result.shape == (3, 2) + np.testing.assert_allclose(result, expected, atol=1e-6) + + def test_negative_coordinates(self) -> None: + boxes_1 = np.array([[-10.0, -10.0, 5.0, 5.0]]) + boxes_2 = np.array([[-3.0, -3.0, 12.0, 12.0]]) + result = _ciou.compute(boxes_1, boxes_2) + expected = _torchvision_ciou(boxes_1, boxes_2) + np.testing.assert_allclose(result, expected, atol=1e-6) + + def test_various_aspect_ratios(self) -> None: + boxes_1 = np.array( + [ + [0.0, 0.0, 100.0, 10.0], # wide + [0.0, 0.0, 10.0, 100.0], # tall + [0.0, 0.0, 50.0, 50.0], # square + ] + ) + boxes_2 = np.array( + [ + [10.0, 0.0, 60.0, 8.0], # wide, offset + [2.0, 10.0, 12.0, 80.0], # tall, offset + ] + ) + result = _ciou.compute(boxes_1, boxes_2) + expected = _torchvision_ciou(boxes_1, boxes_2) + np.testing.assert_allclose(result, expected, atol=1e-6) + + def test_large_random_batch(self) -> None: + rng = np.random.default_rng(42) + xy = rng.uniform(0, 500, size=(50, 2)) + wh = rng.uniform(5, 100, size=(50, 2)) + boxes_1 = np.hstack([xy, xy + wh]) + + xy2 = rng.uniform(0, 500, size=(30, 2)) + wh2 = rng.uniform(5, 100, size=(30, 2)) + boxes_2 = np.hstack([xy2, xy2 + wh2]) + + result = _ciou.compute(boxes_1, boxes_2) + expected = _torchvision_ciou(boxes_1, boxes_2) + assert result.shape == (50, 30) + np.testing.assert_allclose(result, expected, atol=1e-6) + + +class TestGIoUProperties: + """Verify mathematical properties of GIoU.""" + + def test_range_is_minus_one_to_one(self) -> None: + rng = np.random.default_rng(99) + xy = rng.uniform(0, 500, size=(100, 2)) + wh = rng.uniform(1, 200, size=(100, 2)) + boxes_1 = np.hstack([xy, xy + wh]) + + xy2 = rng.uniform(0, 500, size=(80, 2)) + wh2 = rng.uniform(1, 200, size=(80, 2)) + boxes_2 = np.hstack([xy2, xy2 + wh2]) + + result = _giou.compute(boxes_1, boxes_2) + assert np.all(result >= -1.0 - 1e-9) + assert np.all(result <= 1.0 + 1e-9) + + def test_symmetry(self) -> None: + boxes_1 = np.array([[0.0, 0.0, 10.0, 10.0], [20.0, 20.0, 35.0, 40.0]]) + boxes_2 = np.array([[5.0, 5.0, 15.0, 15.0], [50.0, 50.0, 60.0, 60.0]]) + result_ab = _giou.compute(boxes_1, boxes_2) + result_ba = _giou.compute(boxes_2, boxes_1) + np.testing.assert_allclose(result_ab, result_ba.T, atol=1e-10) + + def test_giou_leq_iou(self) -> None: + """GIoU <= IoU always holds.""" + rng = np.random.default_rng(7) + xy = rng.uniform(0, 100, size=(40, 2)) + wh = rng.uniform(5, 50, size=(40, 2)) + boxes_1 = np.hstack([xy, xy + wh]) + + xy2 = rng.uniform(0, 100, size=(30, 2)) + wh2 = rng.uniform(5, 50, size=(30, 2)) + boxes_2 = np.hstack([xy2, xy2 + wh2]) + + iou_result = _iou.compute(boxes_1, boxes_2).astype(np.float64) + giou_result = _giou.compute(boxes_1, boxes_2) + assert np.all(giou_result <= iou_result + 1e-6) + + +class TestDIoUProperties: + """Verify mathematical properties of DIoU.""" + + def test_range_is_minus_one_to_one(self) -> None: + rng = np.random.default_rng(101) + xy = rng.uniform(0, 500, size=(100, 2)) + wh = rng.uniform(1, 200, size=(100, 2)) + boxes_1 = np.hstack([xy, xy + wh]) + + xy2 = rng.uniform(0, 500, size=(80, 2)) + wh2 = rng.uniform(1, 200, size=(80, 2)) + boxes_2 = np.hstack([xy2, xy2 + wh2]) + + result = _diou.compute(boxes_1, boxes_2) + assert np.all(result >= -1.0 - 1e-9) + assert np.all(result <= 1.0 + 1e-9) + + def test_symmetry(self) -> None: + boxes_1 = np.array([[0.0, 0.0, 10.0, 10.0], [20.0, 20.0, 35.0, 40.0]]) + boxes_2 = np.array([[5.0, 5.0, 15.0, 15.0], [50.0, 50.0, 60.0, 60.0]]) + result_ab = _diou.compute(boxes_1, boxes_2) + result_ba = _diou.compute(boxes_2, boxes_1) + np.testing.assert_allclose(result_ab, result_ba.T, atol=1e-10) + + def test_diou_leq_iou(self) -> None: + """DIoU <= IoU: center-distance penalty is nonnegative.""" + rng = np.random.default_rng(11) + xy = rng.uniform(0, 100, size=(40, 2)) + wh = rng.uniform(5, 50, size=(40, 2)) + boxes_1 = np.hstack([xy, xy + wh]) + + xy2 = rng.uniform(0, 100, size=(30, 2)) + wh2 = rng.uniform(5, 50, size=(30, 2)) + boxes_2 = np.hstack([xy2, xy2 + wh2]) + + iou_result = _iou.compute(boxes_1, boxes_2).astype(np.float64) + diou_result = _diou.compute(boxes_1, boxes_2) + assert np.all(diou_result <= iou_result + 1e-6) + + +class TestCIoUProperties: + """Verify mathematical properties of CIoU.""" + + def test_at_most_one(self) -> None: + """Pairwise CIoU is at most 1; unlike IoU/DIoU/GIoU it can be < -1.""" + rng = np.random.default_rng(103) + xy = rng.uniform(0, 500, size=(100, 2)) + wh = rng.uniform(1, 200, size=(100, 2)) + boxes_1 = np.hstack([xy, xy + wh]) + + xy2 = rng.uniform(0, 500, size=(80, 2)) + wh2 = rng.uniform(1, 200, size=(80, 2)) + boxes_2 = np.hstack([xy2, xy2 + wh2]) + + result = _ciou.compute(boxes_1, boxes_2) + assert np.all(result <= 1.0 + 1e-9) + + def test_symmetry(self) -> None: + boxes_1 = np.array([[0.0, 0.0, 10.0, 10.0], [20.0, 20.0, 35.0, 40.0]]) + boxes_2 = np.array([[5.0, 5.0, 15.0, 15.0], [50.0, 50.0, 60.0, 60.0]]) + result_ab = _ciou.compute(boxes_1, boxes_2) + result_ba = _ciou.compute(boxes_2, boxes_1) + np.testing.assert_allclose(result_ab, result_ba.T, atol=1e-10) + + def test_ciou_leq_diou(self) -> None: + """CIoU <= DIoU: aspect-ratio term is nonnegative after scaling by alpha.""" + rng = np.random.default_rng(13) + xy = rng.uniform(0, 100, size=(40, 2)) + wh = rng.uniform(5, 50, size=(40, 2)) + boxes_1 = np.hstack([xy, xy + wh]) + + xy2 = rng.uniform(0, 100, size=(30, 2)) + wh2 = rng.uniform(5, 50, size=(30, 2)) + boxes_2 = np.hstack([xy2, xy2 + wh2]) + + diou_result = _diou.compute(boxes_1, boxes_2) + ciou_result = _ciou.compute(boxes_1, boxes_2) + assert np.all(ciou_result <= diou_result + 1e-6) + + +class TestEmptyArrayHandling: + """Verify BaseIoU.compute handles empty inputs for all subclasses.""" + + @pytest.mark.parametrize( + "iou_instance", + [_iou, _biou, _giou, _diou, _ciou], + ids=["IoU", "BIoU", "GIoU", "DIoU", "CIoU"], + ) + def test_empty_boxes_1(self, iou_instance) -> None: + boxes_1 = np.empty((0, 4)) + boxes_2 = np.array([[0.0, 0.0, 10.0, 10.0]]) + result = iou_instance.compute(boxes_1, boxes_2) + assert result.shape == (0, 1) + + @pytest.mark.parametrize( + "iou_instance", + [_iou, _biou, _giou, _diou, _ciou], + ids=["IoU", "BIoU", "GIoU", "DIoU", "CIoU"], + ) + def test_empty_boxes_2(self, iou_instance) -> None: + boxes_1 = np.array([[0.0, 0.0, 10.0, 10.0], [5.0, 5.0, 15.0, 15.0]]) + boxes_2 = np.empty((0, 4)) + result = iou_instance.compute(boxes_1, boxes_2) + assert result.shape == (2, 0) + + @pytest.mark.parametrize( + "iou_instance", + [_iou, _biou, _giou, _diou, _ciou], + ids=["IoU", "BIoU", "GIoU", "DIoU", "CIoU"], + ) + def test_both_empty(self, iou_instance) -> None: + boxes_1 = np.empty((0, 4)) + boxes_2 = np.empty((0, 4)) + result = iou_instance.compute(boxes_1, boxes_2) + assert result.shape == (0, 0) diff --git a/trackers/__init__.py b/trackers/__init__.py index 31c646df..1a322dc8 100644 --- a/trackers/__init__.py +++ b/trackers/__init__.py @@ -20,15 +20,22 @@ IdentityTransformation, ) from trackers.utils.converters import xcycsr_to_xyxy, xyxy_to_xcycsr +from trackers.utils.iou import BaseIoU, BIoU, CIoU, DIoU, GIoU, IoU __all__ = [ + "BIoU", + "BaseIoU", "ByteTrackTracker", + "CIoU", "CoordinatesTransformation", + "DIoU", "Dataset", "DatasetAsset", "DatasetSplit", + "GIoU", "HomographyTransformation", "IdentityTransformation", + "IoU", "MotionAwareTraceAnnotator", "MotionEstimator", "OCSORTTracker", diff --git a/trackers/core/bytetrack/tracker.py b/trackers/core/bytetrack/tracker.py index 3792266c..ecb2a1cc 100644 --- a/trackers/core/bytetrack/tracker.py +++ b/trackers/core/bytetrack/tracker.py @@ -9,10 +9,12 @@ from scipy.optimize import linear_sum_assignment from trackers.core.base import BaseTracker -from trackers.core.bytetrack.kalman import ByteTrackKalmanBoxTracker -from trackers.core.sort.utils import ( - get_alive_trackers, - get_iou_matrix, +from trackers.core.bytetrack.tracklet import ByteTrackTracklet +from trackers.core.bytetrack.utils import _get_alive_tracklets +from trackers.utils.iou import BaseIoU, IoU +from trackers.utils.state_representations import ( + BaseStateEstimator, + XYXYStateEstimator, ) @@ -53,6 +55,13 @@ class ByteTrackTracker(BaseTracker): detections to existing tracks. Higher values require more overlap. high_conf_det_threshold: `float` specifying threshold for separating high and low confidence detections in the two-stage association. + state_estimator_class: State estimator class to use for Kalman filter. + Defaults to `XYXYStateEstimator`. Can also use + `XCYCSRStateEstimator` for center-based representation. + iou: IoU similarity metric instance to use for data association. + Defaults to standard `IoU`. Can be replaced with any `BaseIoU` + subclass (e.g. GIoU, DIoU, CIoU) to change how bounding-box + similarity is computed during the association step. """ tracker_id = "bytetrack" @@ -65,6 +74,8 @@ def __init__( minimum_consecutive_frames: int = 2, minimum_iou_threshold: float = 0.1, high_conf_det_threshold: float = 0.6, + state_estimator_class: type[BaseStateEstimator] = XYXYStateEstimator, + iou: BaseIoU = IoU(), ) -> None: # Calculate maximum frames without update based on lost_track_buffer and # frame_rate. This scales the buffer based on the frame rate to ensure @@ -74,13 +85,15 @@ def __init__( self.minimum_iou_threshold = minimum_iou_threshold self.track_activation_threshold = track_activation_threshold self.high_conf_det_threshold = high_conf_det_threshold - self.tracks: list[ByteTrackKalmanBoxTracker] = [] + self.tracks: list[ByteTrackTracklet] = [] + self.state_estimator_class = state_estimator_class + self.iou = iou def update( self, detections: sv.Detections, ) -> sv.Detections: - """Update tracker state with new detections and return tracked objects. + """Update tracks state with new detections and return tracked objects. Performs Kalman filter prediction, two-stage association (high then low confidence), and initializes new tracks for unmatched detections. @@ -120,7 +133,12 @@ def update( low_boxes = detection_boxes[low_indices] # Step 1: associate high-confidence detections to all tracks - iou_matrix = get_iou_matrix(self.tracks, high_boxes) + predicted_boxes = ( + np.array([t.get_state_bbox() for t in self.tracks]) + if self.tracks + else np.empty((0, 4)) + ) + iou_matrix = self.iou.compute(predicted_boxes, high_boxes) matched, unmatched_tracks, unmatched_high = self._get_associated_indices( iou_matrix, self.minimum_iou_threshold ) @@ -129,17 +147,23 @@ def update( track = self.tracks[row] track.update(high_boxes[col]) if ( - track.number_of_successful_updates >= self.minimum_consecutive_frames + track.number_of_successful_consecutive_updates + >= self.minimum_consecutive_frames and track.tracker_id == -1 ): - track.tracker_id = ByteTrackKalmanBoxTracker.get_next_tracker_id() + track.tracker_id = ByteTrackTracklet.get_next_tracker_id() out_det_indices.append(int(high_indices[col])) out_tracker_ids.append(track.tracker_id) remaining_tracks = [self.tracks[i] for i in unmatched_tracks] # Step 2: associate low-confidence detections to remaining tracks - iou_matrix = get_iou_matrix(remaining_tracks, low_boxes) + remaining_boxes = ( + np.array([t.get_state_bbox() for t in remaining_tracks]) + if remaining_tracks + else np.empty((0, 4)) + ) + iou_matrix = self.iou.compute(remaining_boxes, low_boxes) matched, _, unmatched_low = self._get_associated_indices( iou_matrix, self.minimum_iou_threshold ) @@ -148,10 +172,11 @@ def update( track = remaining_tracks[row] track.update(low_boxes[col]) if ( - track.number_of_successful_updates >= self.minimum_consecutive_frames + track.number_of_successful_consecutive_updates + >= self.minimum_consecutive_frames and track.tracker_id == -1 ): - track.tracker_id = ByteTrackKalmanBoxTracker.get_next_tracker_id() + track.tracker_id = ByteTrackTracklet.get_next_tracker_id() out_det_indices.append(int(low_indices[col])) out_tracker_ids.append(track.tracker_id) @@ -161,7 +186,7 @@ def update( out_tracker_ids.append(-1) # Spawn new tracks from unmatched high-confidence detections - self._spawn_new_trackers( + self._spawn_new_tracks( detection_boxes, confidences, unmatched_high, @@ -170,10 +195,10 @@ def update( out_tracker_ids, ) - self.tracks = get_alive_trackers( - trackers=self.tracks, - maximum_frames_without_update=self.maximum_frames_without_update, + self.tracks = _get_alive_tracklets( # type: ignore[assignment] + tracklets=self.tracks, minimum_consecutive_frames=self.minimum_consecutive_frames, + maximum_frames_without_update=self.maximum_frames_without_update, ) # Build final sv.Detections from original by indexing @@ -223,7 +248,7 @@ def _get_associated_indices( return matched_indices, unmatched_tracks, unmatched_detections - def _spawn_new_trackers( + def _spawn_new_tracks( self, detection_boxes: np.ndarray, confidences: np.ndarray, @@ -237,7 +262,10 @@ def _spawn_new_trackers( conf = float(confidences[global_idx]) if conf >= self.track_activation_threshold: self.tracks.append( - ByteTrackKalmanBoxTracker(bbox=detection_boxes[global_idx]) + ByteTrackTracklet( + initial_bbox=detection_boxes[global_idx], + state_estimator_class=self.state_estimator_class, + ) ) out_det_indices.append(global_idx) out_tracker_ids.append(-1) @@ -247,4 +275,4 @@ def reset(self) -> None: Call this method when switching to a new video or scene. """ self.tracks = [] - ByteTrackKalmanBoxTracker.count_id = 0 + ByteTrackTracklet.count_id = 0 diff --git a/trackers/core/bytetrack/tracklet.py b/trackers/core/bytetrack/tracklet.py new file mode 100644 index 00000000..dd63fa96 --- /dev/null +++ b/trackers/core/bytetrack/tracklet.py @@ -0,0 +1,57 @@ +# ------------------------------------------------------------------------ +# Trackers +# Copyright (c) 2026 Roboflow. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------ + +import numpy as np + +from trackers.utils.base_tracklet import BaseTracklet +from trackers.utils.state_representations import ( + BaseStateEstimator, + XYXYStateEstimator, +) + + +class ByteTrackTracklet(BaseTracklet): + count_id: int = 0 + + def __init__( + self, + initial_bbox: np.ndarray, + state_estimator_class: type[BaseStateEstimator] = XYXYStateEstimator, + ) -> None: + super().__init__(initial_bbox, state_estimator_class) + self._configure_noise() + # Count initial bbox as first successful update (matches original + # ByteTrackKalmanBoxTracker behavior where hits started at 1) + self.number_of_successful_consecutive_updates = 1 + + def update(self, bbox: np.ndarray | None) -> None: + """Update tracklet with new observation or None if missed.""" + if bbox is not None: + self.state_estimator.update(bbox) + self.time_since_update = 0 + self.number_of_successful_consecutive_updates += 1 + else: + self.state_estimator.update(None) + self.time_since_update += 1 + self.number_of_successful_consecutive_updates = 0 + + def predict(self) -> np.ndarray: + """Predict next bounding box position.""" + self.state_estimator.predict() + self.age += 1 + return self.state_estimator.state_to_bbox() + + def get_state_bbox(self) -> np.ndarray: + """Get current bounding box estimate from the filter/state.""" + return self.state_estimator.state_to_bbox() + + def _configure_noise(self) -> None: + """Configure Kalman filter noise (original ByteTrack tuning).""" + kf = self.state_estimator.kf + self.state_estimator.set_kf_covariances( + R=kf.R * 0.1, + Q=kf.Q * 0.01, + ) diff --git a/trackers/core/bytetrack/utils.py b/trackers/core/bytetrack/utils.py new file mode 100644 index 00000000..7bb80ddd --- /dev/null +++ b/trackers/core/bytetrack/utils.py @@ -0,0 +1,43 @@ +# ------------------------------------------------------------------------ +# Trackers +# Copyright (c) 2026 Roboflow. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------ + +from collections.abc import Sequence + +from trackers.utils.base_tracklet import BaseTracklet + + +def _get_alive_tracklets( + tracklets: Sequence[BaseTracklet], + minimum_consecutive_frames: int, + maximum_frames_without_update: int, +) -> list[BaseTracklet]: + """ + Remove dead or immature lost tracklets and get alive trackers + that are within `maximum_frames_without_update` AND (it's mature OR + it was just updated). + + Args: + tracklets: List of BaseTracklet objects. + minimum_consecutive_frames: Number of consecutive frames that an object + must be tracked before it is considered a 'valid' track. + maximum_frames_without_update: Maximum number of frames without update + before a track is considered dead. + + Returns: + List of alive tracklets. + """ + alive_tracklets = [] + for tracklet in tracklets: + is_mature = ( + tracklet.number_of_successful_consecutive_updates + >= minimum_consecutive_frames + ) + is_active = tracklet.time_since_update == 0 + if tracklet.time_since_update < maximum_frames_without_update and ( + is_mature or is_active + ): + alive_tracklets.append(tracklet) + return alive_tracklets diff --git a/trackers/core/ocsort/tracker.py b/trackers/core/ocsort/tracker.py index f84fa7b6..16720893 100644 --- a/trackers/core/ocsort/tracker.py +++ b/trackers/core/ocsort/tracker.py @@ -4,17 +4,20 @@ # Licensed under the Apache License, Version 2.0 [see LICENSE for details] # ------------------------------------------------------------------------ +from copy import deepcopy + import numpy as np import supervision as sv from scipy.optimize import linear_sum_assignment from trackers.core.base import BaseTracker from trackers.core.ocsort.tracklet import OCSORTTracklet -from trackers.core.ocsort.utils import ( - _build_direction_consistency_matrix_batch, - _get_iou_matrix, +from trackers.core.ocsort.utils import _build_direction_consistency_matrix_batch +from trackers.utils.iou import BaseIoU, IoU +from trackers.utils.state_representations import ( + BaseStateEstimator, + XCYCSRStateEstimator, ) -from trackers.utils.state_representations import XCYCSRStateEstimator class OCSORTTracker(BaseTracker): @@ -57,6 +60,13 @@ class OCSORTTracker(BaseTracker): delta_t: `int` specifying number of past frames to use for velocity estimation. Higher values provide more stable direction estimates during occlusion. + state_estimator_class: State estimator class to use for Kalman filter. + Defaults to `XCYCSRStateEstimator`. Can also use + `XYXYStateEstimator` for corner-based representation. + iou: IoU similarity metric instance to use for data association. + Defaults to standard `IoU`. Can be replaced with any `BaseIoU` + subclass (e.g. GIoU, DIoU, CIoU) to change how bounding-box + similarity is computed during the association step. """ tracker_id = "ocsort" @@ -70,6 +80,8 @@ def __init__( direction_consistency_weight: float = 0.2, high_conf_det_threshold: float = 0.6, delta_t: int = 3, + state_estimator_class: type[BaseStateEstimator] = XCYCSRStateEstimator, + iou: BaseIoU = IoU(), ) -> None: # Calculate maximum frames without update based on lost_track_buffer and # frame_rate. This scales the buffer based on the frame rate to ensure @@ -83,7 +95,8 @@ def __init__( self.tracks: list[OCSORTTracklet] = [] self.frame_count = 0 - self.state_estimator_class = XCYCSRStateEstimator + self.state_estimator_class = state_estimator_class + self.iou = iou def _get_associated_indices( self, @@ -181,7 +194,7 @@ def update(self, detections: sv.Detections) -> sv.Detections: tracker.predict() predicted_boxes = np.array([t.get_state_bbox() for t in self.tracks]) - iou_matrix = _get_iou_matrix(predicted_boxes, detection_boxes) + iou_matrix = self.iou.compute(predicted_boxes, detection_boxes) direction_consistency_matrix = self._compute_direction_consistency_matrix( detection_boxes, confidences @@ -205,7 +218,7 @@ def update(self, detections: sv.Detections) -> sv.Detections: last_observation_of_tracks = np.array( [self.tracks[t].last_observation for t in unmatched_tracks] ) - ocr_iou_matrix = sv.box_iou_batch( + ocr_iou_matrix = self.iou.compute( last_observation_of_tracks, detection_boxes[unmatched_detections], ) @@ -248,7 +261,8 @@ def update(self, detections: sv.Detections) -> sv.Detections: # Build output — single index into the filtered detections preserves # all metadata (confidence, class_id, mask, data dict). if out_det_indices: - result = detections[out_det_indices] + copied_detections = deepcopy(detections) + result = copied_detections[out_det_indices] result.tracker_id = np.array(out_tracker_ids, dtype=int) else: result = sv.Detections.empty() diff --git a/trackers/core/ocsort/tracklet.py b/trackers/core/ocsort/tracklet.py index 89aa2b64..387935b3 100644 --- a/trackers/core/ocsort/tracklet.py +++ b/trackers/core/ocsort/tracklet.py @@ -8,6 +8,7 @@ import numpy as np +from trackers.utils.base_tracklet import BaseTracklet from trackers.utils.converters import ( xyxy_to_xcycsr, ) @@ -17,7 +18,7 @@ ) -class OCSORTTracklet: +class OCSORTTracklet(BaseTracklet): """Tracklet for OC-SORT tracker with ORU (Observation-centric Re-Update). Manages a single tracked object with Kalman filter state estimation. @@ -50,18 +51,17 @@ def __init__( Args: initial_bbox: Initial bounding box `[x1, y1, x2, y2]`. - kalman_filter_class: Kalman filter class to use. Instantiated + state_estimator_class: State estimator class to use. Instantiated with *initial_bbox*. Defaults to `XCYCSRKalmanFilter`. delta_t: Number of timesteps back to look for velocity estimation. Higher values use observations further in the past to estimate motion direction, providing more stable velocity estimates. """ - self.age = 0 # Initialize state estimator (wraps KalmanFilter + state repr) - self.kalman_filter: BaseStateEstimator = state_estimator_class(initial_bbox) - + super().__init__(initial_bbox, state_estimator_class) + self._configure_noise() # Observation history for ORU and delta_t self.delta_t = delta_t self.last_observation = initial_bbox @@ -69,28 +69,13 @@ def __init__( self.observations: dict[int, np.ndarray] = {} self.velocity: np.ndarray | None = None - # Track ID can be initialized before mature in oc-sort - # it is assigned if the frame number is less than minimum_consecutive_frames - self.tracker_id = -1 - - # Tracking counters - self.number_of_successful_consecutive_updates = 0 - self.time_since_update = 0 - # ORU: saved state for freeze/unfreeze self._frozen_state: dict | None = None self._observed = True - @classmethod - def get_next_tracker_id(cls) -> int: - """Get next available tracker ID.""" - next_id = cls.count_id - cls.count_id += 1 - return next_id - def _freeze(self) -> None: """Save Kalman filter state before track is lost (ORU mechanism).""" - self._frozen_state = self.kalman_filter.get_state() + self._frozen_state = self.state_estimator.get_state() def _unfreeze(self, new_bbox: np.ndarray) -> None: """Restore state and apply virtual trajectory (ORU mechanism). @@ -106,11 +91,11 @@ def _unfreeze(self, new_bbox: np.ndarray) -> None: return # Restore to frozen state - self.kalman_filter.set_state(self._frozen_state) + self.state_estimator.set_state(self._frozen_state) time_gap = self.time_since_update # this is oc-sort specific - if isinstance(self.kalman_filter, XCYCSRStateEstimator): + if isinstance(self.state_estimator, XCYCSRStateEstimator): self._unfreeze_xcycsr(new_bbox, time_gap) else: self._unfreeze_xyxy(new_bbox, time_gap) @@ -155,9 +140,9 @@ def _unfreeze_xcycsr(self, new_bbox: np.ndarray, time_gap: int) -> None: r = w / h virtual_obs = np.array([x, y, s, r]).reshape((4, 1)) - self.kalman_filter.kf.update(virtual_obs) + self.state_estimator.kf.update(virtual_obs) if i < time_gap - 1: - self.kalman_filter.kf.predict() + self.state_estimator.kf.predict() def _unfreeze_xyxy(self, new_bbox: np.ndarray, time_gap: int) -> None: """ORU interpolation for XYXY representation. @@ -174,9 +159,9 @@ def _unfreeze_xyxy(self, new_bbox: np.ndarray, time_gap: int) -> None: for i in range(time_gap): virtual_obs = (last_xyxy + (i + 1) * delta).reshape((4, 1)) - self.kalman_filter.kf.update(virtual_obs) + self.state_estimator.kf.update(virtual_obs) if i < time_gap - 1: - self.kalman_filter.kf.predict() + self.state_estimator.kf.predict() def get_k_previous_obs(self) -> np.ndarray | None: """Get observation from delta_t steps ago. @@ -239,7 +224,7 @@ def update(self, bbox: np.ndarray | None) -> None: # Update KF with the real observation # (after ORU this is the final update at the correct time step; # without ORU this is the normal measurement update) - self.kalman_filter.update(bbox) + self.state_estimator.update(bbox) self._observed = True self.time_since_update = 0 @@ -252,7 +237,7 @@ def update(self, bbox: np.ndarray | None) -> None: if self._observed: self._freeze() self._observed = False - self.kalman_filter.update(None) + self.state_estimator.update(None) def predict(self) -> np.ndarray: """Predict next bounding box position. @@ -260,14 +245,14 @@ def predict(self) -> np.ndarray: Returns: Predicted bounding box `[x1, y1, x2, y2]`. """ - self.kalman_filter.predict() + self.state_estimator.predict() self.age += 1 if self.time_since_update > 0: self.number_of_successful_consecutive_updates = 0 self.time_since_update += 1 - return self.kalman_filter.state_to_bbox() + return self.state_estimator.state_to_bbox() def get_state_bbox(self) -> np.ndarray: """Get current bounding box estimate from Kalman filter. @@ -275,7 +260,26 @@ def get_state_bbox(self) -> np.ndarray: Returns: Current bounding box estimate `[x1, y1, x2, y2]`. """ - return self.kalman_filter.state_to_bbox() + return self.state_estimator.state_to_bbox() + + def _configure_noise(self) -> None: + """Configure Kalman filter noise matrices (OC-SORT paper tuning).""" + kf = self.state_estimator.kf + R = kf.R + P = kf.P + Q = kf.Q + if isinstance(self.state_estimator, XCYCSRStateEstimator): + R[2:, 2:] *= 10.0 + P[4:, 4:] *= 1000.0 + P *= 10.0 + Q[-1, -1] *= 0.01 + Q[4:, 4:] *= 0.01 + else: + # XYXY: same velocity uncertainty scaling + P[4:, 4:] *= 1000.0 + P *= 10.0 + Q[4:, 4:] *= 0.01 + self.state_estimator.set_kf_covariances(R=R, Q=Q, P=P) def resolve_tracker_id( self, diff --git a/trackers/core/ocsort/utils.py b/trackers/core/ocsort/utils.py index 6f612a7d..41fcd91d 100644 --- a/trackers/core/ocsort/utils.py +++ b/trackers/core/ocsort/utils.py @@ -10,7 +10,6 @@ from __future__ import annotations import numpy as np -import supervision as sv def _speed_direction_batch( @@ -103,31 +102,3 @@ def _build_direction_consistency_matrix_batch( angle_diff_cost = velocity_mask * angle_diff_cost return angle_diff_cost.astype(np.float32) - - -def _get_iou_matrix(track_boxes: np.ndarray, detection_boxes: np.ndarray) -> np.ndarray: - """Build IoU matrix between track and detection bounding boxes. - - Computes pairwise Intersection over Union (IoU) scores used as the primary - cost metric for Hungarian algorithm association in SORT-family trackers. - - Args: - track_boxes: `np.ndarray` of shape `(n_tracks, 4)` containing track - bounding boxes in `[x1, y1, x2, y2]` format. Typically predicted - positions from Kalman filter or last observations. - detection_boxes: `np.ndarray` of shape `(n_detections, 4)` containing - detection bounding boxes in `[x1, y1, x2, y2]` format from the - current frame. - - Returns: - `np.ndarray` of shape `(n_tracks, n_detections)` containing IoU scores - in range `[0, 1]`. Higher values indicate greater overlap between - track and detection boxes. - """ - n_tracks = track_boxes.shape[0] - n_detections = detection_boxes.shape[0] - if n_tracks > 0 and n_detections > 0: - iou_matrix = sv.box_iou_batch(track_boxes, detection_boxes) - else: - iou_matrix = np.zeros((n_tracks, n_detections), dtype=np.float32) - return iou_matrix diff --git a/trackers/core/sort/kalman.py b/trackers/core/sort/kalman.py deleted file mode 100644 index 144fde58..00000000 --- a/trackers/core/sort/kalman.py +++ /dev/null @@ -1,147 +0,0 @@ -# ------------------------------------------------------------------------ -# Trackers -# Copyright (c) 2026 Roboflow. All Rights Reserved. -# Licensed under the Apache License, Version 2.0 [see LICENSE for details] -# ------------------------------------------------------------------------ - -import numpy as np -from numpy.typing import NDArray - - -class SORTKalmanBoxTracker: - """ - The `SORTKalmanBoxTracker` class represents the internals of a single - tracked object (bounding box), with a Kalman filter to predict and update - its position. - - Attributes: - tracker_id: Unique identifier for the tracker. - number_of_successful_updates: Number of times the object has been - updated successfully. - time_since_update: Number of frames since the last update. - state: State vector of the bounding box. - F: State transition matrix. - H: Measurement matrix. - Q: Process noise covariance matrix. - R: Measurement noise covariance matrix. - P: Error covariance matrix. - count_id: Class variable to assign unique IDs to each tracker. - - Args: - bbox: Initial bounding box in the form [x1, y1, x2, y2]. - """ - - count_id: int = 0 - state: NDArray[np.float32] - F: NDArray[np.float32] - H: NDArray[np.float32] - Q: NDArray[np.float32] - R: NDArray[np.float32] - P: NDArray[np.float32] - - @classmethod - def get_next_tracker_id(cls) -> int: - next_id = cls.count_id - cls.count_id += 1 - return next_id - - def __init__(self, bbox: NDArray[np.float64]) -> None: - # Initialize with a temporary ID of -1 - # Will be assigned a real ID when the track is considered mature - self.tracker_id = -1 - - # Number of hits indicates how many times the object has been - # updated successfully - self.number_of_successful_updates = 1 - # Number of frames since the last update - self.time_since_update = 0 - - # For simplicity, we keep a small state vector: - # (x, y, x2, y2, vx, vy, vx2, vy2). - # We'll store the bounding box in "self.state" - self.state = np.zeros((8, 1), dtype=np.float32) - - # Initialize state directly from the first detection - bbox_float: NDArray[np.float32] = bbox.astype(np.float32) - self.state[0, 0] = bbox_float[0] - self.state[1, 0] = bbox_float[1] - self.state[2, 0] = bbox_float[2] - self.state[3, 0] = bbox_float[3] - - # Basic constant velocity model - self._initialize_kalman_filter() - - def _initialize_kalman_filter(self) -> None: - """ - Sets up the matrices for the Kalman filter. - """ - # State transition matrix (F): 8x8 - # We assume a constant velocity model. Positions are incremented by - # velocity each step. - self.F = np.eye(8, dtype=np.float32) - for i in range(4): - self.F[i, i + 4] = 1.0 - - # Measurement matrix (H): we directly measure x1, y1, x2, y2 - self.H = np.eye(4, 8, dtype=np.float32) # 4x8 - - # Process covariance matrix (Q) - self.Q = np.eye(8, dtype=np.float32) * 0.01 - - # Measurement covariance (R): noise in detection - self.R = np.eye(4, dtype=np.float32) * 0.1 - - # Error covariance matrix (P) - self.P = np.eye(8, dtype=np.float32) - - def predict(self) -> None: - """ - Predict the next state of the bounding box (applies the state transition). - """ - # Predict state - self.state = (self.F @ self.state).astype(np.float32) - # Predict error covariance - self.P = (self.F @ self.P @ self.F.T + self.Q).astype(np.float32) - - # Increase time since update - self.time_since_update += 1 - - def update(self, bbox: NDArray[np.float64]) -> None: - """ - Updates the state with a new detected bounding box. - - Args: - bbox: Detected bounding box in the form [x1, y1, x2, y2]. - """ - self.time_since_update = 0 - self.number_of_successful_updates += 1 - - # Kalman Gain - S: NDArray[np.float32] = (self.H @ self.P @ self.H.T + self.R).astype( - np.float32 - ) - K: NDArray[np.float32] = (self.P @ self.H.T @ np.linalg.inv(S)).astype( - np.float32 - ) - - # Residual - measurement: NDArray[np.float32] = bbox.reshape((4, 1)).astype(np.float32) - y: NDArray[np.float32] = ( - measurement - self.H @ self.state - ) # y should be float32 (4,1) - - # Update state - self.state = (self.state + K @ y).astype(np.float32) - - # Update covariance - identity_matrix: NDArray[np.float32] = np.eye(8, dtype=np.float32) - self.P = ((identity_matrix - K @ self.H) @ self.P).astype(np.float32) - - def get_state_bbox(self) -> NDArray[np.float32]: - """ - Returns the current bounding box estimate from the state vector. - - Returns: - The bounding box [x1, y1, x2, y2]. - """ - return self.state[:4, 0].flatten().astype(np.float32) diff --git a/trackers/core/sort/tracker.py b/trackers/core/sort/tracker.py index 5e0f2a5f..12d1c94a 100644 --- a/trackers/core/sort/tracker.py +++ b/trackers/core/sort/tracker.py @@ -9,10 +9,12 @@ from scipy.optimize import linear_sum_assignment from trackers.core.base import BaseTracker -from trackers.core.sort.kalman import SORTKalmanBoxTracker -from trackers.core.sort.utils import ( - get_alive_trackers, - get_iou_matrix, +from trackers.core.sort.tracklet import SORTTracklet +from trackers.core.sort.utils import _get_alive_tracklets +from trackers.utils.iou import BaseIoU, IoU +from trackers.utils.state_representations import ( + BaseStateEstimator, + XYXYStateEstimator, ) @@ -51,6 +53,13 @@ class SORTTracker(BaseTracker): threshold, tracks are assigned `tracker_id` of `-1`. minimum_iou_threshold: `float` specifying IoU threshold for associating detections to existing tracks. Higher values require more overlap. + state_estimator_class: State estimator class to use for Kalman filter. + Defaults to `XYXYStateEstimator`. Can also use + `XYXYStateEstimator` for corner-based representation. + iou: IoU similarity metric instance to use for data association. + Defaults to standard `IoU`. Can be replaced with any `BaseIoU` + subclass (e.g. GIoU, DIoU, CIoU) to change how bounding-box + similarity is computed during the association step. """ tracker_id = "sort" @@ -62,6 +71,8 @@ def __init__( track_activation_threshold: float = 0.25, minimum_consecutive_frames: int = 3, minimum_iou_threshold: float = 0.3, + state_estimator_class: type[BaseStateEstimator] = XYXYStateEstimator, + iou: BaseIoU = IoU(), ) -> None: # Calculate maximum frames without update based on lost_track_buffer and # frame_rate. This scales the buffer based on the frame rate to ensure @@ -70,28 +81,30 @@ def __init__( self.minimum_consecutive_frames = minimum_consecutive_frames self.minimum_iou_threshold = minimum_iou_threshold self.track_activation_threshold = track_activation_threshold + self.state_estimator_class = state_estimator_class + self.iou = iou - # Active trackers - self.trackers: list[SORTKalmanBoxTracker] = [] + # Active tracklets + self.tracklets: list[SORTTracklet] = [] def _get_associated_indices( self, iou_matrix: np.ndarray, detection_boxes: np.ndarray ) -> tuple[list[tuple[int, int]], set[int], set[int]]: """ - Associate detections to trackers based on IOU + Associate detections to tracklets based on IOU Args: iou_matrix: IOU cost matrix. detection_boxes: Detected bounding boxes in the form [x1, y1, x2, y2]. Returns: - Matched indices, unmatched trackers, unmatched detections. + Matched indices, unmatched tracklets, unmatched detections. """ matched_indices = [] - unmatched_trackers = set(range(len(self.trackers))) + unmatched_tracklets = set(range(len(self.tracklets))) unmatched_detections = set(range(len(detection_boxes))) - if len(self.trackers) > 0 and len(detection_boxes) > 0: + if len(self.tracklets) > 0 and len(detection_boxes) > 0: # Find optimal assignment using scipy.optimize.linear_sum_assignment. # Note that it uses a a modified Jonker-Volgenant algorithm with no # initialization instead of the Hungarian algorithm as mentioned in the @@ -100,26 +113,24 @@ def _get_associated_indices( for row, col in zip(row_indices, col_indices): if iou_matrix[row, col] >= self.minimum_iou_threshold: matched_indices.append((row, col)) - unmatched_trackers.remove(row) + unmatched_tracklets.remove(row) unmatched_detections.remove(col) - return matched_indices, unmatched_trackers, unmatched_detections + return matched_indices, unmatched_tracklets, unmatched_detections - def _spawn_new_trackers( + def _spawn_new_tracklets( self, - confidences: np.ndarray | None, + confidences: np.ndarray, detection_boxes: np.ndarray, unmatched_detections: set[int], ) -> None: for detection_idx in unmatched_detections: - if ( - confidences is None - or detection_idx >= len(confidences) - or confidences[detection_idx] >= self.track_activation_threshold - ): - self.trackers.append( - SORTKalmanBoxTracker(detection_boxes[detection_idx]) + if confidences[detection_idx] >= self.track_activation_threshold: + new_tracker = SORTTracklet( + detection_boxes[detection_idx], + state_estimator_class=self.state_estimator_class, ) + self.tracklets.append(new_tracker) def update(self, detections: sv.Detections) -> sv.Detections: """Update tracker state with new detections and return tracked objects. @@ -135,7 +146,7 @@ def update(self, detections: sv.Detections) -> sv.Detections: `sv.Detections` with `tracker_id` assigned for each detection. Unmatched or immature tracks have `tracker_id` of `-1`. """ - if len(self.trackers) == 0 and len(detections) == 0: + if len(self.tracklets) == 0 and len(detections) == 0: detections.tracker_id = np.array([], dtype=int) return detections @@ -143,37 +154,48 @@ def update(self, detections: sv.Detections) -> sv.Detections: detections.xyxy if len(detections) > 0 else np.array([]).reshape(0, 4) ) - for tracker in self.trackers: - tracker.predict() + for tracklet in self.tracklets: + tracklet.predict() - iou_matrix = get_iou_matrix(self.trackers, detection_boxes) - matched_indices, _, unmatched_detections = self._get_associated_indices( - iou_matrix, detection_boxes + predicted_boxes = ( + np.array([t.get_state_bbox() for t in self.tracklets]) + if self.tracklets + else np.empty((0, 4)) ) + iou_matrix = self.iou.compute(predicted_boxes, detection_boxes) - # Update matched trackers and record the det_idx -> tracker mapping - matched_tracker_for_det: dict[int, SORTKalmanBoxTracker] = {} + # Associate detections to tracklets based on IOU + matched_indices, unmatched_tracklets, unmatched_detections = ( + self._get_associated_indices(iou_matrix, detection_boxes) + ) + + # Update matched tracklets and record the det_idx -> tracklet mapping + matched_tracklet_for_det: dict[int, SORTTracklet] = {} for row, col in matched_indices: - self.trackers[row].update(detection_boxes[col]) - matched_tracker_for_det[col] = self.trackers[row] + self.tracklets[row].update(detection_boxes[col]) + matched_tracklet_for_det[col] = self.tracklets[row] - self._spawn_new_trackers( + # Update non matched for increasing time_since_update + for index in unmatched_tracklets: + self.tracklets[index].update(None) + self._spawn_new_tracklets( detections.confidence, detection_boxes, unmatched_detections ) - self.trackers = get_alive_trackers( - self.trackers, + # Remove dead tracklets + self.tracklets = _get_alive_tracklets( # type: ignore[assignment] + self.tracklets, self.minimum_consecutive_frames, self.maximum_frames_without_update, ) # Build tracker_ids from the recorded mapping (no deepcopy, no re-IoU) tracker_ids = np.full(len(detection_boxes), -1, dtype=int) - for det_idx, tracker in matched_tracker_for_det.items(): - if tracker.number_of_successful_updates >= self.minimum_consecutive_frames: - if tracker.tracker_id == -1: - tracker.tracker_id = SORTKalmanBoxTracker.get_next_tracker_id() - tracker_ids[det_idx] = tracker.tracker_id + for det_idx, tracklet in matched_tracklet_for_det.items(): + if tracklet.number_of_successful_updates >= self.minimum_consecutive_frames: + if tracklet.tracker_id == -1: + tracklet.tracker_id = SORTTracklet.get_next_tracker_id() + tracker_ids[det_idx] = tracklet.tracker_id detections.tracker_id = tracker_ids return detections @@ -182,5 +204,5 @@ def reset(self) -> None: """Reset tracker state by clearing all tracks and resetting ID counter. Call this method when switching to a new video or scene. """ - self.trackers = [] - SORTKalmanBoxTracker.count_id = 0 + self.tracklets = [] + SORTTracklet.count_id = 0 diff --git a/trackers/core/sort/tracklet.py b/trackers/core/sort/tracklet.py new file mode 100644 index 00000000..486a25da --- /dev/null +++ b/trackers/core/sort/tracklet.py @@ -0,0 +1,75 @@ +# ------------------------------------------------------------------------ +# Trackers +# Copyright (c) 2026 Roboflow. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------ + +import numpy as np + +from trackers.utils.base_tracklet import BaseTracklet +from trackers.utils.state_representations import ( + BaseStateEstimator, + XCYCSRStateEstimator, + XYXYStateEstimator, +) + + +class SORTTracklet(BaseTracklet): + count_id: int = 0 + + def __init__( + self, + initial_bbox: np.ndarray, + state_estimator_class: type[BaseStateEstimator] = XYXYStateEstimator, + ) -> None: + super().__init__(initial_bbox, state_estimator_class) + self._configure_noise() + # SORTKalmanBoxTracker behavior where hits started at 1) + self.number_of_successful_updates = ( + 1 # SORT doesn't use number_of_successful_consecutive_updates + ) + + def update(self, bbox: np.ndarray | None) -> None: + """Update tracklet with new observation or None if missed.""" + if bbox is not None: + self.state_estimator.update(bbox) + self.time_since_update = 0 + self.number_of_successful_updates += 1 + else: + self.state_estimator.update(None) + self.time_since_update += 1 + + def predict(self) -> np.ndarray: + """Predict next bounding box position.""" + self.state_estimator.predict() + self.age += 1 + return self.state_estimator.state_to_bbox() + + def get_state_bbox(self) -> np.ndarray: + """Get current bounding box estimate from the filter/state.""" + return self.state_estimator.state_to_bbox() + + def _configure_noise(self) -> None: + """Configure Kalman filter noise matrices (OC-SORT paper behaviour) and SORT + behaviour for XYXY coordinates.""" + kf = self.state_estimator.kf + R = kf.R + P = kf.P + Q = kf.Q + if isinstance(self.state_estimator, XCYCSRStateEstimator): + R[2:, 2:] *= 10.0 + P[4:, 4:] *= 1000.0 + P *= 10.0 + Q[-1, -1] *= 0.01 + Q[4:, 4:] *= 0.01 + else: + # Process covariance matrix (Q) + Q = np.eye(8, dtype=np.float64) * 0.01 + + # Measurement covariance (R): noise in detection + R = np.eye(4, dtype=np.float64) * 0.1 + + # Error covariance matrix (P) + P = np.eye(8, dtype=np.float64) + + self.state_estimator.set_kf_covariances(R=R, Q=Q, P=P) diff --git a/trackers/core/sort/utils.py b/trackers/core/sort/utils.py index 987f90f4..ba1f9867 100644 --- a/trackers/core/sort/utils.py +++ b/trackers/core/sort/utils.py @@ -5,148 +5,36 @@ # ------------------------------------------------------------------------ from collections.abc import Sequence -from copy import deepcopy -from typing import TypeVar -import numpy as np -import supervision as sv +from trackers.core.sort.tracklet import SORTTracklet -from trackers.core.bytetrack.kalman import ByteTrackKalmanBoxTracker -from trackers.core.sort.kalman import SORTKalmanBoxTracker -KalmanBoxTrackerType = TypeVar( - "KalmanBoxTrackerType", bound=SORTKalmanBoxTracker | ByteTrackKalmanBoxTracker -) - - -def get_alive_trackers( - trackers: Sequence[KalmanBoxTrackerType], +def _get_alive_tracklets( + tracklets: Sequence[SORTTracklet], minimum_consecutive_frames: int, maximum_frames_without_update: int, -) -> list[KalmanBoxTrackerType]: +) -> list[SORTTracklet]: """ Remove dead or immature lost tracklets and get alive trackers that are within `maximum_frames_without_update` AND (it's mature OR it was just updated). Args: - trackers: List of KalmanBoxTracker objects. + tracklets: List of SORTTracklet objects. minimum_consecutive_frames: Number of consecutive frames that an object must be tracked before it is considered a 'valid' track. maximum_frames_without_update: Maximum number of frames without update before a track is considered dead. Returns: - List of alive trackers. + List of alive tracklets. """ - alive_trackers = [] - for tracker in trackers: - is_mature = tracker.number_of_successful_updates >= minimum_consecutive_frames - is_active = tracker.time_since_update == 0 - if tracker.time_since_update < maximum_frames_without_update and ( + alive_tracklets = [] + for tracklet in tracklets: + is_mature = tracklet.number_of_successful_updates >= minimum_consecutive_frames + is_active = tracklet.time_since_update == 0 + if tracklet.time_since_update < maximum_frames_without_update and ( is_mature or is_active ): - alive_trackers.append(tracker) - return alive_trackers - - -def get_iou_matrix( - trackers: Sequence[KalmanBoxTrackerType], detection_boxes: np.ndarray -) -> np.ndarray: - """ - Build IOU cost matrix between detections and predicted bounding boxes - - Args: - trackers: List of KalmanBoxTracker objects. - detection_boxes: Detected bounding boxes in the - form [x1, y1, x2, y2]. - - Returns: - IOU cost matrix. - """ - predicted_boxes = np.array([t.get_state_bbox() for t in trackers]) - if len(predicted_boxes) == 0 and len(trackers) > 0: - # Handle case where get_state_bbox might return empty array - predicted_boxes = np.zeros((len(trackers), 4), dtype=np.float32) - - if len(trackers) > 0 and len(detection_boxes) > 0: - iou_matrix = sv.box_iou_batch(predicted_boxes, detection_boxes) - else: - iou_matrix = np.zeros((len(trackers), len(detection_boxes)), dtype=np.float32) - - return iou_matrix - - -def update_detections_with_track_ids( - trackers: Sequence[KalmanBoxTrackerType], - detections: sv.Detections, - detection_boxes: np.ndarray, - minimum_iou_threshold: float, - minimum_consecutive_frames: int, -) -> sv.Detections: - """ - The function prepares the updated Detections with track IDs. - If a tracker is "mature" (>= `minimum_consecutive_frames`) or recently updated, - it is assigned an ID to the detection that just updated it. - - Args: - trackers: List of SORTKalmanBoxTracker objects. - detections: The latest set of object detections. - detection_boxes: Detected bounding boxes in the - form [x1, y1, x2, y2]. - minimum_iou_threshold: IOU threshold for associating detections to - existing tracks. - minimum_consecutive_frames: Number of consecutive frames that an object - must be tracked before it is considered a 'valid' track. - - Returns: - A copy of the detections with `tracker_id` set - for each detection that is tracked. - """ - # Re-run association in the same way (could also store direct mapping) - final_tracker_ids = [-1] * len(detection_boxes) - - # Recalculate predicted_boxes based on current trackers after some may have - # been removed - predicted_boxes = np.array([t.get_state_bbox() for t in trackers]) - iou_matrix_final = np.zeros((len(trackers), len(detection_boxes)), dtype=np.float32) - - # Ensure predicted_boxes is properly shaped before the second iou calculation - if len(predicted_boxes) == 0 and len(trackers) > 0: - predicted_boxes = np.zeros((len(trackers), 4), dtype=np.float32) - - if len(trackers) > 0 and len(detection_boxes) > 0: - iou_matrix_final = sv.box_iou_batch(predicted_boxes, detection_boxes) - - row_indices, col_indices = np.where(iou_matrix_final > minimum_iou_threshold) - sorted_pairs = sorted( - zip(row_indices, col_indices), - key=lambda x: iou_matrix_final[x[0], x[1]], - reverse=True, - ) - used_rows: set[int] = set() - used_cols: set[int] = set() - for row, col in sorted_pairs: - # Double check index is in range - if row < len(trackers): - tracker_obj = trackers[int(row)] - # Only assign if the track is "mature" or is new but has enough hits - if (int(row) not in used_rows) and (int(col) not in used_cols): - if ( - tracker_obj.number_of_successful_updates - >= minimum_consecutive_frames - ): - # If tracker is mature but still has ID -1, assign a new ID - if tracker_obj.tracker_id == -1: - tracker_obj.tracker_id = ( - SORTKalmanBoxTracker.get_next_tracker_id() - ) - final_tracker_ids[int(col)] = tracker_obj.tracker_id - used_rows.add(int(row)) - used_cols.add(int(col)) - - # Assign tracker IDs to the returned Detections - updated_detections = deepcopy(detections) - updated_detections.tracker_id = np.array(final_tracker_ids) - - return updated_detections + alive_tracklets.append(tracklet) + return alive_tracklets diff --git a/trackers/utils/base_tracklet.py b/trackers/utils/base_tracklet.py new file mode 100644 index 00000000..77477885 --- /dev/null +++ b/trackers/utils/base_tracklet.py @@ -0,0 +1,51 @@ +# ------------------------------------------------------------------------ +# Trackers +# Copyright (c) 2026 Roboflow. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------ + +from abc import ABC, abstractmethod + +import numpy as np + +from trackers.utils.state_representations import BaseStateEstimator + + +class BaseTracklet(ABC): + """ + Abstract base class for all tracker-specific tracklets. + Provides common interface and attributes for tracklet management. + """ + + count_id: int = 0 + + def __init__( + self, bbox: np.ndarray, state_estimator_class: type[BaseStateEstimator] + ) -> None: + self.age = 0 + self.state_estimator: BaseStateEstimator = state_estimator_class(bbox) + + self.tracker_id = -1 + self.time_since_update = 0 + self.number_of_successful_consecutive_updates = 0 + + @classmethod + def get_next_tracker_id(cls) -> int: + next_id = cls.count_id + cls.count_id += 1 + return next_id + + @abstractmethod + def update(self, bbox: np.ndarray | None) -> None: + """Update tracklet with new observation or None if missed.""" + pass + + @abstractmethod + def predict(self) -> np.ndarray: + """Predict next bounding box position.""" + pass + + @abstractmethod + def get_state_bbox(self) -> np.ndarray: + """Get current bounding box estimate from the filter/state.""" + pass diff --git a/trackers/utils/iou.py b/trackers/utils/iou.py new file mode 100644 index 00000000..f4f92c26 --- /dev/null +++ b/trackers/utils/iou.py @@ -0,0 +1,283 @@ +# ------------------------------------------------------------------------ +# Trackers +# Copyright (c) 2026 Roboflow. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------ + +from __future__ import annotations + +from abc import ABC, abstractmethod + +import numpy as np +import supervision as sv + + +class BaseIoU(ABC): + """Abstract base for IoU similarity metrics used in tracker association. + + Subclasses implement a specific Intersection over Union variant + (e.g. standard IoU, GIoU, DIoU, CIoU, BIoU) that computes a pairwise + similarity matrix between two sets of bounding boxes. + + The resulting matrix is used as a cost/similarity signal in the + Hungarian algorithm during the data association step. + """ + + def compute(self, boxes_1: np.ndarray, boxes_2: np.ndarray) -> np.ndarray: + """Compute pairwise similarity between two sets of bounding boxes. + + Handles the empty-input edge case (returns a correctly-shaped zero + matrix) and delegates to subclass `_compute` method for the actual math. + + Args: + boxes_1: ``(N, 4)`` array of boxes in ``[x1, y1, x2, y2]`` format. + boxes_2: ``(M, 4)`` array of boxes in ``[x1, y1, x2, y2]`` format. + + Returns: + ``(N, M)`` similarity matrix where entry ``(i, j)`` is the + similarity between ``boxes_1[i]`` and ``boxes_2[j]``. + """ + if len(boxes_1) == 0 or len(boxes_2) == 0: + return np.zeros((len(boxes_1), len(boxes_2)), dtype=np.float64) + return self._compute(boxes_1, boxes_2) + + @abstractmethod + def _compute(self, boxes_1: np.ndarray, boxes_2: np.ndarray) -> np.ndarray: + """Subclass hook — compute similarity for non-empty inputs. + + Args: + boxes_1: ``(N, 4)`` array of boxes in ``[x1, y1, x2, y2]`` format. + Guaranteed ``N > 0``. + boxes_2: ``(M, 4)`` array of boxes in ``[x1, y1, x2, y2]`` format. + Guaranteed ``M > 0``. + + Returns: + ``(N, M)`` similarity matrix. + """ + + +class IoU(BaseIoU): + """Standard Intersection over Union. + + Computes the ratio of the intersection area to the union area for + every pair of boxes. Values range from 0 (no overlap) to 1 (perfect + overlap). This is the classic metric used in SORT. + """ + + def _compute(self, boxes_1: np.ndarray, boxes_2: np.ndarray) -> np.ndarray: + return sv.box_iou_batch(boxes_1, boxes_2) + + +class BIoU(BaseIoU): + """Buffered Intersection over Union. + + Computes IoU after expanding each box by a configurable relative margin + around its center: + + - ``x1' = x1 - r * w`` + - ``y1' = y1 - r * h`` + - ``x2' = x2 + r * w`` + - ``y2' = y2 + r * h`` + + where ``w = x2 - x1``, ``h = y2 - y1``, and ``r`` is ``buffer_ratio``. + + In practice, this makes association more tolerant to small localization + gaps while preserving familiar IoU behavior. Setting + ``buffer_ratio=0`` recovers standard IoU exactly. + + Reference: https://arxiv.org/pdf/2211.14317 + """ + + def __init__(self, buffer_ratio: float = 0.1) -> None: + if buffer_ratio < 0: + raise ValueError(f"buffer_ratio must be non-negative, got {buffer_ratio}") + self.buffer_ratio = buffer_ratio + + def _compute(self, boxes_1: np.ndarray, boxes_2: np.ndarray) -> np.ndarray: + if self.buffer_ratio == 0: + return sv.box_iou_batch(boxes_1, boxes_2) + + boxes_1_b = boxes_1.astype(np.float64, copy=True) + boxes_2_b = boxes_2.astype(np.float64, copy=True) + + w1 = boxes_1_b[:, 2] - boxes_1_b[:, 0] + h1 = boxes_1_b[:, 3] - boxes_1_b[:, 1] + w2 = boxes_2_b[:, 2] - boxes_2_b[:, 0] + h2 = boxes_2_b[:, 3] - boxes_2_b[:, 1] + + r = self.buffer_ratio + boxes_1_b[:, 0] -= r * w1 + boxes_1_b[:, 1] -= r * h1 + boxes_1_b[:, 2] += r * w1 + boxes_1_b[:, 3] += r * h1 + + boxes_2_b[:, 0] -= r * w2 + boxes_2_b[:, 1] -= r * h2 + boxes_2_b[:, 2] += r * w2 + boxes_2_b[:, 3] += r * h2 + + return sv.box_iou_batch(boxes_1_b, boxes_2_b) + + +def _compute_iou_and_enclosing( + boxes_1: np.ndarray, boxes_2: np.ndarray +) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]: + """Shared geometry used by GIoU, DIoU, CIoU and other variants. + + Args: + boxes_1: ``(N, 4)`` array in ``[x1, y1, x2, y2]`` format. + boxes_2: ``(M, 4)`` array in ``[x1, y1, x2, y2]`` format. + + Returns: + Tuple of ``(iou, intersection, union, enclosing_area, enclosing_diagonal_sq)`` + each with shape ``(N, M)``. + """ + # Intersection + inter_x1 = np.maximum(boxes_1[:, np.newaxis, 0], boxes_2[np.newaxis, :, 0]) + inter_y1 = np.maximum(boxes_1[:, np.newaxis, 1], boxes_2[np.newaxis, :, 1]) + inter_x2 = np.minimum(boxes_1[:, np.newaxis, 2], boxes_2[np.newaxis, :, 2]) + inter_y2 = np.minimum(boxes_1[:, np.newaxis, 3], boxes_2[np.newaxis, :, 3]) + intersection = np.maximum(inter_x2 - inter_x1, 0) * np.maximum( + inter_y2 - inter_y1, 0 + ) + + # Areas and union + area_1 = (boxes_1[:, 2] - boxes_1[:, 0]) * (boxes_1[:, 3] - boxes_1[:, 1]) + area_2 = (boxes_2[:, 2] - boxes_2[:, 0]) * (boxes_2[:, 3] - boxes_2[:, 1]) + union = area_1[:, np.newaxis] + area_2[np.newaxis, :] - intersection + + iou = np.where(union > 0, intersection / union, 0.0) + + # Smallest enclosing box C + enc_x1 = np.minimum(boxes_1[:, np.newaxis, 0], boxes_2[np.newaxis, :, 0]) + enc_y1 = np.minimum(boxes_1[:, np.newaxis, 1], boxes_2[np.newaxis, :, 1]) + enc_x2 = np.maximum(boxes_1[:, np.newaxis, 2], boxes_2[np.newaxis, :, 2]) + enc_y2 = np.maximum(boxes_1[:, np.newaxis, 3], boxes_2[np.newaxis, :, 3]) + + enc_w = enc_x2 - enc_x1 + enc_h = enc_y2 - enc_y1 + enclosing_area = enc_w * enc_h + enclosing_diagonal_sq = enc_w**2 + enc_h**2 + + return iou, intersection, union, enclosing_area, enclosing_diagonal_sq + + +class GIoU(BaseIoU): + """Generalized Intersection over Union (Rezatofighi et al., 2019). + + Extends standard IoU by penalizing the empty area within the smallest + enclosing box that is not covered by either box. This provides a + meaningful gradient even when the two boxes do not overlap. + + ``GIoU = IoU - |C \\ (A U B)| / |C|`` + + Values are in ``[-1, 1]``: near -1 for far-apart boxes, 1 for perfect overlap. + + Reference: https://arxiv.org/abs/1902.09630 + """ + + def _compute(self, boxes_1: np.ndarray, boxes_2: np.ndarray) -> np.ndarray: + iou, _, union, enclosing_area, _ = _compute_iou_and_enclosing(boxes_1, boxes_2) + + giou = iou - np.where( + enclosing_area > 0, + (enclosing_area - union) / enclosing_area, + 0.0, + ) + + return giou + + +class DIoU(BaseIoU): + """Distance Intersection over Union (Zheng et al., 2019). + + Extends IoU by penalizing the normalized Euclidean distance between + bounding-box centers, using the diagonal length of the smallest + enclosing rectangle as the scale. This yields a smooth signal when + boxes overlap or are separated and aligns with how many detectors + localize objects (center-based error). + + ``DIoU = IoU - d^2 / (c^2 + epsilon)`` + + where `d` is the center-to-center distance, `c` is the enclosing + diagonal, and ``\\epsilon`` avoids division by zero (same convention as + :func:`torchvision.ops.distance_box_iou`). + + Because the penalty is nonnegative, ``DIoU ≤ IoU`` for every pair. + Values typically lie in ``[-1, 1]`` for well-formed boxes. + + Reference: https://arxiv.org/abs/1911.08287 + """ + + _EPS = 1e-7 + + def _compute(self, boxes_1: np.ndarray, boxes_2: np.ndarray) -> np.ndarray: + iou, _, _, _, enclosing_diagonal_sq = _compute_iou_and_enclosing( + boxes_1, boxes_2 + ) + + cx1 = (boxes_1[:, 0] + boxes_1[:, 2]) / 2 + cy1 = (boxes_1[:, 1] + boxes_1[:, 3]) / 2 + cx2 = (boxes_2[:, 0] + boxes_2[:, 2]) / 2 + cy2 = (boxes_2[:, 1] + boxes_2[:, 3]) / 2 + + dx = cx1[:, np.newaxis] - cx2[np.newaxis, :] + dy = cy1[:, np.newaxis] - cy2[np.newaxis, :] + center_dist_sq = dx * dx + dy * dy + + denom = enclosing_diagonal_sq + self._EPS + return iou - center_dist_sq / denom + + +class CIoU(BaseIoU): + """Complete Intersection over Union (Zheng et al., 2019). + + Builds on **DIoU** by adding a penalty for mismatched aspect ratio between + boxes (via a term ``v`` on the difference of box arctan aspect ratios). + The trade-off is weighted by ``\\alpha`` that depends on IoU and ``v``, + matching :func:`torchvision.ops.complete_box_iou`. + + ``CIoU = DIoU - alpha * v``, with + ``alpha = v / (1 - IoU + v + epsilon)``. + + So **CIoU ≤ DIoU ≤ IoU** when widths and heights are positive. + Scores are at most 1; unlike plain IoU they can fall **below** -1 when the + aspect-ratio penalty is large. + + Reference: https://arxiv.org/abs/1911.08287 + """ + + _EPS = 1e-7 + + def _compute(self, boxes_1: np.ndarray, boxes_2: np.ndarray) -> np.ndarray: + iou, _, _, _, enclosing_diagonal_sq = _compute_iou_and_enclosing( + boxes_1, boxes_2 + ) + + cx1 = (boxes_1[:, 0] + boxes_1[:, 2]) / 2 + cy1 = (boxes_1[:, 1] + boxes_1[:, 3]) / 2 + cx2 = (boxes_2[:, 0] + boxes_2[:, 2]) / 2 + cy2 = (boxes_2[:, 1] + boxes_2[:, 3]) / 2 + + dx = cx1[:, np.newaxis] - cx2[np.newaxis, :] + dy = cy1[:, np.newaxis] - cy2[np.newaxis, :] + center_dist_sq = dx * dx + dy * dy + + denom = enclosing_diagonal_sq + self._EPS + diou = iou - center_dist_sq / denom + + w1 = boxes_1[:, 2] - boxes_1[:, 0] + h1 = boxes_1[:, 3] - boxes_1[:, 1] + w2 = boxes_2[:, 2] - boxes_2[:, 0] + h2 = boxes_2[:, 3] - boxes_2[:, 1] + + w_pred = w1[:, np.newaxis] + h_pred = h1[:, np.newaxis] + w_gt = w2[np.newaxis, :] + h_gt = h2[np.newaxis, :] + + v = (4.0 / (np.pi**2)) * ( + np.arctan(w_pred / h_pred) - np.arctan(w_gt / h_gt) + ) ** 2 + alpha = v / (1.0 - iou + v + self._EPS) + return diou - alpha * v diff --git a/trackers/utils/state_representations.py b/trackers/utils/state_representations.py index f1e93a22..17b878b5 100644 --- a/trackers/utils/state_representations.py +++ b/trackers/utils/state_representations.py @@ -126,6 +126,26 @@ def set_state(self, state: dict) -> None: """ self.kf.set_state(state) + def set_kf_covariances( + self, + R: np.ndarray | None = None, + Q: np.ndarray | None = None, + P: np.ndarray | None = None, + ) -> None: + """Set Kalman filter parameters. + + Args: + R: Measurement noise covariance matrix. + Q: Process noise covariance matrix. + P: Error covariance matrix. + """ + if R is not None: + self.kf.R = R + if Q is not None: + self.kf.Q = Q + if P is not None: + self.kf.P = P + class XCYCSRStateEstimator(BaseStateEstimator): """Center-based Kalman filter with 7 state dimensions and 4 measurements. @@ -157,13 +177,6 @@ def _create_filter(self, bbox: np.ndarray) -> KalmanFilter: # Measurement function: observe (x, y, s, r) from state kf.H = np.eye(4, 7, dtype=np.float64) - # Noise tuning (from OC-SORT paper) - kf.R[2:, 2:] *= 10.0 - kf.P[4:, 4:] *= 1000.0 # high uncertainty for velocities - kf.P *= 10.0 - kf.Q[-1, -1] *= 0.01 - kf.Q[4:, 4:] *= 0.01 - # Initialise state with first observation kf.x[:4] = xyxy_to_xcycsr(bbox).reshape((4, 1)) @@ -211,12 +224,6 @@ def _create_filter(self, bbox: np.ndarray) -> KalmanFilter: # Measurement function: observe (x1, y1, x2, y2) from state kf.H = np.eye(4, 8, dtype=np.float64) - # Noise tuning (similar scaling to XCYCSR version) - kf.R *= 1.0 # measurement noise - kf.P[4:, 4:] *= 1000.0 # high uncertainty for velocities - kf.P *= 10.0 - kf.Q[4:, 4:] *= 0.01 - # Initialise state with first observation (direct XYXY) kf.x[:4] = bbox.reshape((4, 1)) diff --git a/uv.lock b/uv.lock index 96818874..60a87d93 100644 --- a/uv.lock +++ b/uv.lock @@ -440,33 +440,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ff/9e/6b4397a3e3d15123de3b1806ef342522393d50736c13b20ec4c9ea6693a6/cryptography-46.0.5-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c18ff11e86df2e28854939acde2d003f7984f721eba450b56a200ad90eeb0e6b", size = 4275637, upload-time = "2026-02-10T19:17:10.53Z" }, { url = "https://files.pythonhosted.org/packages/63/e7/471ab61099a3920b0c77852ea3f0ea611c9702f651600397ac567848b897/cryptography-46.0.5-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d7e3d356b8cd4ea5aff04f129d5f66ebdc7b6f8eae802b93739ed520c47c79b", size = 4424742, upload-time = "2026-02-10T19:17:12.388Z" }, { url = "https://files.pythonhosted.org/packages/37/53/a18500f270342d66bf7e4d9f091114e31e5ee9e7375a5aba2e85a91e0044/cryptography-46.0.5-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:50bfb6925eff619c9c023b967d5b77a54e04256c4281b0e21336a130cd7fc263", size = 4277528, upload-time = "2026-02-10T19:17:13.853Z" }, - { url = "https://files.pythonhosted.org/packages/22/29/c2e812ebc38c57b40e7c583895e73c8c5adb4d1e4a0cc4c5a4fdab2b1acc/cryptography-46.0.5-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:803812e111e75d1aa73690d2facc295eaefd4439be1023fefc4995eaea2af90d", size = 4947993, upload-time = "2026-02-10T19:17:15.618Z" }, { url = "https://files.pythonhosted.org/packages/6b/e7/237155ae19a9023de7e30ec64e5d99a9431a567407ac21170a046d22a5a3/cryptography-46.0.5-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3ee190460e2fbe447175cda91b88b84ae8322a104fc27766ad09428754a618ed", size = 4456855, upload-time = "2026-02-10T19:17:17.221Z" }, { url = "https://files.pythonhosted.org/packages/2d/87/fc628a7ad85b81206738abbd213b07702bcbdada1dd43f72236ef3cffbb5/cryptography-46.0.5-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:f145bba11b878005c496e93e257c1e88f154d278d2638e6450d17e0f31e558d2", size = 3984635, upload-time = "2026-02-10T19:17:18.792Z" }, { url = "https://files.pythonhosted.org/packages/84/29/65b55622bde135aedf4565dc509d99b560ee4095e56989e815f8fd2aa910/cryptography-46.0.5-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:e9251e3be159d1020c4030bd2e5f84d6a43fe54b6c19c12f51cde9542a2817b2", size = 4277038, upload-time = "2026-02-10T19:17:20.256Z" }, - { url = "https://files.pythonhosted.org/packages/bc/36/45e76c68d7311432741faf1fbf7fac8a196a0a735ca21f504c75d37e2558/cryptography-46.0.5-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:47fb8a66058b80e509c47118ef8a75d14c455e81ac369050f20ba0d23e77fee0", size = 4912181, upload-time = "2026-02-10T19:17:21.825Z" }, { url = "https://files.pythonhosted.org/packages/6d/1a/c1ba8fead184d6e3d5afcf03d569acac5ad063f3ac9fb7258af158f7e378/cryptography-46.0.5-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:4c3341037c136030cb46e4b1e17b7418ea4cbd9dd207e4a6f3b2b24e0d4ac731", size = 4456482, upload-time = "2026-02-10T19:17:25.133Z" }, { url = "https://files.pythonhosted.org/packages/f9/e5/3fb22e37f66827ced3b902cf895e6a6bc1d095b5b26be26bd13c441fdf19/cryptography-46.0.5-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:890bcb4abd5a2d3f852196437129eb3667d62630333aacc13dfd470fad3aaa82", size = 4405497, upload-time = "2026-02-10T19:17:26.66Z" }, { url = "https://files.pythonhosted.org/packages/1a/df/9d58bb32b1121a8a2f27383fabae4d63080c7ca60b9b5c88be742be04ee7/cryptography-46.0.5-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:80a8d7bfdf38f87ca30a5391c0c9ce4ed2926918e017c29ddf643d0ed2778ea1", size = 4667819, upload-time = "2026-02-10T19:17:28.569Z" }, { url = "https://files.pythonhosted.org/packages/67/c8/581a6702e14f0898a0848105cbefd20c058099e2c2d22ef4e476dfec75d7/cryptography-46.0.5-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5be7bf2fb40769e05739dd0046e7b26f9d4670badc7b032d6ce4db64dddc0678", size = 4265728, upload-time = "2026-02-10T19:17:35.569Z" }, { url = "https://files.pythonhosted.org/packages/dd/4a/ba1a65ce8fc65435e5a849558379896c957870dd64fecea97b1ad5f46a37/cryptography-46.0.5-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fe346b143ff9685e40192a4960938545c699054ba11d4f9029f94751e3f71d87", size = 4408287, upload-time = "2026-02-10T19:17:36.938Z" }, { url = "https://files.pythonhosted.org/packages/f8/67/8ffdbf7b65ed1ac224d1c2df3943553766914a8ca718747ee3871da6107e/cryptography-46.0.5-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:c69fd885df7d089548a42d5ec05be26050ebcd2283d89b3d30676eb32ff87dee", size = 4270291, upload-time = "2026-02-10T19:17:38.748Z" }, - { url = "https://files.pythonhosted.org/packages/f8/e5/f52377ee93bc2f2bba55a41a886fd208c15276ffbd2569f2ddc89d50e2c5/cryptography-46.0.5-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:8293f3dea7fc929ef7240796ba231413afa7b68ce38fd21da2995549f5961981", size = 4927539, upload-time = "2026-02-10T19:17:40.241Z" }, { url = "https://files.pythonhosted.org/packages/3b/02/cfe39181b02419bbbbcf3abdd16c1c5c8541f03ca8bda240debc467d5a12/cryptography-46.0.5-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:1abfdb89b41c3be0365328a410baa9df3ff8a9110fb75e7b52e66803ddabc9a9", size = 4442199, upload-time = "2026-02-10T19:17:41.789Z" }, { url = "https://files.pythonhosted.org/packages/c0/96/2fcaeb4873e536cf71421a388a6c11b5bc846e986b2b069c79363dc1648e/cryptography-46.0.5-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:d66e421495fdb797610a08f43b05269e0a5ea7f5e652a89bfd5a7d3c1dee3648", size = 3960131, upload-time = "2026-02-10T19:17:43.379Z" }, { url = "https://files.pythonhosted.org/packages/d8/d2/b27631f401ddd644e94c5cf33c9a4069f72011821cf3dc7309546b0642a0/cryptography-46.0.5-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:4e817a8920bfbcff8940ecfd60f23d01836408242b30f1a708d93198393a80b4", size = 4270072, upload-time = "2026-02-10T19:17:45.481Z" }, - { url = "https://files.pythonhosted.org/packages/f4/a7/60d32b0370dae0b4ebe55ffa10e8599a2a59935b5ece1b9f06edb73abdeb/cryptography-46.0.5-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:68f68d13f2e1cb95163fa3b4db4bf9a159a418f5f6e7242564fc75fcae667fd0", size = 4892170, upload-time = "2026-02-10T19:17:46.997Z" }, { url = "https://files.pythonhosted.org/packages/d2/b9/cf73ddf8ef1164330eb0b199a589103c363afa0cf794218c24d524a58eab/cryptography-46.0.5-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:a3d1fae9863299076f05cb8a778c467578262fae09f9dc0ee9b12eb4268ce663", size = 4441741, upload-time = "2026-02-10T19:17:48.661Z" }, { url = "https://files.pythonhosted.org/packages/5f/eb/eee00b28c84c726fe8fa0158c65afe312d9c3b78d9d01daf700f1f6e37ff/cryptography-46.0.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c4143987a42a2397f2fc3b4d7e3a7d313fbe684f67ff443999e803dd75a76826", size = 4396728, upload-time = "2026-02-10T19:17:50.058Z" }, { url = "https://files.pythonhosted.org/packages/65/f4/6bc1a9ed5aef7145045114b75b77c2a8261b4d38717bd8dea111a63c3442/cryptography-46.0.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:7d731d4b107030987fd61a7f8ab512b25b53cef8f233a97379ede116f30eb67d", size = 4652001, upload-time = "2026-02-10T19:17:51.54Z" }, { url = "https://files.pythonhosted.org/packages/0f/04/c85bdeab78c8bc77b701bf0d9bdcf514c044e18a46dcff330df5448631b0/cryptography-46.0.5-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7d1f30a86d2757199cb2d56e48cce14deddf1f9c95f1ef1b64ee91ea43fe2e18", size = 4275349, upload-time = "2026-02-10T19:17:58.419Z" }, { url = "https://files.pythonhosted.org/packages/5c/32/9b87132a2f91ee7f5223b091dc963055503e9b442c98fc0b8a5ca765fab0/cryptography-46.0.5-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:039917b0dc418bb9f6edce8a906572d69e74bd330b0b3fea4f79dab7f8ddd235", size = 4420667, upload-time = "2026-02-10T19:18:00.619Z" }, { url = "https://files.pythonhosted.org/packages/a1/a6/a7cb7010bec4b7c5692ca6f024150371b295ee1c108bdc1c400e4c44562b/cryptography-46.0.5-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:ba2a27ff02f48193fc4daeadf8ad2590516fa3d0adeeb34336b96f7fa64c1e3a", size = 4276980, upload-time = "2026-02-10T19:18:02.379Z" }, - { url = "https://files.pythonhosted.org/packages/8e/7c/c4f45e0eeff9b91e3f12dbd0e165fcf2a38847288fcfd889deea99fb7b6d/cryptography-46.0.5-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:61aa400dce22cb001a98014f647dc21cda08f7915ceb95df0c9eaf84b4b6af76", size = 4939143, upload-time = "2026-02-10T19:18:03.964Z" }, { url = "https://files.pythonhosted.org/packages/37/19/e1b8f964a834eddb44fa1b9a9976f4e414cbb7aa62809b6760c8803d22d1/cryptography-46.0.5-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3ce58ba46e1bc2aac4f7d9290223cead56743fa6ab94a5d53292ffaac6a91614", size = 4453674, upload-time = "2026-02-10T19:18:05.588Z" }, { url = "https://files.pythonhosted.org/packages/db/ed/db15d3956f65264ca204625597c410d420e26530c4e2943e05a0d2f24d51/cryptography-46.0.5-cp38-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:420d0e909050490d04359e7fdb5ed7e667ca5c3c402b809ae2563d7e66a92229", size = 3978801, upload-time = "2026-02-10T19:18:07.167Z" }, { url = "https://files.pythonhosted.org/packages/41/e2/df40a31d82df0a70a0daf69791f91dbb70e47644c58581d654879b382d11/cryptography-46.0.5-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:582f5fcd2afa31622f317f80426a027f30dc792e9c80ffee87b993200ea115f1", size = 4276755, upload-time = "2026-02-10T19:18:09.813Z" }, - { url = "https://files.pythonhosted.org/packages/33/45/726809d1176959f4a896b86907b98ff4391a8aa29c0aaaf9450a8a10630e/cryptography-46.0.5-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:bfd56bb4b37ed4f330b82402f6f435845a5f5648edf1ad497da51a8452d5d62d", size = 4901539, upload-time = "2026-02-10T19:18:11.263Z" }, { url = "https://files.pythonhosted.org/packages/99/0f/a3076874e9c88ecb2ecc31382f6e7c21b428ede6f55aafa1aa272613e3cd/cryptography-46.0.5-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:a3d507bb6a513ca96ba84443226af944b0f7f47dcc9a399d110cd6146481d24c", size = 4452794, upload-time = "2026-02-10T19:18:12.914Z" }, { url = "https://files.pythonhosted.org/packages/02/ef/ffeb542d3683d24194a38f66ca17c0a4b8bf10631feef44a7ef64e631b1a/cryptography-46.0.5-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9f16fbdf4da055efb21c22d81b89f155f02ba420558db21288b3d0035bafd5f4", size = 4404160, upload-time = "2026-02-10T19:18:14.375Z" }, { url = "https://files.pythonhosted.org/packages/96/93/682d2b43c1d5f1406ed048f377c0fc9fc8f7b0447a478d5c65ab3d3a66eb/cryptography-46.0.5-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:ced80795227d70549a411a4ab66e8ce307899fad2220ce5ab2f296e687eacde9", size = 4667123, upload-time = "2026-02-10T19:18:15.886Z" }, @@ -695,6 +689,7 @@ wheels = [ name = "griffelib" version = "2.0.0" source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ad/06/eccbd311c9e2b3ca45dbc063b93134c57a1ccc7607c5e545264ad092c4a9/griffelib-2.0.0.tar.gz", hash = "sha256:e504d637a089f5cab9b5daf18f7645970509bf4f53eda8d79ed71cce8bd97934", size = 166312, upload-time = "2026-03-23T21:06:55.954Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/4d/51/c936033e16d12b627ea334aaaaf42229c37620d0f15593456ab69ab48161/griffelib-2.0.0-py3-none-any.whl", hash = "sha256:01284878c966508b6d6f1dbff9b6fa607bc062d8261c5c7253cb285b06422a7f", size = 142004, upload-time = "2026-02-09T19:09:40.561Z" }, ] @@ -3708,6 +3703,8 @@ build = [ dev = [ { name = "pre-commit" }, { name = "pytest" }, + { name = "torch" }, + { name = "torchvision" }, { name = "uv" }, ] docs = [ @@ -3744,6 +3741,8 @@ build = [ dev = [ { name = "pre-commit", specifier = ">=4.2.0" }, { name = "pytest", specifier = ">=8.3.3" }, + { name = "torch" }, + { name = "torchvision" }, { name = "uv", specifier = ">=0.4.20" }, ] docs = [