diff --git a/ami/base/pagination.py b/ami/base/pagination.py index 9ebca7b21..f7edc5fd2 100644 --- a/ami/base/pagination.py +++ b/ami/base/pagination.py @@ -1,17 +1,191 @@ -from rest_framework.pagination import LimitOffsetPagination +from django.core.exceptions import ValidationError +from django.forms import BooleanField +from rest_framework.pagination import LimitOffsetPagination, remove_query_param, replace_query_param +from rest_framework.response import Response from .permissions import add_collection_level_permissions +# Query parameter name used to opt out of the total count in paginated list responses. +# Pass ``?with_counts=false`` to skip the COUNT(*) query entirely on large tables. +WITH_TOTAL_COUNT_PARAM = "with_counts" + class LimitOffsetPaginationWithPermissions(LimitOffsetPagination): + """ + LimitOffsetPagination with a precision cap on the total count. + + The total ``count`` is exact for result sets up to + ``COUNT_PRECISION_THRESHOLD`` rows. Beyond that, counting the full set means + scanning a large slice of a big table on every page load, so the count is + capped: the response returns the threshold value with + ``count_is_exact: false``, which the UI renders as e.g. "10,000+". That + number is a lower bound, not the true total, so ``next`` / ``previous`` links + are computed from a one-extra-row probe rather than from ``count``. + + Callers that don't need a total at all can pass ``?with_counts=false`` to + skip the count query entirely and receive ``count: null`` (with + ``count_is_exact: null``). ``next`` / ``previous`` still work via the probe. + + Response fields: + - ``count``: the exact total, the precision cap (a lower bound), or null. + - ``count_is_exact``: true when ``count`` is exact, false when it is the + capped lower bound, null when counting was skipped. + """ + + # Sentinel returned by ``_get_capped_count`` when the result set is larger + # than the precision threshold, so the exact total is deliberately not run. + _OVER_CAP = object() + + # Largest result set we count exactly. Past this the count query would scan + # an unbounded slice of a large table, so we cap precision instead. + COUNT_PRECISION_THRESHOLD = 10_000 + + # Per-request flag; the default is overwritten in ``paginate_queryset``. + count_is_exact = True + + def paginate_queryset(self, queryset, request, view=None): + self.request = request + self.limit = self.get_limit(request) + if self.limit is None: + return None + self.offset = self.get_offset(request) + + if self._should_skip_count(request): + # Opt-out: no count at all. Probe one extra row for the next link. + self.count = None + self.count_is_exact = None + page = list(queryset[self.offset : self.offset + self.limit + 1]) + self._has_next = len(page) > self.limit + return page[: self.limit] + + capped = self._get_capped_count(queryset) + if capped is self._OVER_CAP: + # Over the precision cap: report the threshold as an approximate + # lower bound. It must not drive next/previous (the true total is + # higher), so fall back to the probe-based links. + self.count = self.COUNT_PRECISION_THRESHOLD + self.count_is_exact = False + page = list(queryset[self.offset : self.offset + self.limit + 1]) + self._has_next = len(page) > self.limit + return page[: self.limit] + + # Exact count. + self.count = capped + self.count_is_exact = True + if self.count > self.limit and self.template is not None: + self.display_page_controls = True + if self.count == 0 or self.offset > self.count: + return [] + return list(queryset[self.offset : self.offset + self.limit]) + + def get_next_link(self): + # When the count is not exact (opt-out or over the cap) the total can't + # tell us whether a next page exists, so use the one-extra-row probe. + if not self.count_is_exact: + if not self._has_next: + return None + url = self.request.build_absolute_uri() + url = replace_query_param(url, self.limit_query_param, self.limit) + return replace_query_param(url, self.offset_query_param, self.offset + self.limit) + return super().get_next_link() + + def get_previous_link(self): + # Previous link logic does not depend on the total count. + if not self.count_is_exact: + if self.offset <= 0: + return None + url = self.request.build_absolute_uri() + url = replace_query_param(url, self.limit_query_param, self.limit) + offset = max(0, self.offset - self.limit) + if offset == 0: + return remove_query_param(url, self.offset_query_param) + return replace_query_param(url, self.offset_query_param, offset) + return super().get_previous_link() + def get_paginated_response(self, data): model = self._get_current_model() project = self._get_project() - paginated_response = super().get_paginated_response(data=data) - paginated_response.data = add_collection_level_permissions( - user=self.request.user, response_data=paginated_response.data, model=model, project=project + response = Response( + { + "count": self.count, + "count_is_exact": self.count_is_exact, + "next": self.get_next_link(), + "previous": self.get_previous_link(), + "results": data, + } ) - return paginated_response + response.data = add_collection_level_permissions( + user=self.request.user, response_data=response.data, model=model, project=project + ) + return response + + def get_paginated_response_schema(self, schema): + paginated_schema = super().get_paginated_response_schema(schema) + # count is the exact total, the precision cap (a lower bound), or null + # when the caller passed with_counts=false. + paginated_schema["properties"]["count"]["nullable"] = True + paginated_schema["properties"]["count_is_exact"] = { + "type": "boolean", + "nullable": True, + "description": ( + "True when `count` is exact; false when it is the precision cap " + '(a lower bound, render as e.g. "10,000+"); null when the count ' + "was skipped via with_counts=false." + ), + } + return paginated_schema + + def _count_queryset(self, queryset): + """ + Return the queryset reduced to the cheapest form that still counts the + same rows: ordering removed and projection narrowed to the primary key. + + Both reductions matter once the count is wrapped in a ``LIMIT`` for the + precision cap. An ``ORDER BY`` not served by an index forces a top-N + sort of the whole filtered set before the ``LIMIT`` can stop it, undoing + the early exit. And the list orderings annotate correlated subqueries + (e.g. ``last_processed`` on captures); an unsliced ``COUNT(*)`` drops + those automatically, but the slice would otherwise re-project them and + run the subquery for every scanned row. Counting ``values("pk")`` keeps + the COUNT over a bare primary-key scan. Neither reduction changes the + count, only its cost. + + This is also the single seam a subclass overrides to count a different + way; the previous per-view ``get_count`` override is folded in here. + """ + return queryset.order_by().values("pk") + + def _get_capped_count(self, queryset): + """ + Run a bounded COUNT that stops scanning after + ``COUNT_PRECISION_THRESHOLD`` rows. Returns the exact count when the + result set is within the cap, or the ``_OVER_CAP`` sentinel when it is + larger so the caller reports an approximate lower bound instead. + + Django translates ``queryset...[:N].count()`` into:: + + SELECT COUNT(*) FROM (SELECT pk … LIMIT N) sub + + so the scan stops after at most N matching rows and the cost is O(N) + regardless of total table size. See ``_count_queryset`` for why the + ordering and annotations are stripped first. + """ + # Fetch one extra row beyond the threshold so we can distinguish + # "exactly N rows" (exact count) from "more than N rows" (over the cap). + capped = self._count_queryset(queryset)[: self.COUNT_PRECISION_THRESHOLD + 1].count() + if capped <= self.COUNT_PRECISION_THRESHOLD: + return capped + return self._OVER_CAP + + def _should_skip_count(self, request) -> bool: + """Return True when the caller has explicitly opted out of the total count.""" + raw = request.query_params.get(WITH_TOTAL_COUNT_PARAM, None) + if raw is None: + return False + try: + return not BooleanField(required=False).clean(raw) + except ValidationError: + return False def _get_current_model(self): """ diff --git a/ami/main/api/views.py b/ami/main/api/views.py index 6ad39a2e5..34eba250c 100644 --- a/ami/main/api/views.py +++ b/ami/main/api/views.py @@ -147,12 +147,9 @@ class DefaultReadOnlyViewSet(DefaultViewSetMixin, viewsets.ReadOnlyModelViewSet) class ProjectPagination(LimitOffsetPaginationWithPermissions): default_limit = 40 - - def get_count(self, queryset): - # The recent-activity orderings annotate correlated subqueries onto the - # queryset. They don't change the row count, so strip them (and ordering) - # before counting to keep the pagination COUNT query cheap. - return super().get_count(queryset.order_by().values("pk")) + # The recent-activity orderings annotate correlated subqueries onto the + # queryset; the base paginator's _count_queryset strips them (and ordering) + # before counting, so no get_count override is needed here. class ProjectViewSet(DefaultViewSet, ProjectMixin): diff --git a/ami/main/tests.py b/ami/main/tests.py index c9e3b0d8f..8b59b069a 100644 --- a/ami/main/tests.py +++ b/ami/main/tests.py @@ -6667,3 +6667,131 @@ def test_scores_and_logits_counted_in_sql_including_empty(self): row = next(c for c in self.admin.get_queryset(self._request()) if c.pk == clf.pk) self.assertEqual(row.scores_count, 3) self.assertEqual(row.logits_count, 0) + + +class TestPaginationWithCounts(APITestCase): + """ + Verify the precision cap and the ``with_counts`` opt-out on list endpoints. + + By default ``count`` is exact with ``count_is_exact: true``. Once a result + set exceeds ``COUNT_PRECISION_THRESHOLD`` the count is capped to the + threshold (a lower bound) with ``count_is_exact: false`` so the UI can + render "N+". Callers that want no count at all pass ``with_counts=false`` + and receive ``count: null`` (``count_is_exact: null``). In both non-exact + modes ``next`` / ``previous`` are computed from a one-extra-row probe. + """ + + def setUp(self) -> None: + project, deployment = setup_test_project() + create_captures(deployment=deployment, num_nights=2, images_per_night=5) + self.project = project + self.user = User.objects.create_user( # type: ignore + email="pagination_test@insectai.org", + is_staff=True, + is_superuser=True, + ) + self.client.force_authenticate(user=self.user) + return super().setUp() + + def _captures_url(self, **params): + from urllib.parse import urlencode + + base = f"/api/v2/captures/?project_id={self.project.pk}" + if params: + base += "&" + urlencode(params) + return base + + def test_default_response_includes_exact_integer_count(self): + """By default a small result set returns an exact integer count.""" + response = self.client.get(self._captures_url(limit=5)) + self.assertEqual(response.status_code, status.HTTP_200_OK) + data = response.json() + self.assertIsInstance(data["count"], int) + self.assertGreater(data["count"], 0) + self.assertTrue(data["count_is_exact"]) + + def test_with_counts_true_returns_exact_integer_count(self): + """Explicit with_counts=true on a small result set is also exact.""" + response = self.client.get(self._captures_url(with_counts="true", limit=5)) + self.assertEqual(response.status_code, status.HTTP_200_OK) + data = response.json() + self.assertIsInstance(data["count"], int) + self.assertGreater(data["count"], 0) + self.assertTrue(data["count_is_exact"]) + + def test_with_counts_false_returns_null_count(self): + """with_counts=false skips the count and returns count/count_is_exact null.""" + response = self.client.get(self._captures_url(with_counts="false", limit=5)) + self.assertEqual(response.status_code, status.HTTP_200_OK) + data = response.json() + self.assertIn("count", data) + self.assertIsNone(data["count"]) + self.assertIsNone(data["count_is_exact"]) + self.assertIn("results", data) + + def test_with_counts_false_next_link_present_when_more_results(self): + """next link is returned even without count when more results exist.""" + total = SourceImage.objects.filter(deployment__project=self.project).count() + limit = max(1, total - 1) + response = self.client.get(self._captures_url(with_counts="false", limit=limit)) + self.assertEqual(response.status_code, status.HTTP_200_OK) + data = response.json() + self.assertIsNone(data["count"]) + self.assertIsNotNone(data["next"]) + + def test_with_counts_false_next_link_absent_on_last_page(self): + """next is None when the current page is the last page.""" + total = SourceImage.objects.filter(deployment__project=self.project).count() + response = self.client.get(self._captures_url(with_counts="false", limit=total)) + self.assertEqual(response.status_code, status.HTTP_200_OK) + data = response.json() + self.assertIsNone(data["count"]) + self.assertIsNone(data["next"]) + + def test_with_counts_false_previous_link_present_with_nonzero_offset(self): + """previous link is returned correctly without count.""" + response = self.client.get(self._captures_url(with_counts="false", limit=2, offset=2)) + self.assertEqual(response.status_code, status.HTTP_200_OK) + data = response.json() + self.assertIsNone(data["count"]) + self.assertIsNotNone(data["previous"]) + + def test_count_is_capped_and_marked_inexact_over_threshold(self): + """ + When the result set exceeds COUNT_PRECISION_THRESHOLD the count is + capped to the threshold (a lower bound) and flagged inexact, while + next/previous still work via the probe-based path. + """ + from unittest.mock import patch + + from ami.base.pagination import LimitOffsetPaginationWithPermissions + + # Patch the threshold to 1 so even a second row trips the precision cap. + with patch.object(LimitOffsetPaginationWithPermissions, "COUNT_PRECISION_THRESHOLD", 1): + total = SourceImage.objects.filter(deployment__project=self.project).count() + self.assertGreater(total, 1, "Need at least 2 captures for this test") + + response = self.client.get(self._captures_url(limit=1)) + self.assertEqual(response.status_code, status.HTTP_200_OK) + data = response.json() + self.assertEqual(data["count"], 1, "count is capped to the threshold as a lower bound") + self.assertFalse(data["count_is_exact"], "count_is_exact must be false above the cap") + self.assertIsNotNone(data["next"], "next link must still be present") + self.assertIsNone(data["previous"]) + + def test_count_exact_at_threshold_boundary(self): + """A result set exactly at the threshold is still reported exactly.""" + from unittest.mock import patch + + from ami.base.pagination import LimitOffsetPaginationWithPermissions + + total = SourceImage.objects.filter(deployment__project=self.project).count() + self.assertGreater(total, 1, "Need at least 2 captures for this test") + + # Threshold == total: the count is within the cap, so it stays exact. + with patch.object(LimitOffsetPaginationWithPermissions, "COUNT_PRECISION_THRESHOLD", total): + response = self.client.get(self._captures_url(limit=1)) + self.assertEqual(response.status_code, status.HTTP_200_OK) + data = response.json() + self.assertEqual(data["count"], total) + self.assertTrue(data["count_is_exact"]) diff --git a/ui/src/data-services/hooks/captures/useCaptures.ts b/ui/src/data-services/hooks/captures/useCaptures.ts index 5fe5674c9..013ef9c56 100644 --- a/ui/src/data-services/hooks/captures/useCaptures.ts +++ b/ui/src/data-services/hooks/captures/useCaptures.ts @@ -14,6 +14,7 @@ export const useCaptures = ( captures?: Capture[] userPermissions?: UserPermission[] total: number + totalIsExact: boolean isLoading: boolean isFetching: boolean error?: unknown @@ -24,6 +25,7 @@ export const useCaptures = ( results: ServerCapture[] user_permissions?: UserPermission[] count: number + count_is_exact?: boolean }>({ queryKey: [API_ROUTES.CAPTURES, params], url: fetchUrl, @@ -36,6 +38,7 @@ export const useCaptures = ( captures, userPermissions: data?.user_permissions, total: data?.count ?? 0, + totalIsExact: data?.count_is_exact ?? true, isLoading, isFetching, error, diff --git a/ui/src/data-services/hooks/occurrences/useOccurrences.ts b/ui/src/data-services/hooks/occurrences/useOccurrences.ts index 0e32e4dbc..b13222d89 100644 --- a/ui/src/data-services/hooks/occurrences/useOccurrences.ts +++ b/ui/src/data-services/hooks/occurrences/useOccurrences.ts @@ -12,6 +12,7 @@ export const useOccurrences = ( ): { occurrences?: Occurrence[] total: number + totalIsExact: boolean isLoading: boolean isFetching: boolean error?: unknown @@ -21,6 +22,7 @@ export const useOccurrences = ( const { data, isLoading, isFetching, error } = useAuthorizedQuery<{ results: ServerOccurrence[] count: number + count_is_exact?: boolean }>({ queryKey: [API_ROUTES.OCCURRENCES, params], url: fetchUrl, @@ -34,6 +36,7 @@ export const useOccurrences = ( return { occurrences, total: data?.count ?? 0, + totalIsExact: data?.count_is_exact ?? true, isLoading, isFetching, error, diff --git a/ui/src/data-services/hooks/sessions/useSessions.ts b/ui/src/data-services/hooks/sessions/useSessions.ts index 4aa814c57..509ffdce5 100644 --- a/ui/src/data-services/hooks/sessions/useSessions.ts +++ b/ui/src/data-services/hooks/sessions/useSessions.ts @@ -12,6 +12,7 @@ export const useSessions = ( ): { sessions?: Session[] total: number + totalIsExact: boolean isLoading: boolean isFetching: boolean error?: unknown @@ -21,6 +22,7 @@ export const useSessions = ( const { data, isLoading, isFetching, error } = useAuthorizedQuery<{ results: ServerEvent[] count: number + count_is_exact?: boolean }>({ queryKey: [API_ROUTES.SESSIONS, params], url: fetchUrl, @@ -31,6 +33,7 @@ export const useSessions = ( return { sessions, total: data?.count ?? 0, + totalIsExact: data?.count_is_exact ?? true, isLoading, isFetching, error, diff --git a/ui/src/data-services/hooks/species/useSpecies.ts b/ui/src/data-services/hooks/species/useSpecies.ts index 27f5fbd5f..4c9bf297a 100644 --- a/ui/src/data-services/hooks/species/useSpecies.ts +++ b/ui/src/data-services/hooks/species/useSpecies.ts @@ -12,6 +12,7 @@ export const useSpecies = ( ): { species?: Species[] total: number + totalIsExact: boolean isLoading: boolean isFetching: boolean error?: unknown @@ -21,6 +22,7 @@ export const useSpecies = ( const { data, isLoading, isFetching, error } = useAuthorizedQuery<{ results: ServerSpecies[] count: number + count_is_exact?: boolean }>({ queryKey: [API_ROUTES.SPECIES, params], url: fetchUrl, @@ -31,6 +33,7 @@ export const useSpecies = ( return { species, total: data?.count ?? 0, + totalIsExact: data?.count_is_exact ?? true, isLoading, isFetching, error, diff --git a/ui/src/nova-ui-kit/components/pagination-bar/info-label/info-label.tsx b/ui/src/nova-ui-kit/components/pagination-bar/info-label/info-label.tsx index 82063b45c..18cfec25e 100644 --- a/ui/src/nova-ui-kit/components/pagination-bar/info-label/info-label.tsx +++ b/ui/src/nova-ui-kit/components/pagination-bar/info-label/info-label.tsx @@ -8,9 +8,16 @@ interface InfoLabelProps { perPage: number } total: number + // False when `total` is a capped lower bound rather than the exact count, so + // it renders as e.g. "10000+". Defaults to true (exact). + totalIsExact?: boolean } -export const InfoLabel = ({ pagination, total }: InfoLabelProps) => { +export const InfoLabel = ({ + pagination, + total, + totalIsExact = true, +}: InfoLabelProps) => { const minIndex = 0 const maxIndex = total - 1 const startIndex = getValueInRange({ @@ -29,7 +36,7 @@ export const InfoLabel = ({ pagination, total }: InfoLabelProps) => { {translate(STRING.MESSAGE_RESULT_RANGE, { start: startIndex + 1, end: endIndex + 1, - total, + total: totalIsExact ? total : `${total}+`, })} ) diff --git a/ui/src/nova-ui-kit/components/pagination-bar/pagination-bar.tsx b/ui/src/nova-ui-kit/components/pagination-bar/pagination-bar.tsx index e86a81d08..50235cb14 100644 --- a/ui/src/nova-ui-kit/components/pagination-bar/pagination-bar.tsx +++ b/ui/src/nova-ui-kit/components/pagination-bar/pagination-bar.tsx @@ -14,6 +14,11 @@ interface PaginationBarProps { } setPage: (page: number) => void total: number + // False when `total` is a capped lower bound (the server's precision cap). + // The info label then renders "N+"; the numbered pages still derive from the + // capped total, so pages beyond the cap are not reachable until the list moves + // to cursor pagination. Defaults to true (exact). + totalIsExact?: boolean } export const PaginationBar = ({ @@ -21,6 +26,7 @@ export const PaginationBar = ({ pagination, setPage, total, + totalIsExact = true, }: PaginationBarProps) => { const { page: currentPage, perPage } = pagination const numPages = Math.ceil(total / perPage) @@ -37,7 +43,11 @@ export const PaginationBar = ({ return (
- +