openedx · tbain · Mar 18, 2026 · Mar 23, 2026 · Mar 25, 2026 · Mar 26, 2026
diff --git a/src/openedx_tagging/models/base.py b/src/openedx_tagging/models/base.py
@@ -479,7 +479,7 @@ def get_filtered_tags(  # pylint: disable=too-many-positional-arguments
     def _get_filtered_tags_free_text(
         self,
         search_term: str | None,
-        include_counts: bool,
+        include_counts: bool,  # pylint: disable=unused-argument
     ) -> TagDataQuerySet:
         """
         Implementation of get_filtered_tags() for free text taxonomies.
@@ -499,16 +499,14 @@ def _get_filtered_tags_free_text(
             _id=Value(None, output_field=models.CharField()),
         )
         qs = qs.values("value", "child_count", "depth", "parent_value", "external_id", "_id").order_by("value")
-        if include_counts:
-            return qs.annotate(usage_count=models.Count("value"))
-        else:
-            return qs.distinct()  # type: ignore[return-value]
+
+        return qs.distinct()  # type: ignore[return-value]
 
     def _get_filtered_tags_one_level(
         self,
         parent_tag_value: str | None,
         search_term: str | None,
-        include_counts: bool,
+        include_counts: bool,  # pylint: disable=unused-argument
     ) -> TagDataQuerySet:
         """
         Implementation of get_filtered_tags() for closed taxonomies, where
@@ -531,24 +529,14 @@ def _get_filtered_tags_one_level(
         qs = qs.annotate(_id=F("id"))  # ID has an underscore to encourage use of 'value' rather than this internal ID
         qs = qs.values("value", "child_count", "depth", "parent_value", "external_id", "_id")
         qs = qs.order_by("value")
-        if include_counts:
-            # We need to include the count of how many times this tag is used to tag objects.
-            # You'd think we could just use:
-            #     qs = qs.annotate(usage_count=models.Count("objecttag__pk"))
-            # but that adds another join which starts creating a cross product and the children and usage_count become
-            # intertwined and multiplied with each other. So we use a subquery.
-            obj_tags = ObjectTag.objects.filter(tag_id=models.OuterRef("pk")).order_by().annotate(
-                # We need to use Func() to get Count() without GROUP BY - see https://stackoverflow.com/a/69031027
-                count=models.Func(F('id'), function='Count')
-            )
-            qs = qs.annotate(usage_count=models.Subquery(obj_tags.values('count')))
+
         return qs  # type: ignore[return-value]
 
     def _get_filtered_tags_deep(
         self,
         parent_tag_value: str | None,
         search_term: str | None,
-        include_counts: bool,
+        include_counts: bool,  # pylint: disable=unused-argument
         excluded_values: list[str] | None,
     ) -> TagDataQuerySet:
         """
@@ -615,17 +603,7 @@ def _get_filtered_tags_deep(
         # lineage is a case-insensitive column storing "Root\tParent\t...\tThisValue\t", so
         # ordering by it gives the tree sort order that we want.
         qs = qs.order_by("lineage")
-        if include_counts:
-            # Including the counts is a bit tricky; see the comment above in _get_filtered_tags_one_level()
-            obj_tags = (
-                ObjectTag.objects.filter(tag_id=models.OuterRef("pk"))
-                .order_by()
-                .annotate(
-                    # We need to use Func() to get Count() without GROUP BY - see https://stackoverflow.com/a/69031027
-                    count=models.Func(F("id"), function="Count")
-                )
-            )
-            qs = qs.annotate(usage_count=models.Subquery(obj_tags.values("count")))
+
         return qs  # type: ignore[return-value]
 
     def add_tag(

diff --git a/src/openedx_tagging/rest_api/v1/views.py b/src/openedx_tagging/rest_api/v1/views.py
@@ -3,6 +3,8 @@
 """
 from __future__ import annotations
 
+from collections import Counter, defaultdict
+
 from django.core import exceptions
 from django.db import models
 from django.http import Http404, HttpResponse
@@ -847,17 +849,54 @@ def get_queryset(self) -> TagDataQuerySet:
         )
         if depth == 1:
             # We're already returning just a single level. It will be paginated normally.
+            if include_counts:
+                return self._add_counts(results)
-                return self._add_counts(results)
+                results_with_counts = self._add_counts(results)
+                return results_with_counts
-                return self._add_counts(results)
+                results_with_counts = self._add_counts(results)
+                return results_with_counts
             return results
         elif full_depth_threshold and len(results) < full_depth_threshold:
             # We can load and display all the tags in this (sub)tree at once:
             self.pagination_class = DisabledTagsPagination
+            if include_counts:
+                return self._add_counts(results)
-                return self._add_counts(results)
+                results_with_counts = self._add_counts(results)
+                return results_with_counts
-                return self._add_counts(results)
+                results_with_counts = self._add_counts(results)
+                return results_with_counts
             return results
         else:
             # We had to do a deep query, but we will only return one level of results.
             # This is because the user did not request a deep response (via full_depth_threshold) or the result was too
             # large (larger than the threshold).
             # It will be paginated normally.
-            return results.filter(parent_value=parent_tag_value)
+            filtered_results = results.filter(parent_value=parent_tag_value)
+            if include_counts:
+                return self._add_counts(filtered_results)
-                return self._add_counts(filtered_results)
+                results_with_counts = self._add_counts(results)
+                return results_with_counts
-                return self._add_counts(filtered_results)
+                results_with_counts = self._add_counts(results)
+                return results_with_counts
+
+            return filtered_results
+
+    def _add_counts(self, tag_data: TagDataQuerySet) -> TagDataQuerySet:
+        """
+        Add usage counts to a list of tag data dictionaries. For performance
+        reasons, we call this function with the list result of the
+        QuerySet so we can then add the counts in-memory rather than to a
+        QuerySet which would require a very expensive annotation to join the
+        in-memory data to the original QuerySet.
+        """
+
+        taxonomy = self.get_taxonomy()
+        object_tags = taxonomy.objecttag_set.values_list("object_id", "tag__lineage")
+        tag_counts: Counter[str] = Counter()
+        object_tag_lineage_seen: defaultdict[str, set] = defaultdict(set)
+
+        for object_id, tag_lineage in object_tags:
+            # split the lineages to get a dict of {tag.value: [lineages]}
+            lineage_tags = list(tag_lineage.split('\t')) if tag_lineage else []
+            # de-duplicate based on if the lineage is already 'seen' per object
+            unseen_tags = [t for t in lineage_tags if t not in object_tag_lineage_seen[object_id]]
+
+            tag_counts.update(unseen_tags)
+            object_tag_lineage_seen[object_id].update(unseen_tags)
+
+        # In-memory 'annotation'; this is faster than using annotate() on the QuerySet.
+        for row in tag_data:
+            row["usage_count"] = tag_counts.get(row["value"], 0)
+
+        return tag_data
 
     def post(self, request, *args, **kwargs):
         """

diff --git a/tests/openedx_tagging/test_api.py b/tests/openedx_tagging/test_api.py
@@ -752,53 +752,53 @@ def get_object_tags():
 
     @ddt.data(
         ("ChA", [
-            "Archaea (used: 1, children: 2)",
-            "  Euryarchaeida (used: 0, children: 0)",
-            "  Proteoarchaeota (used: 0, children: 0)",
-            "Bacteria (used: 0, children: 1)",  # does not contain "cha" but a child does
-            "  Archaebacteria (used: 1, children: 0)",
+            "Archaea (children: 2)",
+            "  Euryarchaeida (children: 0)",
+            "  Proteoarchaeota (children: 0)",
+            "Bacteria (children: 1)",  # does not contain "cha" but a child does
+            "  Archaebacteria (children: 0)",
         ]),
         ("ar", [
-            "Archaea (used: 1, children: 2)",
-            "  Euryarchaeida (used: 0, children: 0)",
-            "  Proteoarchaeota (used: 0, children: 0)",
-            "Bacteria (used: 0, children: 1)",  # does not contain "ar" but a child does
-            "  Archaebacteria (used: 1, children: 0)",
-            "Eukaryota (used: 0, children: 1)",
-            "  Animalia (used: 1, children: 2)",  # does not contain "ar" but a child does
-            "    Arthropoda (used: 1, children: 0)",
-            "    Cnidaria (used: 0, children: 0)",
+            "Archaea (children: 2)",
+            "  Euryarchaeida (children: 0)",
+            "  Proteoarchaeota (children: 0)",
+            "Bacteria (children: 1)",  # does not contain "ar" but a child does
+            "  Archaebacteria (children: 0)",
+            "Eukaryota (children: 1)",
+            "  Animalia (children: 2)",  # does not contain "ar" but a child does
+            "    Arthropoda (children: 0)",
+            "    Cnidaria (children: 0)",
         ]),
         ("aE", [
-            "Archaea (used: 1, children: 2)",
-            "  Euryarchaeida (used: 0, children: 0)",
-            "  Proteoarchaeota (used: 0, children: 0)",
-            "Bacteria (used: 0, children: 1)",  # does not contain "ae" but a child does
-            "  Archaebacteria (used: 1, children: 0)",
-            "Eukaryota (used: 0, children: 1)",  # does not contain "ae" but a child does
-            "  Plantae (used: 1, children: 0)",
+            "Archaea (children: 2)",
+            "  Euryarchaeida (children: 0)",
+            "  Proteoarchaeota (children: 0)",
+            "Bacteria (children: 1)",  # does not contain "ae" but a child does
+            "  Archaebacteria (children: 0)",
+            "Eukaryota (children: 1)",  # does not contain "ae" but a child does
+            "  Plantae (children: 0)",
         ]),
         ("a", [
-            "Archaea (used: 1, children: 3)",
-            "  DPANN (used: 0, children: 0)",
-            "  Euryarchaeida (used: 0, children: 0)",
-            "  Proteoarchaeota (used: 0, children: 0)",
-            "Bacteria (used: 0, children: 2)",
-            "  Archaebacteria (used: 1, children: 0)",
-            "  Eubacteria (used: 0, children: 0)",
-            "Eukaryota (used: 0, children: 4)",
-            "  Animalia (used: 1, children: 7)",
-            "    Arthropoda (used: 1, children: 0)",
-            "    Chordata (used: 0, children: 1)",
-            "      Mammalia (used: 0, children: 0)",
-            "    Cnidaria (used: 0, children: 0)",
-            "    Ctenophora (used: 0, children: 0)",
-            "    Gastrotrich (used: 1, children: 0)",
-            "    Placozoa (used: 1, children: 0)",
-            "    Porifera (used: 0, children: 0)",
-            "  Monera (used: 1, children: 0)",
-            "  Plantae (used: 1, children: 0)",
-            "  Protista (used: 0, children: 0)",
+            "Archaea (children: 3)",
+            "  DPANN (children: 0)",
+            "  Euryarchaeida (children: 0)",
+            "  Proteoarchaeota (children: 0)",
+            "Bacteria (children: 2)",
+            "  Archaebacteria (children: 0)",
+            "  Eubacteria (children: 0)",
+            "Eukaryota (children: 4)",
+            "  Animalia (children: 7)",
+            "    Arthropoda (children: 0)",
+            "    Chordata (children: 1)",
+            "      Mammalia (children: 0)",
+            "    Cnidaria (children: 0)",
+            "    Ctenophora (children: 0)",
+            "    Gastrotrich (children: 0)",
+            "    Placozoa (children: 0)",
+            "    Porifera (children: 0)",
+            "  Monera (children: 0)",
+            "  Plantae (children: 0)",
+            "  Protista (children: 0)",
         ]),
     )
     @ddt.unpack

diff --git a/tests/openedx_tagging/test_models.py b/tests/openedx_tagging/test_models.py
@@ -339,8 +339,8 @@ def test_get_child_tags_one_level(self) -> None:
         Test basic retrieval of tags one level below the "Eukaryota" root tag in
         the closed taxonomy, using get_filtered_tags(). With counts included.
         """
-        result = list(self.taxonomy.get_filtered_tags(depth=1, parent_tag_value="Eukaryota", include_counts=True))
-        common_fields = {"depth": 1, "parent_value": "Eukaryota", "usage_count": 0, "external_id": None}
+        result = list(self.taxonomy.get_filtered_tags(depth=1, parent_tag_value="Eukaryota"))
+        common_fields = {"depth": 1, "parent_value": "Eukaryota", "external_id": None}
         for r in result:
             del r["_id"]  # Remove the internal database IDs; they aren't interesting here and a other tests check them
         assert result == [
@@ -376,13 +376,12 @@ def test_get_depth_1_search_term(self) -> None:
         """
         Filter the root tags to only those that match a search term
         """
-        result = list(self.taxonomy.get_filtered_tags(depth=1, search_term="ARCH", include_counts=True))
+        result = list(self.taxonomy.get_filtered_tags(depth=1, search_term="ARCH"))
         assert result == [
             {
                 "value": "Archaea",
                 "child_count": 3,
                 "depth": 0,
-                "usage_count": 0,
                 "parent_value": None,
                 "external_id": None,
                 "_id": 2,  # These IDs are hard-coded in the test fixture file
@@ -501,13 +500,12 @@ def test_tags_deep(self) -> None:
         """
         Test getting a deep tag in the taxonomy
         """
-        result = list(self.taxonomy.get_filtered_tags(parent_tag_value="Chordata", include_counts=True))
+        result = list(self.taxonomy.get_filtered_tags(parent_tag_value="Chordata"))
         assert result == [
             {
                 "value": "Mammalia",
                 "parent_value": "Chordata",
                 "depth": 3,
-                "usage_count": 0,
                 "child_count": 0,
                 "external_id": None,
                 "_id": 21,  # These IDs are hard-coded in the test fixture file
@@ -540,29 +538,6 @@ def test_get_external_id(self) -> None:
         assert result[0]["value"] == "Bacteria"
         assert result[0]["external_id"] == "bct001"
 
-    def test_usage_count(self) -> None:
-        """
-        Test that the usage count in the results is right
-        """
-        api.tag_object(object_id="obj01", taxonomy=self.taxonomy, tags=["Bacteria"])
-        api.tag_object(object_id="obj02", taxonomy=self.taxonomy, tags=["Bacteria"])
-        api.tag_object(object_id="obj03", taxonomy=self.taxonomy, tags=["Bacteria"])
-        api.tag_object(object_id="obj04", taxonomy=self.taxonomy, tags=["Eubacteria"])
-        # Now the API should reflect these usage counts:
-        result = pretty_format_tags(self.taxonomy.get_filtered_tags(search_term="bacteria", include_counts=True))
-        assert result == [
-            "Bacteria (None) (used: 3, children: 2)",
-            "  Archaebacteria (Bacteria) (used: 0, children: 0)",
-            "  Eubacteria (Bacteria) (used: 1, children: 0)",
-        ]
-        # Same with depth=1, which uses a different query internally:
-        result1 = pretty_format_tags(
-            self.taxonomy.get_filtered_tags(search_term="bacteria", include_counts=True, depth=1)
-        )
-        assert result1 == [
-            "Bacteria (None) (used: 3, children: 2)",
-        ]
-
     def test_tree_sort(self) -> None:
         """
         Verify that taxonomies can be sorted correctly in tree orer (case insensitive).
@@ -623,29 +598,13 @@ def test_get_filtered_tags(self):
             {"value": "triple", **common_fields},
         ]
 
-    def test_get_filtered_tags_with_count(self):
-        """
-        Test basic retrieval of all tags in the taxonomy.
-        Without counts included.
-        """
-        result = list(self.taxonomy.get_filtered_tags(include_counts=True))
-        common_fields = {"child_count": 0, "depth": 0, "parent_value": None, "external_id": None, "_id": None}
-        assert result == [
-            # These should appear in alphabetical order:
-            {"value": "double", "usage_count": 2, **common_fields},
-            {"value": "solo", "usage_count": 1, **common_fields},
-            {"value": "triple", "usage_count": 3, **common_fields},
-        ]
-
     def test_get_filtered_tags_num_queries(self):
         """
         Test that the number of queries used by get_filtered_tags() is fixed
         and not O(n) or worse.
         """
         with self.assertNumQueries(1):
             self.test_get_filtered_tags()
-        with self.assertNumQueries(1):
-            self.test_get_filtered_tags_with_count()
 
     def test_get_filtered_tags_with_search(self) -> None:
         """
@@ -655,8 +614,8 @@ def test_get_filtered_tags_with_search(self) -> None:
         common_fields = {"child_count": 0, "depth": 0, "parent_value": None, "external_id": None, "_id": None}
         assert result1 == [
             # These should appear in alphabetical order:
-            {"value": "double", "usage_count": 2, **common_fields},
-            {"value": "triple", "usage_count": 3, **common_fields},
+            {"value": "double", **common_fields},
+            {"value": "triple", **common_fields},
         ]
         # And it should be case insensitive:
         result2 = list(self.taxonomy.get_filtered_tags(search_term="LE", include_counts=True))