Skip to content
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
0473c0e
feat: #253 new branch to reduce noise; adding BE logic to implement c…
tbain Mar 18, 2026
2c0b959
feat: #253 Fixing API tests with regard to count logic changes
tbain Mar 23, 2026
8846805
Merge branch 'main' of https://github.com/openedx/openedx-core into t…
tbain Mar 25, 2026
b01964b
feat: #253 Resolving merge conflict with upstream main branch
tbain Mar 26, 2026
a23afe8
feat: #253 Fixing pylint issues
tbain Mar 26, 2026
3df68ab
feat: #253 Fixing pycodestyle issue
tbain Mar 26, 2026
435808c
feat: #253 Fixing pycodestyle issue
tbain Mar 26, 2026
457313b
feat: #253 Addressing first round Code review comments
tbain Mar 27, 2026
a14c56e
feat: #253 fixing count depth issue and updating appropriate unit tests
tbain Mar 27, 2026
2055a07
feat: #253 fixing spelling errors in comments
tbain Mar 27, 2026
939f18c
feat: #253 Fixing code review comments; fix incorrect unit test & fil…
tbain Mar 30, 2026
5762c33
feat: #253 adjusting comments per code review feedback
tbain Apr 1, 2026
79be83a
Merge branch 'main' of https://github.com/openedx/openedx-core into t…
tbain Apr 1, 2026
c2f79d2
feat: #253 fixing unit tests to work with upstream updates
tbain Apr 1, 2026
c017e8a
feat: #253 Changing usage_count to being in-mem/python based instead …
tbain Apr 3, 2026
7d42793
feat: #253 Fixing code quality pipeline issues
tbain Apr 3, 2026
1e47967
feat: #253 Moving usage_count logic out to API level, cleaning up/add…
tbain Apr 8, 2026
a87c877
Merge branch 'main' of https://github.com/openedx/openedx-core into t…
tbain Apr 8, 2026
bbc3638
Merge branch 'main' of https://github.com/openedx/openedx-core into t…
tbain Apr 13, 2026
155d03b
feat: #253 Addressing code review comments
tbain Apr 13, 2026
9e03f6b
Merge branch 'main' of https://github.com/openedx/openedx-core into t…
tbain Apr 13, 2026
184a469
Merge branch 'main' of https://github.com/openedx/openedx-core into t…
tbain Apr 13, 2026
0880c20
feat: #253 Addressing code review comments
tbain Apr 13, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 35 additions & 21 deletions src/openedx_tagging/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@

import logging
import re
from typing import List, Self
from collections import Counter, defaultdict
from typing import List, Self, cast

from django.core.exceptions import ValidationError
from django.db import models
Expand Down Expand Up @@ -532,16 +533,8 @@ def _get_filtered_tags_one_level(
qs = qs.values("value", "child_count", "depth", "parent_value", "external_id", "_id")
qs = qs.order_by("value")
if include_counts:
# We need to include the count of how many times this tag is used to tag objects.
# You'd think we could just use:
# qs = qs.annotate(usage_count=models.Count("objecttag__pk"))
# but that adds another join which starts creating a cross product and the children and usage_count become
# intertwined and multiplied with each other. So we use a subquery.
obj_tags = ObjectTag.objects.filter(tag_id=models.OuterRef("pk")).order_by().annotate(
# We need to use Func() to get Count() without GROUP BY - see https://stackoverflow.com/a/69031027
count=models.Func(F('id'), function='Count')
)
qs = qs.annotate(usage_count=models.Subquery(obj_tags.values('count')))
return self._add_counts(list(cast(list, qs))) # type: ignore[return-value]

return qs # type: ignore[return-value]

def _get_filtered_tags_deep(
Expand Down Expand Up @@ -616,18 +609,39 @@ def _get_filtered_tags_deep(
# ordering by it gives the tree sort order that we want.
qs = qs.order_by("lineage")
if include_counts:
# Including the counts is a bit tricky; see the comment above in _get_filtered_tags_one_level()
obj_tags = (
ObjectTag.objects.filter(tag_id=models.OuterRef("pk"))
.order_by()
.annotate(
# We need to use Func() to get Count() without GROUP BY - see https://stackoverflow.com/a/69031027
count=models.Func(F("id"), function="Count")
)
)
qs = qs.annotate(usage_count=models.Subquery(obj_tags.values("count")))
return self._add_counts(list(cast(list, qs))) # type: ignore[return-value]

return qs # type: ignore[return-value]

def _add_counts(self, tag_data: list[dict]) -> list[dict]:
"""
Add usage counts to a list of tag data dictionaries. For performance
reasons, we call this function with the list result of the
QuerySet so we can then add the counts in-memory rather than to a
QuerySet which would require a very expensive annotation to join the
in-memory data to the original QuerySet.
"""

tag_lineage_dict = dict(self.tag_set.all().filter(taxonomy_id=self.id).values_list("value", "lineage"))
object_tags = self.objecttag_set.all().filter(taxonomy_id=self.id).values_list("_value", "object_id")
tag_counts: Counter[str] = Counter()
object_tag_lineage_seen: defaultdict[str, set] = defaultdict(set)

for tag_value, object_id in object_tags:
# split the lineages to get a dict of {tag.value: [lineages]}
lineage_tags = (t for t in tag_lineage_dict.get(tag_value, "").split('\t') if t)
# de-duplicate based on if the lineage is already 'seen' per object
unseen_tags = [t for t in lineage_tags if t not in object_tag_lineage_seen[object_id]]

tag_counts.update(unseen_tags)
object_tag_lineage_seen[object_id].update(unseen_tags)

# In-memory 'annotation'; this is faster than using annotate() on the QuerySet.
for row in tag_data:
row["usage_count"] = tag_counts.get(row["value"], 0)

return tag_data

def add_tag(
self,
tag_value: str,
Expand Down
18 changes: 9 additions & 9 deletions tests/openedx_tagging/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -755,39 +755,39 @@ def get_object_tags():
"Archaea (used: 1, children: 2)",
" Euryarchaeida (used: 0, children: 0)",
" Proteoarchaeota (used: 0, children: 0)",
"Bacteria (used: 0, children: 1)", # does not contain "cha" but a child does
"Bacteria (used: 1, children: 1)", # does not contain "cha" but a child does
" Archaebacteria (used: 1, children: 0)",
]),
("ar", [
"Archaea (used: 1, children: 2)",
" Euryarchaeida (used: 0, children: 0)",
" Proteoarchaeota (used: 0, children: 0)",
"Bacteria (used: 0, children: 1)", # does not contain "ar" but a child does
"Bacteria (used: 1, children: 1)", # does not contain "ar" but a child does
" Archaebacteria (used: 1, children: 0)",
"Eukaryota (used: 0, children: 1)",
" Animalia (used: 1, children: 2)", # does not contain "ar" but a child does
"Eukaryota (used: 6, children: 1)",
" Animalia (used: 4, children: 2)", # does not contain "ar" but a child does
" Arthropoda (used: 1, children: 0)",
" Cnidaria (used: 0, children: 0)",
]),
("aE", [
"Archaea (used: 1, children: 2)",
" Euryarchaeida (used: 0, children: 0)",
" Proteoarchaeota (used: 0, children: 0)",
"Bacteria (used: 0, children: 1)", # does not contain "ae" but a child does
"Bacteria (used: 1, children: 1)", # does not contain "ae" but a child does
" Archaebacteria (used: 1, children: 0)",
"Eukaryota (used: 0, children: 1)", # does not contain "ae" but a child does
"Eukaryota (used: 6, children: 1)", # does not contain "ae" but a child does
" Plantae (used: 1, children: 0)",
]),
("a", [
"Archaea (used: 1, children: 3)",
" DPANN (used: 0, children: 0)",
" Euryarchaeida (used: 0, children: 0)",
" Proteoarchaeota (used: 0, children: 0)",
"Bacteria (used: 0, children: 2)",
"Bacteria (used: 1, children: 2)",
" Archaebacteria (used: 1, children: 0)",
" Eubacteria (used: 0, children: 0)",
"Eukaryota (used: 0, children: 4)",
" Animalia (used: 1, children: 7)",
"Eukaryota (used: 6, children: 4)",
" Animalia (used: 4, children: 7)",
" Arthropoda (used: 1, children: 0)",
" Chordata (used: 0, children: 1)",
" Mammalia (used: 0, children: 0)",
Expand Down
Loading