Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions integration_tests/tests/adapter_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,14 @@ def _serialize_value(val: Any) -> Any:
* Everything else is returned unchanged.
"""
if isinstance(val, Decimal):
# Match the Jinja macro: normalize, then int or float
# Match the Jinja macro: normalize, then int or float.
# Note: for special values (Infinity, NaN), as_tuple().exponent is a
# string ('F' or 'n'), not an int — convert those directly to float.
normalized = val.normalize()
if normalized.as_tuple().exponent >= 0:
exponent = normalized.as_tuple().exponent
if isinstance(exponent, str):
return float(normalized)
if exponent >= 0:
return int(normalized)
return float(normalized)
if isinstance(val, (datetime, date, time)):
Expand Down
80 changes: 80 additions & 0 deletions integration_tests/tests/test_dimension_anomalies.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,3 +315,83 @@ def test_anomaly_in_detection_period(
)

assert test_result["status"] == expected_status


def test_dimension_anomalies_alert_description_few_failures(
test_id: str, dbt_project: DbtProject
):
"""When ≤5 dimension values fail, description shows each one's anomaly details."""
utc_today = datetime.utcnow().date()
test_date, *training_dates = generate_dates(base_date=utc_today - timedelta(1))

# 3 dimension values all spike on test_date (training: 1/day, test: 10/day)
anomalous_dimensions = ["Batman", "Superman", "Spiderman"]

data: List[Dict[str, Any]] = [
{TIMESTAMP_COLUMN: test_date.strftime(DATE_FORMAT), "superhero": hero}
for hero in anomalous_dimensions
for _ in range(10)
]
data += [
{TIMESTAMP_COLUMN: cur_date.strftime(DATE_FORMAT), "superhero": hero}
for cur_date in training_dates
for hero in anomalous_dimensions
]

test_args = {
"timestamp_column": TIMESTAMP_COLUMN,
"dimensions": ["superhero"],
"sensitivity": 2,
}
test_result = dbt_project.test(test_id, DBT_TEST_NAME, test_args, data=data)
assert test_result["status"] == "fail"

description = test_result["test_results_description"]
# Each failing dimension value should appear in the description
for hero in anomalous_dimensions:
assert hero in description, f"Expected '{hero}' in description: {description}"
# Should NOT show the high-volume summary message
assert "dimension values are anomalous" not in description


def test_dimension_anomalies_alert_description_many_failures(
test_id: str, dbt_project: DbtProject
):
"""When >5 dimension values fail, description shows a count summary."""
utc_today = datetime.utcnow().date()
test_date, *training_dates = generate_dates(base_date=utc_today - timedelta(1))

# 6 dimension values all spike on test_date (>5 threshold)
anomalous_dimensions = [
"Batman",
"Superman",
"Spiderman",
"IronMan",
"Thor",
"Hulk",
]

data: List[Dict[str, Any]] = [
{TIMESTAMP_COLUMN: test_date.strftime(DATE_FORMAT), "superhero": hero}
for hero in anomalous_dimensions
for _ in range(10)
]
data += [
{TIMESTAMP_COLUMN: cur_date.strftime(DATE_FORMAT), "superhero": hero}
for cur_date in training_dates
for hero in anomalous_dimensions
]

test_args = {
"timestamp_column": TIMESTAMP_COLUMN,
"dimensions": ["superhero"],
"sensitivity": 2,
}
test_result = dbt_project.test(test_id, DBT_TEST_NAME, test_args, data=data)
assert test_result["status"] == "fail"

description = test_result["test_results_description"]
# Should show the count summary for many failures
assert "dimension values are anomalous" in description, (
f"Expected summary message in description: {description}"
)
Original file line number Diff line number Diff line change
Expand Up @@ -74,23 +74,37 @@
and upper(column_name) = upper({{ elementary.const_as_string(column_name) }})
{%- endif %}
{%- endset -%}
{% set test_results_description %}
{% if rows_with_score %}
{{ elementary.insensitive_get_dict_value(rows_with_score[-1], 'anomaly_description') }}
{% else %}
Not enough data to calculate anomaly score.
{% endif %}
{% endset %}
{% set failures = namespace(data=0) %}
{% set filtered_anomaly_scores_rows = [] %}
{% set anomalous_rows = [] %}
{% for row in anomaly_scores_rows %}
{% if row.anomaly_score is not none %}
{% do filtered_anomaly_scores_rows.append(row) %}
{% if row.is_anomalous %}
{% set failures.data = failures.data + 1 %}
{% do anomalous_rows.append(row) %}
{% endif %}
{% endif %}
{% endfor %}
{%- set max_dimension_alerts = 5 -%}
{% set test_results_description %}
{%- if rows_with_score -%}
{%- set sample_row = rows_with_score[-1] -%}
{%- set row_dimension = elementary.insensitive_get_dict_value(sample_row, "dimension") -%}
{%- if row_dimension is not none and anomalous_rows | length > 0 -%}
{%- if anomalous_rows | length > max_dimension_alerts -%}
{%- set remaining = (anomalous_rows | length) - max_dimension_alerts -%}
{{ anomalous_rows | length }} dimension values are anomalous. Showing first {{ max_dimension_alerts }}: {% for row in anomalous_rows[:max_dimension_alerts] %}{{ elementary.insensitive_get_dict_value(row, "dimension_value") }}{% if not loop.last %}, {% endif %}{% endfor %}, and {{ remaining }} more.
{%- else -%}
{% for row in anomalous_rows %}{{ elementary.insensitive_get_dict_value(row, "anomaly_description") }}{% if not loop.last %} | {% endif %}{% endfor %}
{%- endif -%}
{%- else -%}
{{ elementary.insensitive_get_dict_value(rows_with_score[-1], "anomaly_description") }}
{%- endif -%}
{%- else -%}
Not enough data to calculate anomaly score.
{%- endif -%}
{% endset %}
{% set test_result_dict = {
"id": elementary.insensitive_get_dict_value(latest_row, "id"),
"data_issue_id": elementary.insensitive_get_dict_value(
Expand Down
Loading