Merge pull request #178 from oss-slu/feature/dynamic-health-score

hcaballero2 · web-flow · commit 76c36715b953 · 2026-03-31T09:59:21.000-05:00
Added dynamic health score calculation
diff --git a/Backend/calculateHealth.py b/Backend/calculateHealth.py
@@ -5,6 +5,11 @@
 generated by collectData.py. The health score is derived from multiple 
 signals related to repository activity and efficiency and is aligned 
 with the Actionable Steps and Healthy & Efficient Repository documents.
+
+UPDATED VERSION:
+- Supports dynamic metric selection per repository
+- Uses only selected metrics for calculation (as required by Issue #173)
+- Handles edge cases like missing metrics or empty selections
 """
 
 import json
@@ -13,6 +18,19 @@
 # Configuration
 DATA_PATH = "Backend/test_data.json"
 
+# Fake Database (simulates future DB / Okta integration)
+# Each repository selects its own metrics
+REPO_METRIC_SELECTION = {
+    "lrda_mobile": ["issue_resolution", "commit_volume"],
+    "oss_dev_analytics": [
+        "issue_resolution",
+        "issue_responsiveness",
+        "pr_responsiveness",
+        "contributor_activity",
+        "commit_volume"
+    ]
+}
+
 # Metric weights derived from Actionable Steps research
 # Weights are kept configurable so they can evolve as research changes
 WEIGHTS = {
@@ -23,7 +41,7 @@
     "commit_volume": 0.20
 }
 
-# Scoring helper functions
+# Scoring helper functions (convert raw values -> 0-100 scale)
 def score_issue_resolution(rate):
     """
     Convert issue resolution rate into a normalized score
@@ -225,23 +243,49 @@ def calculate_health_scores(data):
         # Computing raw metrics
         issue_rate, issue_time = calculate_issue_metrics(issues)
         pr_time = calculate_pr_metrics(prs)
-        total_commmits, contributors = calculate_commit_metrics(commits)
+        total_commits, contributors = calculate_commit_metrics(commits)
 
         # Converting raw metrics into normalized scores
         metric_scores = {
             "issue_resolution": score_issue_resolution(issue_rate),
             "issue_responsiveness": score_time_hours(issue_time),
             "pr_responsiveness": score_time_hours(pr_time),
             "contributor_activity": score_contributors(contributors),
-            "commit_volume": score_commits(total_commmits),
+            "commit_volume": score_commits(total_commits),
         }
 
-        # Final weighted health score
-        final_score = round(
-            sum(metric_scores[m] * WEIGHTS[m] for m in WEIGHTS), 2
-        )
+        # Get selected metrics for this repo (dynamic behavior)
+        selected_metrics = REPO_METRIC_SELECTION.get(repo_name, [])
+
+        # Edge Case 1: No metrics selected
+        if len(selected_metrics) == 0:
+            raise ValueError(f"No metrics selected for {repo_name}")
+
+        total_weighted = 0
+        total_weights = 0
+
+        for metric in selected_metrics:
+            value = metric_scores.get(metric)
+
+            # Skip missing values safely
+            if value is None:
+                continue
+
+            weight = WEIGHTS.get(metric, 1)
+
+            total_weighted += value * weight
+            total_weights += weight
+
+        # Edge Case 2: All selected metrics had no valid data
+        if total_weights == 0:
+            raise ValueError(f"No valid metric data for {repo_name}")
+
+        # Final dynamic score
+        final_score = round(total_weighted / total_weights, 2)
 
+        # Storing results
         results[repo_name] = {
+            "selected_metrics": selected_metrics,
             "metrics": metric_scores,
             "final_score": final_score,
             "status": health_label(final_score)
diff --git a/Backend/test_calculateHealth.py b/Backend/test_calculateHealth.py
@@ -0,0 +1,143 @@
+# test_calculateHealth.py
+
+import pytest
+from Backend.calculateHealth import calculate_health_scores
+
+# Mock Data Functions
+# These simulate different repository scenarios
+
+def sample_data_single_metric():
+    """
+    Simulates a repo with basic data
+    Used to test when only ONE metric is selected
+    """
+    return {
+        "lrda_mobile": {
+            "issues": {
+                "user1": {
+                    "total_issues_opened": 10,
+                    "total_issues_closed": "10",
+                    "average_time_to_close": 10
+                }
+            },
+            "pull_requests": {},
+            "commits": {
+                "user1": {"total_commits": 100}
+            }
+        }
+    }
+
+def sample_data_multiple_metrics():
+    """
+    Simulates a repo with multiple contributors and metrics
+    Used to test weighted calculation across multiple metrics
+    """
+    return {
+        "oss_dev_analytics": {
+            "issues": {
+                "user1": {
+                    "total_issues_opened": 20,
+                    "total_issues_closed": "15",
+                    "average_time_to_close": 20
+                }
+            },
+            "pull_requests": {
+                "user1": {"average_time_to_merge": 30}
+            },
+            "commits": {
+                "user1": {"total_commits": 200},
+                "user2": {"total_commits": 50}
+            }
+        }
+    }
+
+def sample_data_missing_values():
+    """
+    Simulates a repo with missing data
+    Used to test how system handles empty inputs
+    """
+    return {
+        "lrda_mobile": {
+            "issues": {},
+            "pull_requests": {},
+            "commits": {}
+        }
+    }
+
+# Test Cases
+def test_single_metric():
+    """
+    Test Case 1:
+    If repi has only one selected metric,
+    system should still return a valid score
+    """
+    data = sample_data_single_metric()
+    result = calculate_health_scores(data)
+
+    # Checking repo exists in result
+    assert "lrda_mobile" in result
+
+    # Score should be valide (non-negative)
+    assert result["lrda_mobile"]["final_score"] >= 0
+
+def test_multiple_metrics():
+    """
+    Test Case 2:
+    Repo with multiple metrics should compute correctly
+    """
+    data = sample_data_multiple_metrics()
+    result = calculate_health_scores(data)
+
+    # Checking repo exists
+    assert "oss_dev_analytics" in result
+
+    # Score should be valid
+    assert result["oss_dev_analytics"]["final_score"] >= 0
+
+def test_all_metrics():
+    """
+    Test Case 3:
+    Ensure score stays withing valid range (0-100)
+    """
+    data = sample_data_multiple_metrics()
+    result = calculate_health_scores(data)
+
+    score = result["oss_dev_analytics"]["final_score"]
+
+    # Score should not exceed 100
+    assert score <= 100
+
+def test_no_metrics_selected():
+    """
+    Test Case 4:
+    If no metrics are selected for a repo,
+    system should raise an error
+    """
+    data = {
+        "unknown_repo": {
+            "issues": {},
+            "pull_requests": {},
+            "commits": {}
+        }
+    }
+
+    # Expect ValueError
+    with pytest.raises(ValueError):
+        calculate_health_scores(data)
+
+def test_missing_values_handled():
+    """
+    Test Case 5:
+    If data is missing, system should not crash
+    """
+    data = sample_data_missing_values()
+
+    try:
+        result = calculate_health_scores(data)
+
+        # Should return a dictionary if handled correctly
+        assert isinstance(result, dict)
+
+    except ValueError:
+        # Acceptable behavior if no valid data exists
+        assert True
diff --git a/docs/README.md b/docs/README.md
@@ -16,6 +16,34 @@ OSS_Dev_Analytics provides a centralized dashboard for the **Open Source with SL
 - **Preprocessing:** Pandas is used to calculate "Time to Merge," "Lead Time," and "Velocity."
 - **Visualization:** A React + Vite frontend consumes the processed JSON to render interactive charts.
 
+## Dynamic Health Score Calculation
+We implemented a dynamic health scoring system that adapts to each repository's selected metrics instead of using a fixed formula
+
+### How it works
+- Each repository defines a set of metrics to track
+- Only those selected metrics are used in the calculation
+- Metric values are normalized (0-100 scale)
+- A weighted average is computed based on selected metrics
+
+### Formula
+Health Score = Σ(metric × weight) / Σ(weights)
+
+### Example
+If a repository selects:
+- issue_resolution = 80
+- commit_volume = 60
+Then:
+Health Score = (80×0.25 + 60×0.20) / (0.25 + 0.20)
+
+### Edge Cases
+- If no metrics are selected -> an error is returned
+- If some metric values are missing -> they are ignored
+- If no valid data is available -> an error is returned
+
+### Note
+Currently, metric selection is simulated using a temporary in-memory configuration (fake database). 
+This will later be replaced with database/Okta-based user configurations.
+
 ## 🤝 Getting Started
 Are you interested in contributing to our organization-wide analytics?
 1. Check out our [Onboarding Document](./Onboarding.md).