Skip to content

Commit 76c3671

Browse files
authored
Merge pull request #178 from oss-slu/feature/dynamic-health-score
Added dynamic health score calculation
2 parents 4412bc2 + 0f3ae7d commit 76c3671

3 files changed

Lines changed: 222 additions & 7 deletions

File tree

Backend/calculateHealth.py

Lines changed: 51 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@
55
generated by collectData.py. The health score is derived from multiple
66
signals related to repository activity and efficiency and is aligned
77
with the Actionable Steps and Healthy & Efficient Repository documents.
8+
9+
UPDATED VERSION:
10+
- Supports dynamic metric selection per repository
11+
- Uses only selected metrics for calculation (as required by Issue #173)
12+
- Handles edge cases like missing metrics or empty selections
813
"""
914

1015
import json
@@ -13,6 +18,19 @@
1318
# Configuration
1419
DATA_PATH = "Backend/test_data.json"
1520

21+
# Fake Database (simulates future DB / Okta integration)
22+
# Each repository selects its own metrics
23+
REPO_METRIC_SELECTION = {
24+
"lrda_mobile": ["issue_resolution", "commit_volume"],
25+
"oss_dev_analytics": [
26+
"issue_resolution",
27+
"issue_responsiveness",
28+
"pr_responsiveness",
29+
"contributor_activity",
30+
"commit_volume"
31+
]
32+
}
33+
1634
# Metric weights derived from Actionable Steps research
1735
# Weights are kept configurable so they can evolve as research changes
1836
WEIGHTS = {
@@ -23,7 +41,7 @@
2341
"commit_volume": 0.20
2442
}
2543

26-
# Scoring helper functions
44+
# Scoring helper functions (convert raw values -> 0-100 scale)
2745
def score_issue_resolution(rate):
2846
"""
2947
Convert issue resolution rate into a normalized score
@@ -225,23 +243,49 @@ def calculate_health_scores(data):
225243
# Computing raw metrics
226244
issue_rate, issue_time = calculate_issue_metrics(issues)
227245
pr_time = calculate_pr_metrics(prs)
228-
total_commmits, contributors = calculate_commit_metrics(commits)
246+
total_commits, contributors = calculate_commit_metrics(commits)
229247

230248
# Converting raw metrics into normalized scores
231249
metric_scores = {
232250
"issue_resolution": score_issue_resolution(issue_rate),
233251
"issue_responsiveness": score_time_hours(issue_time),
234252
"pr_responsiveness": score_time_hours(pr_time),
235253
"contributor_activity": score_contributors(contributors),
236-
"commit_volume": score_commits(total_commmits),
254+
"commit_volume": score_commits(total_commits),
237255
}
238256

239-
# Final weighted health score
240-
final_score = round(
241-
sum(metric_scores[m] * WEIGHTS[m] for m in WEIGHTS), 2
242-
)
257+
# Get selected metrics for this repo (dynamic behavior)
258+
selected_metrics = REPO_METRIC_SELECTION.get(repo_name, [])
259+
260+
# Edge Case 1: No metrics selected
261+
if len(selected_metrics) == 0:
262+
raise ValueError(f"No metrics selected for {repo_name}")
263+
264+
total_weighted = 0
265+
total_weights = 0
266+
267+
for metric in selected_metrics:
268+
value = metric_scores.get(metric)
269+
270+
# Skip missing values safely
271+
if value is None:
272+
continue
273+
274+
weight = WEIGHTS.get(metric, 1)
275+
276+
total_weighted += value * weight
277+
total_weights += weight
278+
279+
# Edge Case 2: All selected metrics had no valid data
280+
if total_weights == 0:
281+
raise ValueError(f"No valid metric data for {repo_name}")
282+
283+
# Final dynamic score
284+
final_score = round(total_weighted / total_weights, 2)
243285

286+
# Storing results
244287
results[repo_name] = {
288+
"selected_metrics": selected_metrics,
245289
"metrics": metric_scores,
246290
"final_score": final_score,
247291
"status": health_label(final_score)

Backend/test_calculateHealth.py

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
# test_calculateHealth.py
2+
3+
import pytest
4+
from Backend.calculateHealth import calculate_health_scores
5+
6+
# Mock Data Functions
7+
# These simulate different repository scenarios
8+
9+
def sample_data_single_metric():
10+
"""
11+
Simulates a repo with basic data
12+
Used to test when only ONE metric is selected
13+
"""
14+
return {
15+
"lrda_mobile": {
16+
"issues": {
17+
"user1": {
18+
"total_issues_opened": 10,
19+
"total_issues_closed": "10",
20+
"average_time_to_close": 10
21+
}
22+
},
23+
"pull_requests": {},
24+
"commits": {
25+
"user1": {"total_commits": 100}
26+
}
27+
}
28+
}
29+
30+
def sample_data_multiple_metrics():
31+
"""
32+
Simulates a repo with multiple contributors and metrics
33+
Used to test weighted calculation across multiple metrics
34+
"""
35+
return {
36+
"oss_dev_analytics": {
37+
"issues": {
38+
"user1": {
39+
"total_issues_opened": 20,
40+
"total_issues_closed": "15",
41+
"average_time_to_close": 20
42+
}
43+
},
44+
"pull_requests": {
45+
"user1": {"average_time_to_merge": 30}
46+
},
47+
"commits": {
48+
"user1": {"total_commits": 200},
49+
"user2": {"total_commits": 50}
50+
}
51+
}
52+
}
53+
54+
def sample_data_missing_values():
55+
"""
56+
Simulates a repo with missing data
57+
Used to test how system handles empty inputs
58+
"""
59+
return {
60+
"lrda_mobile": {
61+
"issues": {},
62+
"pull_requests": {},
63+
"commits": {}
64+
}
65+
}
66+
67+
# Test Cases
68+
def test_single_metric():
69+
"""
70+
Test Case 1:
71+
If repi has only one selected metric,
72+
system should still return a valid score
73+
"""
74+
data = sample_data_single_metric()
75+
result = calculate_health_scores(data)
76+
77+
# Checking repo exists in result
78+
assert "lrda_mobile" in result
79+
80+
# Score should be valide (non-negative)
81+
assert result["lrda_mobile"]["final_score"] >= 0
82+
83+
def test_multiple_metrics():
84+
"""
85+
Test Case 2:
86+
Repo with multiple metrics should compute correctly
87+
"""
88+
data = sample_data_multiple_metrics()
89+
result = calculate_health_scores(data)
90+
91+
# Checking repo exists
92+
assert "oss_dev_analytics" in result
93+
94+
# Score should be valid
95+
assert result["oss_dev_analytics"]["final_score"] >= 0
96+
97+
def test_all_metrics():
98+
"""
99+
Test Case 3:
100+
Ensure score stays withing valid range (0-100)
101+
"""
102+
data = sample_data_multiple_metrics()
103+
result = calculate_health_scores(data)
104+
105+
score = result["oss_dev_analytics"]["final_score"]
106+
107+
# Score should not exceed 100
108+
assert score <= 100
109+
110+
def test_no_metrics_selected():
111+
"""
112+
Test Case 4:
113+
If no metrics are selected for a repo,
114+
system should raise an error
115+
"""
116+
data = {
117+
"unknown_repo": {
118+
"issues": {},
119+
"pull_requests": {},
120+
"commits": {}
121+
}
122+
}
123+
124+
# Expect ValueError
125+
with pytest.raises(ValueError):
126+
calculate_health_scores(data)
127+
128+
def test_missing_values_handled():
129+
"""
130+
Test Case 5:
131+
If data is missing, system should not crash
132+
"""
133+
data = sample_data_missing_values()
134+
135+
try:
136+
result = calculate_health_scores(data)
137+
138+
# Should return a dictionary if handled correctly
139+
assert isinstance(result, dict)
140+
141+
except ValueError:
142+
# Acceptable behavior if no valid data exists
143+
assert True

docs/README.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,34 @@ OSS_Dev_Analytics provides a centralized dashboard for the **Open Source with SL
1616
- **Preprocessing:** Pandas is used to calculate "Time to Merge," "Lead Time," and "Velocity."
1717
- **Visualization:** A React + Vite frontend consumes the processed JSON to render interactive charts.
1818

19+
## Dynamic Health Score Calculation
20+
We implemented a dynamic health scoring system that adapts to each repository's selected metrics instead of using a fixed formula
21+
22+
### How it works
23+
- Each repository defines a set of metrics to track
24+
- Only those selected metrics are used in the calculation
25+
- Metric values are normalized (0-100 scale)
26+
- A weighted average is computed based on selected metrics
27+
28+
### Formula
29+
Health Score = Σ(metric × weight) / Σ(weights)
30+
31+
### Example
32+
If a repository selects:
33+
- issue_resolution = 80
34+
- commit_volume = 60
35+
Then:
36+
Health Score = (80×0.25 + 60×0.20) / (0.25 + 0.20)
37+
38+
### Edge Cases
39+
- If no metrics are selected -> an error is returned
40+
- If some metric values are missing -> they are ignored
41+
- If no valid data is available -> an error is returned
42+
43+
### Note
44+
Currently, metric selection is simulated using a temporary in-memory configuration (fake database).
45+
This will later be replaced with database/Okta-based user configurations.
46+
1947
## 🤝 Getting Started
2048
Are you interested in contributing to our organization-wide analytics?
2149
1. Check out our [Onboarding Document](./Onboarding.md).

0 commit comments

Comments
 (0)