diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f5e019c..2ec486c 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -33,3 +33,4 @@ jobs: - name: Run tests run: pytest + diff --git a/README.md b/README.md index 4767906..6c3e748 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ This package is aimed to be a one-stop-shop for statistical testing in machine learning when it comes to evaluating models on a test set and comparing whether our *improved* model is really beating the baseline. That is, we cover the following very typical use-case in machine learning: ![usecase](docs/source/_static/usecase.png) -Currently, we support the cases of classification, regresson, and semantic segmentation. We do not yet support the significance of ranking, as well as grouped data. It is coming in the future releases. +Currently, we support the cases of classification, regresson, and semantic segmentation. ## In practice Install from PyPI: diff --git a/docs/source/index.rst b/docs/source/index.rst index 7536194..c87852d 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -11,7 +11,7 @@ About Statistical Model Comparison with Bootstrap (STAMBO) focuses on statistically sound comparisons between models and samples by implementing -the one-tailed bootstrap hypothesis tests: +the two-tailed bootstrap hypothesis tests: .. figure:: /_static/banner.png :alt: stambo banner diff --git a/notebooks/Classification.ipynb b/notebooks/Classification.ipynb index 58c9872..0a6e571 100644 --- a/notebooks/Classification.ipynb +++ b/notebooks/Classification.ipynb @@ -32,7 +32,7 @@ { "data": { "text/plain": [ - "'0.1.5'" + "'0.1.6'" ] }, "execution_count": 1, @@ -147,7 +147,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "2ac3e8d1720c49719e4b286464de2da1", + "model_id": "94d13ceeeac94889877ef71f603340a6", "version_major": 2, "version_minor": 0 }, @@ -180,16 +180,41 @@ { "data": { "text/plain": [ - "{'ROCAUC': array([0.05494505, 0.01607463, 0.00116143, 0.03622366, 0.97275219,\n", - " 0.94904937, 0.99252755, 0.98882682, 0.97478143, 0.99825874]),\n", - " 'AP': array([0.02797203, 0.02314431, 0.00502882, 0.04678529, 0.9689868 ,\n", - " 0.94193877, 0.99255722, 0.99213111, 0.9810929 , 0.99893794]),\n", - " 'QKappa': array([ 0.92007992, -0.0320894 , -0.07893126, 0.00764438, 0.90810898,\n", - " 0.85387269, 0.95589713, 0.87601958, 0.81299213, 0.93027112]),\n", - " 'BACC': array([ 0.93906094, -0.02079161, -0.04718104, 0.00252861, 0.94531991,\n", - " 0.91380672, 0.97414673, 0.9245283 , 0.88886054, 0.95691282]),\n", - " 'MCC': array([ 0.90709291, -0.02795235, -0.07123844, 0.0096027 , 0.91078326,\n", - " 0.86011542, 0.95680034, 0.88283091, 0.82759216, 0.93254094])}" + "{'ROCAUC': {'p_value': 0.10989010989010989,\n", + " 'diff': 0.016074628438916494,\n", + " 'ci_es': (0.0011614303013786215, 0.03622365794387381),\n", + " 'ci_s1': (0.9490493728705037, 0.9925275462036619),\n", + " 'ci_s2': (0.9747814271953842, 0.9982587433114521),\n", + " 'emp_s1': 0.9727521872035416,\n", + " 'emp_s2': 0.9888268156424581},\n", + " 'AP': {'p_value': 0.055944055944055944,\n", + " 'diff': 0.023144311990304867,\n", + " 'ci_es': (0.005028817149312964, 0.04678528920344301),\n", + " 'ci_s1': (0.941938772504055, 0.992557217298247),\n", + " 'ci_s2': (0.9810928996823938, 0.9989379380439234),\n", + " 'emp_s1': 0.9689867972199498,\n", + " 'emp_s2': 0.9921311092102547},\n", + " 'QKappa': {'p_value': 0.16183816183816183,\n", + " 'diff': -0.03208940366959201,\n", + " 'ci_es': (-0.0789312628900082, 0.007644376319408607),\n", + " 'ci_s1': (0.8538726858185821, 0.9558971346535168),\n", + " 'ci_s2': (0.812992125984252, 0.9302711162834225),\n", + " 'emp_s1': 0.9081089795260358,\n", + " 'emp_s2': 0.8760195758564437},\n", + " 'BACC': {'p_value': 0.12387612387612387,\n", + " 'diff': -0.02079160957099191,\n", + " 'ci_es': (-0.047181042228212136, 0.0025286105738232976),\n", + " 'ci_s1': (0.9138067241897886, 0.9741467276565798),\n", + " 'ci_s2': (0.888860544217687, 0.9569128171763175),\n", + " 'emp_s1': 0.9453199114577844,\n", + " 'emp_s2': 0.9245283018867925},\n", + " 'MCC': {'p_value': 0.1878121878121878,\n", + " 'diff': -0.0279523458715083,\n", + " 'ci_es': (-0.07123843522834138, 0.009602702001584999),\n", + " 'ci_s1': (0.8601154159583538, 0.9568003358464806),\n", + " 'ci_s2': (0.8275921636811222, 0.932540941432413),\n", + " 'emp_s1': 0.9107832588440067,\n", + " 'emp_s2': 0.8828309129724984}}" ] }, "execution_count": 5, @@ -229,7 +254,7 @@ "\\midrule\n", "Effect size & $0.02$ [$0.00$-$0.04]$ & $0.02$ [$0.01$-$0.05]$ & $-0.03$ [$-0.08$-$0.01]$ & $-0.02$ [$-0.05$-$0.00]$ & $-0.03$ [$-0.07$-$0.01]$ \\\\ \n", "\\midrule\n", - "$p$-value & $0.05$ & $0.03$ & $0.92$ & $0.94$ & $0.91$ \\\\ \n", + "$p$-value & $0.11$ & $0.06$ & $0.16$ & $0.12$ & $0.19$ \\\\ \n", "\\bottomrule\n", "\\end{tabular}\n" ] @@ -278,14 +303,14 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "id": "afd3e34e-4d18-4045-a49f-121a1b178551", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "18a9e339d56b40eaa9ce43e50cd7c5ed", + "model_id": "cc4126fdca934dc09f3f52f6c2185786", "version_major": 2, "version_minor": 0 }, @@ -304,7 +329,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "id": "10c6e6ab-787f-48b7-98f5-48de84564d94", "metadata": {}, "outputs": [ @@ -322,7 +347,7 @@ "\\midrule\n", "Effect size & $0.02$ [$0.00$-$0.04]$ & $0.02$ [$0.01$-$0.05]$ & $-0.00$ [$-0.01$-$0.01]$ \\\\ \n", "\\midrule\n", - "$p$-value & $0.05$ & $0.03$ & $0.52$ \\\\ \n", + "$p$-value & $0.11$ & $0.06$ & $0.96$ \\\\ \n", "\\bottomrule\n", "\\end{tabular}\n" ] @@ -334,23 +359,37 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "id": "16f56d4c-114f-4701-80fb-783c1b3d1207", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'ROCAUC': array([0.05494505, 0.01607463, 0.00116143, 0.03622366, 0.97275219,\n", - " 0.94904937, 0.99252755, 0.98882682, 0.97478143, 0.99825874]),\n", - " 'AP': array([0.02797203, 0.02314431, 0.00502882, 0.04678529, 0.9689868 ,\n", - " 0.94193877, 0.99255722, 0.99213111, 0.9810929 , 0.99893794]),\n", - " 'F2Score': array([ 5.21478521e-01, -9.88531818e-04, -1.01705787e-02, 1.07904249e-02,\n", - " 9.83425414e-01, 9.70317291e-01, 9.93377483e-01, 9.82436883e-01,\n", - " 9.72540046e-01, 9.90712074e-01])}" + "{'ROCAUC': {'p_value': 0.10989010989010989,\n", + " 'diff': 0.016074628438916494,\n", + " 'ci_es': (0.0011614303013786215, 0.03622365794387381),\n", + " 'ci_s1': (0.9490493728705037, 0.9925275462036619),\n", + " 'ci_s2': (0.9747814271953842, 0.9982587433114521),\n", + " 'emp_s1': 0.9727521872035416,\n", + " 'emp_s2': 0.9888268156424581},\n", + " 'AP': {'p_value': 0.055944055944055944,\n", + " 'diff': 0.023144311990304867,\n", + " 'ci_es': (0.005028817149312964, 0.04678528920344301),\n", + " 'ci_s1': (0.941938772504055, 0.992557217298247),\n", + " 'ci_s2': (0.9810928996823938, 0.9989379380439234),\n", + " 'emp_s1': 0.9689867972199498,\n", + " 'emp_s2': 0.9921311092102547},\n", + " 'F2Score': {'p_value': 0.9590409590409591,\n", + " 'diff': -0.000988531817988858,\n", + " 'ci_es': (-0.01017057865486726, 0.010790424857444192),\n", + " 'ci_s1': (0.970317290652865, 0.9933774834437086),\n", + " 'ci_s2': (0.9725400457665904, 0.9907120743034056),\n", + " 'emp_s1': 0.9834254143646409,\n", + " 'emp_s2': 0.9824368825466521}}" ] }, - "execution_count": 12, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -361,7 +400,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "id": "86c223df-2833-42a2-9b50-fae76accb6d4", "metadata": {}, "outputs": [ @@ -379,7 +418,7 @@ "\\midrule\n", "Effect size & $0.02$ [$0.00$-$0.04]$ & $0.02$ [$0.01$-$0.05]$ & $-0.00$ [$-0.01$-$0.01]$ \\\\ \n", "\\midrule\n", - "$p$-value & $0.05$ & $0.03$ & $0.52$ \\\\ \n", + "$p$-value & $0.11$ & $0.06$ & $0.96$ \\\\ \n", "\\bottomrule\n", "\\end{tabular}\n" ] @@ -388,14 +427,6 @@ "source": [ "print(stambo.to_latex(testing_result, m1_name=\"kNN\", m2_name=\"LR\"))" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9cca7848", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/notebooks/Classification_non_iid.ipynb b/notebooks/Classification_non_iid.ipynb index c5f2e1b..d5addbe 100644 --- a/notebooks/Classification_non_iid.ipynb +++ b/notebooks/Classification_non_iid.ipynb @@ -28,7 +28,7 @@ { "data": { "text/plain": [ - "'0.1.5'" + "'0.1.6'" ] }, "execution_count": 1, @@ -99,7 +99,7 @@ "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWgAAAFfCAYAAABjmlbAAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjcsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvTLEjVAAAAAlwSFlzAAAPYQAAD2EBqD+naQAAP6lJREFUeJztnQt0HdV57z/puDYv21gPsGzJNiQkNIFQEjChRDd2zGraQpeIMBBjch1IyU1iQALsNqQUo9uyTB4FOZSwCqRAF7GNbNkhKwkNmMpGKXZ5pLTAvYAN9rUsCz8Tg51YREdz1zdHI58zmsfeM3vm7D3n/1trfHyO5szZM2fOf7759veosizLIgAAANpRXe4BAAAA8AYCDQAAmgKBBgAATYFAAwCApkCgAQBAUyDQAACgKRBoAADQlHGkMcPDw7R7926aOHEiVVVVlXs4AAAQG049ef/992natGlUXV1trkCzODc1NZV7GAAAoJy+vj5qbGw0V6DZcnZ2ZNKkSeUeDgAAxOa9996zDU9H34wVaMetweIMgQYAZAkRty0mCQEAQFMg0AAAoCkQaAAA0BQINAAAaAoEGgAANAUCDQAAmqJ1mB0AAKgmnyfq7SUaGCBqaCBqbibK5UhLINAAgIph3TqitjaiXbuOvcbJfCtWELW2knbAxQEAqBhxnj+/VJyZ/v7C6/x33YBAAwAqwq3R1saFisb+zXmtvb2wnk5AoAEAmae3d6zl7Bbpvr7CejoBgQYAZJ6BAbXrpQUmCQEAmY+WaGhQu15awIIGACiHJ9xmzSKaO5fo6qsLj/y8XBNxzc2FaA2/AnL8Opee5/V0AgINAMh8tEQuVwilY9wi7Tzv7NQvHjpxge7v76drrrmGamtr6fjjj6ezzz6bXnrppaQ/FgBQBnSOlmhtJVq7lmj69NLX2bLm13WMg07UB/3rX/+aLrroIpo7dy499dRTVF9fT1u3bqUpU6Yk+bEAAAOiJebModRpbSVqadHHN15Wgf72t79tt3Z55JFHRl877bTTkvxIAEAZMSFaIpcrz8VBOxfHT37yEzrvvPPoiiuuoFNOOYXOPfdceuihh3zXHxwctPt1FS8AAHMwNVqiIgX6nXfeoQceeIDOOOMM+sUvfkFf//rX6aabbqLHHnvMc/3ly5fT5MmTRxd09AbALNKIlsjniTZuJFq1qvCoW/afSqosy8udr4bx48fbFvTzzz8/+hoL9IsvvkibN2/2tKB5cXe/PXToEJrGAmBYFAdTrC6OaMeZkFtnWLEjL1jX2AAV0bVELeiGhgb62Mc+VvLaH/7hH9LOnTs9158wYcJoB2908gbATJKKllgXIXzPdGs70UlCjuB48803S1576623aObMmUl+LACgzKiOlsiHhO+xdc7he/yZzmdkwdomK0FeeOEFa9y4cdZdd91lbd261frRj35knXDCCdbjjz8u9P5Dhw7x12E/AgAql54eluHwhddjurstq6pq7N/5NV747+VCRtcSdXGcf/75tH79elq1ahWdddZZ9Hd/93fU2dlJCxcuTPJjAQAVHL6X1zhZRrtiSZdeeqm9AABAGuF7vZony8iAWhwAgEyF7w0oSpbRYYIR5UYBAEaUK73nHqKrriqIsVf4nlPsSEWyjC4TjInGQacZLwgAyE7tZz+BXLCgYNEWv86WM4uzI5w8Fi5tyuF3XurGgs7b2r7de4xOOJ/7vSriuGV1DQINAIiFamszTCC7uojq6oIvBlGTZRxx9/Nhh4m7CBBoAEAqqLY2VQrkOo8Lh9vadsO+Zm4uEEZPT/QJRm0yCQEA2SWJcDaVzV1bW4l27CiI6cqVhUcW9qALhm7V+DBJCACIRBLhbKLC9+STYtuULS2qWzU+WNAAgEgkYW2KCl9nZzKts3TrXQiBBgBEIglr0xHIMKpGam+ojk3WrXchBBoAoI21WSyQQcj4ok3uXQiBBgCU3dr84IPCujfeSMTViG+6SWwMSU3WRZlgTAJMEgIAYlubXnHQQeFsxfzVXxWyBIvdFdXV5Z+s06F3IRJVAADqMwn/OE+558NTC1mcv/td+c+rUpAwUi5k4qBhQQMA1FqbHF7xofDUQnZrsOUcRlVI7Y0sAx80AEAdEn2pfvADsSiMSZPKP1lXLmBBAwDUINmX6u23xTa7cCHRFVeoK8RkEhBoAEBZUgs/9CGxzZ5xRvkn68oFXBwAgLKkFn7jG+GWcC5XWK9SgUADAMqSWjh+PNEttwSvessthfUqFQg0AKBsqYXf+Q7R0qVjLWl+vnRp4e+VDOKgAQDqiFgpn0PuOKqDJw7ZN81ujfHjk+nWUm5QsB8AUD6iVMrXvDegSiDQAIDyosDsXZdwb8ByAYEGABhNGr0BywVaXgEAjEZl6yuTQaIKABWAaRNtuvUGzGSY3Z133klVVVUly5lnnpnkRwIAPHy57C7gbtVXX1145OdJtIxShW69ATNrQX/84x+nDRs2HPvAcTDaAUgLv4k2p3aRrhNtTkh1f793aQ/HB51Wb8BykbhasiBPnTpVaN3BwUF7KXamAwBSqV2klZvE6dYyfz7KjSbK1q1badq0aXT66afTwoULaSf3s/Fh+fLldiFrZ2ni2EkAdIOVauNGolWrCo+qO5dqNNHm7OrNNxOxnZWmm6RVo96AmcwkfOqpp+jw4cP00Y9+lAYGBqijo4P6+/vptddeo4kTJwpZ0CzSIp0HAEgFgzIn+PrBYhoG99xbsEBsV8sRj5w3bILT2Djo3/zmNzRz5ky655576Ctf+YrSHQEgcQzLnGDLly3dMLghqrucp9+u6hCPnDdcsLWNgz755JPpIx/5CG3bti3NjwUgeYcuww5djdwdEWoXhe5queOR1xkYkRKHVAWa3R1vv/02NWQ9NgZkDwMzJ5yJNsYt0kF9/cJ2tVzxyOvEu2llhkQFesmSJbRp0ybasWMHPf/88/SFL3yBcrkcLfByeAGgM4ZmTkSZaIuyC0nbXHnzbmD0D7PbtWuXLcYHDhyg+vp6+sxnPkNbtmyx/w+AURicOcEizKF0on5bmV1IKx65V66bVmZIVKBXr16d5OYBSA/DMydYjEWFK2xX3Xi5SVQzYOYNTGzQUQWAJB26GdvVYniCMa3AlQZzb2BiAYEGQJQKypzw21X2TrKvl0PzOLQurV1ujhiRYjpoeQVApQXiararop+xLlo3Le3QNlFFFiSqAJDta45sYuY6dd20ygYEGoAKJKrIlit7PWpiZt7wGxgINAAVRlSRLVf2epZbWhmb6g0A0CfDLtHkj5CKfwYmZpYFCDQABhNHZBMTSYGCGZUa1ywLBBoAg4kjsomIpKA5X6lxzbJAoAEwmDgiq1wkJcz5So1rlgUCDYDBXVXiiKxykZQw5ysoMTMWEGgADC5KHEdklYukpDlfQYmZkYFAA2BwUeK4IqtUJCOY87z9HTsKqePceivtFHLdQSYhyD6qMhtkg3dTzKiIm2GnZKjO8Qmr+JfF4GYJkKgCgCMYd91VMDEPHvTP4BBVJ5kmf/x5KafnaZFhl5WCGbqUsLA05tChQ/wN248ASNHdbVm1tSwRY5eqqsLC6/DS2Fj6d37Or7tZudJ7e+6lvb2w/aDPzTJex7SpKfv7nYCuwcUBsodIS2q26GpqCpauaJ6zqAVdV0e0f7//55p4my9rnmthzusJXBygcgnzE4viJaQiPlYW5337xNwgpvRmKlc1pYyCWhygconSklo0BU8gZGLrBQuzlcOsWdRKpYEwO5CtpA/VwufeXkBc2pYla+mrP20xM4fZ6zsoQyttTfJ/9MHSGEwSVgAyk3Qi9PSITeSJLrw9L4aGCn/jicOeHmtocMgedjUNWTup0cpzBKvH9oZ5kpAnzPj9un8HHR3xjpGiYXRnbG5RRtcg0KB88C9PdbQDCx//qr226144ysNvPUkhLb4ufIG6bYF2izQ/H+bXdFKcoO9A9CLGF6kEh1Gl2SFLU6Dh4gDlIanbZ5GW1LW1RN3dRA8+qCzPudgTsp5aaT6tpX4qdYPsokb6ZbtGccAh34FwL7yY7poyeFKMAQINykOSFdv9/MQszB0dRHv2FNZRmOfs1igW6Vm0g+ZQDy2glfbjabSd8i2aiLPAd+BzeVNeck70VLjvvsrzTY8r9wBAhZJ0xXYW15aW8Fhc0fUEixYVR+ANU4420ZySqD2tymcKHlvenSreAa/MQAUl50S/4ptvrrwov9Qs6Lvvvtv+ktv5XgWANCq2s3BwrPGCBYVHPyERXS/ko4wrnyl4bP9hUgdZCZaci/IV91dIlF8qmYQvvvgiXXnllXbe+dy5c6mTz1TVOevALDJaWCdu0aJUyefpaMMsGr+vn6o9PM7DVGX7zdk18+wGojm5ZDIDw04FPww9RfRKVDl8+DAtXLiQHnroIZoyZUrSHwdMwUiTMxyjymfmcvTiwhWjYlyM87ydOm1XzcDe+HcZceZ1VU9TmELiAr148WK65JJL6OKLLw5dd3Bw0L66FC8gw2S0YrsCj0l6uSgt/hEn/DpPdqaRV+N3KmQpKVO7ScLVq1fTr371K9vFIcLy5cupg2fZQeWgaJIORCuncc89RC82ttJpu1roM9RLDTRAA9RAvdRsW85pTm66T4U9e0onBk1JyjTCB93X10fnnXcePfPMM/SJT3zCfm3OnDn0R3/0R74+aLageXFgC7qpqQk+aAASKO7nuBOWLCH63vcK//cK1CjXzUw+m9MUevigX375Zdq7dy998pOfpHHjxtnLpk2b6Pvf/779/7xHIOOECRPsARcvAIBkk0BWrybq6tLP05TL5jSFHi6OefPm0auvvlry2rXXXktnnnkm/fVf/zXlsnxUAdAE0SQQrpLKk5u6eZpaR3zTXu4ZLSNjTBHoiRMn0llnnVXy2oknnki1tbVjXgdASzJQdF4mH8iZ3NSN1gqepkAmISgfOgug16wam5nXXFNQC53GWuZ8oDTI5QqH3Dld+NGQryAelsag3GiG0bm2pF9pNR3HGrO4n47VT007XWRBNTugNzp36QiaVdNtrBUy0bZO49MladA0FujVM7AcsVPFrhbR4FvD4ryMSkHX/HRJM8wOPuhKQgefr0yZ0TRmrLyUS5S0x1qBE229gqfLnXdy5JgZ+yQDBLpS0KUzc9JlRlVkcGQ011jXKA0Vh/bv/76wpHZKp2TsQKArAT8hcpx4aWYjJB1WIPrDEfU1qx5rPk/5jb305sZCSnVuTjM1z8llyupTSYPkaZDKKZ2msWNpDKI4FE7j+0UjpD2Nv2aNZeVyyYxHZqpfRXNZ2bF2d1tHakvHxw1m/7K228hohDQYkmgxmcopraB5IprGAnkhUtSZOXb4WtQOobI/HG50GlecQ8Za3Pj71Y5uu2Fs3rUdp7lsK0Gkw77aKgmRTuSUVmTsQKCBvBAp6Mwc6+TmhS3rrq50fjiiF65777Ws9nbLqq8vfZ23FyDOxcZ8NQ3ZlrJbnItF+v9RkzWzcUj7eORy0e1xc5T6Ka3I2JERaPigs06aqWRB/t+w6Xjn/fX16USGeDUR9IrfuvHGwj5wuTfBSSG3y7+ZeqmJ/MfH3UxmUB/N2tVLvb1zjJvISwL3qdTSciwK5dlnCxOCqWdHlmGCGwKddUSFKG7B37CJkyRP7ijbdjI4WElFGqK6QiBsAdk4Vq+95h65xrIIdi1mMwJCEiXsVGpuJnr00eRPaS3y5i2NwSRhwk48iYkNoe0H+WmT9IXH2bbXvbPjvih2IvPjiP8haC7SayifJbHxtdG9Vs+GyvZxiE4ldCd8SieZNw8fNJATIhUnbZj/d3AwuaIQcX84XkLso8Kbl3YHCgi7q91/O+aDDp/lGja1wIQCZKcSuhM6pQNRcGWAQANvfCzCWMhYr0maPSq37WPGDVcVIi6+QN2+AuKeS3QWfo8TsVGyzTBTsYKIciM0lMApHUrMKwMEGgSj8qyWjRJJ0uxRse0QM86JuGCr2G9XWaS9rGwWabakhY4Xj6HCQjp0CThK+jcEgQbJ1G30Oil1M3vibltwf9iv7PdndnP4GfM5GrL+c9G9YsesoyO13dYBnUL2kwQCDdRnQfkJO2cGZqHgsKQZ90VaGSgggca8TJIMq32I4malVnJWaleHAYEGarOgwoR96VJPk5Ez59hva5RSxLCg3YfQ16qNkmbuo7gKMo+1oizRGSkDgQbq7h9ZVWprw4W9q2tMnQn21f7Pk7rtO/WyWz2iPoAQM44vOrxf7KqILCAiWZUCiqtbmRVVlCU6I0Ug0EDdDAyrq8D7NnX02KLFliXf/vNj8UQaa3xqPzC3GHP6uIAPwH7bhiHrvy7vsKMr/CIsONQutoDwyrJWdJHi8lg5Cz3KNdcEsuBT9wMCDcYSdTKvpkbofYtr/H2yxYsykfb7BcsWbWAf+sjbuKpcYJRFkQorERDBi5/XxXD6dPG3aBH1AEaBQOuELqZAlBkYCV9pUFSDW+NiHwIWVXfAMe+b4wuXEbxcztp8a5ddTa4Qp1z697xjSSfhp4ni6giZoPRaNmzQ4xQEBSDQupDU9HpU0ZedgRF0ixw9qTYwLjj2LXfx/l51VSSrM2hhEd5HtYHV5oYbR7IhVStdhFqaohdD3iS7lrIQ4ZElINA6kNT0elzRl5mBEbSg37m2Q0oTpW65o9SZjCDQQuvW1SWjdN3dhRTv0HGGJ8kUn2ZBf8tKRISJQKDLTVLT66pEX1FEg73U1lpDg0O+u1vtMXEobEGLFPgv56JQ6Xhyko/PP1C77VJxp4Q7aeJ+aebuhb8PkeAbuDvSBwKdxZSoJERfRKjDbsFHxMlLS71Sm/tzjdbQGsGsxYQtZ2UirUDpir1JXseNLWdRcf7Slwp+Z9WnIMiYQP/gBz+wzj77bGvixIn28ulPf9r6+c9/nv1yo0kUFVAt+jKuEkG3CD91rLZjxYHGxhELWZ0qegZKLmPC6mSWmErn3l2vOw/RobA4G1XXosI4pItA/+QnP7F+9rOfWW+99Zb15ptvWt/61resP/iDP7Bee+21bAt0Eha0yl9cFFeJoFuEX/7fy4asXVX+LZ6ErM64PQMVCbOwf/rxx8W/S5/jFuSSEF14G1FLpET4uoHJAu3FlClTrIcffjjbAp1EUQFVoh/FVSL7a1Ux1qgW9JIl3tb+rbdG2t7vJvnUD3UvHPYXwxetSqCdIcQ5BbNS20NXtBTooaEha9WqVdb48eOt119/3XOdo0eP2oN2lr6+PjMFOomiAqpEX1Y8o/xaVVj7IhOU7oVD8BQmsQxXV1vWqlVCyhm37khcj47XVxLlFMxabQ8d0Uqg//u//9s68cQTrVwuZ02ePNl2efixbNkye+DuxUiBTqKogArRlxHPqL9WVda+bBTH7bcHX6B4ezIpeJIL+9yP1EabMIzj0QnabZlTMKu1PXRDK4EeHBy0tm7dar300kvWN7/5Tauurq4yLOiknHlxRV9UPHmmKeqvVcT65RRy/gy/9zvHjDP43PHHgqZk8WZe7eguWLgJibM7Fdvzqx889sLQhh47tM75m2jURZTrnOgpWCn1mMuNVgLtZt68edZXv/rVbPugdRZ9UVdJ3Dgt0Qw597251wVo2jTLmjRJbDweBY2O9QRMXpyduiTu+kwc1cIhhsXr7aF663JaM3oYZH3Qqi1aRH6kg4yujaOUGR4epsHBwbQ/NlvkckRz5kR/L/evnz+/0J+ef+sO/Jzp7CTau1dsewMD3q+3thKtXUvU1ka0a5f/+/v7C2PhdRn+f/GYmN27SRg2OqiKpn23nXZTC+8wNVMvNVHAGBTz2sEGuv/KY8+/QOtoLc2nqnzpfp1C+2gNXUHfoaV0W/93xux2NeXtsTfQAA1QA/VSMw1TbsxXxV+pChoa1K4HFJDklYJdGps2bbK2b99u+6L5eVVVlfX0008LvR8WdIIEuUpU1rLkbbE1HlQVj01BHoti/7BTs4JjidOwnL1SsR3r3S+Uzylrejl1lfzJK1llD9XZmYa8XzMbh5RP2EnNQyMOz3wXx3XXXWfNnDnTjtyor6+33Rui4sxAoBPG60cmGukgc39dhqST4qpvLGhpiLNXKrboZ79L9aPC7pfkUyLsCcW9Cc1DIw4vGwIdFwh0yohGTcjGXKWYdOJlQR/zQaubJHRbxH6p2DLW+1zaIO4vTzDuLXAeGnF4sYFAA3lkal/IhgqmbEGzuLldDcesUh/TULKMKU/wddF8q4NuHxVWr1VvJ/Gi/PupRmr9JOPePD0YiMNTAgQayCMqouyblhUEEecm/33KFGXW7bdp6Zg/efl1Ry82olb+n/7pmLA/3qZjPXMkBi+8S4WLgniND5l1yxL3hji81AW6WsVEI8gAftEYbk49VT5swIkcKQ4/cHCe89/b20kVS+m7dDmNRIaM8OOqVjqNdtBzHT1EK1cS9fQQbd9eiDgRDU34138l2r+/5KXp1G9HaXC0xsGDRAcOkG2rr6A2+++uPfaFf4yuQA61311cRD8nrfFUABBoYPPc1oRjrJywu+nTS19vbCy8zn//m78hqq0N3o7AxaFqZFlNX6TLaU3JR3V15+h/3DGHaMGCQqiis73m5sIK7guIwGdXj8hqJ7UXnChVRJdOKoT2yf7AnPWHZd6UVtwb4vBSBwINaN06os8ta6Y+aqRhH3uPX99JTbRuX/Poa/k80caNRKtWFR75eSAswjt2FCxXx4Ldto2opqawkd5eogce8H4vqx4vvB6/7/bbQ7+5cZSnNXQlPde+rsRYjmzlB+wgi/QM6rPjltnvcMJ78a3IUJHmcTU1FS4uaRB2EUt7PJWApTGI4kie4nkfv4k05zVurOrMSSmJtPLbCDd/DUtnl4kMkZlI8xoTV6pj33OKoX08WRjYYbxc1YtUFwGrQA4hzC5lDA7ad8/7iHTz4PIYsSuehYVrcefuoGMqGxki272G129vH9s9XCK073f1ktX4XMkuTtF+Tk7hqJHAC5bJRcAqjEMQ6BQxPGjfyxAN6+YRlhQYarCqCNeSbYkl2zpEspJesbCODn+NXMfuoL6D42jQaqN7rb7LbihE0nCH8XLaGQYbJeUGAp0WGQjaTypEOdBgVVmONCkLWkL8i4V1zFfvZ216uHH8kl08wwMVGwGG2xlGAYFOg4wE7cvUxed1RCuulRisbmuL20OpsnrZFZLLqf0eJK9axcLqeafvZ20Wvf6f93r3HfRN+1bYyT0DdoZRQKDTwNCgfb/yG2F34s7f2f8stdt+k24qjx3X9gwatKzCiE5A3nDDmLrOUa/HXhfK0LRv0YtPgHmcETvDKCDQaWBg8dyg29iwGknFhe6EK54FmWZBxyyKKqicuBK9+PLVSiHuC6VwJEjQhSzEPOZGBgbaGUYDgU4DwyxokdvYYuuaK4Ty4mUZCkVayU7iuRe2imWJMnHl9R7RsfM6ik3L4uuMcKElPyNAwDw+XFtas8QAO8N4INCmdu5OeKipGqyiFzC/dlbuGSpVUQPudlp+txTSvpyQz5IYs/22DUPWS9fErMkt+B04oYEG2BmZAAKdFoYE7Sdl7Afqj6gLqK3N/6rhHENVIQYita6dz+UY6DjWK09eun3tomOW6T4edGUV/A64RZcBdkZmgECrQNT6MSBovyzu8rgWdFjYiOxFUCaumdeTmch0nyu33hq8bY+mtqOnmGwnc74QxPwOuMmtAXZGZoBAx0XWYtM8aF/Ggo69K84GOJSORS7INJPM0ots3kX1h/PFI8S0HHpiTSFj0KONle92m5qs7q6hMUOqmzJk/WaSf3ss3y8tbL8FzGMD7IzMAIGOQwaDQkV/p2yMxfIkePzKPcVK1o0QRaSKrzSi/RW93C8BpuUbLUtDW1P5LXM8/L5c+F96jGG3PRJuOM3tjMwAgY5KhoNCw36nnNgW67o08gHDIkXoHdNMURrj67evLP1KZHy4Acvg5DpraIl34abNt3aJtaYKKapUXG/jMB2n5uLk9d3APNYGCHSFhM7J4vc75Yi2oAzB0OvSyIXN79Y8P9IU9T9uenxsJp0CIWWBG7X0ZX24AQuPmy1kFuNi03Lot4PWHZPujT1mzxRu0UXWWIB5rA0Q6ApKPpHF63caO6JM8MJ2RX3PWD1h0z3gPcMnnWQN+zR7dRcoytGQdaQ2vuB7fQb7jG26u8f4nGUXvlhdTmsiu0dMdrcBS0qgx5W7HrVWqOoYwYXdufg8t/7hdbmAuWybqITgYXAjkeKhOnXqI3cyEmxxlNs3YB+W0c/P58niAvw+baH4LD5wZALV0BGyqGq0cwnjNBZop04appy9MhfLP+HALlKJU4h/yeJeaqk6SLkr59MElskIOO+6ke6je+lm+5VIHTO4Kw1/ab7dB9Si8emcedBRRXXHCG5PMmsW0dy5RFdfXXjk5/y6hvAPj/voRb0u8Y/3lT1iF7YBaijR8ufu6qWqXbt8e/bx63XWAXqEFlE/TSv52y5qpPm0ltbTMZGaSsn1whu3r59+/40224YV6THoJ+HfoaV0oPrUSO2wjm082gUiCoadztlD51uOsnRUiZN8YmAEiKhXh33UbveE49M+VtQn3BXhuEn4vQtEU5lHOmdzlxG/GtUqupgELVyLWdwtMnZitODW6JJL4S6zi8PA09kI4IOOS5RZb0MjQKLWBXL/eP3bZRWW+dRl7z7XmecaH1z0X0ZQg4rZO4tzofDzWUdZeOx8cbmaBEukjqzPPma/pgdKLiQJn08qT2fMT5YCgVaB7FllaASISD1ot/Xs9+MNikrg1x9r6S55X5jlHWSJ+4nG9bXd1rBEFxMRgeZejDzBKbL+w/Tl0OJDsvtdjvNJZU8FNAKILtCJ+qCXL19O559/Pk2cOJFOOeUUuuyyy+jNN98kI3Bm0xYsKDyGzYoITpQJr5cSQc2sHR588Njus8/5vvuIdu/K02dpI32RVtmP4+gDOkg1tJYut/2vbi9pI/XTNU/Op/N3HXNe8uReGxU+3K+buF/nbDfO2P/0wVaqWru2MJFWDM8dLF16rDu4IJ3UTuurWumL9/0xUV1d4Lq8z9fRo7Sx/cnApuPF+y3m0U7/fFJxOrOfev58ol2uedv+/sLr8GMLYCXI5z//eeuRRx6xXnvtNeuVV16x/vzP/9yaMWOGdfjw4ex19S6XBa3o/lHEq+Os42Up/55ykS1g2XhgJ8kjaKy+x0UyiaVlco+1eanEe0bu/bkaXdiqvN9jQvY4vfzyyy3rS18y2oI21ONX2S6OvXv32gPbtGlT9gS6HOVHFd8/Bmm943P2a8EkUz/Cq7wli7boRJzzfvaLR7ouDQ1Zz35rg7WfanzjkJ2LyaYb10Ryl7z0vZ4xzXWLm/FyqvfMxiFraDCgHVYZy9nG/XhDPX6VLdBbt261B/bqq696/v3o0aP2oJ2lr6/PHIFOu/yowin2MCPc+bGGtmCKYQE7AiYSDVJfUyjuEwfeT/+JzcJrHHXxu7pGJfvodZdgJ9WIRAWVqcxcnI+vgJyvbAl0Pp+3LrnkEuuiiy7yXWfZsmWO+7JkMUag06p7oPD+UcQId6whVWFsQQXi7Uk+GqsK/Bov/7Ws0EdP1SFs9RBOpwlsnP0t3sdYjV/LXEcj6sfDgjZMoL/2ta9ZM2fOtK1iP4y3oNOKK5I4+0XcFmG64VhDceN3g6Iw2B3A4XejNZG9Gs0G1T6OgLP/nCLuFRIXdX/ZH38Frba3tYAet/ZQfbzGr2WOU4vaScyQhkOpo51AL1682GpsbLTeeecdqfcZ5YNOE8H7x/9oX+lrHcsY4SosaMdtwBarkBHJFZzcxfxj+Nf9CJozjLq/nhX8BC6mWcOQhkOVK9DDw8O2OE+bNs166623pN8PgY5nQXvVHHZ+HDIFkhwxz8WI32XL+YeXlsZB+94up5zCxpa717DH0aBtDUuLbRSBVumM1SgzBJVONRbor3/969bkyZOtjRs3WgMDA6PLb3/728oTaJU/mpD7R07U2JULTugIKi/qpRuOZrb6TKx51YHmhescO24DoY4tZYjP8rshSTJ13PNKqAINM0M0ul5ogTYC7TXhxwvHRleUQKv+0QTVCLWL5genREfVDZk4aGeizVdT/X61ZZhd8vvI2DUzRBaVFxwUzzACbQQ6LpkQaFU/GkfQuE1UUC+/pibruXYxceaJOdlJHHYNu2N6+ZHdAV4TbZ67GXTBKkN8lt8NSeIWtEqXDTJDjAECrQuqfjSi2W9sVQ8VXAki+sCry0ziRGmAUnyjwO9/taMQRud2iThhdEPLxJzjQxt6Ep/QUhX3HXQxVeZ6UBTZA5IHAq0LKm7XRVs4FYm9TIiTzCROlBaCPAHn7MaM6cGCx37t/lyjdaRmuu8GnSa019d0KXereh2LL0/yvqAELYHr8t0PdzxXrYwKIntAOkCgdSHu7XoUk3VE7GVCnEQtKtHdce8af1ZOIpX7DrozUOScEqC8TdXCUuxJcqL82Je+jwRnVYMuoEnGlimI7IFIpwMEOisWdBSTtUjsVYc4RRnOsmWW9Ze1csWQOuh2YbGx61lsUHu/7nXTwu6Ov6UOu37HmAPKCTTFVzh+nnb2n4LInkpNHEkbCLQuxE2nimKyusQ+xeg+z8U3zVmBQHMI3xjhj3m/HnbTwlY714bOPx5yQMvh6A24bRKN7Mlgvox2QKCzkk4lY7KmZAL57Y7XIjvJVnBdNFpzySdzxG0VjrxnjBhVVdmTkUHaqFGUn1p8bptEI3sqsXhR2kCgdSOqr0HUZE3ZiSgaVBIlTI19vdwuKriqXSHuOqxUqHM77zaqNYvyU4/H1cf4C0+GgEDrSNRbXhGTNcXqZu7dueEG/2FFSfRwshTvpqW+PQ6HJSvKFV+/wsLSZVLgTQLFi/QBAp01/Kq7cahBmQNZgyyz6MWGChYw12OWmVwMqsnM4suHcLp/BF/JOlmswtbdNWRPrC4QSSYCiQGBzqJhrVl2gTOcm25KrjkqiwiLtFPXI8r7o2i7bAKPqRd5/m544rAMN2AVzSGkepuBhnVthJBp68cC4GQJyirl1fR4pEy+sO7forHbZayTrxYfv47qJghADAi0AZha10Y0sbF44YgKu72TpFKKJra4xTkfs1iU41/W7KYlkbhB52LGWZ66nnOVLNDVIp2/gVryeaK2tsIvZGyX9cJje3thvbThz9y4kWjVqsJj8RiCxh3ED3/dSpMO7KA51EMLaCV9jjZQH02nYaryXJ9f30lNtJfqpce/ixppPq2l9dTq+feaGqIq74+1X29qImpuLjzP5YjmzCFasKDwyM+No7eXaNcu3z9Xk0UzqI9O7++l+fOJ1q1LdXQghHFhK4DUfzO2APb1FdZjYXDEkZ8PDBA1NBREhAXD7/Uo8I+TBbh4bI2NRCtWELW2ho/bjx/9iChPOdpEIztDRG30fVpL820xZpEYparKlu0tV3bS4Z/XEL0fvv12upf20Kk0QA3US800TP4HgEWYj6/zWPw609lpqBD7wSeGAFNpYNQwaGnJ2DEwGUtjMlFu1APZWFs/X/XSpep82CIulyiJjRMn+v+tUFfaFVbBIRQ8S7dypTX09Abrd3XTff3XUX3NvPkw/3Im3BuMYAB08YSqaSGEpgEftObIJA3I+nyj+LBFq6L6tYaKungV/reqq0ufO61fPLp8R/U1s+iGNdM1cfI2SgC010VO6yScDACBzkjSwOCgfDG7Yl0b7ZKt6ILB24s6nsg1OpyD5OrRdbj2WMcW2SVKdVfdJ28DGdkp952I34QqLOhkwSSh5rB/j/26jHvCqtgX+vzz0Xy+zIEDRBdfTDRrVsG3HDT5J+impL17j41blmrK02dpI32RVtFcepZWUJvdAS10lpo1gw/K8ccTbdhAtHIlUU8PvfjE9pKJwOLt8yM/D5sENGnyNhY8gbB2LVHj9MAJ1bDjA8qApTFZ9UE7hMXaFvt83S2mRP2uPgZoyS27bJ0G0XToYms5Tkagl2nHdwbOPnlt30nCELGAHXfH7WJF9My1MIeGrE0dhUzCOa5zyOg7BMOAi8MggnyhjnCKCJDsUvyDlK3TINNHIEq5UdFa10Hbd7IPORPRfeGTafEYMgwjyVQSjoFAoDMCiwgXu/cWoPgJGbzwD9VpfSWT3ixSw0l5T78R09WxnkW2z1Xv/utv14ROAsrcqRhrQVsZjFIxEAh0VhgasjPwRMtqRl3YZRHFsuLXg4oPqeqKbU9uFZnwTjSJ6Pbt6ncjOxEUFRN2p5J2oSSIaDaBQGeFCDGsUZfiztuillVYTY4o5UaHfe4U/n3JsauE4ysW3b69zaYma2hwyHe8/q6SY3cqafpoMxXqB0qAQBuKWxzttkoCAvTqt1ZaNa5WebKLqGVY7LsN22YUC5pdEsXP+Q7BsWAdcXIEWnb7/3mv94UszFXi3Kn81a3pmM6ZDPUDo0CgDcTLYppfJx5eIdOKKmAz0mMMWsLKjRaEr9DiyvH5jqNBXx+wcxFxXByyPu5/v2FlrAsJ9yJM2r0hmjQEn7G5aCPQmzZtsi699FKroaHBHtD69eul3p/1MLswi0lE4LhTM9+6O9tx/7jd4XVRoxOiVLErdR2IJUmIXETcIXb5mBa0qKuE10t6ghCtqbLPIV2q2R05coTOOeccuv/++5P8GKMJSo7goj9tVMgMcVd+c57fmO+k3udzo/kIO3bYeRxOPgft2UPU0SE2Fi62JDvGMDgJgpMh+ik4SUIUTqrhRJ8HHzy2/Supi4YCCiQ5GRhnf6PZLv7kTg7iIktCn00NoUk9QQlBQp8xoHY9YDipXDLYVIcFHdli8oouKPbNsj84CLY4w1o9Bd02yzQX91uiJtr4WdCOH7yurvA6N5r16/Jd7Lj1cgWJuWIK0TJhaeJxJ/ZgQWefQ7q4OGQF+ujRo/agnaWvry/zLg7RCnFhAhcmArJxzlHGmOTiXES6usaKIIs0izU3BhgWiBP0EtJFE4NdMa0jraH8LmKqJvbQ3DX7HDJVoJctW2av516yINB+4WsqrFNHh8ImjqJmkMmMMc4kZdh2ubxqqAi6DjT7572Ou9f3sXmp950Ki3OQyKqe2ItzMQX6Y6xAK7OgNYvwD7r1DbOYZBaRCawoh0ZmjMVWrkqxXrZMXgSjuBy48zVHzxTfqYRdxJJwSyAdO7sYK9BKojg0i/AXufUNs5guvbT8NSLCwvjYxVAs+CrC/ooX2UJGcVwOshcx2QYMhtoZQBGVK9CaRfjL3PoGWUy6TBxFSQX3Wt+rE4wqgXaK8YdtnxN7ROtlh6HL9wPMQEbXqvifpCJEDh8+TNu2bbP/f+6559I999xDc+fOpZqaGpoxY0bo+9977z2aPHkyHTp0iCZNmhS8MsczcfFjvwLKHFvFMVbbt6fWcI3DrObODV+Pw+G492BQ30Hetf7+wk+9nLsm2wPRCTvjpWo4Ty01vXTu1AGyphb6B/a/m7NrLO/f7/1+Z9+uu04sXJCPJSNy3N09F6Oi0/cD9EdK15K8UvT09HhO+i1atEi9BR3RjEnyNlLlra+pE0eOFe1ZE3rE9RS2b2vWBIcJFm+Ovz+ZqBNVx8/U7wekj5YujihICXQENUzaXS1zzXAuFI8/bln33lt45Ofc9sq5gHDVObdQ6VzH1xEtv0JEdpW6EfVS4eJxqvLJRsaoSp/GxB4QoTIFWtKCTsNdLRrT6hXb6yy50tpB9nojTa+1njhy9j2sXsZwkenrdzcje+2NGhmjwkeMiT0QRmUKtESEf5oFacJuff1ie4PGZsIts3O9FK4455i/AdsKS9gpFtgoUSSmd0oBZlCZAi3hCEx71t3r1pfbLLW1HUtVLscteZI4Vq9UTWifq45zQW31Kajvl+UnW30PURagooolla178fTSwjz2FDq/PjJVn3ZBmuIiRhyxUFdHtG9fIXrAL3ohCJaTvr5CNIWuOIWXRAsRBbXM5siHNQvW0RqaT9OpNEpnOvXbr3d9cd2YCAnnuHMz8Joa/49FN2ugK9kSaL+SbhzfVBRH5Ve1zc3WreqGxeJx8GB0UTatohmH3/F18ZfUTH3USMMib/K76uTz9OlVbVRF1pgTtposu67fp1f7i/u8eUQPPVQQYnclO+d5Z6d3CFzc6nQAxMLSmKTqQYtOIqn09cp0whZdONFC50wzx+PELoh8HEewqE8q5ICoSLRB2ykQl8r1QUvAP740fb2qiiI54+KC9e6QuzHioUFIgSNyt1NHdEewaBiHu++Xh5qKHhLNklJBhoBAC8KBA2lNHqkq2Rlk9ZeIRwLmX1S9t9+3Ycg6UtM4pims0NUw6tUtopqi7RRIksqdJJT0GZ5xRnq+XlG/txu3X5TnP2trvddlVWL+9avryJo/f2zaO+ci8+vr1kmPg9/C6cycQn311YVHfi6yKd6HOfNydMJDK6gqiiN4xKFd8DaPxbdWgXNAfCYf/WA3uF/FAFMmaUFGsDLk4pA1GtMMtxMJ0+bQu3/5F/9MQn50Gqb6LU5iSCRL1Urhdj9iut1jLX4F9Un5l5hUdToAKtbFEUVE0u5eoaJeQ5h4CCeGFAtWgO8ikdt9SV8J1+Lgz/Kq57GPapWrKarTgSSpOIGOIyJJFbnx06C49RrCxEM4McQRrJDbDv48kX6CSSV58HHjO4viO4TiscylkFuKCANE2ymQJBUn0HEtHtVFbsJcLXGCK8LEY46MBe132+EsHR3Wczd1eWbvOQ1rnSWscW1Uwr7bsIavUW+DUJ0OJEXFCbQKn6GqiLQ0wrOCxCNHQ9aRWgG/DTu3BQKzvTplO77gYpFmKzeJKD6R79aplmdXx1N40FGdDiRBxQm0Lj7DtIswuT+LY6M5dHBojYDfRvCg+U02siByQ1W/YkWqEP1uvzxJrKO3LBqEkoOMUXECrYvPMO0LBe8PC7JXfgZ3qA702ygKzGY/sMgdSlSlE83A5JKtUFOQNYEeRxmAQ2e5xgWH+HJILf9kRUNsVSISL11Neco/K9EzKoAnnyS6887S/XXCnf/4e6209okWaq33+ayogdkuGujYTvtukoOl29pKg4sFe00Vf7fu/XRYupToiivstQu9wwDIClbG46DT7DgSZkEHtX2SxW1ZuqMb2BcdeNcQtaq9hwUdeIeiyCnvV7KVQ/AAMImKc3Ho4jMM0jy/tk9RJ7KKLwZewu9EWgS6U8KiOAIWxwfNFwLf4St2ysMfDLJARQt0ufGKsAhr+xTFSe64kP2E34m0eK49RPhlq9q7ojgC71B0mb0FQCMquhaHjj0DmqmXmmiXf/HtCMUd2N/L/uwV1GZXo/Cqk8yc/6OQOhRO/eyODuHP/qC+kf69fS3d1NPqLrVdStqdEQDIGJmYJNQNFqyWloLesvac838GiP5erVDxfF9rXS817fev6sMifdy+EeEPmjzjmbg77iA666yxk3lNTUT33FNoAzMy2XhcczM1i0xsik5EKpqwBCBrQKATwq7g5mjixgYxgZYQKt7+0msGiDoVCr/7yhIzysR+L5feO3DAfx3+O68HABgDBDrN/k8c/+YVK8axgPx3SaGa3dIgJtAyFmrJlQUAUE7gg04DJ5iXka2FHIRTJ9m9zeJts4siTPiTarzHlniQ9czw31FYGQBPINCadRyPIvxVcYQ/TiX+MDBJCID+An3//ffTrFmz6LjjjqMLLriAXnjhBapIwjqOR7Fk4wg/i7Bs5xWZMWKSEIB4JB3zt3r1amv8+PHWP//zP1uvv/66df3111snn3yytWfPnkzGQUcmbg9B2SyOKEkksmPUpUgKABqhVaLK7NmzrcWLF48+z+fz1rRp06zly5ePWffo0aP2oJ2lr6+vMgS6HC2kZZNIoo4RhZUB0DNR5YMPPqCXX36ZLr744tHXqqur7eebN28es/7y5ctp8uTJo0sTT3BlHXYRcOyxV3RHxKanSv3D7O549lmi66+PNkY/Fww/j+p7B6BCSFSg9+/fT/l8nk499dSS1/n5u+++O2b92267jQ4dOjS69HF2XdZJqYX0GNfxKYKhdzffTMQX2IMH443RLe5eYg8A0DcOesKECfZSUaQQ6eBV7XPG9Gb6v7WNdMJBn9hsh337xD/Ia4zORKT7M3bvLrweZkXzlUVV4gwAhpGoBV1XV0e5XI727NlT8jo/nzp1apIfbQ4JRzr4BWr07c7Rlw6sKOimXxy1LO4xxnXfJBkCCEClC/T48ePpU5/6FD3LPswRhoeH7ecXXnhhkh9tXpZh3GQTD8L0cX1VK/2v2rVkuf3D9fVyH+Q3xjjumyghgABkjMRdHLfccgstWrSIzjvvPJo9ezZ1dnbSkSNH6Nprr036o6nS28GI6ONDB1rp6g0tNCdX5EZgEbzmGrEPCRpjVPdNyJWFMycHv9ZOP/5dC02dnoPXA2QXKwXuu+8+a8aMGXY8NIfdbdmyReh9FR8HHbMdTORu56IheGFjjFoPWvB9Tj/EiE1pACgLMrpWxf+Qprz33nt2uB1HdEyaNIkyj+IJMY7WYLdtGJzQWFIficfBvl6/4k5OFbonnii80W+MYdtxikRxNmXxNjjUhH3OISyglbSaFowa8YjaAyYgo2uoxaETTiW5BQuChS9p93ZYcSdeHnyQaN684DFGLRIlOCE6QA2Jh4sDUE4g0BkmVhE9VcWdomwn5MoyTFW0k5qol5pVh4sDoBVwcVSAi8QrDpotZxbnUJ1V5XaR3Y4TxcEUuUdYnJn5tJbW09jBcw0qvgEBIAsuDgh0VvBSYbZC2YRubTUz38Njn9hybqdOT3H29KcDoBkQ6ArBEd3ck+voM51sbVoj9uUIWZg9G9nJ4f4Buqq9gdbvb6Y85YTnGwEwWaC1SvUG8sbl7l152kFtZHl09rZdA6xcPHvGvQZNVK6RiVPetwXHE3VzuDgpDRcHQFswSWgCrkpH69bkR5PsmqmXmmiX/xeZodmzJJrSAKAzsKDJPD/sp3ONdJm1wvbDNlDyxZZ0QnXjcQB0BgKtMz6V4Kbm+2ktzbcjGZxY4KSKLekIGo+DSgECnQZRQigC6lFUk2WHm3VSO32ItlEfNdJ06rdf9509i1BsCQBQXuCDTpqoJTNDKh2xGM+gPrqInqc2WlESIzwKZs8AMBoIdJLEKZn55JNCH8E+aPZFs7ujnzB7BkCWQKJKUjiFgvys4KDAXRbuyy8X+pg51EObqJCZkaO8HdXx7fYBmt2C2TMAdARx0DogU6y+OPXN8T2HwN7m3bkm6s0f8y1Pa8rRjZ1zaDbCzQDIBJgkTIqoxerDhH0E9jZPXd1Jz9blEG4GQEaBQOvWa1BU2NvbKTe/dcS5Abwwsv4IAEVgkjApwooxM6wW7q7ZosLO2RrAF/SbBVkAAp1GMeYgE++qq0qjORJsIlspoN8syAoQ6KTzkru6wu+ri1uBxKqyD8I6mbsPNwA6A4FOmrq6YDXwKmaEqkCpBM8AoDuYJNQ1mgNVgVI93ADoCARa12gOBlWBUj3cAOgGXBxJg0m/VMHhBlkCAp00mPRLFRxukCUg0GmASb9UweEGWSGxYkl33XUX/exnP6NXXnmFxo8fT7/5zW8SLSpiBEhtw+EGFc97OjSN/eCDD+iKK66gCy+8kH74wx9W/Jdig0m/VMHhBqaTmEB3dHTYj48++mhSHwEAAJlGqzC7wcFBeym+FQAAgEpFq0nC5cuX274ZZ2nimhMAAFChSAn0N7/5Taqqqgpc3njjjciDue2222zHubP0cU4uAABUKFIujltvvZW+/OUvB65z+umnRx7MhAkT7AUAAICkQNfX19sLAAAAgycJd+7cSQcPHrQf8/m8HQ/NfPjDH6aTTjopqY8FAIDMkJhA33HHHfTYY4+NPj/33HPtx56eHppT3CQVAABAupmEKlCeSYhMPgBAmdEik1DLPkjcaqO4mju3luLuJVy8AQAANEOrOOjEQJM6AICBZF+g0aQOAGAo2RdoNKkDABhK9gUaTeoAAIaS/UnCpJrUISIEAJAw2begk2hSx5OOs2YRzZ1LdPXVhUd+zq8DAIAisi/QqpvUISIEAJAS2RdolU3qEBECAEiR7PugHViEW1oKUR08ccg+Z3ZriFrOshEhSGcHAMSkcgRaRZM6RIQAAFKkMlwcukeEAACABxDockeEAACADxDockaEAABAABDockWEAABACJU1SahTRAgAAIQAgS5XRAgAAIQAFwcAAGgKBBoAADQFAg0AAJoCgQYAAE2BQAMAgKZAoAEAQFO0DrOzuDocEb333nvlHgoAACjB0TNH34wV6Pfff99+bOL6FgAAkCFY3yZPnhy4TpUlIuNlYnh4mHbv3k0TJ06kKr8CRZrBV0e+oPT19dGkSZPIFEwct4ljZjDuyj7elmXZ4jxt2jSqrq4214LmwTdyjQsD4RNBh5OhEsZt4pgZjLtyj/fkEMvZAZOEAACgKRBoAADQFAi0YiZMmEDLli2zH03CxHGbOGYG48bxFkXrSUIAAKhkYEEDAICmQKABAEBTINAAAKApEGgAANAUCDQAAGgKBFoh999/P82aNYuOO+44uuCCC+iFF14g3XnuuefoL/7iL+y0U06n//GPf0y6s3z5cjr//PPtEgCnnHIKXXbZZfTmm2+S7jzwwAP0iU98YjSj7cILL6SnnnqKTOLuu++2z5P29nbSnTvvvNMea/Fy5plnkklAoBXxxBNP0C233GLH5f7qV7+ic845hz7/+c/T3r17SWeOHDlij5UvLqawadMmWrx4MW3ZsoWeeeYZ+v3vf09/8id/Yu+LznDZAha4l19+mV566SX63Oc+Ry0tLfT666+TCbz44ov0T//0T/ZFxhQ+/vGP08DAwOjyy1/+koyC46BBfGbPnm0tXrx49Hk+n7emTZtmLV++3JjDy6fD+vXrLdPYu3evPfZNmzZZpjFlyhTr4YcftnTn/ffft8444wzrmWeesT772c9abW1tlu4sW7bMOueccyyTgQWtgA8++MC2ii6++OKSQk/8fPPmzSo+AgRw6NAh+7GmpsaY45TP52n16tW21c+uDt3hO5ZLLrmk5Bw3ga1bt9ruu9NPP50WLlxIO3fuJJPQupqdKezfv9/+wZ166qklr/PzN954o2zjqgS4JC37Qy+66CI666yzSHdeffVVW5CPHj1KJ510Eq1fv54+9rGPkc7whYTdduziMIkLLriAHn30UfroRz9quzc6OjqoubmZXnvtNXv+wgQg0MBo2LLjH5wpvkUWi1deecW2+teuXUuLFi2yfeq6ijTXUG5ra7N9/Tz5bRJ/9md/Nvp/9puzYM+cOZO6urroK1/5CpkABFoBdXV1lMvlaM+ePSWv8/OpU6eq+AjgwQ033EA//elP7UgUU+qGjx8/nj784Q/b///Upz5lW6UrVqywJ990hF13PNH9yU9+cvQ1vlvkY/6P//iPNDg4aJ/7JnDyySfTRz7yEdq2bRuZAnzQin50/GN79tlnS269+bkJ/kXT4PlMFmd2D/zbv/0bnXbaaWQqfJ6wyOnKvHnzbLcMW/3Oct5559n+XP6/KeLMHD58mN5++21qaGggU4AFrQgOsePbVT55Z8+eTZ2dnfYE0LXXXku6n7TFFsX27dvtHx5PuM2YMYN0dWusXLmSnnzySduX+O677452qTj++ONJV2677Tb7tpuPK7c84n3YuHEj/eIXvyBd4ePr9u2feOKJVFtbq73Pf8mSJXaMP7s1uHUeh8DyBWXBggVkDOUOI8kS9913nzVjxgxr/Pjxdtjdli1bLN3p6emxQ9Tcy6JFiyxd8RovL4888oilM9ddd501c+ZM+/yor6+35s2bZz399NOWaZgSZnfVVVdZDQ0N9vGePn26/Xzbtm2WSaAeNAAAaAp80AAAoCkQaAAA0BQINAAAaAoEGgAANAUCDQAAmgKBBgAATYFAAwCApkCgAQBAUyDQAACgKRBoAADQFAg0AACQnvx/VtB/6WQIoaoAAAAASUVORK5CYII=", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWgAAAFfCAYAAABjmlbAAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAP6lJREFUeJztnQt0HdV57z/puDYv21gPsGzJNiQkNIFQEjChRDd2zGraQpeIMBBjch1IyU1iQALsNqQUo9uyTB4FOZSwCqRAF7GNbNkhKwkNmMpGKXZ5pLTAvYAN9rUsCz8Tg51YREdz1zdHI58zmsfeM3vm7D3n/1trfHyO5szZM2fOf7759veosizLIgAAANpRXe4BAAAA8AYCDQAAmgKBBgAATYFAAwCApkCgAQBAUyDQAACgKRBoAADQlHGkMcPDw7R7926aOHEiVVVVlXs4AAAQG049ef/992natGlUXV1trkCzODc1NZV7GAAAoJy+vj5qbGw0V6DZcnZ2ZNKkSeUeDgAAxOa9996zDU9H34wVaMetweIMgQYAZAkRty0mCQEAQFMg0AAAoCkQaAAA0BQINAAAaAoEGgAANAUCDQAAmqJ1mB0AAKgmnyfq7SUaGCBqaCBqbibK5UhLINAAgIph3TqitjaiXbuOvcbJfCtWELW2knbAxQEAqBhxnj+/VJyZ/v7C6/x33YBAAwAqwq3R1saFisb+zXmtvb2wnk5AoAEAmae3d6zl7Bbpvr7CejoBgQYAZJ6BAbXrpQUmCQEAmY+WaGhQu15awIIGACiHJ9xmzSKaO5fo6qsLj/y8XBNxzc2FaA2/AnL8Opee5/V0AgINAMh8tEQuVwilY9wi7Tzv7NQvHjpxge7v76drrrmGamtr6fjjj6ezzz6bXnrppaQ/FgBQBnSOlmhtJVq7lmj69NLX2bLm13WMg07UB/3rX/+aLrroIpo7dy499dRTVF9fT1u3bqUpU6Yk+bEAAAOiJebModRpbSVqadHHN15Wgf72t79tt3Z55JFHRl877bTTkvxIAEAZMSFaIpcrz8VBOxfHT37yEzrvvPPoiiuuoFNOOYXOPfdceuihh3zXHxwctPt1FS8AAHMwNVqiIgX6nXfeoQceeIDOOOMM+sUvfkFf//rX6aabbqLHHnvMc/3ly5fT5MmTRxd09AbALNKIlsjniTZuJFq1qvCoW/afSqosy8udr4bx48fbFvTzzz8/+hoL9IsvvkibN2/2tKB5cXe/PXToEJrGAmBYFAdTrC6OaMeZkFtnWLEjL1jX2AAV0bVELeiGhgb62Mc+VvLaH/7hH9LOnTs9158wYcJoB2908gbATJKKllgXIXzPdGs70UlCjuB48803S1576623aObMmUl+LACgzKiOlsiHhO+xdc7he/yZzmdkwdomK0FeeOEFa9y4cdZdd91lbd261frRj35knXDCCdbjjz8u9P5Dhw7x12E/AgAql54eluHwhddjurstq6pq7N/5NV747+VCRtcSdXGcf/75tH79elq1ahWdddZZ9Hd/93fU2dlJCxcuTPJjAQAVHL6X1zhZRrtiSZdeeqm9AABAGuF7vZony8iAWhwAgEyF7w0oSpbRYYIR5UYBAEaUK73nHqKrriqIsVf4nlPsSEWyjC4TjInGQacZLwgAyE7tZz+BXLCgYNEWv86WM4uzI5w8Fi5tyuF3XurGgs7b2r7de4xOOJ/7vSriuGV1DQINAIiFamszTCC7uojq6oIvBlGTZRxx9/Nhh4m7CBBoAEAqqLY2VQrkOo8Lh9vadsO+Zm4uEEZPT/QJRm0yCQEA2SWJcDaVzV1bW4l27CiI6cqVhUcW9qALhm7V+DBJCACIRBLhbKLC9+STYtuULS2qWzU+WNAAgEgkYW2KCl9nZzKts3TrXQiBBgBEIglr0xHIMKpGam+ojk3WrXchBBoAoI21WSyQQcj4ok3uXQiBBgCU3dr84IPCujfeSMTViG+6SWwMSU3WRZlgTAJMEgIAYlubXnHQQeFsxfzVXxWyBIvdFdXV5Z+s06F3IRJVAADqMwn/OE+558NTC1mcv/td+c+rUpAwUi5k4qBhQQMA1FqbHF7xofDUQnZrsOUcRlVI7Y0sAx80AEAdEn2pfvADsSiMSZPKP1lXLmBBAwDUINmX6u23xTa7cCHRFVeoK8RkEhBoAEBZUgs/9CGxzZ5xRvkn68oFXBwAgLKkFn7jG+GWcC5XWK9SgUADAMqSWjh+PNEttwSvessthfUqFQg0AKBsqYXf+Q7R0qVjLWl+vnRp4e+VDOKgAQDqiFgpn0PuOKqDJw7ZN81ujfHjk+nWUm5QsB8AUD6iVMrXvDegSiDQAIDyosDsXZdwb8ByAYEGABhNGr0BywVaXgEAjEZl6yuTQaIKABWAaRNtuvUGzGSY3Z133klVVVUly5lnnpnkRwIAPHy57C7gbtVXX1145OdJtIxShW69ATNrQX/84x+nDRs2HPvAcTDaAUgLv4k2p3aRrhNtTkh1f793aQ/HB51Wb8BykbhasiBPnTpVaN3BwUF7KXamAwBSqV2klZvE6dYyfz7KjSbK1q1badq0aXT66afTwoULaSf3s/Fh+fLldiFrZ2ni2EkAdIOVauNGolWrCo+qO5dqNNHm7OrNNxOxnZWmm6RVo96AmcwkfOqpp+jw4cP00Y9+lAYGBqijo4P6+/vptddeo4kTJwpZ0CzSIp0HAEgFgzIn+PrBYhoG99xbsEBsV8sRj5w3bILT2Djo3/zmNzRz5ky655576Ctf+YrSHQEgcQzLnGDLly3dMLghqrucp9+u6hCPnDdcsLWNgz755JPpIx/5CG3bti3NjwUgeYcuww5djdwdEWoXhe5queOR1xkYkRKHVAWa3R1vv/02NWQ9NgZkDwMzJ5yJNsYt0kF9/cJ2tVzxyOvEu2llhkQFesmSJbRp0ybasWMHPf/88/SFL3yBcrkcLfByeAGgM4ZmTkSZaIuyC0nbXHnzbmD0D7PbtWuXLcYHDhyg+vp6+sxnPkNbtmyx/w+AURicOcEizKF0on5bmV1IKx65V66bVmZIVKBXr16d5OYBSA/DMydYjEWFK2xX3Xi5SVQzYOYNTGzQUQWAJB26GdvVYniCMa3AlQZzb2BiAYEGQJQKypzw21X2TrKvl0PzOLQurV1ujhiRYjpoeQVApQXiararop+xLlo3Le3QNlFFFiSqAJDta45sYuY6dd20ygYEGoAKJKrIlit7PWpiZt7wGxgINAAVRlSRLVf2epZbWhmb6g0A0CfDLtHkj5CKfwYmZpYFCDQABhNHZBMTSYGCGZUa1ywLBBoAg4kjsomIpKA5X6lxzbJAoAEwmDgiq1wkJcz5So1rlgUCDYDBXVXiiKxykZQw5ysoMTMWEGgADC5KHEdklYukpDlfQYmZkYFAA2BwUeK4IqtUJCOY87z9HTsKqePceivtFHLdQSYhyD6qMhtkg3dTzKiIm2GnZKjO8Qmr+JfF4GYJkKgCgCMYd91VMDEPHvTP4BBVJ5kmf/x5KafnaZFhl5WCGbqUsLA05tChQ/wN248ASNHdbVm1tSwRY5eqqsLC6/DS2Fj6d37Or7tZudJ7e+6lvb2w/aDPzTJex7SpKfv7nYCuwcUBsodIS2q26GpqCpauaJ6zqAVdV0e0f7//55p4my9rnmthzusJXBygcgnzE4viJaQiPlYW5337xNwgpvRmKlc1pYyCWhygconSklo0BU8gZGLrBQuzlcOsWdRKpYEwO5CtpA/VwufeXkBc2pYla+mrP20xM4fZ6zsoQyttTfJ/9MHSGEwSVgAyk3Qi9PSITeSJLrw9L4aGCn/jicOeHmtocMgedjUNWTup0cpzBKvH9oZ5kpAnzPj9un8HHR3xjpGiYXRnbG5RRtcg0KB88C9PdbQDCx//qr226144ysNvPUkhLb4ufIG6bYF2izQ/H+bXdFKcoO9A9CLGF6kEh1Gl2SFLU6Dh4gDlIanbZ5GW1LW1RN3dRA8+qCzPudgTsp5aaT6tpX4qdYPsokb6ZbtGccAh34FwL7yY7poyeFKMAQINykOSFdv9/MQszB0dRHv2FNZRmOfs1igW6Vm0g+ZQDy2glfbjabSd8i2aiLPAd+BzeVNeck70VLjvvsrzTY8r9wBAhZJ0xXYW15aW8Fhc0fUEixYVR+ANU4420ZySqD2tymcKHlvenSreAa/MQAUl50S/4ptvrrwov9Qs6Lvvvtv+ktv5XgWANCq2s3BwrPGCBYVHPyERXS/ko4wrnyl4bP9hUgdZCZaci/IV91dIlF8qmYQvvvgiXXnllXbe+dy5c6mTz1TVOevALDJaWCdu0aJUyefpaMMsGr+vn6o9PM7DVGX7zdk18+wGojm5ZDIDw04FPww9RfRKVDl8+DAtXLiQHnroIZoyZUrSHwdMwUiTMxyjymfmcvTiwhWjYlyM87ydOm1XzcDe+HcZceZ1VU9TmELiAr148WK65JJL6OKLLw5dd3Bw0L66FC8gw2S0YrsCj0l6uSgt/hEn/DpPdqaRV+N3KmQpKVO7ScLVq1fTr371K9vFIcLy5cupg2fZQeWgaJIORCuncc89RC82ttJpu1roM9RLDTRAA9RAvdRsW85pTm66T4U9e0onBk1JyjTCB93X10fnnXcePfPMM/SJT3zCfm3OnDn0R3/0R74+aLageXFgC7qpqQk+aAASKO7nuBOWLCH63vcK//cK1CjXzUw+m9MUevigX375Zdq7dy998pOfpHHjxtnLpk2b6Pvf/779/7xHIOOECRPsARcvAIBkk0BWrybq6tLP05TL5jSFHi6OefPm0auvvlry2rXXXktnnnkm/fVf/zXlsnxUAdAE0SQQrpLKk5u6eZpaR3zTXu4ZLSNjTBHoiRMn0llnnVXy2oknnki1tbVjXgdASzJQdF4mH8iZ3NSN1gqepkAmISgfOgug16wam5nXXFNQC53GWuZ8oDTI5QqH3Dld+NGQryAelsag3GiG0bm2pF9pNR3HGrO4n47VT007XWRBNTugNzp36QiaVdNtrBUy0bZO49MladA0FujVM7AcsVPFrhbR4FvD4ryMSkHX/HRJM8wOPuhKQgefr0yZ0TRmrLyUS5S0x1qBE229gqfLnXdy5JgZ+yQDBLpS0KUzc9JlRlVkcGQ011jXKA0Vh/bv/76wpHZKp2TsQKArAT8hcpx4aWYjJB1WIPrDEfU1qx5rPk/5jb305sZCSnVuTjM1z8llyupTSYPkaZDKKZ2msWNpDKI4FE7j+0UjpD2Nv2aNZeVyyYxHZqpfRXNZ2bF2d1tHakvHxw1m/7K228hohDQYkmgxmcopraB5IprGAnkhUtSZOXb4WtQOobI/HG50GlecQ8Za3Pj71Y5uu2Fs3rUdp7lsK0Gkw77aKgmRTuSUVmTsQKCBvBAp6Mwc6+TmhS3rrq50fjiiF65777Ws9nbLqq8vfZ23FyDOxcZ8NQ3ZlrJbnItF+v9RkzWzcUj7eORy0e1xc5T6Ka3I2JERaPigs06aqWRB/t+w6Xjn/fX16USGeDUR9IrfuvHGwj5wuTfBSSG3y7+ZeqmJ/MfH3UxmUB/N2tVLvb1zjJvISwL3qdTSciwK5dlnCxOCqWdHlmGCGwKddUSFKG7B37CJkyRP7ijbdjI4WElFGqK6QiBsAdk4Vq+95h65xrIIdi1mMwJCEiXsVGpuJnr00eRPaS3y5i2NwSRhwk48iYkNoe0H+WmT9IXH2bbXvbPjvih2IvPjiP8haC7SayifJbHxtdG9Vs+GyvZxiE4ldCd8SieZNw8fNJATIhUnbZj/d3AwuaIQcX84XkLso8Kbl3YHCgi7q91/O+aDDp/lGja1wIQCZKcSuhM6pQNRcGWAQANvfCzCWMhYr0maPSq37WPGDVcVIi6+QN2+AuKeS3QWfo8TsVGyzTBTsYKIciM0lMApHUrMKwMEGgSj8qyWjRJJ0uxRse0QM86JuGCr2G9XWaS9rGwWabakhY4Xj6HCQjp0CThK+jcEgQbJ1G30Oil1M3vibltwf9iv7PdndnP4GfM5GrL+c9G9YsesoyO13dYBnUL2kwQCDdRnQfkJO2cGZqHgsKQZ90VaGSgggca8TJIMq32I4malVnJWaleHAYEGarOgwoR96VJPk5Ez59hva5RSxLCg3YfQ16qNkmbuo7gKMo+1oizRGSkDgQbq7h9ZVWprw4W9q2tMnQn21f7Pk7rtO/WyWz2iPoAQM44vOrxf7KqILCAiWZUCiqtbmRVVlCU6I0Ug0EDdDAyrq8D7NnX02KLFliXf/vNj8UQaa3xqPzC3GHP6uIAPwH7bhiHrvy7vsKMr/CIsONQutoDwyrJWdJHi8lg5Cz3KNdcEsuBT9wMCDcYSdTKvpkbofYtr/H2yxYsykfb7BcsWbWAf+sjbuKpcYJRFkQorERDBi5/XxXD6dPG3aBH1AEaBQOuELqZAlBkYCV9pUFSDW+NiHwIWVXfAMe+b4wuXEbxcztp8a5ddTa4Qp1z697xjSSfhp4ni6giZoPRaNmzQ4xQEBSDQupDU9HpU0ZedgRF0ixw9qTYwLjj2LXfx/l51VSSrM2hhEd5HtYHV5oYbR7IhVStdhFqaohdD3iS7lrIQ4ZElINA6kNT0elzRl5mBEbSg37m2Q0oTpW65o9SZjCDQQuvW1SWjdN3dhRTv0HGGJ8kUn2ZBf8tKRISJQKDLTVLT66pEX1FEg73U1lpDg0O+u1vtMXEobEGLFPgv56JQ6Xhyko/PP1C77VJxp4Q7aeJ+aebuhb8PkeAbuDvSBwKdxZSoJERfRKjDbsFHxMlLS71Sm/tzjdbQGsGsxYQtZ2UirUDpir1JXseNLWdRcf7Slwp+Z9WnIMiYQP/gBz+wzj77bGvixIn28ulPf9r6+c9/nv1yo0kUFVAt+jKuEkG3CD91rLZjxYHGxhELWZ0qegZKLmPC6mSWmErn3l2vOw/RobA4G1XXosI4pItA/+QnP7F+9rOfWW+99Zb15ptvWt/61resP/iDP7Bee+21bAt0Eha0yl9cFFeJoFuEX/7fy4asXVX+LZ6ErM64PQMVCbOwf/rxx8W/S5/jFuSSEF14G1FLpET4uoHJAu3FlClTrIcffjjbAp1EUQFVoh/FVSL7a1Ux1qgW9JIl3tb+rbdG2t7vJvnUD3UvHPYXwxetSqCdIcQ5BbNS20NXtBTooaEha9WqVdb48eOt119/3XOdo0eP2oN2lr6+PjMFOomiAqpEX1Y8o/xaVVj7IhOU7oVD8BQmsQxXV1vWqlVCyhm37khcj47XVxLlFMxabQ8d0Uqg//u//9s68cQTrVwuZ02ePNl2efixbNkye+DuxUiBTqKogArRlxHPqL9WVda+bBTH7bcHX6B4ezIpeJIL+9yP1EabMIzj0QnabZlTMKu1PXRDK4EeHBy0tm7dar300kvWN7/5Tauurq4yLOiknHlxRV9UPHmmKeqvVcT65RRy/gy/9zvHjDP43PHHgqZk8WZe7eguWLgJibM7Fdvzqx889sLQhh47tM75m2jURZTrnOgpWCn1mMuNVgLtZt68edZXv/rVbPugdRZ9UVdJ3Dgt0Qw597251wVo2jTLmjRJbDweBY2O9QRMXpyduiTu+kwc1cIhhsXr7aF663JaM3oYZH3Qqi1aRH6kg4yujaOUGR4epsHBwbQ/NlvkckRz5kR/L/evnz+/0J+ef+sO/Jzp7CTau1dsewMD3q+3thKtXUvU1ka0a5f/+/v7C2PhdRn+f/GYmN27SRg2OqiKpn23nXZTC+8wNVMvNVHAGBTz2sEGuv/KY8+/QOtoLc2nqnzpfp1C+2gNXUHfoaV0W/93xux2NeXtsTfQAA1QA/VSMw1TbsxXxV+pChoa1K4HFJDklYJdGps2bbK2b99u+6L5eVVVlfX0008LvR8WdIIEuUpU1rLkbbE1HlQVj01BHoti/7BTs4JjidOwnL1SsR3r3S+Uzylrejl1lfzJK1llD9XZmYa8XzMbh5RP2EnNQyMOz3wXx3XXXWfNnDnTjtyor6+33Rui4sxAoBPG60cmGukgc39dhqST4qpvLGhpiLNXKrboZ79L9aPC7pfkUyLsCcW9Cc1DIw4vGwIdFwh0yohGTcjGXKWYdOJlQR/zQaubJHRbxH6p2DLW+1zaIO4vTzDuLXAeGnF4sYFAA3lkal/IhgqmbEGzuLldDcesUh/TULKMKU/wddF8q4NuHxVWr1VvJ/Gi/PupRmr9JOPePD0YiMNTAgQayCMqouyblhUEEecm/33KFGXW7bdp6Zg/efl1Ry82olb+n/7pmLA/3qZjPXMkBi+8S4WLgniND5l1yxL3hji81AW6WsVEI8gAftEYbk49VT5swIkcKQ4/cHCe89/b20kVS+m7dDmNRIaM8OOqVjqNdtBzHT1EK1cS9fQQbd9eiDgRDU34138l2r+/5KXp1G9HaXC0xsGDRAcOkG2rr6A2+++uPfaFf4yuQA61311cRD8nrfFUABBoYPPc1oRjrJywu+nTS19vbCy8zn//m78hqq0N3o7AxaFqZFlNX6TLaU3JR3V15+h/3DGHaMGCQqiis73m5sIK7guIwGdXj8hqJ7UXnChVRJdOKoT2yf7AnPWHZd6UVtwb4vBSBwINaN06os8ta6Y+aqRhH3uPX99JTbRuX/Poa/k80caNRKtWFR75eSAswjt2FCxXx4Ldto2opqawkd5eogce8H4vqx4vvB6/7/bbQ7+5cZSnNXQlPde+rsRYjmzlB+wgi/QM6rPjltnvcMJ78a3IUJHmcTU1FS4uaRB2EUt7PJWApTGI4kie4nkfv4k05zVurOrMSSmJtPLbCDd/DUtnl4kMkZlI8xoTV6pj33OKoX08WRjYYbxc1YtUFwGrQA4hzC5lDA7ad8/7iHTz4PIYsSuehYVrcefuoGMqGxki272G129vH9s9XCK073f1ktX4XMkuTtF+Tk7hqJHAC5bJRcAqjEMQ6BQxPGjfyxAN6+YRlhQYarCqCNeSbYkl2zpEspJesbCODn+NXMfuoL6D42jQaqN7rb7LbihE0nCH8XLaGQYbJeUGAp0WGQjaTypEOdBgVVmONCkLWkL8i4V1zFfvZ216uHH8kl08wwMVGwGG2xlGAYFOg4wE7cvUxed1RCuulRisbmuL20OpsnrZFZLLqf0eJK9axcLqeafvZ20Wvf6f93r3HfRN+1bYyT0DdoZRQKDTwNCgfb/yG2F34s7f2f8stdt+k24qjx3X9gwatKzCiE5A3nDDmLrOUa/HXhfK0LRv0YtPgHmcETvDKCDQaWBg8dyg29iwGknFhe6EK54FmWZBxyyKKqicuBK9+PLVSiHuC6VwJEjQhSzEPOZGBgbaGUYDgU4DwyxokdvYYuuaK4Ty4mUZCkVayU7iuRe2imWJMnHl9R7RsfM6ik3L4uuMcKElPyNAwDw+XFtas8QAO8N4INCmdu5OeKipGqyiFzC/dlbuGSpVUQPudlp+txTSvpyQz5IYs/22DUPWS9fErMkt+B04oYEG2BmZAAKdFoYE7Sdl7Afqj6gLqK3N/6rhHENVIQYita6dz+UY6DjWK09eun3tomOW6T4edGUV/A64RZcBdkZmgECrQNT6MSBovyzu8rgWdFjYiOxFUCaumdeTmch0nyu33hq8bY+mtqOnmGwnc74QxPwOuMmtAXZGZoBAx0XWYtM8aF/Ggo69K84GOJSORS7INJPM0ots3kX1h/PFI8S0HHpiTSFj0KONle92m5qs7q6hMUOqmzJk/WaSf3ss3y8tbL8FzGMD7IzMAIGOQwaDQkV/p2yMxfIkePzKPcVK1o0QRaSKrzSi/RW93C8BpuUbLUtDW1P5LXM8/L5c+F96jGG3PRJuOM3tjMwAgY5KhoNCw36nnNgW67o08gHDIkXoHdNMURrj67evLP1KZHy4Acvg5DpraIl34abNt3aJtaYKKapUXG/jMB2n5uLk9d3APNYGCHSFhM7J4vc75Yi2oAzB0OvSyIXN79Y8P9IU9T9uenxsJp0CIWWBG7X0ZX24AQuPmy1kFuNi03Lot4PWHZPujT1mzxRu0UXWWIB5rA0Q6ApKPpHF63caO6JM8MJ2RX3PWD1h0z3gPcMnnWQN+zR7dRcoytGQdaQ2vuB7fQb7jG26u8f4nGUXvlhdTmsiu0dMdrcBS0qgx5W7HrVWqOoYwYXdufg8t/7hdbmAuWybqITgYXAjkeKhOnXqI3cyEmxxlNs3YB+W0c/P58niAvw+baH4LD5wZALV0BGyqGq0cwnjNBZop04appy9MhfLP+HALlKJU4h/yeJeaqk6SLkr59MElskIOO+6ke6je+lm+5VIHTO4Kw1/ab7dB9Si8emcedBRRXXHCG5PMmsW0dy5RFdfXXjk5/y6hvAPj/voRb0u8Y/3lT1iF7YBaijR8ufu6qWqXbt8e/bx63XWAXqEFlE/TSv52y5qpPm0ltbTMZGaSsn1whu3r59+/40224YV6THoJ+HfoaV0oPrUSO2wjm082gUiCoadztlD51uOsnRUiZN8YmAEiKhXh33UbveE49M+VtQn3BXhuEn4vQtEU5lHOmdzlxG/GtUqupgELVyLWdwtMnZitODW6JJL4S6zi8PA09kI4IOOS5RZb0MjQKLWBXL/eP3bZRWW+dRl7z7XmecaH1z0X0ZQg4rZO4tzofDzWUdZeOx8cbmaBEukjqzPPma/pgdKLiQJn08qT2fMT5YCgVaB7FllaASISD1ot/Xs9+MNikrg1x9r6S55X5jlHWSJ+4nG9bXd1rBEFxMRgeZejDzBKbL+w/Tl0OJDsvtdjvNJZU8FNAKILtCJ+qCXL19O559/Pk2cOJFOOeUUuuyyy+jNN98kI3Bm0xYsKDyGzYoITpQJr5cSQc2sHR588Njus8/5vvuIdu/K02dpI32RVtmP4+gDOkg1tJYut/2vbi9pI/XTNU/Op/N3HXNe8uReGxU+3K+buF/nbDfO2P/0wVaqWru2MJFWDM8dLF16rDu4IJ3UTuurWumL9/0xUV1d4Lq8z9fRo7Sx/cnApuPF+y3m0U7/fFJxOrOfev58ol2uedv+/sLr8GMLYCXI5z//eeuRRx6xXnvtNeuVV16x/vzP/9yaMWOGdfjw4ex19S6XBa3o/lHEq+Os42Up/55ykS1g2XhgJ8kjaKy+x0UyiaVlco+1eanEe0bu/bkaXdiqvN9jQvY4vfzyyy3rS18y2oI21ONX2S6OvXv32gPbtGlT9gS6HOVHFd8/Bmm943P2a8EkUz/Cq7wli7boRJzzfvaLR7ouDQ1Zz35rg7WfanzjkJ2LyaYb10Ryl7z0vZ4xzXWLm/FyqvfMxiFraDCgHVYZy9nG/XhDPX6VLdBbt261B/bqq696/v3o0aP2oJ2lr6/PHIFOu/yowin2MCPc+bGGtmCKYQE7AiYSDVJfUyjuEwfeT/+JzcJrHHXxu7pGJfvodZdgJ9WIRAWVqcxcnI+vgJyvbAl0Pp+3LrnkEuuiiy7yXWfZsmWO+7JkMUag06p7oPD+UcQId6whVWFsQQXi7Uk+GqsK/Bov/7Ws0EdP1SFs9RBOpwlsnP0t3sdYjV/LXEcj6sfDgjZMoL/2ta9ZM2fOtK1iP4y3oNOKK5I4+0XcFmG64VhDceN3g6Iw2B3A4XejNZG9Gs0G1T6OgLP/nCLuFRIXdX/ZH38Frba3tYAet/ZQfbzGr2WOU4vaScyQhkOpo51AL1682GpsbLTeeecdqfcZ5YNOE8H7x/9oX+lrHcsY4SosaMdtwBarkBHJFZzcxfxj+Nf9CJozjLq/nhX8BC6mWcOQhkOVK9DDw8O2OE+bNs166623pN8PgY5nQXvVHHZ+HDIFkhwxz8WI32XL+YeXlsZB+94up5zCxpa717DH0aBtDUuLbRSBVumM1SgzBJVONRbor3/969bkyZOtjRs3WgMDA6PLb3/728oTaJU/mpD7R07U2JULTugIKi/qpRuOZrb6TKx51YHmhescO24DoY4tZYjP8rshSTJ13PNKqAINM0M0ul5ogTYC7TXhxwvHRleUQKv+0QTVCLWL5genREfVDZk4aGeizVdT/X61ZZhd8vvI2DUzRBaVFxwUzzACbQQ6LpkQaFU/GkfQuE1UUC+/pibruXYxceaJOdlJHHYNu2N6+ZHdAV4TbZ67GXTBKkN8lt8NSeIWtEqXDTJDjAECrQuqfjSi2W9sVQ8VXAki+sCry0ziRGmAUnyjwO9/taMQRud2iThhdEPLxJzjQxt6Ep/QUhX3HXQxVeZ6UBTZA5IHAq0LKm7XRVs4FYm9TIiTzCROlBaCPAHn7MaM6cGCx37t/lyjdaRmuu8GnSa019d0KXereh2LL0/yvqAELYHr8t0PdzxXrYwKIntAOkCgdSHu7XoUk3VE7GVCnEQtKtHdce8af1ZOIpX7DrozUOScEqC8TdXCUuxJcqL82Je+jwRnVYMuoEnGlimI7IFIpwMEOisWdBSTtUjsVYc4RRnOsmWW9Ze1csWQOuh2YbGx61lsUHu/7nXTwu6Ov6UOu37HmAPKCTTFVzh+nnb2n4LInkpNHEkbCLQuxE2nimKyusQ+xeg+z8U3zVmBQHMI3xjhj3m/HnbTwlY714bOPx5yQMvh6A24bRKN7Mlgvox2QKCzkk4lY7KmZAL57Y7XIjvJVnBdNFpzySdzxG0VjrxnjBhVVdmTkUHaqFGUn1p8bptEI3sqsXhR2kCgdSOqr0HUZE3ZiSgaVBIlTI19vdwuKriqXSHuOqxUqHM77zaqNYvyU4/H1cf4C0+GgEDrSNRbXhGTNcXqZu7dueEG/2FFSfRwshTvpqW+PQ6HJSvKFV+/wsLSZVLgTQLFi/QBAp01/Kq7cahBmQNZgyyz6MWGChYw12OWmVwMqsnM4suHcLp/BF/JOlmswtbdNWRPrC4QSSYCiQGBzqJhrVl2gTOcm25KrjkqiwiLtFPXI8r7o2i7bAKPqRd5/m544rAMN2AVzSGkepuBhnVthJBp68cC4GQJyirl1fR4pEy+sO7forHbZayTrxYfv47qJghADAi0AZha10Y0sbF44YgKu72TpFKKJra4xTkfs1iU41/W7KYlkbhB52LGWZ66nnOVLNDVIp2/gVryeaK2tsIvZGyX9cJje3thvbThz9y4kWjVqsJj8RiCxh3ED3/dSpMO7KA51EMLaCV9jjZQH02nYaryXJ9f30lNtJfqpce/ixppPq2l9dTq+feaGqIq74+1X29qImpuLjzP5YjmzCFasKDwyM+No7eXaNcu3z9Xk0UzqI9O7++l+fOJ1q1LdXQghHFhK4DUfzO2APb1FdZjYXDEkZ8PDBA1NBREhAXD7/Uo8I+TBbh4bI2NRCtWELW2ho/bjx/9iChPOdpEIztDRG30fVpL820xZpEYparKlu0tV3bS4Z/XEL0fvv12upf20Kk0QA3US800TP4HgEWYj6/zWPw609lpqBD7wSeGAFNpYNQwaGnJ2DEwGUtjMlFu1APZWFs/X/XSpep82CIulyiJjRMn+v+tUFfaFVbBIRQ8S7dypTX09Abrd3XTff3XUX3NvPkw/3Im3BuMYAB08YSqaSGEpgEftObIJA3I+nyj+LBFq6L6tYaKungV/reqq0ufO61fPLp8R/U1s+iGNdM1cfI2SgC010VO6yScDACBzkjSwOCgfDG7Yl0b7ZKt6ILB24s6nsg1OpyD5OrRdbj2WMcW2SVKdVfdJ28DGdkp952I34QqLOhkwSSh5rB/j/26jHvCqtgX+vzz0Xy+zIEDRBdfTDRrVsG3HDT5J+impL17j41blmrK02dpI32RVtFcepZWUJvdAS10lpo1gw/K8ccTbdhAtHIlUU8PvfjE9pKJwOLt8yM/D5sENGnyNhY8gbB2LVHj9MAJ1bDjA8qApTFZ9UE7hMXaFvt83S2mRP2uPgZoyS27bJ0G0XToYms5Tkagl2nHdwbOPnlt30nCELGAHXfH7WJF9My1MIeGrE0dhUzCOa5zyOg7BMOAi8MggnyhjnCKCJDsUvyDlK3TINNHIEq5UdFa10Hbd7IPORPRfeGTafEYMgwjyVQSjoFAoDMCiwgXu/cWoPgJGbzwD9VpfSWT3ixSw0l5T78R09WxnkW2z1Xv/utv14ROAsrcqRhrQVsZjFIxEAh0VhgasjPwRMtqRl3YZRHFsuLXg4oPqeqKbU9uFZnwTjSJ6Pbt6ncjOxEUFRN2p5J2oSSIaDaBQGeFCDGsUZfiztuillVYTY4o5UaHfe4U/n3JsauE4ysW3b69zaYma2hwyHe8/q6SY3cqafpoMxXqB0qAQBuKWxzttkoCAvTqt1ZaNa5WebKLqGVY7LsN22YUC5pdEsXP+Q7BsWAdcXIEWnb7/3mv94UszFXi3Kn81a3pmM6ZDPUDo0CgDcTLYppfJx5eIdOKKmAz0mMMWsLKjRaEr9DiyvH5jqNBXx+wcxFxXByyPu5/v2FlrAsJ9yJM2r0hmjQEn7G5aCPQmzZtsi699FKroaHBHtD69eul3p/1MLswi0lE4LhTM9+6O9tx/7jd4XVRoxOiVLErdR2IJUmIXETcIXb5mBa0qKuE10t6ghCtqbLPIV2q2R05coTOOeccuv/++5P8GKMJSo7goj9tVMgMcVd+c57fmO+k3udzo/kIO3bYeRxOPgft2UPU0SE2Fi62JDvGMDgJgpMh+ik4SUIUTqrhRJ8HHzy2/Supi4YCCiQ5GRhnf6PZLv7kTg7iIktCn00NoUk9QQlBQp8xoHY9YDipXDLYVIcFHdli8oouKPbNsj84CLY4w1o9Bd02yzQX91uiJtr4WdCOH7yurvA6N5r16/Jd7Lj1cgWJuWIK0TJhaeJxJ/ZgQWefQ7q4OGQF+ujRo/agnaWvry/zLg7RCnFhAhcmArJxzlHGmOTiXES6usaKIIs0izU3BhgWiBP0EtJFE4NdMa0jraH8LmKqJvbQ3DX7HDJVoJctW2av516yINB+4WsqrFNHh8ImjqJmkMmMMc4kZdh2ubxqqAi6DjT7572Ou9f3sXmp950Ki3OQyKqe2ItzMQX6Y6xAK7OgNYvwD7r1DbOYZBaRCawoh0ZmjMVWrkqxXrZMXgSjuBy48zVHzxTfqYRdxJJwSyAdO7sYK9BKojg0i/AXufUNs5guvbT8NSLCwvjYxVAs+CrC/ooX2UJGcVwOshcx2QYMhtoZQBGVK9CaRfjL3PoGWUy6TBxFSQX3Wt+rE4wqgXaK8YdtnxN7ROtlh6HL9wPMQEbXqvifpCJEDh8+TNu2bbP/f+6559I999xDc+fOpZqaGpoxY0bo+9977z2aPHkyHTp0iCZNmhS8MsczcfFjvwLKHFvFMVbbt6fWcI3DrObODV+Pw+G492BQ30Hetf7+wk+9nLsm2wPRCTvjpWo4Ty01vXTu1AGyphb6B/a/m7NrLO/f7/1+Z9+uu04sXJCPJSNy3N09F6Oi0/cD9EdK15K8UvT09HhO+i1atEi9BR3RjEnyNlLlra+pE0eOFe1ZE3rE9RS2b2vWBIcJFm+Ovz+ZqBNVx8/U7wekj5YujihICXQENUzaXS1zzXAuFI8/bln33lt45Ofc9sq5gHDVObdQ6VzH1xEtv0JEdpW6EfVS4eJxqvLJRsaoSp/GxB4QoTIFWtKCTsNdLRrT6hXb6yy50tpB9nojTa+1njhy9j2sXsZwkenrdzcje+2NGhmjwkeMiT0QRmUKtESEf5oFacJuff1ie4PGZsIts3O9FK4455i/AdsKS9gpFtgoUSSmd0oBZlCZAi3hCEx71t3r1pfbLLW1HUtVLscteZI4Vq9UTWifq45zQW31Kajvl+UnW30PURagooolla178fTSwjz2FDq/PjJVn3ZBmuIiRhyxUFdHtG9fIXrAL3ohCJaTvr5CNIWuOIWXRAsRBbXM5siHNQvW0RqaT9OpNEpnOvXbr3d9cd2YCAnnuHMz8Joa/49FN2ugK9kSaL+SbhzfVBRH5Ve1zc3WreqGxeJx8GB0UTatohmH3/F18ZfUTH3USMMib/K76uTz9OlVbVRF1pgTtposu67fp1f7i/u8eUQPPVQQYnclO+d5Z6d3CFzc6nQAxMLSmKTqQYtOIqn09cp0whZdONFC50wzx+PELoh8HEewqE8q5ICoSLRB2ykQl8r1QUvAP740fb2qiiI54+KC9e6QuzHioUFIgSNyt1NHdEewaBiHu++Xh5qKHhLNklJBhoBAC8KBA2lNHqkq2Rlk9ZeIRwLmX1S9t9+3Ycg6UtM4pims0NUw6tUtopqi7RRIksqdJJT0GZ5xRnq+XlG/txu3X5TnP2trvddlVWL+9avryJo/f2zaO+ci8+vr1kmPg9/C6cycQn311YVHfi6yKd6HOfNydMJDK6gqiiN4xKFd8DaPxbdWgXNAfCYf/WA3uF/FAFMmaUFGsDLk4pA1GtMMtxMJ0+bQu3/5F/9MQn50Gqb6LU5iSCRL1Urhdj9iut1jLX4F9Un5l5hUdToAKtbFEUVE0u5eoaJeQ5h4CCeGFAtWgO8ikdt9SV8J1+Lgz/Kq57GPapWrKarTgSSpOIGOIyJJFbnx06C49RrCxEM4McQRrJDbDv48kX6CSSV58HHjO4viO4TiscylkFuKCANE2ymQJBUn0HEtHtVFbsJcLXGCK8LEY46MBe132+EsHR3Wczd1eWbvOQ1rnSWscW1Uwr7bsIavUW+DUJ0OJEXFCbQKn6GqiLQ0wrOCxCNHQ9aRWgG/DTu3BQKzvTplO77gYpFmKzeJKD6R79aplmdXx1N40FGdDiRBxQm0Lj7DtIswuT+LY6M5dHBojYDfRvCg+U02siByQ1W/YkWqEP1uvzxJrKO3LBqEkoOMUXECrYvPMO0LBe8PC7JXfgZ3qA702ygKzGY/sMgdSlSlE83A5JKtUFOQNYEeRxmAQ2e5xgWH+HJILf9kRUNsVSISL11Neco/K9EzKoAnnyS6887S/XXCnf/4e6209okWaq33+ayogdkuGujYTvtukoOl29pKg4sFe00Vf7fu/XRYupToiivstQu9wwDIClbG46DT7DgSZkEHtX2SxW1ZuqMb2BcdeNcQtaq9hwUdeIeiyCnvV7KVQ/AAMImKc3Ho4jMM0jy/tk9RJ7KKLwZewu9EWgS6U8KiOAIWxwfNFwLf4St2ysMfDLJARQt0ufGKsAhr+xTFSe64kP2E34m0eK49RPhlq9q7ojgC71B0mb0FQCMquhaHjj0DmqmXmmiXf/HtCMUd2N/L/uwV1GZXo/Cqk8yc/6OQOhRO/eyODuHP/qC+kf69fS3d1NPqLrVdStqdEQDIGJmYJNQNFqyWloLesvac838GiP5erVDxfF9rXS817fev6sMifdy+EeEPmjzjmbg77iA666yxk3lNTUT33FNoAzMy2XhcczM1i0xsik5EKpqwBCBrQKATwq7g5mjixgYxgZYQKt7+0msGiDoVCr/7yhIzysR+L5feO3DAfx3+O68HABgDBDrN/k8c/+YVK8axgPx3SaGa3dIgJtAyFmrJlQUAUE7gg04DJ5iXka2FHIRTJ9m9zeJts4siTPiTarzHlniQ9czw31FYGQBPINCadRyPIvxVcYQ/TiX+MDBJCID+An3//ffTrFmz6LjjjqMLLriAXnjhBapIwjqOR7Fk4wg/i7Bs5xWZMWKSEIB4JB3zt3r1amv8+PHWP//zP1uvv/66df3111snn3yytWfPnkzGQUcmbg9B2SyOKEkksmPUpUgKABqhVaLK7NmzrcWLF48+z+fz1rRp06zly5ePWffo0aP2oJ2lr6+vMgS6HC2kZZNIoo4RhZUB0DNR5YMPPqCXX36ZLr744tHXqqur7eebN28es/7y5ctp8uTJo0sTT3BlHXYRcOyxV3RHxKanSv3D7O549lmi66+PNkY/Fww/j+p7B6BCSFSg9+/fT/l8nk499dSS1/n5u+++O2b92267jQ4dOjS69HF2XdZJqYX0GNfxKYKhdzffTMQX2IMH443RLe5eYg8A0DcOesKECfZSUaQQ6eBV7XPG9Gb6v7WNdMJBn9hsh337xD/Ia4zORKT7M3bvLrweZkXzlUVV4gwAhpGoBV1XV0e5XI727NlT8jo/nzp1apIfbQ4JRzr4BWr07c7Rlw6sKOimXxy1LO4xxnXfJBkCCEClC/T48ePpU5/6FD3LPswRhoeH7ecXXnhhkh9tXpZh3GQTD8L0cX1VK/2v2rVkuf3D9fVyH+Q3xjjumyghgABkjMRdHLfccgstWrSIzjvvPJo9ezZ1dnbSkSNH6Nprr036o6nS28GI6ONDB1rp6g0tNCdX5EZgEbzmGrEPCRpjVPdNyJWFMycHv9ZOP/5dC02dnoPXA2QXKwXuu+8+a8aMGXY8NIfdbdmyReh9FR8HHbMdTORu56IheGFjjFoPWvB9Tj/EiE1pACgLMrpWxf+Qprz33nt2uB1HdEyaNIkyj+IJMY7WYLdtGJzQWFIficfBvl6/4k5OFbonnii80W+MYdtxikRxNmXxNjjUhH3OISyglbSaFowa8YjaAyYgo2uoxaETTiW5BQuChS9p93ZYcSdeHnyQaN684DFGLRIlOCE6QA2Jh4sDUE4g0BkmVhE9VcWdomwn5MoyTFW0k5qol5pVh4sDoBVwcVSAi8QrDpotZxbnUJ1V5XaR3Y4TxcEUuUdYnJn5tJbW09jBcw0qvgEBIAsuDgh0VvBSYbZC2YRubTUz38Njn9hybqdOT3H29KcDoBkQ6ArBEd3ck+voM51sbVoj9uUIWZg9G9nJ4f4Buqq9gdbvb6Y85YTnGwEwWaC1SvUG8sbl7l152kFtZHl09rZdA6xcPHvGvQZNVK6RiVPetwXHE3VzuDgpDRcHQFswSWgCrkpH69bkR5PsmqmXmmiX/xeZodmzJJrSAKAzsKDJPD/sp3ONdJm1wvbDNlDyxZZ0QnXjcQB0BgKtMz6V4Kbm+2ktzbcjGZxY4KSKLekIGo+DSgECnQZRQigC6lFUk2WHm3VSO32ItlEfNdJ06rdf9509i1BsCQBQXuCDTpqoJTNDKh2xGM+gPrqInqc2WlESIzwKZs8AMBoIdJLEKZn55JNCH8E+aPZFs7ujnzB7BkCWQKJKUjiFgvys4KDAXRbuyy8X+pg51EObqJCZkaO8HdXx7fYBmt2C2TMAdARx0DogU6y+OPXN8T2HwN7m3bkm6s0f8y1Pa8rRjZ1zaDbCzQDIBJgkTIqoxerDhH0E9jZPXd1Jz9blEG4GQEaBQOvWa1BU2NvbKTe/dcS5Abwwsv4IAEVgkjApwooxM6wW7q7ZosLO2RrAF/SbBVkAAp1GMeYgE++qq0qjORJsIlspoN8syAoQ6KTzkru6wu+ri1uBxKqyD8I6mbsPNwA6A4FOmrq6YDXwKmaEqkCpBM8AoDuYJNQ1mgNVgVI93ADoCARa12gOBlWBUj3cAOgGXBxJg0m/VMHhBlkCAp00mPRLFRxukCUg0GmASb9UweEGWSGxYkl33XUX/exnP6NXXnmFxo8fT7/5zW8SLSpiBEhtw+EGFc97OjSN/eCDD+iKK66gCy+8kH74wx9W/Jdig0m/VMHhBqaTmEB3dHTYj48++mhSHwEAAJlGqzC7wcFBeym+FQAAgEpFq0nC5cuX274ZZ2nimhMAAFChSAn0N7/5Taqqqgpc3njjjciDue2222zHubP0cU4uAABUKFIujltvvZW+/OUvB65z+umnRx7MhAkT7AUAAICkQNfX19sLAAAAgycJd+7cSQcPHrQf8/m8HQ/NfPjDH6aTTjopqY8FAIDMkJhA33HHHfTYY4+NPj/33HPtx56eHppT3CQVAABAupmEKlCeSYhMPgBAmdEik1DLPkjcaqO4mju3luLuJVy8AQAANEOrOOjEQJM6AICBZF+g0aQOAGAo2RdoNKkDABhK9gUaTeoAAIaS/UnCpJrUISIEAJAw2begk2hSx5OOs2YRzZ1LdPXVhUd+zq8DAIAisi/QqpvUISIEAJAS2RdolU3qEBECAEiR7PugHViEW1oKUR08ccg+Z3ZriFrOshEhSGcHAMSkcgRaRZM6RIQAAFKkMlwcukeEAACABxDockeEAACADxDockaEAABAABDockWEAABACJU1SahTRAgAAIQAgS5XRAgAAIQAFwcAAGgKBBoAADQFAg0AAJoCgQYAAE2BQAMAgKZAoAEAQFO0DrOzuDocEb333nvlHgoAACjB0TNH34wV6Pfff99+bOL6FgAAkCFY3yZPnhy4TpUlIuNlYnh4mHbv3k0TJ06kKr8CRZrBV0e+oPT19dGkSZPIFEwct4ljZjDuyj7elmXZ4jxt2jSqrq4214LmwTdyjQsD4RNBh5OhEsZt4pgZjLtyj/fkEMvZAZOEAACgKRBoAADQFAi0YiZMmEDLli2zH03CxHGbOGYG48bxFkXrSUIAAKhkYEEDAICmQKABAEBTINAAAKApEGgAANAUCDQAAGgKBFoh999/P82aNYuOO+44uuCCC+iFF14g3XnuuefoL/7iL+y0U06n//GPf0y6s3z5cjr//PPtEgCnnHIKXXbZZfTmm2+S7jzwwAP0iU98YjSj7cILL6SnnnqKTOLuu++2z5P29nbSnTvvvNMea/Fy5plnkklAoBXxxBNP0C233GLH5f7qV7+ic845hz7/+c/T3r17SWeOHDlij5UvLqawadMmWrx4MW3ZsoWeeeYZ+v3vf09/8id/Yu+LznDZAha4l19+mV566SX63Oc+Ry0tLfT666+TCbz44ov0T//0T/ZFxhQ+/vGP08DAwOjyy1/+koyC46BBfGbPnm0tXrx49Hk+n7emTZtmLV++3JjDy6fD+vXrLdPYu3evPfZNmzZZpjFlyhTr4YcftnTn/ffft8444wzrmWeesT772c9abW1tlu4sW7bMOueccyyTgQWtgA8++MC2ii6++OKSQk/8fPPmzSo+AgRw6NAh+7GmpsaY45TP52n16tW21c+uDt3hO5ZLLrmk5Bw3ga1bt9ruu9NPP50WLlxIO3fuJJPQupqdKezfv9/+wZ166qklr/PzN954o2zjqgS4JC37Qy+66CI666yzSHdeffVVW5CPHj1KJ510Eq1fv54+9rGPkc7whYTdduziMIkLLriAHn30UfroRz9quzc6OjqoubmZXnvtNXv+wgQg0MBo2LLjH5wpvkUWi1deecW2+teuXUuLFi2yfeq6ijTXUG5ra7N9/Tz5bRJ/9md/Nvp/9puzYM+cOZO6urroK1/5CpkABFoBdXV1lMvlaM+ePSWv8/OpU6eq+AjgwQ033EA//elP7UgUU+qGjx8/nj784Q/b///Upz5lW6UrVqywJ990hF13PNH9yU9+cvQ1vlvkY/6P//iPNDg4aJ/7JnDyySfTRz7yEdq2bRuZAnzQin50/GN79tlnS269+bkJ/kXT4PlMFmd2D/zbv/0bnXbaaWQqfJ6wyOnKvHnzbLcMW/3Oct5559n+XP6/KeLMHD58mN5++21qaGggU4AFrQgOsePbVT55Z8+eTZ2dnfYE0LXXXku6n7TFFsX27dvtHx5PuM2YMYN0dWusXLmSnnzySduX+O677452qTj++ONJV2677Tb7tpuPK7c84n3YuHEj/eIXvyBd4ePr9u2feOKJVFtbq73Pf8mSJXaMP7s1uHUeh8DyBWXBggVkDOUOI8kS9913nzVjxgxr/Pjxdtjdli1bLN3p6emxQ9Tcy6JFiyxd8RovL4888oilM9ddd501c+ZM+/yor6+35s2bZz399NOWaZgSZnfVVVdZDQ0N9vGePn26/Xzbtm2WSaAeNAAAaAp80AAAoCkQaAAA0BQINAAAaAoEGgAANAUCDQAAmgKBBgAATYFAAwCApkCgAQBAUyDQAACgKRBoAADQFAg0AACQnvx/VtB/6WQIoaoAAAAASUVORK5CYII=", "text/plain": [ "
" ] @@ -132,7 +132,7 @@ "\n", "# Initialize classifiers\n", "knn = KNeighborsClassifier(n_neighbors=5)\n", - "logreg = LogisticRegression()\n", + "logreg = LogisticRegression(random_state=SEED)\n", "\n", "# Train classifiers\n", "knn.fit(X_train, y_train)\n", @@ -158,7 +158,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "5ef0aadb94674c699fcc3271964e750d", + "model_id": "55df656e5d814921aa30b0e7dbef7f89", "version_major": 2, "version_minor": 0 }, @@ -178,12 +178,12 @@ "\\toprule \n", "\\textbf{Model} & \\textbf{ROCAUC} & \\textbf{AP} \\\\ \n", "\\midrule \n", - "logreg & $0.45$ [$0.36$-$0.54$] & $0.48$ [$0.39$-$0.58$] \\\\ \n", - "kNN & $0.56$ [$0.48$-$0.65$] & $0.54$ [$0.44$-$0.65$] \\\\ \n", + "logreg & $0.44$ [$0.34$-$0.54$] & $0.47$ [$0.38$-$0.58$] \\\\ \n", + "kNN & $0.59$ [$0.49$-$0.69$] & $0.55$ [$0.44$-$0.67$] \\\\ \n", "\\midrule\n", - "Effect size & $0.12$ [$-0.01$-$0.24]$ & $0.06$ [$-0.00$-$0.13]$ \\\\ \n", + "Effect size & $0.15$ [$0.03$-$0.28]$ & $0.08$ [$0.01$-$0.15]$ \\\\ \n", "\\midrule\n", - "$p$-value & $0.04$ & $0.05$ \\\\ \n", + "$p$-value & $0.02$ & $0.03$ \\\\ \n", "\\bottomrule\n", "\\end{tabular}\n" ] @@ -209,7 +209,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "690504f2694d4722a4cfc24289601429", + "model_id": "f7d0150a30374b5b9bd243f3c5df1b97", "version_major": 2, "version_minor": 0 }, @@ -229,12 +229,12 @@ "\\toprule \n", "\\textbf{Model} & \\textbf{ROCAUC} & \\textbf{AP} \\\\ \n", "\\midrule \n", - "LR & $0.45$ [$0.30$-$0.62$] & $0.48$ [$0.24$-$0.74$] \\\\ \n", - "kNN & $0.56$ [$0.44$-$0.73$] & $0.54$ [$0.34$-$0.74$] \\\\ \n", + "LR & $0.44$ [$0.29$-$0.64$] & $0.47$ [$0.25$-$0.70$] \\\\ \n", + "kNN & $0.59$ [$0.46$-$0.75$] & $0.55$ [$0.35$-$0.74$] \\\\ \n", "\\midrule\n", - "Effect size & $0.12$ [$-0.10$-$0.34]$ & $0.06$ [$-0.05$-$0.19]$ \\\\ \n", + "Effect size & $0.15$ [$-0.00$-$0.31]$ & $0.08$ [$-0.00$-$0.18]$ \\\\ \n", "\\midrule\n", - "$p$-value & $0.20$ & $0.18$ \\\\ \n", + "$p$-value & $0.06$ & $0.12$ \\\\ \n", "\\bottomrule\n", "\\end{tabular}\n" ] @@ -249,7 +249,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**Summary:** According to conventional bootstrap, we did not find any significant difference between the two models. Once we take into account the subject-level structure, we see that the difference is significant, which is valuable in applications." + "**Summary:** According to conventional bootstrap, we found a statistically significant difference between the two models. Once we take into account the subject-level structure, the evidence becomes weaker (larger $p$-values and wider CIs). Depending on the split/seed and effect size, the result may or may not remain significant." ] } ], diff --git a/notebooks/Regression.ipynb b/notebooks/Regression.ipynb index 4a52e68..b4660fe 100644 --- a/notebooks/Regression.ipynb +++ b/notebooks/Regression.ipynb @@ -31,7 +31,7 @@ { "data": { "text/plain": [ - "'0.1.5'" + "'0.1.6'" ] }, "execution_count": 1, @@ -126,7 +126,7 @@ "\n", "As stated in the documentation, the testing routine returns the `dict` of `tuple`. The keys in the dict are the metric tags, and the values are tuples that store the data in the following format:\n", "\n", - "* p-value ($H_0: model_1 \\leq model_2$)\n", + "* p-value ($H_0: model_1 = model_2$)\n", "* Empirical value (model 1)\n", "* CI low (model 1)\n", "* CI high (model 1)\n", @@ -156,7 +156,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "5ef125fc1d67468ab28cb2f52a47f2fe", + "model_id": "8966ef7c555d4b0aa12ba0ab900b2ddb", "version_major": 2, "version_minor": 0 }, @@ -189,12 +189,20 @@ { "data": { "text/plain": [ - "{'MAE': array([ 3.29934013e-02, 3.85996728e+00, -1.36873670e-01, 7.93333005e+00,\n", - " 4.60042861e+01, 4.17109824e+01, 5.05579091e+01, 4.98642534e+01,\n", - " 4.47314857e+01, 5.51450226e+01]),\n", - " 'MSE': array([4.99900020e-03, 8.02532883e+02, 2.27838928e+02, 1.41044449e+03,\n", - " 3.22590754e+03, 2.69030996e+03, 3.82864762e+03, 4.02844042e+03,\n", - " 3.25671296e+03, 4.88786525e+03])}" + "{'MAE': {'p_value': 0.06598680263947211,\n", + " 'diff': 3.859967279672837,\n", + " 'ci_es': (-0.13687366958812033, 7.933330050547908),\n", + " 'ci_s1': (41.71098240871023, 50.557909066351826),\n", + " 'ci_s2': (44.731485671191564, 55.14502262443438),\n", + " 'emp_s1': 46.00428611399232,\n", + " 'emp_s2': 49.86425339366516},\n", + " 'MSE': {'p_value': 0.009998000399920015,\n", + " 'diff': 802.5328829652262,\n", + " 'ci_es': (227.83892760580275, 1410.444493286724),\n", + " 'ci_s1': (2690.309962073994, 3828.647621254201),\n", + " 'ci_s2': (3256.712958773253, 4887.865246354953),\n", + " 'emp_s1': 3225.907539357549,\n", + " 'emp_s2': 4028.4404223227752}}" ] }, "execution_count": 5, @@ -234,7 +242,7 @@ "\\midrule\n", "Effect size & $3.86$ [$-0.14$-$7.93]$ & $802.53$ [$227.84$-$1410.44]$ \\\\ \n", "\\midrule\n", - "$p$-value & $0.03$ & $0.00$ \\\\ \n", + "$p$-value & $0.07$ & $0.01$ \\\\ \n", "\\bottomrule\n", "\\end{tabular}\n" ] diff --git a/notebooks/Two_sample_test.ipynb b/notebooks/Two_sample_test.ipynb index ac0c9cd..32f71cd 100644 --- a/notebooks/Two_sample_test.ipynb +++ b/notebooks/Two_sample_test.ipynb @@ -33,7 +33,7 @@ { "data": { "text/plain": [ - "'0.1.5'" + "'0.1.6'" ] }, "execution_count": 1, @@ -93,7 +93,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "4950e740873a4c6caec761f58726ef43", + "model_id": "90b9d7f85b47419ebfc863e3540313ab", "version_major": 2, "version_minor": 0 }, @@ -103,6 +103,20 @@ }, "metadata": {}, "output_type": "display_data" + }, + { + "ename": "KeyError", + "evalue": "(slice(None, None, None), 0)", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mKeyError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[3]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m res = \u001b[43mstambo\u001b[49m\u001b[43m.\u001b[49m\u001b[43mtwo_sample_test\u001b[49m\u001b[43m(\u001b[49m\u001b[43msample_1\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msample_2\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstatistics\u001b[49m\u001b[43m=\u001b[49m\u001b[43m{\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mMean\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mlambda\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mx\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mx\u001b[49m\u001b[43m.\u001b[49m\u001b[43mmean\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnon_paired\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Dev/stambo/stambo/_stambo.py:337\u001b[39m, in \u001b[36mtwo_sample_test\u001b[39m\u001b[34m(sample_1, sample_2, statistics, groups, alpha, n_bootstrap, seed, non_paired, silent)\u001b[39m\n\u001b[32m 329\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 330\u001b[39m bootstrap_result = bootstrap_arrays(\n\u001b[32m 331\u001b[39m arrays=(sample_1, sample_2), \n\u001b[32m 332\u001b[39m statistics=statistics, \n\u001b[32m (...)\u001b[39m\u001b[32m 335\u001b[39m silent=silent\n\u001b[32m 336\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m337\u001b[39m result_final = \u001b[43mpairwise_bootstrap_test\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 338\u001b[39m \u001b[43m \u001b[49m\u001b[43msamples\u001b[49m\u001b[43m=\u001b[49m\u001b[43m(\u001b[49m\u001b[43msample_1\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msample_2\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 339\u001b[39m \u001b[43m \u001b[49m\u001b[43mstatistics\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstatistics\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 340\u001b[39m \u001b[43m \u001b[49m\u001b[43mbootstrap_results\u001b[49m\u001b[43m=\u001b[49m\u001b[43mbootstrap_result\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 341\u001b[39m \u001b[43m \u001b[49m\u001b[43mlabels\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 342\u001b[39m \u001b[43m \u001b[49m\u001b[43malpha\u001b[49m\u001b[43m=\u001b[49m\u001b[43malpha\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 343\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 345\u001b[39m results_return = {}\n\u001b[32m 346\u001b[39m \u001b[38;5;66;03m# This is a necessary post-processing step\u001b[39;00m\n\u001b[32m 347\u001b[39m \u001b[38;5;66;03m# The pairwise bootstrap has only two models, so, the labels are not needed\u001b[39;00m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Dev/stambo/stambo/_stambo.py:224\u001b[39m, in \u001b[36mpairwise_bootstrap_test\u001b[39m\u001b[34m(samples, statistics, bootstrap_results, labels, adjusted_p_value, alpha, n_bootstrap, silent)\u001b[39m\n\u001b[32m 220\u001b[39m result_final[s_tag][label] = {}\n\u001b[32m 222\u001b[39m \u001b[38;5;66;03m# And we report the p-value, empirical values, as well as the confidence intervals. \u001b[39;00m\n\u001b[32m 223\u001b[39m \u001b[38;5;66;03m# The format in the documentation.\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m224\u001b[39m b_res = \u001b[43mcompute_bootstrap_model_test\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 225\u001b[39m \u001b[43m \u001b[49m\u001b[43mbootstrap_results\u001b[49m\u001b[43m=\u001b[49m\u001b[43mbootstrap_results\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[32m 226\u001b[39m \u001b[43m \u001b[49m\u001b[43msamples\u001b[49m\u001b[43m=\u001b[49m\u001b[43msamples\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[32m 227\u001b[39m \u001b[43m \u001b[49m\u001b[43mi\u001b[49m\u001b[43m=\u001b[49m\u001b[43mi\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mj\u001b[49m\u001b[43m=\u001b[49m\u001b[43mj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[32m 228\u001b[39m \u001b[43m \u001b[49m\u001b[43mstatistic\u001b[49m\u001b[43m=\u001b[49m\u001b[43ms_tag\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[32m 229\u001b[39m \u001b[43m \u001b[49m\u001b[43mstatistics_dict\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstatistics\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[32m 230\u001b[39m \u001b[43m \u001b[49m\u001b[43malpha\u001b[49m\u001b[43m=\u001b[49m\u001b[43malpha\u001b[49m\n\u001b[32m 231\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 233\u001b[39m result_final[s_tag][label][\u001b[33m\"\u001b[39m\u001b[33mp_value\u001b[39m\u001b[33m\"\u001b[39m] = b_res[\u001b[33m\"\u001b[39m\u001b[33mp_value\u001b[39m\u001b[33m\"\u001b[39m]\n\u001b[32m 234\u001b[39m result_final[s_tag][label][\u001b[33m\"\u001b[39m\u001b[33mdiff\u001b[39m\u001b[33m\"\u001b[39m] = b_res[\u001b[33m\"\u001b[39m\u001b[33mdiff\u001b[39m\u001b[33m\"\u001b[39m]\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Dev/stambo/stambo/_stambo.py:71\u001b[39m, in \u001b[36mcompute_bootstrap_model_test\u001b[39m\u001b[34m(bootstrap_results, samples, i, j, statistic, statistics_dict, alpha)\u001b[39m\n\u001b[32m 61\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mcompute_bootstrap_model_test\u001b[39m(\n\u001b[32m 62\u001b[39m bootstrap_results: Dict[\u001b[38;5;28mstr\u001b[39m, npt.NDArray[\u001b[38;5;28mfloat\u001b[39m]], \n\u001b[32m 63\u001b[39m samples: \u001b[38;5;28mtuple\u001b[39m[Union[npt.NDArray[\u001b[38;5;28mint\u001b[39m], npt.NDArray[\u001b[38;5;28mfloat\u001b[39m], PredSampleWrapper]],\n\u001b[32m (...)\u001b[39m\u001b[32m 67\u001b[39m statistics_dict: Dict[\u001b[38;5;28mstr\u001b[39m, Callable],\n\u001b[32m 68\u001b[39m alpha: \u001b[38;5;28mfloat\u001b[39m=\u001b[32m0.05\u001b[39m) -> Dict[\u001b[38;5;28mstr\u001b[39m, Tuple[\u001b[38;5;28mfloat\u001b[39m]]:\n\u001b[32m 69\u001b[39m \u001b[38;5;250m \u001b[39m\u001b[33mr\u001b[39m\u001b[33;03m\"\"\"Computes the bootstrap test for a model comparison.\u001b[39;00m\n\u001b[32m 70\u001b[39m \u001b[33;03m \"\"\"\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m71\u001b[39m n_bootstrap_i = \u001b[38;5;28mlen\u001b[39m(\u001b[43mbootstrap_results\u001b[49m\u001b[43m[\u001b[49m\u001b[43mstatistic\u001b[49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[43m:\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mi\u001b[49m\u001b[43m]\u001b[49m)\n\u001b[32m 72\u001b[39m n_bootstrap_j = \u001b[38;5;28mlen\u001b[39m(bootstrap_results[statistic][:, j])\n\u001b[32m 73\u001b[39m n_bootstrap = n_bootstrap_i\n", + "\u001b[31mKeyError\u001b[39m: (slice(None, None, None), 0)" + ] } ], "source": [ @@ -111,15 +125,20 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "199be76d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'Mean': array([ 0.1039792 , 0.23771615, -0.11272705, 0.58867682, 0.08127468,\n", - " -0.17035326, 0.34184593, 0.31899082, 0.07648143, 0.56946271])}" + "{'Mean': {'p_value': np.float64(0.17436512697460507),\n", + " 'diff': np.float64(0.2377161467355364),\n", + " 'ci_es': (np.float64(-0.09487459471684045), np.float64(0.576572599564563)),\n", + " 'ci_s1': (np.float64(-0.1671848876765046), np.float64(0.33110024529537907)),\n", + " 'ci_s2': (np.float64(0.07607755235809846), np.float64(0.5669838280353934)),\n", + " 'emp_s1': np.float64(0.08127467643600941),\n", + " 'emp_s2': np.float64(0.3189908231715458)}}" ] }, "execution_count": 4, @@ -141,7 +160,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "fdb0a272-f9fb-4b29-96d1-e28fb91d7103", "metadata": {}, "outputs": [ @@ -154,12 +173,12 @@ "\\toprule \n", "\\textbf{Model} & \\textbf{Mean} \\\\ \n", "\\midrule \n", - "Control & $0.08$ [$-0.17$-$0.34$] \\\\ \n", + "Control & $0.08$ [$-0.17$-$0.33$] \\\\ \n", "Treated & $0.32$ [$0.08$-$0.57$] \\\\ \n", "\\midrule\n", - "Effect size & $0.24$ [$-0.11$-$0.59]$ \\\\ \n", + "Effect size & $0.24$ [$-0.09$-$0.58]$ \\\\ \n", "\\midrule\n", - "$p$-value & $0.10$ \\\\ \n", + "$p$-value & $0.17$ \\\\ \n", "\\bottomrule\n", "\\end{tabular}\n" ] @@ -191,7 +210,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "23e578c9", "metadata": {}, "outputs": [ @@ -319,7 +338,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "08f27af5", "metadata": {}, "outputs": [ @@ -352,14 +371,14 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "b9975d69", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "582e4122248740b4a896c9d14555453b", + "model_id": "8f455096335f475587419a643412ea39", "version_major": 2, "version_minor": 0 }, @@ -384,7 +403,7 @@ "\\midrule\n", "Effect size & $0.13$ [$0.04$-$0.23]$ \\\\ \n", "\\midrule\n", - "$p$-value & $0.00$ \\\\ \n", + "$p$-value & $0.01$ \\\\ \n", "\\bottomrule\n", "\\end{tabular}\n" ] @@ -405,14 +424,14 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "9b314b2a", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "48fe455f3cf946468b439e1392e59aee", + "model_id": "e53d20ec51774b7db784c2bee510f330", "version_major": 2, "version_minor": 0 }, @@ -437,7 +456,7 @@ "\\midrule\n", "Effect size & $0.1323$ [$-0.1588$-$0.4641]$ \\\\ \n", "\\midrule\n", - "$p$-value & $0.2056$ \\\\ \n", + "$p$-value & $0.4111$ \\\\ \n", "\\bottomrule\n", "\\end{tabular}\n" ] diff --git a/pyproject.toml b/pyproject.toml index 0c65f9a..d30a838 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "stambo" -version = "0.1.5" +version = "0.1.6" description = "A library for statistical model comparison using bootstrap." readme = "README.md" authors = [ @@ -22,6 +22,9 @@ dependencies = [ keywords = [ "statistical testing", "machine learning", + "model comparison", + "AI", + "Medical imaging" ] classifiers = [ diff --git a/stambo/__init__.py b/stambo/__init__.py index eef564c..2add390 100644 --- a/stambo/__init__.py +++ b/stambo/__init__.py @@ -1,8 +1,12 @@ -__version__ = "0.1.5" +__version__ = "0.1.6" -__all__ = ["metrics", "synthetic", "compare_models", "two_sample_test", "to_latex", "PredSampleWrapper"] +__all__ = [ + "metrics", "synthetic", "compare_models", "two_sample_test", "to_latex", "PredSampleWrapper", + "bootstrap_arrays", "pairwise_bootstrap_test" +] from . import metrics, synthetic from ._stambo import compare_models, two_sample_test from ._utils import to_latex from ._predsamplewrapper import PredSampleWrapper +from ._stambo import bootstrap_arrays, pairwise_bootstrap_test diff --git a/stambo/_predsamplewrapper.py b/stambo/_predsamplewrapper.py index 162ca38..fbcb3db 100644 --- a/stambo/_predsamplewrapper.py +++ b/stambo/_predsamplewrapper.py @@ -20,10 +20,11 @@ class PredSampleWrapper: cached_am: Optional cached argmax / thresholded predictions to reuse. """ def __init__(self: PredSampleWrapperType, predictions: PredGtType, - gt: PredGtType, multiclass: bool=True, threshold: Optional[float]=0.5, + gt: PredGtType, groups: Optional[npt.NDArray[int]]=None, multiclass: bool=True, threshold: Optional[float]=0.5, cached_am: Optional[npt.NDArray[int]]=None): self.multiclass = multiclass + self.groups = groups self.predictions = predictions self.predictions_am = None self.threshold = threshold diff --git a/stambo/_stambo.py b/stambo/_stambo.py index d1c4d27..1494bad 100644 --- a/stambo/_stambo.py +++ b/stambo/_stambo.py @@ -8,6 +8,332 @@ from . import metrics as metricslib +def bootstrap_arrays(arrays: tuple[Union[npt.NDArray[int], npt.NDArray[float], PredSampleWrapper]], + statistics: Dict[str, Callable], + groups: Optional[npt.NDArray[int]]=None, + n_bootstrap: int=5000, + silent: bool=False) -> tuple[Union[npt.NDArray[int], npt.NDArray[float], PredSampleWrapper]]: + r""" + Bootstraps an array of mutually paired samples. + Perfect if we want to establish how a set of models are related to each other. + + Note: this function is for the future releases of STAMBO, where, we will introduce a new API. + For now, it is only used internally to power the two-model comparison case. + You can still use it in your analyses and build on top of it. + + Args: + arrays: A tuple of arrays to bootstrap. Each array is a sample. + statistics: A dictionary of statistics to compute. + groups: Groups indicating the subject for each measurement. Defaults to None. + n_bootstrap: The number of bootstrap iterations. Defaults to 5000. + silent: Whether to execute the function silently, i.e. not showing the progress bar. Defaults to False. + + Returns: + A dictionary of statistics values for each bootstrap iteration and each sample. + """ + def _same_groups(a: Optional[np.ndarray], b: Optional[np.ndarray]) -> bool: + if a is None and b is None: + return True + if (a is None) != (b is None): + return False + return bool(np.array_equal(a, b)) + + # Sanity checks + # Note: numpy.typing.NDArray[...] is a typing construct and cannot be used with isinstance() checks. + if not all(isinstance(arr, (np.ndarray, PredSampleWrapper)) for arr in arrays): + raise ValueError("All arrays must be numpy.ndarray or PredSampleWrapper") + # Checking the lengths of the arrays + arr_lengths = np.array([len(arr) for arr in arrays]) + assert np.all(arr_lengths == arr_lengths[0]), "All arrays must have the same length" + + # Checking the types of the arrays + if isinstance(arrays[0], PredSampleWrapper): + assert all(isinstance(arr, PredSampleWrapper) for arr in arrays), "All arrays must be of type PredSampleWrapper" + assert all(arr.multiclass == arrays[0].multiclass for arr in arrays), "All arrays must have the same multiclass setting" + assert all(arr.threshold == arrays[0].threshold for arr in arrays), "All arrays must have the same threshold" + assert all(_same_groups(arr.groups, arrays[0].groups) for arr in arrays), "All arrays must have the same groups" + else: + assert all(isinstance(arr, np.ndarray) for arr in arrays), "All arrays must be of type numpy.ndarray" + assert all(arr.dtype == arrays[0].dtype for arr in arrays), "All arrays must have the same dtype" + + # This makes sure that the groups are set correctly + if isinstance(arrays[0], PredSampleWrapper): + if groups is not None and arrays[0].groups is not None: + raise Warning("Groups are provided when using PredSampleWrapper. They will be ignored, since the arrays already contain the groups.") + elif arrays[0].groups is not None: + assert all(_same_groups(arr.groups, arrays[0].groups) for arr in arrays), "All arrays must have the same groups" + groups = arrays[0].groups + + # This creates the group data dictionary to make sure + # That we sample correctly + # This works exactly the same way for plain arrays and PredSampleWrapper objects. + if groups is not None: + groups_ids = np.unique(groups) + group_data = {} + for group_id in groups_ids: + group_data[group_id] = {"indices": np.where(groups == group_id)[0]} + else: + group_data = None + + result = {s_tag: np.zeros((n_bootstrap, len(arrays))) for s_tag in statistics} + for bootstrap_iter in pbar(range(n_bootstrap), total=n_bootstrap, desc="Bootstrapping", silent=silent): + # We are here in a paired design, so we need to sample the same indices for both samples + if group_data is None: + ind = np.random.choice(arr_lengths[0], arr_lengths[0], replace=True) + else: + # When we have groups, we need to sample them with replacement + groups_ind = np.random.choice(groups_ids, len(groups_ids), replace=True) + # Once the groups are sampled, we can concatenate the indices + ind = np.concatenate([group_data[grp]["indices"] for grp in groups_ind]) + + # We are always in the paired design setting + for s_tag in statistics: + for sample_idx in range(len(arrays)): + v = statistics[s_tag](arrays[sample_idx][ind]) + result[s_tag][bootstrap_iter, sample_idx] = v + + return result + + +def compute_bootstrap_model_test( + bootstrap_results: Dict[str, npt.NDArray[float]], + samples: tuple[Union[npt.NDArray[int], npt.NDArray[float], PredSampleWrapper]], + i: int, + j: int, + statistic: str, + statistics_dict: Dict[str, Callable], + alpha: float=0.05) -> Dict[str, Tuple[float]]: + r"""Computes the bootstrap test for a model comparison. + + Args: + bootstrap_results: A dictionary of bootstrap results. + samples: A tuple of samples. + i: The index of the first sample. + j: The index of the second sample. + statistic: The statistic to compute. + statistics_dict: A dictionary of statistics to compute. + alpha: A significance level for confidence intervals (from 0 to 1). Defaults to 0.05. + """ + n_bootstrap_i = len(bootstrap_results[statistic][:, i]) + n_bootstrap_j = len(bootstrap_results[statistic][:, j]) + n_bootstrap = n_bootstrap_i + # Some sanity checks + assert alpha >= 0.0 and alpha <= 1.0, "The alpha must be between 0 and 1" + assert n_bootstrap_i == n_bootstrap_j, "The number of bootstrap samples must be the same for both models" + assert n_bootstrap > 0, "The number of bootstrap samples must be greater than 0" + assert i < len(samples), "The index i must be less than the number of samples" + assert j < len(samples), "The index j must be less than the number of samples" + assert i != j, "The index i and j must be different" + assert statistic in bootstrap_results, "The statistic must be in the bootstrap results" + assert statistic in statistics_dict, "The statistic must be in the statistics dictionary" + sample_1_b = bootstrap_results[statistic][:, i] + sample_2_b = bootstrap_results[statistic][:, j] + + emp_s1 = statistics_dict[statistic](samples[i]) + emp_s2 = statistics_dict[statistic](samples[j]) + + # Observed difference: Delta + observed = emp_s2 - emp_s1 + diff_array = sample_2_b - sample_1_b + # Generating the null + # Model 2 != Model 1 is the alternative hypothesis in two-tailed test + null = diff_array - observed + p_val_right = ((null >= observed).sum() + 1.) / (n_bootstrap + 1) + p_val_left = ((null <= observed).sum() + 1.) / (n_bootstrap + 1) + p_val = 2 * min(p_val_right, p_val_left) + # Numerical / finite-sample guard: two-tailed doubling can exceed 1.0 + if p_val > 1.0: + p_val = 1.0 + # Compute the effect size + # We also want to compute the confidence intervals + # Public API uses alpha as a fraction in [0, 1]. + alpha_pct = 100.0 * alpha + ci_es = ( + np.percentile(diff_array, alpha_pct / 2.0), + np.percentile(diff_array, 100.0 - alpha_pct / 2.0), + ) + ci_s1 = ( + np.percentile(sample_1_b, alpha_pct / 2.0), + np.percentile(sample_1_b, 100.0 - alpha_pct / 2.0), + ) + ci_s2 = ( + np.percentile(sample_2_b, alpha_pct / 2.0), + np.percentile(sample_2_b, 100.0 - alpha_pct / 2.0), + ) + + return { + "p_value": float(p_val), + "diff": float(observed), + "ci_es": (float(ci_es[0]), float(ci_es[1])), + "ci_s1": (float(ci_s1[0]), float(ci_s1[1])), + "ci_s2": (float(ci_s2[0]), float(ci_s2[1])), + "emp_s1": float(emp_s1), + "emp_s2": float(emp_s2), + } + +def apply_correction( + results: Dict[str, Dict[str, Dict[str, float]]]) -> Dict[str, Dict[str, Dict[str, float]]]: + r"""Applies the Holm-Bonferroni correction to the p-values. + Args: + results: A dictionary of results. + The format is: + { + "statistic": { + "comparison_label": { + "p_value": float, + "diff": float, + "ci_es": (float, float), + "ci_s1": (float, float), + "ci_s2": (float, float), + "emp_s1": float, + "emp_s2": float, + } + } + } + Returns: + A dictionary of results with adjusted p-values. + """ + # Holm-Bonferroni step-down correction across *all* performed tests. + # We adjust and write back into the nested dict structure under the "p_value" key. + + p_values = [] + comparisons = [] + s_tags = [] + for s_tag in results: + for comparison in results[s_tag]: + p_values.append(results[s_tag][comparison]["p_value"]) + comparisons.append(comparison) + s_tags.append(s_tag) + if len(p_values) <= 1: + return results + p_values = np.asarray(p_values, dtype=float) + comparisons = np.asarray(comparisons, dtype=str) + s_tags = np.asarray(s_tags, dtype=str) + + m = len(p_values) + order = np.argsort(p_values) # increasing p-values + + p_sorted = p_values[order] + comparisons_sorted = comparisons[order] + s_tags_sorted = s_tags[order] + + # Holm adjusted p-values: p_(k) * (m - k), with monotonicity enforcement. + p_adj_sorted = np.empty_like(p_sorted) + running_max = 0.0 + for k in range(m): + factor = (m - k) + adj = p_sorted[k] * factor + if adj > 1.0: + adj = 1.0 + if adj < running_max: + adj = running_max + running_max = adj + p_adj_sorted[k] = adj + + # Map back to original order + p_adj = np.empty_like(p_values) + p_adj[order] = p_adj_sorted + + # Write back into results dict + for k in range(m): + results[s_tags_sorted[k]][comparisons_sorted[k]]["p_value"] = float(p_adj_sorted[k]) + + return results + +def pairwise_bootstrap_test( + samples: tuple[Union[npt.NDArray[int], npt.NDArray[float], PredSampleWrapper]], + statistics: Dict[str, Callable], + groups: Optional[npt.NDArray[int]]=None, + bootstrap_results: Optional[Dict[str, npt.NDArray[float]]]=None, + labels: Optional[Tuple[str, ...]]=None, + adjusted_p_value: bool=False, + alpha: float=0.05, + n_bootstrap: int=5000, + silent: bool=False) -> Dict[str, Tuple[float]]: + r""" + Performs a pairwise bootstrap test to compare the statistics of the bootstrap results. + Note: if N samples are compared, there will be `N(N-1)/2` comparisons. + When N samples are compared, the p-values are adjusted using the Bonforroni-Holm correction. + + Note: this function is for the future releases of STAMBO, where, we will introduce a new API. + For now, it is only used internally to power the two-model comparison case. + You can still use it in your analyses and build on top of it. + + Args: + samples: A tuple of samples. + statistics: A dictionary of statistics to compute. + groups: Groups indicating the subject for each measurement. Defaults to None. + bootstrap_results: A dictionary of bootstrap results. If None, bootstrap results are computed internally. + labels: A tuple of labels for the samples. Defaults to None. If None, the labels are automatically generated as "0", "1", "2", ..., "N-1". + adjusted_p_value: Whether to adjust the p-value for multiple testing. Defaults to True. + alpha: A significance level for confidence intervals (from 0 to 1). Defaults to 0.05. + n_bootstrap: Number of bootstrap iterations used when bootstrap_results is None. Defaults to 5000. + silent: Whether to execute silently (no progress bar) when bootstrap_results is None. Defaults to False. + + Returns: + A dictionary of statistics values for each bootstrap iteration and each sample. + If adjusted_p_value is True, the p-values are adjusted. + """ + result_final = {} + arr_lengths = np.array([len(arr) for arr in samples]) + if labels is None: + labels = [f"{i}" for i in range(len(samples))] + assert len(labels) == len(samples), "The number of labels must be the same as the number of samples" + assert np.all(arr_lengths == arr_lengths[0]), "All arrays must have the same length" + p_val_array = [] + comparisons_array = [] + s_tags_array = [] + # Going over statistics + if bootstrap_results is None: + bootstrap_results = bootstrap_arrays( + arrays=samples, + statistics=statistics, + groups=groups, + n_bootstrap=n_bootstrap, + silent=silent + ) + else: + assert len(bootstrap_results) == len(statistics), "The number of bootstrap results must be the same as the number of statistics" + assert all(s_tag in bootstrap_results for s_tag in statistics), "All statistics must be in the bootstrap results" + assert all(s_tag in statistics for s_tag in bootstrap_results), "All statistics must be in the statistics dictionary" + + for s_tag in statistics: + result_final[s_tag] = {} + for i in range(len(samples)): + for j in range(i+1, len(samples)): + # We always take the second model as the improved + label = f"{labels[i]} / {labels[j]}" + + result_final[s_tag][label] = {} + + # And we report the p-value, empirical values, as well as the confidence intervals. + # The format in the documentation. + b_res = compute_bootstrap_model_test( + bootstrap_results=bootstrap_results, + samples=samples, + i=i, j=j, + statistic=s_tag, + statistics_dict=statistics, + alpha=alpha + ) + + result_final[s_tag][label]["p_value"] = b_res["p_value"] + result_final[s_tag][label]["diff"] = b_res["diff"] + result_final[s_tag][label]["ci_es"] = b_res["ci_es"] + result_final[s_tag][label]["ci_s1"] = b_res["ci_s1"] + result_final[s_tag][label]["ci_s2"] = b_res["ci_s2"] + result_final[s_tag][label]["emp_s1"] = b_res["emp_s1"] + result_final[s_tag][label]["emp_s2"] = b_res["emp_s2"] + + p_val_array.append(b_res["p_value"]) + comparisons_array.append(label) + s_tags_array.append(s_tag) + + if adjusted_p_value: + # This is done in place + apply_correction(results=result_final) + return result_final + def two_sample_test(sample_1: Union[npt.NDArray[int], npt.NDArray[float], PredSampleWrapper], sample_2: Union[npt.NDArray[int], npt.NDArray[float], PredSampleWrapper], statistics: Dict[str, Callable], @@ -22,8 +348,8 @@ def two_sample_test(sample_1: Union[npt.NDArray[int], npt.NDArray[float], PredSa .. math:: - H_0: f(x_1) \leq f(x_2) \\ - H_1: f(x_1) > f(x_2), + H_0: f(x_1) = f(x_2) \\ + H_1: f(x_1) \neq f(x_2), where :math:`f` is a function of interest, and :math:`x_1` and :math:`x_2` are the samples to be compared. Note that the statistics are computed independently, and should thus be treated independently. @@ -41,10 +367,9 @@ def two_sample_test(sample_1: Union[npt.NDArray[int], npt.NDArray[float], PredSa silent: Whether to execute the function silently, i.e. not showing the progress bar. Defaults to False. Returns: - A dictionary containing a tuple with the empirical value of - the metric, and the p-value. Each entry in the dictionary contains, in order: + A dictionary containing: - * Right-tailed :math:`p(H_0 \mid \texttt{data})` + * Two-tailed :math:`p(H_0 \mid \texttt{data})` * Observed difference (effect size) * CI low (effect size) * CI high (effect size) @@ -59,12 +384,7 @@ def two_sample_test(sample_1: Union[npt.NDArray[int], npt.NDArray[float], PredSa if seed is not None: np.random.seed(seed) - alpha = 100 * alpha - - - # Dict to store the null bootstrap distribution - result = {s_tag: np.zeros((n_bootstrap, 2)) for s_tag in statistics} - + # Dict to store the null bootstrap distribution if groups is not None: assert len(groups) == len(sample_1), "Groups must be of the same length as the samples" assert len(groups) == len(sample_2), "Groups must be of the same length as the samples" @@ -76,49 +396,39 @@ def two_sample_test(sample_1: Union[npt.NDArray[int], npt.NDArray[float], PredSa for group_id in groups_ids: group_data[group_id] = {"indices": np.where(groups == group_id)[0]} - for bootstrap_iter in pbar(range(n_bootstrap), total=n_bootstrap, desc="Bootstrapping", silent=silent): - # We are here in a paired design, so we need to sample the same indices for both samples - if groups is None: - ind = np.random.choice(len(sample_1), len(sample_1), replace=True) - ind1 = ind - ind2 = ind - if non_paired: - ind2 = np.random.choice(len(sample_2), len(sample_2), replace=True) - else: - # When we have groups, we need to sample them with replacement - groups_ind = np.random.choice(groups_ids, len(groups_ids), replace=True) - # Once the groups are sampled, we can concatenate the indices - ind = np.concatenate([group_data[grp]["indices"] for grp in groups_ind]) - ind1 = ind - ind2 = ind - - for s_tag in statistics: - result[s_tag][bootstrap_iter, 0] = statistics[s_tag](sample_1[ind1]) - result[s_tag][bootstrap_iter, 1] = statistics[s_tag](sample_2[ind2]) - - result_final = {} - for s_tag in result: - emp_s1 = statistics[s_tag](sample_1) - emp_s2 = statistics[s_tag](sample_2) - - # Observed difference: Delta - observed = emp_s2 - emp_s1 - diff_array = result[s_tag][:, 1] - result[s_tag][:, 0] - # Generating the null - # Model 2 > Model 1 is the alternative hypothesis in one-tailed test - null = diff_array - observed - p_val_right = ((null >= observed).sum() + 1.) / (n_bootstrap + 1) - # Compute the effect size - # We also want to compute the confidence intervals - # In this version of STAMBO, we use the simple percentile method - ci_es = (np.percentile(diff_array, alpha / 2.), np.percentile(diff_array, 100 - alpha / 2.)) - ci_s1 = (np.percentile(result[s_tag][:, 0], alpha / 2.), np.percentile(result[s_tag][:, 0], 100 - alpha / 2.)) - ci_s2 = (np.percentile(result[s_tag][:, 1], alpha / 2.), np.percentile(result[s_tag][:, 1], 100 - alpha / 2.)) - # And we report the p-value, empirical values, as well as the confidence intervals. - # The format in the documentation. - result_final[s_tag] = [p_val_right, observed, ci_es[0], ci_es[1], emp_s1, ci_s1[0], ci_s1[1], emp_s2, ci_s2[0], ci_s2[1]] - result_final[s_tag] = np.array(result_final[s_tag]) - return result_final + if non_paired: + # This is a simple case of non-paired, non grouped design + # We will rarely use it in practice, but it is still a valid test + bootstrap_result = {s_tag: {"0 / 1": np.zeros((n_bootstrap, 2))} for s_tag in statistics} + for bootstrap_iter in pbar(range(n_bootstrap), total=n_bootstrap, desc="Bootstrapping", silent=silent): + ind1 = np.random.choice(len(sample_1), len(sample_1), replace=True) + ind2 = np.random.choice(len(sample_2), len(sample_2), replace=True) + for s_tag in statistics: + bootstrap_result[s_tag]["0 / 1"][bootstrap_iter, 0] = statistics[s_tag](sample_1[ind1]) + bootstrap_result[s_tag]["0 / 1"][bootstrap_iter, 1] = statistics[s_tag](sample_2[ind2]) + else: + bootstrap_result = bootstrap_arrays( + arrays=(sample_1, sample_2), + statistics=statistics, + groups=groups, + n_bootstrap=n_bootstrap, + silent=silent + ) + result_final = pairwise_bootstrap_test( + samples=(sample_1, sample_2), + statistics=statistics, + bootstrap_results=bootstrap_result, + labels=None, + alpha=alpha, + ) + + results_return = {} + # This is a necessary post-processing step + # The pairwise bootstrap has only two models, so, the labels are not needed + for s_tag in statistics: + comaprison_label = list(result_final[s_tag].keys())[0] + results_return[s_tag] = result_final[s_tag][comaprison_label] + return results_return def compare_models(y_test: Union[npt.NDArray[int], npt.NDArray[float]], @@ -131,14 +441,14 @@ def compare_models(y_test: Union[npt.NDArray[int], npt.NDArray[float]], seed: Optional[int]=None, silent: bool=False) -> Dict[str, Tuple[float]]: r"""Compares predictions from two models :math:`f_1(x)` and :math:`f_2(x)` that yield prediction vectors :math:`\hat y_{1}` and :math:`\hat y_{2}` - with a one-tailed bootstrap hypothesis test. Note: you must make sure that the metric is defined as more is better (e.g. accuracy, AUC, and others). + with a two-tailed bootstrap hypothesis test. Note: you must make sure that the metric is defined as more is better (e.g. accuracy, AUC, and others). I.e., we state the following null and alternative hypotheses: .. math:: - H_0: M(y_{gt}, \hat y_{1}) \leq M(y_{gt}, \hat y_{2}) + H_0: M(y_{gt}, \hat y_{1}) = M(y_{gt}, \hat y_{2}) - H_1: M(y_{gt}, \hat y_{2}) > M(y_{gt}, \hat y_{1}), + H_1: M(y_{gt}, \hat y_{2}) \neq M(y_{gt}, \hat y_{1}), where :math:`M` is a metric, :math:`y_{gt}` is the vector of ground truth labels, and :math:`\hat y_{i}, i=1,2` are the vectors of predictions for model 1 and 2, respectively. @@ -177,10 +487,10 @@ def compare_models(y_test: Union[npt.NDArray[int], npt.NDArray[float]], Returns: A dictionary containing a tuple with the empirical value of - the metric, and the one-tailed p-value. The expected format in the output in + the metric, and the two-tailed p-value. The expected format in the output in every dict entry is: - * One-sided :math:`p`-value + * Two-tailed :math:`p`-value * Observed difference (effect size) * Effect size CI low * Effect size CI high diff --git a/stambo/_utils.py b/stambo/_utils.py index 35a9be0..359605c 100644 --- a/stambo/_utils.py +++ b/stambo/_utils.py @@ -63,22 +63,32 @@ def to_latex(report: Dict[str, Tuple[float]], m1_name: Optional[str]="M1", m2_na tbl += m1_name # Filling the first row for metric in report: - tbl += " & " + f"${report[metric][4]:.{n_digits}f}$ [${report[metric][5]:.{n_digits}f}$-${report[metric][6]:.{n_digits}f}$]" + emp_s1 = report[metric]["emp_s1"] + ci_low_s1 = report[metric]["ci_s1"][0] + ci_high_s1 = report[metric]["ci_s1"][1] + tbl += " & " + f"${emp_s1:.{n_digits}f}$ [${ci_low_s1:.{n_digits}f}$-${ci_high_s1:.{n_digits}f}$]" tbl += " \\\\ \n" tbl += m2_name # Filling the second row for metric in report: - tbl += " & " + f"${report[metric][7]:.{n_digits}f}$ [${report[metric][8]:.{n_digits}f}$-${report[metric][9]:.{n_digits}f}$]" + emp_s2 = report[metric]["emp_s2"] + ci_low_s2 = report[metric]["ci_s2"][0] + ci_high_s2 = report[metric]["ci_s2"][1] + tbl += " & " + f"${emp_s2:.{n_digits}f}$ [${ci_low_s2:.{n_digits}f}$-${ci_high_s2:.{n_digits}f}$]" tbl += " \\\\ \n\\midrule\n" # Filling the final row with p-value per metric tbl += "Effect size" for metric in report: - tbl += " & " + f"${report[metric][1]:.{n_digits}f}$ [${report[metric][2]:.{n_digits}f}$-${report[metric][3]:.{n_digits}f}]$" + diff = report[metric]["diff"] + ci_low_diff = report[metric]["ci_es"][0] + ci_high_diff = report[metric]["ci_es"][1] + tbl += " & " + f"${diff:.{n_digits}f}$ [${ci_low_diff:.{n_digits}f}$-${ci_high_diff:.{n_digits}f}]$" tbl += " \\\\ \n\\midrule\n" tbl += "$p$-value" for metric in report: - tbl += " & " + f"${report[metric][0]:.{n_digits}f}$" + p_value = report[metric]["p_value"] + tbl += " & " + f"${p_value:.{n_digits}f}$" tbl += " \\\\ \n\\bottomrule\n" # Final row tbl += "\\end{tabular}" diff --git a/stambo/metrics.py b/stambo/metrics.py index 91c585e..fc21f53 100644 --- a/stambo/metrics.py +++ b/stambo/metrics.py @@ -1,13 +1,14 @@ from typing import Callable +import numpy as np from sklearn.metrics import roc_auc_score, average_precision_score, f1_score from sklearn.metrics import cohen_kappa_score, balanced_accuracy_score, matthews_corrcoef -from sklearn.metrics import mean_absolute_error, mean_squared_error +from sklearn.metrics import mean_absolute_error, mean_squared_error, accuracy_score from functools import partial from ._predsamplewrapper import PredSampleWrapper -__all__ = ["Metric", "ROCAUC", "AP", "F1Score", "QKappa", "BACC", "MCC", "MSE", "MAE"] +__all__ = ["Metric", "ROCAUC", "AP", "F1Score", "QKappa", "BACC", "Accuracy", "MCC", "MSE", "MAE", "LogOddsRatio"] # Base metric class class Metric: @@ -86,6 +87,15 @@ def __init__(self) -> None: def __str__(self) -> str: return "BACC" +class Accuracy(Metric): + r"""The accuracy score. + """ + def __init__(self) -> None: + Metric.__init__(self, accuracy_score, int_input=True) + + def __str__(self) -> str: + return "Accuracy" + class MCC(Metric): r"""The Matthew's correlation coefficient """ @@ -115,7 +125,32 @@ def __init__(self) -> None: def __str__(self) -> str: return "MAE" +class LogOddsRatio(Metric): + r"""The log odds ratio. + """ + epsilon = 1e-4 + def __init__(self) -> None: + Metric.__init__(self, None, int_input=False) + def __call__(self, sample: PredSampleWrapper) -> float: + # For binary classification, PredSampleWrapper stores thresholded predictions in `predictions_am`. + # Using raw `predictions` would typically mean operating on probabilities, which is incorrect + # for a 2x2 contingency-table-based odds ratio. + gt = sample.gt + pred = sample.predictions_am + counts = np.bincount(2 * gt + pred, minlength=4) + tn, fp, fn, tp = counts + + # Use a tiny epsilon to prevent log(0) without shifting the OR significantly + # Or stick to 0.5 only if you expect very frequent zeros. + log_tp = np.log(tp + self.epsilon) + log_tn = np.log(tn + self.epsilon) + log_fp = np.log(fp + self.epsilon) + log_fn = np.log(fn + self.epsilon) + return float((log_tp + log_tn) - (log_fp + log_fn)) + + def __str__(self) -> str: + return "LogOddsRatio" diff --git a/tests/conftest.py b/tests/conftest.py index 30b7f6d..ad476c2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -34,3 +34,59 @@ def grouped_gaussian_samples() -> Tuple[np.ndarray, np.ndarray, np.ndarray]: np.concatenate(sample_2), np.concatenate(groups), ) + + +@pytest.fixture +def clustered_binary_accuracy_data() -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: + """ + Generate a deterministic non-iid binary dataset with strong within-group correlation. + + Mirrors the AR(1) block-covariance idea used in notebooks/Two_sample_test.ipynb: + - variable group sizes + - within-group AR(1) covariance with high rho + - two independent correlated draws per group -> two "models" + + Returns: + preds_1, preds_2, y, groups + """ + rng = np.random.default_rng(SEED) + + def generate_covariance(size: int, rho: float, sigma: float = 1.0) -> np.ndarray: + idx = np.arange(size) + diff = np.abs(np.subtract.outer(idx, idx)) + return (sigma**2) * (rho**diff) + + n_groups = 40 + n_total = 800 + rho = 0.95 + noise_sigma = 1.0 + + if n_total < n_groups: + raise ValueError("n_total must be >= n_groups") + + base = np.ones(n_groups, dtype=int) + remaining = n_total - n_groups + probs = rng.dirichlet(np.ones(n_groups) * 0.9) + extra = rng.multinomial(remaining, probs) + counts = base + extra + assert counts.sum() == n_total + + groups = np.concatenate([np.full(counts[i], i, dtype=int) for i in range(n_groups)]) + + preds_1 = [] + preds_2 = [] + for i in range(n_groups): + size = int(counts[i]) + sigma_block = generate_covariance(size, rho, noise_sigma) + L = np.linalg.cholesky(sigma_block) + e1 = L @ rng.normal(0.0, 1.0, size=size) + e2 = L @ rng.normal(0.0, 1.0, size=size) + preds_1.append((e1 > 0.0).astype(int)) + preds_2.append((e2 > 0.0).astype(int)) + + preds_1 = np.concatenate(preds_1) + preds_2 = np.concatenate(preds_2) + y = np.ones(n_total, dtype=int) + + return preds_1, preds_2, y, groups + diff --git a/tests/test_alpha_and_correction_api.py b/tests/test_alpha_and_correction_api.py new file mode 100644 index 0000000..2733859 --- /dev/null +++ b/tests/test_alpha_and_correction_api.py @@ -0,0 +1,58 @@ +import numpy as np + +from stambo._stambo import apply_correction, pairwise_bootstrap_test + + +def test_apply_correction_returns_results_and_adjusts_in_place(): + # Two p-values -> Holm should adjust the smallest by factor 2. + results = { + "mean": { + "A / B": {"p_value": 0.01, "diff": 0.0}, + "A / C": {"p_value": 0.04, "diff": 0.0}, + } + } + + out = apply_correction(results) + + # API: return the (mutated) results dict + assert out is results + + assert results["mean"]["A / B"]["p_value"] == 0.02 + assert results["mean"]["A / C"]["p_value"] == 0.04 + + +def test_pairwise_bootstrap_ci_uses_alpha_as_fraction(): + # Construct a tiny deterministic bootstrap distribution + # col 0 = model A, col 1 = model B + sample_1_b = np.array([0.0, 0.0, 0.0, 0.0]) + sample_2_b = np.array([0.0, 1.0, 2.0, 3.0]) + bootstrap_results = {"mean": np.stack([sample_1_b, sample_2_b], axis=1)} + + # samples are only used for empirical stats; keep them simple + samples = (np.array([0.0, 0.0]), np.array([0.0, 0.0])) + + alpha = 0.05 + res = pairwise_bootstrap_test( + samples=samples, + statistics={"mean": np.mean}, + bootstrap_results=bootstrap_results, + labels=("A", "B"), + adjusted_p_value=False, + alpha=alpha, + ) + + label = "A / B" + diff_array = sample_2_b - sample_1_b + expected_ci_es = ( + float(np.percentile(diff_array, 100 * alpha / 2.0)), + float(np.percentile(diff_array, 100 - 100 * alpha / 2.0)), + ) + expected_ci_s2 = ( + float(np.percentile(sample_2_b, 100 * alpha / 2.0)), + float(np.percentile(sample_2_b, 100 - 100 * alpha / 2.0)), + ) + + assert np.allclose(res["mean"][label]["ci_es"], expected_ci_es) + assert np.allclose(res["mean"][label]["ci_s2"], expected_ci_s2) + + diff --git a/tests/test_bootstrap_arrays_sanity_checks.py b/tests/test_bootstrap_arrays_sanity_checks.py new file mode 100644 index 0000000..8b1193f --- /dev/null +++ b/tests/test_bootstrap_arrays_sanity_checks.py @@ -0,0 +1,87 @@ +import numpy as np +import pytest + +from stambo._predsamplewrapper import PredSampleWrapper +from stambo._stambo import bootstrap_arrays + + +def test_bootstrap_arrays_rejects_non_array_inputs(): + arrays = (np.array([1, 2, 3], dtype=float), "not-an-array") + with pytest.raises(ValueError): + bootstrap_arrays(arrays=arrays, statistics={"mean": np.mean}, n_bootstrap=1, silent=True) + + +def test_bootstrap_arrays_requires_same_lengths(): + arrays = (np.array([1, 2, 3], dtype=float), np.array([1, 2], dtype=float)) + with pytest.raises(AssertionError, match="same length"): + bootstrap_arrays(arrays=arrays, statistics={"mean": np.mean}, n_bootstrap=1, silent=True) + + +def test_bootstrap_arrays_numpy_branch_requires_same_dtype(): + arrays = (np.array([1, 2, 3], dtype=int), np.array([1, 2, 3], dtype=float)) + with pytest.raises(AssertionError, match="same dtype"): + bootstrap_arrays(arrays=arrays, statistics={"mean": np.mean}, n_bootstrap=1, silent=True) + + +def test_bootstrap_arrays_predsamples_must_not_mix_with_numpy(): + gt = np.array([0, 1, 0], dtype=int) + preds = np.array([0.1, 0.9, 0.2], dtype=float) + w = PredSampleWrapper(preds, gt, multiclass=False, threshold=0.5) + + arrays = (w, np.array([1, 2, 3], dtype=float)) + with pytest.raises(AssertionError, match="PredSampleWrapper"): + bootstrap_arrays(arrays=arrays, statistics={"mean": np.mean}, n_bootstrap=1, silent=True) + + +def test_bootstrap_arrays_predsamples_require_same_multiclass_setting(): + gt = np.array([0, 1, 0], dtype=int) + preds_bin = np.array([0.1, 0.9, 0.2], dtype=float) + preds_mc = np.array([[0.2, 0.8], [0.9, 0.1], [0.6, 0.4]], dtype=float) + w_bin = PredSampleWrapper(preds_bin, gt, multiclass=False, threshold=0.5) + w_mc = PredSampleWrapper(preds_mc, gt, multiclass=True) + + with pytest.raises(AssertionError, match="multiclass"): + bootstrap_arrays(arrays=(w_bin, w_mc), statistics={"mean": np.mean}, n_bootstrap=1, silent=True) + + +def test_bootstrap_arrays_predsamples_require_same_threshold(): + gt = np.array([0, 1, 0, 1], dtype=int) + preds = np.array([0.49, 0.51, 0.2, 0.8], dtype=float) + w1 = PredSampleWrapper(preds, gt, multiclass=False, threshold=0.5) + w2 = PredSampleWrapper(preds, gt, multiclass=False, threshold=0.7) + + with pytest.raises(AssertionError, match="threshold"): + bootstrap_arrays(arrays=(w1, w2), statistics={"mean": np.mean}, n_bootstrap=1, silent=True) + + +def test_bootstrap_arrays_predsamples_require_same_groups(): + gt = np.array([0, 1, 0, 1], dtype=int) + preds = np.array([0.49, 0.51, 0.2, 0.8], dtype=float) + g1 = np.array([0, 0, 1, 1], dtype=int) + g2 = np.array([0, 1, 0, 1], dtype=int) + w1 = PredSampleWrapper(preds, gt, groups=g1, multiclass=False, threshold=0.5) + w2 = PredSampleWrapper(preds, gt, groups=g2, multiclass=False, threshold=0.5) + + with pytest.raises(AssertionError, match="groups"): + bootstrap_arrays(arrays=(w1, w2), statistics={"mean": np.mean}, n_bootstrap=1, silent=True) + + +def test_bootstrap_arrays_accepts_valid_numpy_inputs(): + arrays = (np.array([1.0, 2.0, 3.0]), np.array([4.0, 5.0, 6.0])) + np.random.seed(0) + out = bootstrap_arrays(arrays=arrays, statistics={"mean": np.mean}, n_bootstrap=3, silent=True) + assert out["mean"].shape == (3, 2) + + +def test_bootstrap_arrays_accepts_valid_predsamples_inputs(): + gt = np.array([0, 1, 0, 1], dtype=int) + preds1 = np.array([0.49, 0.51, 0.2, 0.8], dtype=float) + preds2 = np.array([0.6, 0.7, 0.1, 0.9], dtype=float) + groups = np.array([0, 0, 1, 1], dtype=int) + w1 = PredSampleWrapper(preds1, gt, groups=groups, multiclass=False, threshold=0.5) + w2 = PredSampleWrapper(preds2, gt, groups=groups, multiclass=False, threshold=0.5) + + np.random.seed(0) + out = bootstrap_arrays(arrays=(w1, w2), statistics={"mean": lambda s: float(np.mean(s.gt))}, n_bootstrap=3, silent=True) + assert out["mean"].shape == (3, 2) + diff --git a/tests/test_clustered_bootstrap.py b/tests/test_clustered_bootstrap.py index 193fffb..9dbc6dc 100644 --- a/tests/test_clustered_bootstrap.py +++ b/tests/test_clustered_bootstrap.py @@ -2,6 +2,9 @@ import stambo +from stambo._predsamplewrapper import PredSampleWrapper +from stambo.metrics import Accuracy + SEED = 2025 @@ -29,9 +32,61 @@ def test_clustered_bootstrap_reduces_false_positives(grouped_gaussian_samples): silent=True, ) - naive_p = naive["mean"][0] - clustered_p = clustered["mean"][0] + naive_p = naive["mean"]["p_value"] + clustered_p = clustered["mean"]["p_value"] assert naive_p < 0.01 assert clustered_p > 0.2 assert clustered_p - naive_p > 0.15 + + +def test_clustered_bootstrap_with_predsamplewrapper_and_metric_reduces_false_positives(clustered_binary_accuracy_data): + """ + Follow the same idea as the Two_sample_test notebook: + generate *within-subject correlated* observations via AR(1) block covariance. + + We create binary "correctness" indicators per model from correlated Gaussian noise. + Using gt=1 everywhere, Accuracy(pred, gt) reduces to mean(pred == 1), so we can test the + clustered bootstrap logic with PredSampleWrapper + Metric in a controlled non-iid setting. + + Expectation under the null: the two models have the same accuracy distribution. + But due to strong within-group correlation, naive resampling (treating observations iid) + becomes overconfident and can yield a false positive, whereas clustered bootstrap (resampling + groups) corrects this and yields a non-significant result. + """ + preds_1, preds_2, y, groups = clustered_binary_accuracy_data + + # Naive: no grouping information + s1_naive = PredSampleWrapper(preds_1, y, multiclass=False, threshold=0.5, groups=None) + s2_naive = PredSampleWrapper(preds_2, y, multiclass=False, threshold=0.5, groups=None) + + # Clustered: grouping stored on the wrapper (bootstrap_arrays will use it) + s1_clustered = PredSampleWrapper(preds_1, y, multiclass=False, threshold=0.5, groups=groups) + s2_clustered = PredSampleWrapper(preds_2, y, multiclass=False, threshold=0.5, groups=groups) + + statistics = {"Accuracy": Accuracy()} + + naive = stambo.two_sample_test( + s1_naive, + s2_naive, + statistics, + n_bootstrap=1500, + seed=SEED, + silent=True, + ) + + clustered = stambo.two_sample_test( + s1_clustered, + s2_clustered, + statistics, + n_bootstrap=1500, + seed=SEED, + silent=True, + ) + + naive_p = naive["Accuracy"]["p_value"] + clustered_p = clustered["Accuracy"]["p_value"] + + # With this fixed seed + strong within-group correlation, naive bootstrap becomes overconfident. + assert naive_p < 0.05 + assert clustered_p > 0.2 diff --git a/tests/test_log_odds_ratio.py b/tests/test_log_odds_ratio.py new file mode 100644 index 0000000..38fb331 --- /dev/null +++ b/tests/test_log_odds_ratio.py @@ -0,0 +1,173 @@ +import numpy as np + +from stambo._predsamplewrapper import PredSampleWrapper +from stambo._stambo import bootstrap_arrays, compare_models +from stambo.metrics import LogOddsRatio + + +def _log_odds_ratio_from_labels(gt: np.ndarray, pred: np.ndarray) -> float: + """Log odds ratio matching stambo.metrics.LogOddsRatio (epsilon-stabilized).""" + gt = np.asarray(gt).astype(int) + pred = np.asarray(pred).astype(int) + counts = np.bincount(2 * gt + pred, minlength=4) + tn, fp, fn, tp = counts + eps = float(LogOddsRatio.epsilon) + return float((np.log(tp + eps) + np.log(tn + eps)) - (np.log(fp + eps) + np.log(fn + eps))) + + +def _odds_from_counts(tp: int, tn: int, fp: int, fn: int, eps: float) -> float: + """Odds ratio in exponentiated space, matching LogOddsRatio's epsilon stabilization.""" + return float(((tp + eps) * (tn + eps)) / ((fp + eps) * (fn + eps))) + + +def _pred_from_counts_for_fixed_gt(gt: np.ndarray, tp: int, fp: int) -> np.ndarray: + """Construct predicted labels for a fixed gt with desired TP and FP counts. + + Assumes gt contains 1s then 0s (but we only rely on the indices). + """ + gt = np.asarray(gt).astype(int) + pos_idx = np.where(gt == 1)[0] + neg_idx = np.where(gt == 0)[0] + if tp > len(pos_idx): + raise ValueError("tp exceeds number of positives in gt") + if fp > len(neg_idx): + raise ValueError("fp exceeds number of negatives in gt") + + pred = np.zeros_like(gt, dtype=int) + pred[pos_idx[:tp]] = 1 + pred[neg_idx[:fp]] = 1 + return pred + + +def test_log_odds_ratio_bootstrap_uses_thresholded_predictions(): + # predictions are *probabilities*; the log-odds ratio must be computed on thresholded labels. + gt = np.array([1, 1, 1, 0, 0, 0], dtype=int) + probs = np.array([0.9, 0.8, 0.6, 0.4, 0.2, 0.1], dtype=float) # threshold=0.5 -> [1,1,1,0,0,0] + + sample = PredSampleWrapper(probs, gt, multiclass=False, threshold=0.5) + + n_bootstrap = 50 + np.random.seed(123) + out = bootstrap_arrays( + arrays=(sample,), + statistics={"LogOddsRatio": LogOddsRatio()}, + n_bootstrap=n_bootstrap, + silent=True, + ) + + # Re-generate the same bootstrap indices and compute the expected statistic manually. + np.random.seed(123) + expected = np.zeros((n_bootstrap, 1), dtype=float) + for b in range(n_bootstrap): + ind = np.random.choice(len(sample), len(sample), replace=True) + expected[b, 0] = _log_odds_ratio_from_labels(gt[ind], sample.predictions_am[ind]) + + assert np.allclose(out["LogOddsRatio"], expected) + + +def test_log_odds_ratio_diff_matches_relative_odds_between_models(): + # Build two models with different confusion tables (via probabilities + threshold). + y = np.array([1, 1, 1, 1, 0, 0, 0, 0], dtype=int) + + # model 1 predicted labels (thr=0.5): [1,1,0,0, 1,0,0,0] + p1 = np.array([0.9, 0.8, 0.1, 0.2, 0.9, 0.1, 0.2, 0.4], dtype=float) + # model 2 predicted labels (thr=0.5): [1,1,1,0, 0,0,0,0] + p2 = np.array([0.9, 0.8, 0.7, 0.2, 0.4, 0.1, 0.2, 0.3], dtype=float) + + # We only need empirical values/diff; bootstrap size can be small. + res = compare_models( + y_test=y, + preds_1=p1, + preds_2=p2, + metrics=("LogOddsRatio",), + n_bootstrap=50, + seed=2027, + silent=True, + ) + + # Compute expected log ORs on thresholded predictions. + pred1 = (p1 > 0.5).astype(int) + pred2 = (p2 > 0.5).astype(int) + log_or_1 = _log_odds_ratio_from_labels(y, pred1) + log_or_2 = _log_odds_ratio_from_labels(y, pred2) + expected_diff = log_or_2 - log_or_1 # log(relative odds) + + assert np.isclose(res["LogOddsRatio"]["emp_s1"], log_or_1) + assert np.isclose(res["LogOddsRatio"]["emp_s2"], log_or_2) + assert np.isclose(res["LogOddsRatio"]["diff"], expected_diff) + + # "Relative odds" (odds ratio between models) is exp(log OR2 - log OR1). + relative_odds = float(np.exp(res["LogOddsRatio"]["diff"])) + expected_relative_odds = float(np.exp(log_or_2) / np.exp(log_or_1)) + assert np.isclose(relative_odds, expected_relative_odds) + + +def test_log_odds_ratio_exact_odds_and_relative_odds_for_three_models_exponentiated(): + """ + "Clever" deterministic construction: + - We pick a fixed gt with P positives and N negatives. + - For each model, we construct *binary predicted labels* that realize exact (TP,FP) counts. + This fixes (FN, TN) automatically since gt is fixed. + - We then assert in *odds space* (exponentiated) that: + exp(emp_s*) equals the model's odds ratio + exp(diff) equals the relative odds ratio between models + - Finally, we assert the test generalizes to multiple samples by checking all pairwise comparisons. + """ + eps = float(LogOddsRatio.epsilon) + + # Fixed ground-truth: 40 positives, 60 negatives + p = 40 + n = 60 + gt = np.array([1] * p + [0] * n, dtype=int) + + # Define 3 models by specifying TP and FP counts (with fixed gt these determine FN and TN). + # All counts are strictly > 0 to avoid degenerate odds. + models = { + "A": {"tp": 30, "fp": 5}, # fn=10, tn=55 + "B": {"tp": 20, "fp": 10}, # fn=20, tn=50 + "C": {"tp": 35, "fp": 8}, # fn=5, tn=52 + } + + samples = [] + expected_odds = {} + expected_log_odds = {} + for name, spec in models.items(): + tp = int(spec["tp"]) + fp = int(spec["fp"]) + fn = p - tp + tn = n - fp + pred = _pred_from_counts_for_fixed_gt(gt, tp=tp, fp=fp) + samples.append(PredSampleWrapper(pred, gt, multiclass=False, threshold=0.5)) + + expected_odds[name] = _odds_from_counts(tp=tp, tn=tn, fp=fp, fn=fn, eps=eps) + expected_log_odds[name] = float(np.log(expected_odds[name])) + + # Deterministic bootstrap_results: constant per model, no RNG. + emp = np.array([expected_log_odds["A"], expected_log_odds["B"], expected_log_odds["C"]], dtype=float) + bootstrap_results = {"LogOddsRatio": np.tile(emp, (7, 1))} + + from stambo._stambo import pairwise_bootstrap_test + + res = pairwise_bootstrap_test( + samples=tuple(samples), + statistics={"LogOddsRatio": LogOddsRatio()}, + bootstrap_results=bootstrap_results, + labels=("A", "B", "C"), + adjusted_p_value=False, + alpha=0.05, + ) + + # Generalizes to >2 samples: 3 models -> 3 pairwise comparisons. + assert len(res["LogOddsRatio"]) == 3 + + # Check each comparison in exponentiated space. + # Labels follow "i / j" where j is the "improved" (second) model in the code. + for label, out in res["LogOddsRatio"].items(): + left, right = [s.strip() for s in label.split("/")] + odds_left = expected_odds[left] + odds_right = expected_odds[right] + + assert np.isclose(float(np.exp(out["emp_s1"])), odds_left) + assert np.isclose(float(np.exp(out["emp_s2"])), odds_right) + assert np.isclose(float(np.exp(out["diff"])), odds_right / odds_left) + diff --git a/tests/test_pairwise_bootstrap_conservatism.py b/tests/test_pairwise_bootstrap_conservatism.py new file mode 100644 index 0000000..34d3dd7 --- /dev/null +++ b/tests/test_pairwise_bootstrap_conservatism.py @@ -0,0 +1,98 @@ +import numpy as np + +from stambo._stambo import bootstrap_arrays, pairwise_bootstrap_test + + +def _count_significant(results: dict, alpha: float) -> int: + pvals = np.array([results["mean"][k]["p_value"] for k in results["mean"]], dtype=float) + return int((pvals < alpha).sum()) + + +def test_pairwise_bootstrap_conservatism_one_shifted_vs_four_null(): + """ + 5 samples, n=50. + 1 sample from N(0.3, 1), 4 samples from N(0, 1). + Expect exactly 4 significant differences (shifted vs each null). + """ + n_models = 5 + n_obs = 50 + alpha = 0.05 + n_bootstrap = 400 + + rng = np.random.default_rng(2026) + base = rng.normal(loc=0.0, scale=1.0, size=n_obs) + + # Paired construction (still marginally N(mu, 1)) to make the expected discoveries stable. + samples = ( + base + 0.3, # shifted + base + 0.0, + base + 0.0, + base + 0.0, + base + 0.0, + ) + assert len(samples) == n_models + + np.random.seed(2026) + bootstrap_results = bootstrap_arrays( + arrays=samples, + statistics={"mean": np.mean}, + n_bootstrap=n_bootstrap, + silent=True, + ) + + adjusted = pairwise_bootstrap_test( + bootstrap_results=bootstrap_results, + samples=samples, + statistics={"mean": np.mean}, + adjusted_p_value=True, + alpha=alpha, + ) + + # Total comparisons: 5*4/2 = 10 + assert len(adjusted["mean"]) == (n_models * (n_models - 1) // 2) + assert _count_significant(adjusted, alpha=alpha) == 4 + + +def test_pairwise_bootstrap_conservatism_two_shifted_vs_three_null(): + """ + 5 samples, n=50. + 2 samples from N(0.3, 1), 3 samples from N(0, 1). + Expect exactly 6 significant differences (each shifted vs each null). + """ + n_models = 5 + n_obs = 50 + alpha = 0.05 + n_bootstrap = 400 + + rng = np.random.default_rng(2027) + base = rng.normal(loc=0.0, scale=1.0, size=n_obs) + + samples = ( + base + 0.3, # shifted + base + 0.3, # shifted + base + 0.0, + base + 0.0, + base + 0.0, + ) + assert len(samples) == n_models + + np.random.seed(2027) + bootstrap_results = bootstrap_arrays( + arrays=samples, + statistics={"mean": np.mean}, + n_bootstrap=n_bootstrap, + silent=True, + ) + + adjusted = pairwise_bootstrap_test( + bootstrap_results=bootstrap_results, + samples=samples, + statistics={"mean": np.mean}, + adjusted_p_value=True, + alpha=alpha, + ) + + assert len(adjusted["mean"]) == (n_models * (n_models - 1) // 2) + assert _count_significant(adjusted, alpha=alpha) == 6 + + diff --git a/tests/test_pairwise_bootstrap_many_samples.py b/tests/test_pairwise_bootstrap_many_samples.py new file mode 100644 index 0000000..04d4935 --- /dev/null +++ b/tests/test_pairwise_bootstrap_many_samples.py @@ -0,0 +1,68 @@ +import numpy as np + +from stambo._stambo import bootstrap_arrays, pairwise_bootstrap_test + + +def test_pairwise_bootstrap_many_samples_holm_adjustment(): + """ + Create 50 paired samples from the same N(0,1) distribution. + + Expect: + - Exactly N*(N-1)/2 pairwise comparisons. + - At least one false positive without p-value adjustment (with many comparisons). + - No significant comparisons after Holm-Bonferroni adjustment (with coarse bootstrap p-value resolution). + """ + n_models = 50 + n_obs = 20 + n_bootstrap = 300 # coarse p-value grid -> makes Holm adjustment very conservative/stable + alpha = 0.05 + + rng = np.random.default_rng(2026) + x = rng.normal(loc=0.0, scale=1.0, size=(n_obs, n_models)) + samples = tuple(x[:, i] for i in range(n_models)) + + # bootstrap_arrays uses the legacy global RNG (np.random.*), so seed it explicitly. + np.random.seed(2026) + bootstrap_results = bootstrap_arrays( + arrays=samples, + statistics={"mean": np.mean}, + n_bootstrap=n_bootstrap, + silent=True, + ) + + unadjusted = pairwise_bootstrap_test( + bootstrap_results=bootstrap_results, + samples=samples, + statistics={"mean": np.mean}, + adjusted_p_value=False, + alpha=alpha, + ) + adjusted = pairwise_bootstrap_test( + bootstrap_results=bootstrap_results, + samples=samples, + statistics={"mean": np.mean}, + adjusted_p_value=True, + alpha=alpha, + ) + + expected_comparisons = n_models * (n_models - 1) // 2 + assert len(unadjusted["mean"]) == expected_comparisons + assert len(adjusted["mean"]) == expected_comparisons + + pvals_unadj = np.array([unadjusted["mean"][k]["p_value"] for k in unadjusted["mean"]], dtype=float) + pvals_adj = np.array([adjusted["mean"][k]["p_value"] for k in adjusted["mean"]], dtype=float) + + assert np.all((0.0 <= pvals_unadj) & (pvals_unadj <= 1.0)) + assert np.all((0.0 <= pvals_adj) & (pvals_adj <= 1.0)) + + # With many comparisons under the complete null, we should see at least one false positive pre-adjustment. + assert (pvals_unadj < alpha).sum() >= 1 + + # With Holm-Bonferroni and a coarse bootstrap p-value grid, no result should survive adjustment. + assert (pvals_adj < alpha).sum() == 0 + + # Adjustment must not make p-values smaller. + for label in unadjusted["mean"]: + assert adjusted["mean"][label]["p_value"] >= unadjusted["mean"][label]["p_value"] - 1e-12 + + diff --git a/tests/test_pvalue_regressions.py b/tests/test_pvalue_regressions.py new file mode 100644 index 0000000..242ced1 --- /dev/null +++ b/tests/test_pvalue_regressions.py @@ -0,0 +1,97 @@ +import numpy as np + +import stambo + +from sklearn.neighbors import KNeighborsClassifier +from sklearn.linear_model import LogisticRegression +from sklearn.model_selection import StratifiedGroupKFold + + +def _make_classification_non_iid_fixture(seed: int = 2025): + # Mirrors notebooks/Classification_non_iid.ipynb + data, y, subject_ids = stambo.synthetic.generate_non_iid_measurements( + n_data=300, + n_subjects=100, + rho=0.9, + subj_sigma=1.0, + noise_sigma=0.5, + gamma=0.8, + mu_cls_1=[2, 2], + mu_cls_2=[2.1, 2.4], + overlap=0.2, + feat_corr=0.5, + seed=42, + ) + + gss = StratifiedGroupKFold(n_splits=3, shuffle=True, random_state=seed) + train_idx, test_idx = next(gss.split(data, y, groups=subject_ids)) + + X_train, X_test = data[train_idx], data[test_idx] + y_train, y_test = y[train_idx], y[test_idx] + groups_test = subject_ids[test_idx] + + knn = KNeighborsClassifier(n_neighbors=5) + logreg = LogisticRegression() + knn.fit(X_train, y_train) + logreg.fit(X_train, y_train) + + return y_test, groups_test, logreg.predict(X_test), knn.predict(X_test) + + +def test_compare_models_p_values_never_zero_and_respect_bootstrap_grid(): + seed = 2025 + n_bootstrap = 400 + y_test, groups_test, logreg_predictions, knn_predictions = _make_classification_non_iid_fixture(seed=seed) + + naive = stambo.compare_models( + y_test, + logreg_predictions, + knn_predictions, + metrics=("ROCAUC", "AP"), + seed=seed, + n_bootstrap=n_bootstrap, + silent=True, + ) + + # With the +1 smoothing and two-tailed doubling used in stambo, + # the theoretical minimum non-zero p-value is 2/(B+1). + p_min = 2.0 / (n_bootstrap + 1) + eps = 1e-12 + + for metric in naive: + p = float(naive[metric]["p_value"]) + assert 0.0 < p <= 1.0 + assert p + eps >= p_min + + +def test_compare_models_clustered_p_values_not_smaller_than_naive_for_fixture(): + # Regression guard for the specific synthetic non-iid notebook scenario: + # clustered bootstrap should not make p-values smaller than naive bootstrap. + seed = 2025 + n_bootstrap = 400 + y_test, groups_test, logreg_predictions, knn_predictions = _make_classification_non_iid_fixture(seed=seed) + + naive = stambo.compare_models( + y_test, + logreg_predictions, + knn_predictions, + metrics=("ROCAUC", "AP"), + seed=seed, + n_bootstrap=n_bootstrap, + silent=True, + ) + clustered = stambo.compare_models( + y_test, + logreg_predictions, + knn_predictions, + metrics=("ROCAUC", "AP"), + groups=groups_test, + seed=seed, + n_bootstrap=n_bootstrap, + silent=True, + ) + + for metric in naive: + assert float(clustered[metric]["p_value"]) >= float(naive[metric]["p_value"]) - 1e-12 + + diff --git a/tests/test_type_i_errors.py b/tests/test_type_i_errors.py index c5e1434..be6a4f2 100644 --- a/tests/test_type_i_errors.py +++ b/tests/test_type_i_errors.py @@ -18,7 +18,6 @@ def test_two_sample_test_has_no_type_i_errors(identical_gaussian_samples): mean_result = results["mean"] - # Index 0 -> p-value, 1 -> observed diff, 4 & 7 -> empirical metric per sample. - assert mean_result[0] == pytest.approx(1.0, rel=0, abs=1e-9) - assert mean_result[1] == pytest.approx(0.0, abs=1e-9) - assert mean_result[4] == pytest.approx(mean_result[7], abs=1e-9) + assert mean_result["p_value"] == pytest.approx(1.0, rel=0, abs=1e-9) + assert mean_result["diff"] == pytest.approx(0.0, abs=1e-9) + assert mean_result["emp_s1"] == pytest.approx(mean_result["emp_s2"], abs=1e-9)