diff --git a/01_materials/notebooks/Classification-1.ipynb b/01_materials/notebooks/Classification-1.ipynb index 7b6959a7a..93ed13871 100644 --- a/01_materials/notebooks/Classification-1.ipynb +++ b/01_materials/notebooks/Classification-1.ipynb @@ -2326,7 +2326,7 @@ ], "metadata": { "kernelspec": { - "display_name": "base", + "display_name": "lcr-env", "language": "python", "name": "python3" }, @@ -2340,7 +2340,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.14" + "version": "3.11.15" } }, "nbformat": 4, diff --git a/02_activities/assignments/assignment_1.ipynb b/02_activities/assignments/assignment_1.ipynb index b0a47da71..d07cba25d 100644 --- a/02_activities/assignments/assignment_1.ipynb +++ b/02_activities/assignments/assignment_1.ipynb @@ -34,7 +34,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "4a3485d6-ba58-4660-a983-5680821c5719", "metadata": {}, "outputs": [], @@ -51,18 +51,295 @@ "from sklearn.metrics import recall_score, precision_score\n", "from sklearn.model_selection import cross_validate\n", "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.metrics import accuracy_score" + "from sklearn.metrics import accuracy_score\n", + "from sklearn.datasets import load_wine\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "a431d282-f9ca-4d5d-8912-71ffc9d8ea19", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
alcoholmalic_acidashalcalinity_of_ashmagnesiumtotal_phenolsflavanoidsnonflavanoid_phenolsproanthocyaninscolor_intensityhueod280/od315_of_diluted_winesprolineclass
014.231.712.4315.6127.02.803.060.282.295.641.043.921065.00
113.201.782.1411.2100.02.652.760.261.284.381.053.401050.00
213.162.362.6718.6101.02.803.240.302.815.681.033.171185.00
314.371.952.5016.8113.03.853.490.242.187.800.863.451480.00
413.242.592.8721.0118.02.802.690.391.824.321.042.93735.00
.............................................
17313.715.652.4520.595.01.680.610.521.067.700.641.74740.02
17413.403.912.4823.0102.01.800.750.431.417.300.701.56750.02
17513.274.282.2620.0120.01.590.690.431.3510.200.591.56835.02
17613.172.592.3720.0120.01.650.680.531.469.300.601.62840.02
17714.134.102.7424.596.02.050.760.561.359.200.611.60560.02
\n", + "

178 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " alcohol malic_acid ash alcalinity_of_ash magnesium total_phenols \\\n", + "0 14.23 1.71 2.43 15.6 127.0 2.80 \n", + "1 13.20 1.78 2.14 11.2 100.0 2.65 \n", + "2 13.16 2.36 2.67 18.6 101.0 2.80 \n", + "3 14.37 1.95 2.50 16.8 113.0 3.85 \n", + "4 13.24 2.59 2.87 21.0 118.0 2.80 \n", + ".. ... ... ... ... ... ... \n", + "173 13.71 5.65 2.45 20.5 95.0 1.68 \n", + "174 13.40 3.91 2.48 23.0 102.0 1.80 \n", + "175 13.27 4.28 2.26 20.0 120.0 1.59 \n", + "176 13.17 2.59 2.37 20.0 120.0 1.65 \n", + "177 14.13 4.10 2.74 24.5 96.0 2.05 \n", + "\n", + " flavanoids nonflavanoid_phenols proanthocyanins color_intensity hue \\\n", + "0 3.06 0.28 2.29 5.64 1.04 \n", + "1 2.76 0.26 1.28 4.38 1.05 \n", + "2 3.24 0.30 2.81 5.68 1.03 \n", + "3 3.49 0.24 2.18 7.80 0.86 \n", + "4 2.69 0.39 1.82 4.32 1.04 \n", + ".. ... ... ... ... ... \n", + "173 0.61 0.52 1.06 7.70 0.64 \n", + "174 0.75 0.43 1.41 7.30 0.70 \n", + "175 0.69 0.43 1.35 10.20 0.59 \n", + "176 0.68 0.53 1.46 9.30 0.60 \n", + "177 0.76 0.56 1.35 9.20 0.61 \n", + "\n", + " od280/od315_of_diluted_wines proline class \n", + "0 3.92 1065.0 0 \n", + "1 3.40 1050.0 0 \n", + "2 3.17 1185.0 0 \n", + "3 3.45 1480.0 0 \n", + "4 2.93 735.0 0 \n", + ".. ... ... ... \n", + "173 1.74 740.0 2 \n", + "174 1.56 750.0 2 \n", + "175 1.56 835.0 2 \n", + "176 1.62 840.0 2 \n", + "177 1.60 560.0 2 \n", + "\n", + "[178 rows x 14 columns]" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "from sklearn.datasets import load_wine\n", - "\n", "# Load the Wine dataset\n", "wine_data = load_wine()\n", "\n", @@ -73,7 +350,7 @@ "wine_df['class'] = wine_data.target\n", "\n", "# Display the DataFrame\n", - "wine_df\n" + "wine_df" ] }, { @@ -91,12 +368,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "id": "56916892", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "178" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your answer here" + "wine_df.shape[0]" ] }, { @@ -109,12 +397,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "id": "df0ef103", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "14" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your answer here" + "wine_df.shape[1]" ] }, { @@ -127,12 +426,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "id": "47989426", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(dtype('int64'), array([0, 1, 2]))" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your answer here" + "wine_df['class'].dtype, wine_df['class'].unique()" ] }, { @@ -146,12 +456,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "id": "bd7b0910", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "13" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your answer here" + "wine_df.shape[1] - 1" ] }, { @@ -175,10 +496,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "id": "cc899b59", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " alcohol malic_acid ash alcalinity_of_ash magnesium \\\n", + "0 1.518613 -0.562250 0.232053 -1.169593 1.913905 \n", + "1 0.246290 -0.499413 -0.827996 -2.490847 0.018145 \n", + "2 0.196879 0.021231 1.109334 -0.268738 0.088358 \n", + "3 1.691550 -0.346811 0.487926 -0.809251 0.930918 \n", + "4 0.295700 0.227694 1.840403 0.451946 1.281985 \n", + "\n", + " total_phenols flavanoids nonflavanoid_phenols proanthocyanins \\\n", + "0 0.808997 1.034819 -0.659563 1.224884 \n", + "1 0.568648 0.733629 -0.820719 -0.544721 \n", + "2 0.808997 1.215533 -0.498407 2.135968 \n", + "3 2.491446 1.466525 -0.981875 1.032155 \n", + "4 0.808997 0.663351 0.226796 0.401404 \n", + "\n", + " color_intensity hue od280/od315_of_diluted_wines proline \n", + "0 0.251717 0.362177 1.847920 1.013009 \n", + "1 -0.293321 0.406051 1.113449 0.965242 \n", + "2 0.269020 0.318304 0.788587 1.395148 \n", + "3 1.186068 -0.427544 1.184071 2.334574 \n", + "4 -0.319276 0.362177 0.449601 -0.037874 \n" + ] + } + ], "source": [ "# Select predictors (excluding the last column)\n", "predictors = wine_df.iloc[:, :-1]\n", @@ -204,7 +552,7 @@ "id": "403ef0bb", "metadata": {}, "source": [ - "> Your answer here..." + "> KNN uses distance calculations to identify nearest neighbors. If variables are measured on different scales, variables with larger values will dominate the distance calculation. Standardization ensures that all predictors contribute equally. here..." ] }, { @@ -220,7 +568,7 @@ "id": "fdee5a15", "metadata": {}, "source": [ - "> Your answer here..." + "> The response variable contains categorical labels (0, 1, and 2). These labels are identifiers rather than measurements, so scaling them would have no meaning." ] }, { @@ -236,7 +584,10 @@ "id": "f0676c21", "metadata": {}, "source": [ - "> Your answer here..." + "> random.seed(123)\n", + "\n", + "Setting a seed ensures reproducibility, meaning the same train/test split and results will be obtained each time the code is run.\n", + "The specific value (123) is arbitrary; any fixed number would work as long as it is used consistently." ] }, { @@ -251,17 +602,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 37, "id": "72c101f2", "metadata": {}, "outputs": [], "source": [ - "# set a seed for reproducibility\n", - "np.random.seed(123)\n", - "\n", - "# split the data into a training and testing set. hint: use train_test_split !\n", - "\n", - "# Your code here ..." + "X_train, X_test, y_train, y_test = train_test_split(predictors_standardized, wine_df['class'], test_size=0.25, random_state=123)\n" ] }, { @@ -284,12 +630,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 40, "id": "08818c64", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "15" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here..." + "grid_search = GridSearchCV(KNeighborsClassifier(), {'n_neighbors': range(1, 51)}, cv=10)\n", + "grid_search.fit(X_train, y_train)\n", + "grid_search.best_params_['n_neighbors']" ] }, { @@ -305,12 +664,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 43, "id": "ffefa9f2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test set accuracy: 0.9333\n" + ] + } + ], "source": [ - "# Your code here..." + "knn_model = KNeighborsClassifier(n_neighbors=grid_search.best_params_['n_neighbors'])\n", + "knn_model.fit(X_train, y_train)\n", + "y_pred = knn_model.predict(X_test)\n", + "accuracy = accuracy_score(y_test, y_pred)\n", + "print(f\"Test set accuracy: {accuracy:.4f}\")" ] }, { @@ -365,7 +736,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.10.4", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -379,12 +750,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.19" - }, - "vscode": { - "interpreter": { - "hash": "497a84dc8fec8cf8d24e7e87b6d954c9a18a327edc66feb9b9ea7e9e72cc5c7e" - } + "version": "3.11.9" } }, "nbformat": 4,