diff --git a/01_materials/notebooks/Classification-1.ipynb b/01_materials/notebooks/Classification-1.ipynb
index 7b6959a7a..93ed13871 100644
--- a/01_materials/notebooks/Classification-1.ipynb
+++ b/01_materials/notebooks/Classification-1.ipynb
@@ -2326,7 +2326,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "base",
+ "display_name": "lcr-env",
"language": "python",
"name": "python3"
},
@@ -2340,7 +2340,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.10.14"
+ "version": "3.11.15"
}
},
"nbformat": 4,
diff --git a/02_activities/assignments/assignment_1.ipynb b/02_activities/assignments/assignment_1.ipynb
index b0a47da71..44b239e69 100644
--- a/02_activities/assignments/assignment_1.ipynb
+++ b/02_activities/assignments/assignment_1.ipynb
@@ -34,10 +34,18 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"id": "4a3485d6-ba58-4660-a983-5680821c5719",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Matplotlib is building the font cache; this may take a moment.\n"
+ ]
+ }
+ ],
"source": [
"# Import standard libraries\n",
"import pandas as pd\n",
@@ -51,18 +59,295 @@
"from sklearn.metrics import recall_score, precision_score\n",
"from sklearn.model_selection import cross_validate\n",
"from sklearn.model_selection import GridSearchCV\n",
- "from sklearn.metrics import accuracy_score"
+ "from sklearn.metrics import accuracy_score\n",
+ "from sklearn.datasets import load_wine\n"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"id": "a431d282-f9ca-4d5d-8912-71ffc9d8ea19",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " alcohol | \n",
+ " malic_acid | \n",
+ " ash | \n",
+ " alcalinity_of_ash | \n",
+ " magnesium | \n",
+ " total_phenols | \n",
+ " flavanoids | \n",
+ " nonflavanoid_phenols | \n",
+ " proanthocyanins | \n",
+ " color_intensity | \n",
+ " hue | \n",
+ " od280/od315_of_diluted_wines | \n",
+ " proline | \n",
+ " class | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 14.23 | \n",
+ " 1.71 | \n",
+ " 2.43 | \n",
+ " 15.6 | \n",
+ " 127.0 | \n",
+ " 2.80 | \n",
+ " 3.06 | \n",
+ " 0.28 | \n",
+ " 2.29 | \n",
+ " 5.64 | \n",
+ " 1.04 | \n",
+ " 3.92 | \n",
+ " 1065.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 13.20 | \n",
+ " 1.78 | \n",
+ " 2.14 | \n",
+ " 11.2 | \n",
+ " 100.0 | \n",
+ " 2.65 | \n",
+ " 2.76 | \n",
+ " 0.26 | \n",
+ " 1.28 | \n",
+ " 4.38 | \n",
+ " 1.05 | \n",
+ " 3.40 | \n",
+ " 1050.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 13.16 | \n",
+ " 2.36 | \n",
+ " 2.67 | \n",
+ " 18.6 | \n",
+ " 101.0 | \n",
+ " 2.80 | \n",
+ " 3.24 | \n",
+ " 0.30 | \n",
+ " 2.81 | \n",
+ " 5.68 | \n",
+ " 1.03 | \n",
+ " 3.17 | \n",
+ " 1185.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 14.37 | \n",
+ " 1.95 | \n",
+ " 2.50 | \n",
+ " 16.8 | \n",
+ " 113.0 | \n",
+ " 3.85 | \n",
+ " 3.49 | \n",
+ " 0.24 | \n",
+ " 2.18 | \n",
+ " 7.80 | \n",
+ " 0.86 | \n",
+ " 3.45 | \n",
+ " 1480.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 13.24 | \n",
+ " 2.59 | \n",
+ " 2.87 | \n",
+ " 21.0 | \n",
+ " 118.0 | \n",
+ " 2.80 | \n",
+ " 2.69 | \n",
+ " 0.39 | \n",
+ " 1.82 | \n",
+ " 4.32 | \n",
+ " 1.04 | \n",
+ " 2.93 | \n",
+ " 735.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 173 | \n",
+ " 13.71 | \n",
+ " 5.65 | \n",
+ " 2.45 | \n",
+ " 20.5 | \n",
+ " 95.0 | \n",
+ " 1.68 | \n",
+ " 0.61 | \n",
+ " 0.52 | \n",
+ " 1.06 | \n",
+ " 7.70 | \n",
+ " 0.64 | \n",
+ " 1.74 | \n",
+ " 740.0 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 174 | \n",
+ " 13.40 | \n",
+ " 3.91 | \n",
+ " 2.48 | \n",
+ " 23.0 | \n",
+ " 102.0 | \n",
+ " 1.80 | \n",
+ " 0.75 | \n",
+ " 0.43 | \n",
+ " 1.41 | \n",
+ " 7.30 | \n",
+ " 0.70 | \n",
+ " 1.56 | \n",
+ " 750.0 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 175 | \n",
+ " 13.27 | \n",
+ " 4.28 | \n",
+ " 2.26 | \n",
+ " 20.0 | \n",
+ " 120.0 | \n",
+ " 1.59 | \n",
+ " 0.69 | \n",
+ " 0.43 | \n",
+ " 1.35 | \n",
+ " 10.20 | \n",
+ " 0.59 | \n",
+ " 1.56 | \n",
+ " 835.0 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 176 | \n",
+ " 13.17 | \n",
+ " 2.59 | \n",
+ " 2.37 | \n",
+ " 20.0 | \n",
+ " 120.0 | \n",
+ " 1.65 | \n",
+ " 0.68 | \n",
+ " 0.53 | \n",
+ " 1.46 | \n",
+ " 9.30 | \n",
+ " 0.60 | \n",
+ " 1.62 | \n",
+ " 840.0 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 177 | \n",
+ " 14.13 | \n",
+ " 4.10 | \n",
+ " 2.74 | \n",
+ " 24.5 | \n",
+ " 96.0 | \n",
+ " 2.05 | \n",
+ " 0.76 | \n",
+ " 0.56 | \n",
+ " 1.35 | \n",
+ " 9.20 | \n",
+ " 0.61 | \n",
+ " 1.60 | \n",
+ " 560.0 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
178 rows × 14 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " alcohol malic_acid ash alcalinity_of_ash magnesium total_phenols \\\n",
+ "0 14.23 1.71 2.43 15.6 127.0 2.80 \n",
+ "1 13.20 1.78 2.14 11.2 100.0 2.65 \n",
+ "2 13.16 2.36 2.67 18.6 101.0 2.80 \n",
+ "3 14.37 1.95 2.50 16.8 113.0 3.85 \n",
+ "4 13.24 2.59 2.87 21.0 118.0 2.80 \n",
+ ".. ... ... ... ... ... ... \n",
+ "173 13.71 5.65 2.45 20.5 95.0 1.68 \n",
+ "174 13.40 3.91 2.48 23.0 102.0 1.80 \n",
+ "175 13.27 4.28 2.26 20.0 120.0 1.59 \n",
+ "176 13.17 2.59 2.37 20.0 120.0 1.65 \n",
+ "177 14.13 4.10 2.74 24.5 96.0 2.05 \n",
+ "\n",
+ " flavanoids nonflavanoid_phenols proanthocyanins color_intensity hue \\\n",
+ "0 3.06 0.28 2.29 5.64 1.04 \n",
+ "1 2.76 0.26 1.28 4.38 1.05 \n",
+ "2 3.24 0.30 2.81 5.68 1.03 \n",
+ "3 3.49 0.24 2.18 7.80 0.86 \n",
+ "4 2.69 0.39 1.82 4.32 1.04 \n",
+ ".. ... ... ... ... ... \n",
+ "173 0.61 0.52 1.06 7.70 0.64 \n",
+ "174 0.75 0.43 1.41 7.30 0.70 \n",
+ "175 0.69 0.43 1.35 10.20 0.59 \n",
+ "176 0.68 0.53 1.46 9.30 0.60 \n",
+ "177 0.76 0.56 1.35 9.20 0.61 \n",
+ "\n",
+ " od280/od315_of_diluted_wines proline class \n",
+ "0 3.92 1065.0 0 \n",
+ "1 3.40 1050.0 0 \n",
+ "2 3.17 1185.0 0 \n",
+ "3 3.45 1480.0 0 \n",
+ "4 2.93 735.0 0 \n",
+ ".. ... ... ... \n",
+ "173 1.74 740.0 2 \n",
+ "174 1.56 750.0 2 \n",
+ "175 1.56 835.0 2 \n",
+ "176 1.62 840.0 2 \n",
+ "177 1.60 560.0 2 \n",
+ "\n",
+ "[178 rows x 14 columns]"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "from sklearn.datasets import load_wine\n",
- "\n",
"# Load the Wine dataset\n",
"wine_data = load_wine()\n",
"\n",
@@ -73,7 +358,7 @@
"wine_df['class'] = wine_data.target\n",
"\n",
"# Display the DataFrame\n",
- "wine_df\n"
+ "wine_df"
]
},
{
@@ -91,12 +376,23 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"id": "56916892",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "178"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your answer here"
+ "wine_df.shape[0]"
]
},
{
@@ -109,12 +405,23 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 4,
"id": "df0ef103",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "14"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your answer here"
+ "wine_df.shape[1]"
]
},
{
@@ -127,12 +434,23 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 7,
"id": "47989426",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(dtype('int64'), array([0, 1, 2]))"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your answer here"
+ "wine_df['class'].dtype, wine_df['class'].unique()"
]
},
{
@@ -146,12 +464,23 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 8,
"id": "bd7b0910",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "13"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your answer here"
+ "wine_df.shape[1] - 1"
]
},
{
@@ -175,10 +504,37 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 9,
"id": "cc899b59",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " alcohol malic_acid ash alcalinity_of_ash magnesium \\\n",
+ "0 1.518613 -0.562250 0.232053 -1.169593 1.913905 \n",
+ "1 0.246290 -0.499413 -0.827996 -2.490847 0.018145 \n",
+ "2 0.196879 0.021231 1.109334 -0.268738 0.088358 \n",
+ "3 1.691550 -0.346811 0.487926 -0.809251 0.930918 \n",
+ "4 0.295700 0.227694 1.840403 0.451946 1.281985 \n",
+ "\n",
+ " total_phenols flavanoids nonflavanoid_phenols proanthocyanins \\\n",
+ "0 0.808997 1.034819 -0.659563 1.224884 \n",
+ "1 0.568648 0.733629 -0.820719 -0.544721 \n",
+ "2 0.808997 1.215533 -0.498407 2.135968 \n",
+ "3 2.491446 1.466525 -0.981875 1.032155 \n",
+ "4 0.808997 0.663351 0.226796 0.401404 \n",
+ "\n",
+ " color_intensity hue od280/od315_of_diluted_wines proline \n",
+ "0 0.251717 0.362177 1.847920 1.013009 \n",
+ "1 -0.293321 0.406051 1.113449 0.965242 \n",
+ "2 0.269020 0.318304 0.788587 1.395148 \n",
+ "3 1.186068 -0.427544 1.184071 2.334574 \n",
+ "4 -0.319276 0.362177 0.449601 -0.037874 \n"
+ ]
+ }
+ ],
"source": [
"# Select predictors (excluding the last column)\n",
"predictors = wine_df.iloc[:, :-1]\n",
@@ -204,7 +560,7 @@
"id": "403ef0bb",
"metadata": {},
"source": [
- "> Your answer here..."
+ "> KNN uses distance calculations to identify nearest neighbors. If variables are measured on different scales, variables with larger values will dominate the distance calculation. Standardization ensures that all predictors contribute equally. here..."
]
},
{
@@ -220,7 +576,7 @@
"id": "fdee5a15",
"metadata": {},
"source": [
- "> Your answer here..."
+ "> The response variable contains categorical labels (0, 1, and 2). These labels are identifiers rather than measurements, so scaling them would have no meaning."
]
},
{
@@ -236,7 +592,10 @@
"id": "f0676c21",
"metadata": {},
"source": [
- "> Your answer here..."
+ "> random.seed(123)\n",
+ "\n",
+ "Setting a seed ensures reproducibility, meaning the same train/test split and results will be obtained each time the code is run.\n",
+ "The specific value (123) is arbitrary; any fixed number would work as long as it is used consistently."
]
},
{
@@ -251,17 +610,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 17,
"id": "72c101f2",
"metadata": {},
"outputs": [],
"source": [
- "# set a seed for reproducibility\n",
- "np.random.seed(123)\n",
- "\n",
- "# split the data into a training and testing set. hint: use train_test_split !\n",
- "\n",
- "# Your code here ..."
+ "X_train, X_test, y_train, y_test = train_test_split(predictors_standardized, wine_df['class'], test_size=0.25, random_state=123)\n"
]
},
{
@@ -284,12 +638,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 18,
"id": "08818c64",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "15"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here..."
+ "grid_search = GridSearchCV(KNeighborsClassifier(), {'n_neighbors': range(1, 51)}, cv=10)\n",
+ "grid_search.fit(X_train, y_train)\n",
+ "grid_search.best_params_['n_neighbors']"
]
},
{
@@ -305,12 +672,38 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 20,
"id": "ffefa9f2",
"metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Test set accuracy: 0.9333\n"
+ ]
+ }
+ ],
+ "source": [
+ "knn_model = KNeighborsClassifier(n_neighbors=grid_search.best_params_['n_neighbors'])\n",
+ "knn_model.fit(X_train, y_train)\n",
+ "y_pred = knn_model.predict(X_test)\n",
+ "accuracy = accuracy_score(y_test, y_pred)\n",
+ "print(f\"Test set accuracy: {accuracy:.4f}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7ad6151c",
+ "metadata": {},
"outputs": [],
"source": [
- "# Your code here..."
+ "# CONCLUSION\n",
+ "# The KNN model with the optimal number of neighbors (as determined by GridSearchCV) \n",
+ "# achieved a test set accuracy of approximately 0.9778. \n",
+ "# This indicates that the model is highly effective in classifying the wine samples based on the standardized \n",
+ "# features.\n"
]
},
{
@@ -359,13 +752,13 @@
"- [ ] Reviewed the PR description guidelines and adhered to them.\n",
"- [ ] Verify that the link is accessible in a private browser window.\n",
"\n",
- "If you encounter any difficulties or have questions, please don't hesitate to reach out to our team via our Slack at `#dsf1-help`. Our Technical Facilitators and Learning Support staff are here to help you navigate any challenges.\n"
+ "If you encounter any difficulties or have questions, please don't hesitate to reach out to our team via our Slack at `#dsf1-help`. Our Technical Facilitators and Learning Support staff are here to help you navigate any challenges\n"
]
}
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3.10.4",
+ "display_name": "lcr-env",
"language": "python",
"name": "python3"
},
@@ -379,12 +772,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.19"
- },
- "vscode": {
- "interpreter": {
- "hash": "497a84dc8fec8cf8d24e7e87b6d954c9a18a327edc66feb9b9ea7e9e72cc5c7e"
- }
+ "version": "3.11.15"
}
},
"nbformat": 4,
diff --git a/LCR-main b/LCR-main
new file mode 160000
index 000000000..55e4db8c7
--- /dev/null
+++ b/LCR-main
@@ -0,0 +1 @@
+Subproject commit 55e4db8c7e38f61b880701d2c7d4e17279284c42