Normalisierung und Erstellung von Train/Test

This commit is contained in:
Ibrahim El Sayed
2021-07-13 17:51:08 +02:00
parent c5a2c85788
commit e79bab7e2c
8 changed files with 480 additions and 436 deletions

View File

@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "markdown",
"id": "879144d9",
"id": "8301251c",
"metadata": {},
"source": [
"### Load MNIST dataset"
@@ -11,7 +11,7 @@
{
"cell_type": "code",
"execution_count": 1,
"id": "bd032860",
"id": "3368e2c3",
"metadata": {},
"outputs": [],
"source": [
@@ -23,7 +23,7 @@
{
"cell_type": "code",
"execution_count": 2,
"id": "30da011c",
"id": "0dc2fe45",
"metadata": {},
"outputs": [],
"source": [
@@ -35,7 +35,7 @@
{
"cell_type": "code",
"execution_count": 3,
"id": "4f555050",
"id": "30459411",
"metadata": {},
"outputs": [],
"source": [
@@ -46,7 +46,7 @@
{
"cell_type": "code",
"execution_count": 4,
"id": "e4de4331",
"id": "0be717f8",
"metadata": {},
"outputs": [
{
@@ -74,7 +74,7 @@
{
"cell_type": "code",
"execution_count": 5,
"id": "b5221963",
"id": "ae9b3d51",
"metadata": {},
"outputs": [],
"source": [
@@ -83,7 +83,7 @@
},
{
"cell_type": "markdown",
"id": "811db75a",
"id": "7c89b6d3",
"metadata": {},
"source": [
"### labels to int"
@@ -92,7 +92,7 @@
{
"cell_type": "code",
"execution_count": 6,
"id": "2bcc19ad",
"id": "30880538",
"metadata": {},
"outputs": [],
"source": [
@@ -104,7 +104,7 @@
{
"cell_type": "code",
"execution_count": 7,
"id": "cc4b728f",
"id": "34b6be41",
"metadata": {},
"outputs": [],
"source": [
@@ -114,7 +114,7 @@
},
{
"cell_type": "markdown",
"id": "d7113df3",
"id": "361fea4c",
"metadata": {},
"source": [
"### Prepare data for machine learning"
@@ -122,7 +122,7 @@
},
{
"cell_type": "markdown",
"id": "570f328e",
"id": "cab5977a",
"metadata": {},
"source": [
"### Identify Train Set and Test Set"
@@ -131,7 +131,7 @@
{
"cell_type": "code",
"execution_count": 8,
"id": "80e1ca03",
"id": "9bb80760",
"metadata": {},
"outputs": [
{
@@ -158,7 +158,7 @@
},
{
"cell_type": "markdown",
"id": "ade8a1f6",
"id": "aac09882",
"metadata": {},
"source": [
"## Pipeline Declaration"
@@ -167,7 +167,7 @@
{
"cell_type": "code",
"execution_count": 9,
"id": "bc5896c2",
"id": "ca389b56",
"metadata": {},
"outputs": [],
"source": [
@@ -195,7 +195,7 @@
},
{
"cell_type": "markdown",
"id": "9e905584",
"id": "b7c97601",
"metadata": {},
"source": [
"# Crossvalidation"
@@ -204,7 +204,7 @@
{
"cell_type": "code",
"execution_count": 10,
"id": "bbbb447c",
"id": "cd37833d",
"metadata": {},
"outputs": [],
"source": [
@@ -222,7 +222,7 @@
{
"cell_type": "code",
"execution_count": 11,
"id": "4a8240c4",
"id": "f738a4ca",
"metadata": {},
"outputs": [],
"source": [
@@ -240,7 +240,7 @@
{
"cell_type": "code",
"execution_count": 12,
"id": "f397cf42",
"id": "756c8015",
"metadata": {},
"outputs": [],
"source": [
@@ -263,7 +263,7 @@
},
{
"cell_type": "markdown",
"id": "a543706f",
"id": "5d3b1484",
"metadata": {},
"source": [
"# Fitting"
@@ -272,7 +272,7 @@
{
"cell_type": "code",
"execution_count": 13,
"id": "45452ceb",
"id": "1ea6a154",
"metadata": {},
"outputs": [],
"source": [
@@ -282,7 +282,7 @@
{
"cell_type": "code",
"execution_count": 14,
"id": "03c01cd0",
"id": "ac4c7a18",
"metadata": {},
"outputs": [
{
@@ -333,7 +333,7 @@
{
"cell_type": "code",
"execution_count": 15,
"id": "18f02d0c",
"id": "23c51b9e",
"metadata": {},
"outputs": [
{
@@ -385,7 +385,7 @@
{
"cell_type": "code",
"execution_count": 16,
"id": "b2e7ee09",
"id": "8c92a008",
"metadata": {},
"outputs": [
{
@@ -437,7 +437,7 @@
{
"cell_type": "code",
"execution_count": 17,
"id": "23ae34c3",
"id": "811c3930",
"metadata": {},
"outputs": [
{
@@ -490,7 +490,7 @@
{
"cell_type": "code",
"execution_count": 18,
"id": "cac23616",
"id": "3c7440ff",
"metadata": {},
"outputs": [
{
@@ -543,7 +543,7 @@
{
"cell_type": "code",
"execution_count": 19,
"id": "a57eb660",
"id": "8b491b79",
"metadata": {},
"outputs": [
{
@@ -596,7 +596,7 @@
{
"cell_type": "code",
"execution_count": 20,
"id": "bcbedf38",
"id": "080ea6b8",
"metadata": {},
"outputs": [
{
@@ -649,7 +649,7 @@
{
"cell_type": "code",
"execution_count": 21,
"id": "5bc4f44b",
"id": "6ee320cd",
"metadata": {},
"outputs": [
{
@@ -702,7 +702,7 @@
{
"cell_type": "code",
"execution_count": 22,
"id": "a901ad5d",
"id": "17934567",
"metadata": {},
"outputs": [
{
@@ -755,7 +755,7 @@
{
"cell_type": "code",
"execution_count": 23,
"id": "19e87457",
"id": "88fb14a4",
"metadata": {},
"outputs": [
{
@@ -816,7 +816,7 @@
{
"cell_type": "code",
"execution_count": 24,
"id": "6146ccb1",
"id": "378c092b",
"metadata": {},
"outputs": [
{
@@ -877,7 +877,7 @@
{
"cell_type": "code",
"execution_count": 25,
"id": "66a7637b",
"id": "3005da1d",
"metadata": {},
"outputs": [
{
@@ -930,7 +930,7 @@
{
"cell_type": "code",
"execution_count": 26,
"id": "d4fadaac",
"id": "cbf8e245",
"metadata": {},
"outputs": [
{
@@ -983,7 +983,7 @@
{
"cell_type": "code",
"execution_count": 27,
"id": "d15fb11c",
"id": "cc1c7c77",
"metadata": {},
"outputs": [
{
@@ -1035,7 +1035,7 @@
{
"cell_type": "code",
"execution_count": 28,
"id": "2db8577b",
"id": "562c937f",
"metadata": {},
"outputs": [
{
@@ -1087,7 +1087,7 @@
{
"cell_type": "code",
"execution_count": 29,
"id": "a5702428",
"id": "0c661938",
"metadata": {},
"outputs": [],
"source": [
@@ -1104,7 +1104,7 @@
{
"cell_type": "code",
"execution_count": 30,
"id": "0ee57cfe",
"id": "05b7b881",
"metadata": {},
"outputs": [],
"source": [
@@ -1120,7 +1120,7 @@
},
{
"cell_type": "markdown",
"id": "7fbbc930",
"id": "0a37b9d8",
"metadata": {},
"source": [
"# Auswertung"
@@ -1129,7 +1129,7 @@
{
"cell_type": "code",
"execution_count": 31,
"id": "e5d609aa",
"id": "480adf73",
"metadata": {},
"outputs": [
{
@@ -1147,7 +1147,7 @@
{
"cell_type": "code",
"execution_count": 38,
"id": "234f14bb",
"id": "202ff9a7",
"metadata": {},
"outputs": [
{
@@ -1179,7 +1179,7 @@
},
{
"cell_type": "markdown",
"id": "a6ddb6f2",
"id": "94f1af95",
"metadata": {},
"source": [
"Default n=3\\\n",
@@ -1202,7 +1202,7 @@
},
{
"cell_type": "markdown",
"id": "bde6e847",
"id": "99ad8309",
"metadata": {},
"source": [
"n=3 euclid distance\\\n",
@@ -1225,7 +1225,7 @@
},
{
"cell_type": "markdown",
"id": "4c625bc3",
"id": "497d3216",
"metadata": {},
"source": [
"# Hyper Parameter Optimization"
@@ -1234,7 +1234,7 @@
{
"cell_type": "code",
"execution_count": 34,
"id": "24ff7ea2",
"id": "e5e0c930",
"metadata": {},
"outputs": [
{
@@ -1269,7 +1269,7 @@
{
"cell_type": "code",
"execution_count": 36,
"id": "b3b0eac3",
"id": "41349c36",
"metadata": {},
"outputs": [],
"source": [
@@ -1279,7 +1279,7 @@
{
"cell_type": "code",
"execution_count": 37,
"id": "b68589fe",
"id": "91d2f4bc",
"metadata": {},
"outputs": [],
"source": [
@@ -1289,7 +1289,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "b162e908",
"id": "c031b179",
"metadata": {},
"outputs": [],
"source": []
@@ -1311,7 +1311,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
"version": "3.8.10"
}
},
"nbformat": 4,

View File

@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "markdown",
"id": "7a0c752a",
"id": "6904e7ae",
"metadata": {},
"source": [
"### Load MNIST dataset"
@@ -11,7 +11,7 @@
{
"cell_type": "code",
"execution_count": 1,
"id": "e07d82fe",
"id": "e3d41c8f",
"metadata": {},
"outputs": [],
"source": [
@@ -23,7 +23,7 @@
{
"cell_type": "code",
"execution_count": 2,
"id": "1f97dcb1",
"id": "55990ccc",
"metadata": {},
"outputs": [],
"source": [
@@ -35,7 +35,7 @@
{
"cell_type": "code",
"execution_count": 3,
"id": "01f83832",
"id": "933f52fb",
"metadata": {},
"outputs": [],
"source": [
@@ -46,7 +46,7 @@
{
"cell_type": "code",
"execution_count": 4,
"id": "affa0e2b",
"id": "41435175",
"metadata": {},
"outputs": [
{
@@ -73,7 +73,7 @@
},
{
"cell_type": "markdown",
"id": "4d51fd43",
"id": "fe14558d",
"metadata": {},
"source": [
"Bunch objects are sometimes used as an output for functions and methods. They extend dictionaries by enabling values to be accessed by key, bunch[\"value_key\"], or by an attribute, bunch.value_key.\\\n",
@@ -83,7 +83,7 @@
{
"cell_type": "code",
"execution_count": 5,
"id": "78be57ab",
"id": "80a39a2e",
"metadata": {},
"outputs": [
{
@@ -106,7 +106,7 @@
{
"cell_type": "code",
"execution_count": 6,
"id": "d0450c41",
"id": "5c211b9c",
"metadata": {},
"outputs": [
{
@@ -127,7 +127,7 @@
},
{
"cell_type": "markdown",
"id": "e61e2adb",
"id": "a4c5fb8d",
"metadata": {},
"source": [
"Datasets loaded by Scikit-Learn generally have a similar dictionary structure, including the following:\\\n",
@@ -139,7 +139,7 @@
{
"cell_type": "code",
"execution_count": 7,
"id": "fe285433",
"id": "7c077427",
"metadata": {},
"outputs": [
{
@@ -159,7 +159,7 @@
},
{
"cell_type": "markdown",
"id": "5a70a746",
"id": "f3f2e42a",
"metadata": {},
"source": [
"### Prepare the MNIST dataset"
@@ -167,7 +167,7 @@
},
{
"cell_type": "markdown",
"id": "a9b7a120",
"id": "6d5d2658",
"metadata": {},
"source": [
"$f(X) = y$\n",
@@ -181,7 +181,7 @@
{
"cell_type": "code",
"execution_count": 8,
"id": "4e02cf2a",
"id": "99784cec",
"metadata": {},
"outputs": [],
"source": [
@@ -191,7 +191,7 @@
{
"cell_type": "code",
"execution_count": 9,
"id": "001d736f",
"id": "923676c7",
"metadata": {},
"outputs": [
{
@@ -212,7 +212,7 @@
{
"cell_type": "code",
"execution_count": 10,
"id": "b344be1d",
"id": "ed44fc7a",
"metadata": {},
"outputs": [
{
@@ -233,7 +233,7 @@
{
"cell_type": "code",
"execution_count": 11,
"id": "cef23e9f",
"id": "94ee3e59",
"metadata": {},
"outputs": [
{
@@ -253,7 +253,7 @@
},
{
"cell_type": "markdown",
"id": "fe3b1259",
"id": "478cb336",
"metadata": {},
"source": [
"### Plot data"
@@ -262,7 +262,7 @@
{
"cell_type": "code",
"execution_count": 12,
"id": "953d9415",
"id": "f3cfebc6",
"metadata": {},
"outputs": [],
"source": [
@@ -274,7 +274,7 @@
{
"cell_type": "code",
"execution_count": 13,
"id": "b68f6cee",
"id": "6d799c25",
"metadata": {},
"outputs": [
{
@@ -297,7 +297,7 @@
{
"cell_type": "code",
"execution_count": 14,
"id": "8779b1a2",
"id": "72c7305b",
"metadata": {},
"outputs": [
{
@@ -317,7 +317,7 @@
{
"cell_type": "code",
"execution_count": 15,
"id": "dcc605cf",
"id": "f5b0b349",
"metadata": {},
"outputs": [
{
@@ -343,7 +343,7 @@
{
"cell_type": "code",
"execution_count": 16,
"id": "6d41d752",
"id": "c70641f8",
"metadata": {},
"outputs": [
{
@@ -364,7 +364,7 @@
{
"cell_type": "code",
"execution_count": 17,
"id": "230cfd35",
"id": "98f8561b",
"metadata": {},
"outputs": [],
"source": [
@@ -375,7 +375,7 @@
{
"cell_type": "code",
"execution_count": 18,
"id": "25a3a2e7",
"id": "33e244b2",
"metadata": {},
"outputs": [],
"source": [
@@ -389,7 +389,7 @@
{
"cell_type": "code",
"execution_count": 19,
"id": "f1552762",
"id": "20043b74",
"metadata": {},
"outputs": [
{
@@ -413,7 +413,7 @@
{
"cell_type": "code",
"execution_count": 20,
"id": "74b3a063",
"id": "6e4369de",
"metadata": {},
"outputs": [],
"source": [
@@ -429,7 +429,7 @@
{
"cell_type": "code",
"execution_count": 21,
"id": "949b3914",
"id": "fb3f6d95",
"metadata": {},
"outputs": [
{
@@ -454,7 +454,7 @@
},
{
"cell_type": "markdown",
"id": "ec8a9d34",
"id": "3a69d0fd",
"metadata": {},
"source": [
"### Prepare data for machine learning"
@@ -463,7 +463,7 @@
{
"cell_type": "code",
"execution_count": 22,
"id": "febbd286",
"id": "dcc31672",
"metadata": {},
"outputs": [
{
@@ -485,7 +485,7 @@
{
"cell_type": "code",
"execution_count": 23,
"id": "fff839b6",
"id": "282ba914",
"metadata": {},
"outputs": [],
"source": [
@@ -495,7 +495,7 @@
},
{
"cell_type": "markdown",
"id": "2bdbeb4e",
"id": "5c34bbd5",
"metadata": {},
"source": [
"### Train classifier"
@@ -504,7 +504,7 @@
{
"cell_type": "code",
"execution_count": 24,
"id": "4c32ae9f",
"id": "f0b6e1c9",
"metadata": {},
"outputs": [],
"source": [
@@ -515,7 +515,7 @@
{
"cell_type": "code",
"execution_count": 25,
"id": "fe06ae55",
"id": "54e3fb64",
"metadata": {},
"outputs": [
{
@@ -540,7 +540,7 @@
{
"cell_type": "code",
"execution_count": 26,
"id": "e6209258",
"id": "29446c32",
"metadata": {},
"outputs": [
{
@@ -565,7 +565,7 @@
{
"cell_type": "code",
"execution_count": 27,
"id": "62773b1b",
"id": "5030ccc3",
"metadata": {},
"outputs": [
{
@@ -584,7 +584,7 @@
{
"cell_type": "code",
"execution_count": 28,
"id": "0ce21474",
"id": "22c780fb",
"metadata": {},
"outputs": [
{
@@ -603,7 +603,7 @@
{
"cell_type": "code",
"execution_count": 29,
"id": "78a8e8a7",
"id": "990d3925",
"metadata": {},
"outputs": [
{
@@ -626,7 +626,7 @@
{
"cell_type": "code",
"execution_count": 30,
"id": "45d93a99",
"id": "b6e1d70c",
"metadata": {},
"outputs": [
{
@@ -647,7 +647,7 @@
},
{
"cell_type": "markdown",
"id": "fc739051",
"id": "a65ed630",
"metadata": {},
"source": [
"### Evaluation"
@@ -656,7 +656,7 @@
{
"cell_type": "code",
"execution_count": 31,
"id": "990a5b7c",
"id": "4c93c5f9",
"metadata": {},
"outputs": [
{
@@ -677,7 +677,7 @@
{
"cell_type": "code",
"execution_count": 32,
"id": "f125a37d",
"id": "f01ec3dd",
"metadata": {},
"outputs": [
{
@@ -697,7 +697,7 @@
},
{
"cell_type": "markdown",
"id": "bdcb6e6e",
"id": "c41db6b0",
"metadata": {},
"source": [
"Accuracy is strongly influenced by the distribution of the classes in the test data."
@@ -705,7 +705,7 @@
},
{
"cell_type": "markdown",
"id": "be858cd5",
"id": "3f830f36",
"metadata": {},
"source": [
"#### Cross Validation\n",
@@ -715,7 +715,7 @@
{
"cell_type": "code",
"execution_count": 33,
"id": "7adb1ea7",
"id": "eeec5311",
"metadata": {},
"outputs": [
{
@@ -736,7 +736,7 @@
{
"cell_type": "code",
"execution_count": 34,
"id": "11d22c5e",
"id": "d1ed46a3",
"metadata": {},
"outputs": [
{
@@ -759,7 +759,7 @@
},
{
"cell_type": "markdown",
"id": "b54e83a5",
"id": "539cfa0c",
"metadata": {},
"source": [
"#### Precision"
@@ -768,7 +768,7 @@
{
"cell_type": "code",
"execution_count": 35,
"id": "ef7a9e7e",
"id": "abfe8383",
"metadata": {},
"outputs": [
{
@@ -790,7 +790,7 @@
},
{
"cell_type": "markdown",
"id": "da723740",
"id": "d899dd6f",
"metadata": {},
"source": [
"#### Recall"
@@ -799,7 +799,7 @@
{
"cell_type": "code",
"execution_count": 36,
"id": "cb77bf58",
"id": "15d30ae5",
"metadata": {},
"outputs": [
{
@@ -821,7 +821,7 @@
},
{
"cell_type": "markdown",
"id": "28867d1b",
"id": "393c3b1c",
"metadata": {},
"source": [
"#### F1 Score"
@@ -830,7 +830,7 @@
{
"cell_type": "code",
"execution_count": 37,
"id": "0674e0de",
"id": "53fa1823",
"metadata": {},
"outputs": [
{
@@ -852,7 +852,7 @@
},
{
"cell_type": "markdown",
"id": "da59da11",
"id": "08b6bdc2",
"metadata": {},
"source": [
"#### Confusion Matrix"
@@ -861,7 +861,7 @@
{
"cell_type": "code",
"execution_count": 38,
"id": "adbdeece",
"id": "e205d359",
"metadata": {},
"outputs": [
{
@@ -891,7 +891,7 @@
{
"cell_type": "code",
"execution_count": 39,
"id": "fb50c5a4",
"id": "4ec777ac",
"metadata": {},
"outputs": [
{
@@ -929,7 +929,7 @@
{
"cell_type": "code",
"execution_count": 40,
"id": "2f0d536a",
"id": "9c53a0a7",
"metadata": {},
"outputs": [],
"source": [
@@ -940,7 +940,7 @@
{
"cell_type": "code",
"execution_count": 41,
"id": "dddf5fe8",
"id": "c47e7c69",
"metadata": {},
"outputs": [
{
@@ -970,7 +970,7 @@
{
"cell_type": "code",
"execution_count": 42,
"id": "44537aae",
"id": "ef09fc40",
"metadata": {},
"outputs": [
{
@@ -1006,7 +1006,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "57d96f56",
"id": "6f9816f1",
"metadata": {},
"outputs": [],
"source": []
@@ -1028,7 +1028,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
"version": "3.8.10"
}
},
"nbformat": 4,