diff --git a/0-pilot-project/MNIST-kNN-pipeline.ipynb b/0-pilot-project/MNIST-kNN-pipeline.ipynb index ddb78d6..833c8a1 100644 --- a/0-pilot-project/MNIST-kNN-pipeline.ipynb +++ b/0-pilot-project/MNIST-kNN-pipeline.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "03d83636", + "id": "a3c5e9f5", "metadata": {}, "source": [ "### Load MNIST dataset" @@ -11,7 +11,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "242d14f0", + "id": "faf6a6a2", "metadata": {}, "outputs": [], "source": [ @@ -23,7 +23,7 @@ { "cell_type": "code", "execution_count": 2, - "id": "cfd3a54a", + "id": "827c4abe", "metadata": {}, "outputs": [], "source": [ @@ -35,7 +35,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "7e1587b3", + "id": "f48143ed", "metadata": {}, "outputs": [], "source": [ @@ -46,7 +46,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "bbccfc32", + "id": "837cf0fb", "metadata": {}, "outputs": [ { @@ -74,7 +74,7 @@ { "cell_type": "code", "execution_count": 5, - "id": "6db7c96a", + "id": "c9b983ad", "metadata": {}, "outputs": [], "source": [ @@ -83,16 +83,16 @@ }, { "cell_type": "markdown", - "id": "459780d0", + "id": "7bda23fd", "metadata": {}, "source": [ - "### Fix labels" + "### labels to int" ] }, { "cell_type": "code", "execution_count": 6, - "id": "48c4e861", + "id": "4740ebf7", "metadata": {}, "outputs": [], "source": [ @@ -104,7 +104,7 @@ { "cell_type": "code", "execution_count": 7, - "id": "72876ab1", + "id": "315c76e6", "metadata": {}, "outputs": [], "source": [ @@ -114,7 +114,7 @@ }, { "cell_type": "markdown", - "id": "c9dacae4", + "id": "91d3d66e", "metadata": {}, "source": [ "### Prepare data for machine learning" @@ -122,7 +122,7 @@ }, { "cell_type": "markdown", - "id": "b44b3f87", + "id": "a8c06c27", "metadata": {}, "source": [ "### Identify Train Set and Test Set" @@ -131,7 +131,7 @@ { "cell_type": "code", "execution_count": 8, - "id": "51c5da44", + "id": "44f347bf", "metadata": {}, "outputs": [ { @@ -148,7 +148,7 @@ "source": [ "from sklearn.model_selection import train_test_split\n", "\n", - "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1337)\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=177013)\n", "\n", "print(f\"X_train: {len(X_train)}, {X_train.shape}\")\n", "print(f\"X_test: {len(X_test)}, {X_test.shape}\")\n", @@ -158,7 +158,7 @@ }, { "cell_type": "markdown", - "id": "673e237d", + "id": "50224021", "metadata": {}, "source": [ "## Pipeline Declaration" @@ -167,106 +167,35 @@ { "cell_type": "code", "execution_count": 9, - "id": "8ca34ce2", + "id": "d03586ff", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(11, 11)" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from sklearn.pipeline import Pipeline\n", - "from sklearn.decomposition import PCA\n", + "from sklearn.decomposition import PCA, KernelPCA\n", "from sklearn.preprocessing import (StandardScaler, \n", " MinMaxScaler, \n", - " MaxAbsScaler, \n", - " PowerTransformer)\n", + " MaxAbsScaler,\n", + " PowerTransformer,\n", + " Binarizer)\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.model_selection import cross_validate\n", "from sklearn.metrics import classification_report, accuracy_score\n", "\n", "n_neighbors = 3\n", + "metric = 'euclidean'\n", + "weights = 'distance'\n", "n95_components = 0.95\n", "n99_components = 0.99\n", "\n", - "names = ['knn (baseline)', \n", - " 'scalar+knn', \n", - " 'minmax+knn', \n", - " 'standard+pca95+knn', \n", - " 'minmax+pca95+knn', # Best so far w/ 97.0429%\n", - " 'standard+pca99+knn', \n", - " 'minmax+pca99+knn',\n", - " 'maxabs+pca95+knn', \n", - " 'maxabs+pca99+knn', # Best so far w/ 97.0429%\n", - " 'power+pca95+knn',\n", - " 'power+pca99+knn',\n", - " ]\n", - "\n", - "classifiers = [\n", - " Pipeline([('knn', KNeighborsClassifier(n_neighbors=n_neighbors, n_jobs=-1))]),\n", - " Pipeline([\n", - " ('standard', StandardScaler()),\n", - " ('knn', KNeighborsClassifier(n_neighbors=n_neighbors, n_jobs=-1))\n", - " ]),\n", - " Pipeline([\n", - " ('minmax', MinMaxScaler()),\n", - " ('knn', KNeighborsClassifier(n_neighbors=n_neighbors, n_jobs=-1))\n", - " ]),\n", - " Pipeline([\n", - " ('standard', StandardScaler()),\n", - " ('pca', PCA(n_components=n95_components)),\n", - " ('knn', KNeighborsClassifier(n_neighbors=n_neighbors, n_jobs=-1))\n", - " ]),\n", - " Pipeline([\n", - " ('minmax', MinMaxScaler()),\n", - " ('pca', PCA(n_components=n95_components)),\n", - " ('knn', KNeighborsClassifier(n_neighbors=n_neighbors, n_jobs=-1))\n", - " ]),\n", - " Pipeline([\n", - " ('standard', StandardScaler()),\n", - " ('pca', PCA(n_components=n99_components)),\n", - " ('knn', KNeighborsClassifier(n_neighbors=n_neighbors, n_jobs=-1))\n", - " ]),\n", - " Pipeline([\n", - " ('minmax', MinMaxScaler()),\n", - " ('pca', PCA(n_components=n99_components)),\n", - " ('knn', KNeighborsClassifier(n_neighbors=n_neighbors, n_jobs=-1))\n", - " ]),\n", - " Pipeline([\n", - " ('maxabs', MaxAbsScaler()),\n", - " ('pca', PCA(n_components=n99_components)),\n", - " ('knn', KNeighborsClassifier(n_neighbors=n_neighbors, n_jobs=-1))\n", - " ]),\n", - " Pipeline([\n", - " ('maxabs', MaxAbsScaler()),\n", - " ('pca', PCA(n_components=n95_components)),\n", - " ('knn', KNeighborsClassifier(n_neighbors=n_neighbors, n_jobs=-1))\n", - " ]),\n", - " Pipeline([\n", - " ('power', PowerTransformer()),\n", - " ('pca', PCA(n_components=n99_components)),\n", - " ('knn', KNeighborsClassifier(n_neighbors=n_neighbors, n_jobs=-1))\n", - " ]),\n", - " Pipeline([\n", - " ('power', PowerTransformer()),\n", - " ('pca', PCA(n_components=n95_components)),\n", - " ('knn', KNeighborsClassifier(n_neighbors=n_neighbors, n_jobs=-1))\n", - " ]),\n", - "]\n", - "\n", - "len(names), len(classifiers)" + "names = []\n", + "classifiers = []\n", + "accuracies = []" ] }, { "cell_type": "markdown", - "id": "f38b2bb2", + "id": "582d15bf", "metadata": {}, "source": [ "# Crossvalidation" @@ -275,20 +204,11 @@ { "cell_type": "code", "execution_count": 10, - "id": "3465f546", - "metadata": {}, - "outputs": [], - "source": [ - "accuracies = []" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "1f087f92", + "id": "3d3c4023", "metadata": {}, "outputs": [], "source": [ + "# not this\n", "def cv_train(num,cv):\n", " name = names[num]\n", " clf = classifiers[num]\n", @@ -301,11 +221,12 @@ }, { "cell_type": "code", - "execution_count": 13, - "id": "9cb9b3e7", + "execution_count": 11, + "id": "620f88eb", "metadata": {}, "outputs": [], "source": [ + "# also not this\n", "def cv_test(num):\n", " name = names[num]\n", " clf = classifiers[num]\n", @@ -319,352 +240,522 @@ { "cell_type": "code", "execution_count": 12, - "id": "462e340f", + "id": "3575c029", "metadata": {}, "outputs": [], "source": [ + "import pandas as pd\n", + "\n", + "# this\n", "def cv(num,cv_arg=10):\n", " name = names[num]\n", " clf = classifiers[num]\n", " clf = clf.fit(X_train, y_train)\n", - " cv = cross_validate(clf, X_train, y_train, cv=cv_arg, n_jobs=-1, return_estimator=True) \n", - " cv_clf = cv['estimator'][np.argmax(cv['test_score'])]\n", - " y_test_pred = cv_clf.predict(X_test)\n", + " cv = cross_validate(clf, X_train, y_train, cv=cv_arg, n_jobs=-1)\n", + " # cv_clf = cv['estimator'][np.argmax(cv['test_score'])] # get the estimator where the max(test_score) on the cross validation\n", + " y_test_pred = clf.predict(X_test)\n", " accuracy = accuracy_score(y_test, y_test_pred, normalize=True)*100\n", - " print(f\"Pipeline: {name} ({accuracy:.4f}%)\")\n", + " print(f\"Pipeline: {name} ({accuracy:.4f}%) ({cv['fit_time'][np.argmax(cv['test_score'])]:.4})\")\n", " print(classification_report(y_test, y_test_pred))\n", + " print(pd.DataFrame.from_dict(cv))\n", " return accuracy" ] }, + { + "cell_type": "markdown", + "id": "e485d1cc", + "metadata": {}, + "source": [ + "# Fitting" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "1bbfa763", + "metadata": {}, + "outputs": [], + "source": [ + "op_kNN = KNeighborsClassifier(n_neighbors=n_neighbors, n_jobs=-1, metric=metric, weights=weights)" + ] + }, { "cell_type": "code", "execution_count": 14, - "id": "cbca3b1f", + "id": "24c79306", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Pipeline: knn (baseline) (96.9857%)\n", + "Pipeline: knn (baseline) (97.3071%) (0.1153)\n", " precision recall f1-score support\n", "\n", - " 0 0.96 0.99 0.98 1404\n", - " 1 0.97 0.99 0.98 1590\n", - " 2 0.98 0.97 0.97 1395\n", - " 3 0.98 0.96 0.97 1462\n", - " 4 0.98 0.97 0.97 1374\n", - " 5 0.96 0.96 0.96 1245\n", - " 6 0.98 0.98 0.98 1334\n", - " 7 0.97 0.97 0.97 1447\n", - " 8 0.99 0.92 0.95 1321\n", - " 9 0.95 0.96 0.96 1428\n", + " 0 0.98 1.00 0.99 1359\n", + " 1 0.97 0.99 0.98 1594\n", + " 2 0.98 0.96 0.97 1369\n", + " 3 0.97 0.97 0.97 1415\n", + " 4 0.98 0.97 0.97 1373\n", + " 5 0.96 0.96 0.96 1257\n", + " 6 0.98 0.99 0.98 1351\n", + " 7 0.97 0.98 0.97 1422\n", + " 8 0.99 0.94 0.96 1411\n", + " 9 0.96 0.97 0.96 1449\n", "\n", " accuracy 0.97 14000\n", " macro avg 0.97 0.97 0.97 14000\n", "weighted avg 0.97 0.97 0.97 14000\n", - "\n" + "\n", + " fit_time score_time test_score\n", + "0 0.148709 8.932784 0.972143\n", + "1 0.209577 9.215023 0.971429\n", + "2 0.179098 9.244114 0.971607\n", + "3 0.262929 9.187294 0.970714\n", + "4 0.183352 9.243924 0.972143\n", + "5 0.116970 9.330330 0.972857\n", + "6 0.114284 8.733218 0.975179\n", + "7 0.117283 8.884361 0.972500\n", + "8 0.115274 8.905413 0.975357\n", + "9 0.117665 9.335545 0.973393\n" ] } ], "source": [ - "from sklearn.model_selection import LeaveOneOut\n", - "# accuracies.append(cv(0,5))\n", - "accuracies.append(cv(0,10))" + "names.append('knn (baseline)')\n", + "classifiers.append(Pipeline([\n", + " ('knn', op_kNN)\n", + "]))\n", + " \n", + "accuracies.append(cv(0))" ] }, { "cell_type": "code", "execution_count": 15, - "id": "ad92d1f0", + "id": "52b4ce78", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Pipeline: scalar+knn (94.2143%)\n", + "Pipeline: scalar+knn (94.8000%) (1.203)\n", " precision recall f1-score support\n", "\n", - " 0 0.95 0.98 0.96 1404\n", - " 1 0.96 0.99 0.97 1590\n", - " 2 0.93 0.95 0.94 1395\n", - " 3 0.94 0.94 0.94 1462\n", - " 4 0.95 0.93 0.94 1374\n", - " 5 0.93 0.92 0.93 1245\n", - " 6 0.96 0.97 0.96 1334\n", - " 7 0.93 0.94 0.94 1447\n", - " 8 0.97 0.88 0.92 1321\n", - " 9 0.91 0.91 0.91 1428\n", + " 0 0.97 0.99 0.98 1359\n", + " 1 0.97 0.99 0.98 1594\n", + " 2 0.95 0.93 0.94 1369\n", + " 3 0.94 0.95 0.95 1415\n", + " 4 0.94 0.93 0.94 1373\n", + " 5 0.93 0.91 0.92 1257\n", + " 6 0.96 0.97 0.97 1351\n", + " 7 0.94 0.95 0.94 1422\n", + " 8 0.96 0.91 0.93 1411\n", + " 9 0.92 0.94 0.93 1449\n", "\n", - " accuracy 0.94 14000\n", - " macro avg 0.94 0.94 0.94 14000\n", - "weighted avg 0.94 0.94 0.94 14000\n", - "\n" + " accuracy 0.95 14000\n", + " macro avg 0.95 0.95 0.95 14000\n", + "weighted avg 0.95 0.95 0.95 14000\n", + "\n", + " fit_time score_time test_score\n", + "0 1.009821 8.804876 0.949643\n", + "1 0.896719 8.698193 0.945000\n", + "2 1.011811 8.871731 0.949107\n", + "3 0.807173 8.469129 0.942500\n", + "4 1.330411 8.438243 0.948393\n", + "5 0.753516 8.551000 0.945179\n", + "6 0.755293 8.546287 0.943214\n", + "7 1.497854 8.511951 0.946607\n", + "8 1.203481 8.682229 0.951607\n", + "9 1.530921 8.592839 0.947143\n" ] } ], "source": [ - "# accuracies.append(cv(1,5))\n", - "accuracies.append(cv(1,10))" + "names.append('scalar+knn') \n", + "classifiers.append(Pipeline([\n", + " ('standard', StandardScaler()),\n", + " ('knn', op_kNN)\n", + "]))\n", + "\n", + "accuracies.append(cv(1,10)) # Pipeline: standard+knn (94.3714%)" ] }, { "cell_type": "code", "execution_count": 16, - "id": "881d8a07", + "id": "0dfeaba7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Pipeline: minmax+knn (96.9857%)\n", + "Pipeline: minmax+knn (97.3071%) (0.4171)\n", " precision recall f1-score support\n", "\n", - " 0 0.96 0.99 0.98 1404\n", - " 1 0.97 0.99 0.98 1590\n", - " 2 0.98 0.97 0.97 1395\n", - " 3 0.98 0.96 0.97 1462\n", - " 4 0.98 0.97 0.97 1374\n", - " 5 0.96 0.96 0.96 1245\n", - " 6 0.98 0.98 0.98 1334\n", - " 7 0.97 0.97 0.97 1447\n", - " 8 0.99 0.92 0.95 1321\n", - " 9 0.95 0.96 0.96 1428\n", + " 0 0.98 1.00 0.99 1359\n", + " 1 0.97 0.99 0.98 1594\n", + " 2 0.98 0.96 0.97 1369\n", + " 3 0.97 0.97 0.97 1415\n", + " 4 0.98 0.97 0.97 1373\n", + " 5 0.96 0.96 0.96 1257\n", + " 6 0.98 0.99 0.98 1351\n", + " 7 0.97 0.98 0.97 1422\n", + " 8 0.99 0.94 0.96 1411\n", + " 9 0.96 0.97 0.96 1449\n", "\n", " accuracy 0.97 14000\n", " macro avg 0.97 0.97 0.97 14000\n", "weighted avg 0.97 0.97 0.97 14000\n", - "\n" + "\n", + " fit_time score_time test_score\n", + "0 0.471567 9.110645 0.972143\n", + "1 0.471289 9.058701 0.971429\n", + "2 0.464118 8.920339 0.971786\n", + "3 0.392877 8.670509 0.970714\n", + "4 0.560493 9.075067 0.972143\n", + "5 0.439241 9.117617 0.972857\n", + "6 0.417131 8.821410 0.975179\n", + "7 0.502502 9.211790 0.972679\n", + "8 0.562765 8.894657 0.975179\n", + "9 0.365910 8.664754 0.973393\n" ] } ], "source": [ - "# accuracies.append(cv(2,5))\n", - "accuracies.append(cv(2,10))" + "names.append('minmax+knn')\n", + "classifiers.append(Pipeline([\n", + " ('minmax', MinMaxScaler()),\n", + " ('knn', op_kNN)\n", + "]))\n", + "\n", + "accuracies.append(cv(2))" ] }, { "cell_type": "code", "execution_count": 17, - "id": "1402e10b", + "id": "954f303d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Pipeline: standard+pca95+knn (94.7000%)\n", + "Pipeline: standard+pca95+knn (95.3929%) (7.125)\n", " precision recall f1-score support\n", "\n", - " 0 0.95 0.98 0.97 1404\n", - " 1 0.96 0.99 0.98 1590\n", - " 2 0.94 0.95 0.94 1395\n", - " 3 0.94 0.94 0.94 1462\n", - " 4 0.96 0.94 0.95 1374\n", - " 5 0.93 0.93 0.93 1245\n", - " 6 0.96 0.97 0.97 1334\n", - " 7 0.94 0.95 0.94 1447\n", - " 8 0.97 0.89 0.93 1321\n", - " 9 0.92 0.92 0.92 1428\n", + " 0 0.97 0.99 0.98 1359\n", + " 1 0.98 0.99 0.98 1594\n", + " 2 0.96 0.94 0.95 1369\n", + " 3 0.94 0.96 0.95 1415\n", + " 4 0.95 0.94 0.95 1373\n", + " 5 0.94 0.92 0.93 1257\n", + " 6 0.96 0.97 0.97 1351\n", + " 7 0.95 0.96 0.95 1422\n", + " 8 0.97 0.92 0.94 1411\n", + " 9 0.92 0.94 0.93 1449\n", "\n", " accuracy 0.95 14000\n", " macro avg 0.95 0.95 0.95 14000\n", "weighted avg 0.95 0.95 0.95 14000\n", - "\n" + "\n", + " fit_time score_time test_score\n", + "0 7.513900 6.548804 0.953036\n", + "1 9.749506 5.649257 0.950357\n", + "2 9.862697 5.623950 0.953571\n", + "3 9.706705 5.707787 0.945179\n", + "4 7.644520 6.525581 0.952500\n", + "5 9.088006 5.999788 0.950000\n", + "6 9.022692 5.937026 0.948214\n", + "7 9.722373 5.648439 0.950714\n", + "8 7.124760 5.989992 0.955893\n", + "9 7.688745 6.399412 0.953571\n" ] } ], "source": [ - "# accuracies.append(cv(3,5))\n", - "accuracies.append(cv(3,10))" + "names.append('standard+pca95+knn')\n", + "classifiers.append(Pipeline([\n", + " ('standard', StandardScaler()),\n", + " ('pca95', PCA(n_components=n95_components)),\n", + " ('knn', op_kNN)\n", + "]))\n", + "\n", + "accuracies.append(cv(3,10)) # Pipeline: standard+pca95+knn (95.0500%)" ] }, { "cell_type": "code", "execution_count": 18, - "id": "24035514", + "id": "375615c0", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Pipeline: minmax+pca95+knn (97.2571%)\n", + "Pipeline: minmax+pca95+knn (97.5500%) (8.169)\n", " precision recall f1-score support\n", "\n", - " 0 0.97 0.99 0.98 1404\n", - " 1 0.98 0.99 0.99 1590\n", - " 2 0.98 0.97 0.97 1395\n", - " 3 0.98 0.97 0.97 1462\n", - " 4 0.98 0.97 0.97 1374\n", - " 5 0.97 0.96 0.97 1245\n", - " 6 0.98 0.99 0.98 1334\n", - " 7 0.97 0.98 0.97 1447\n", - " 8 0.99 0.93 0.96 1321\n", - " 9 0.95 0.97 0.96 1428\n", + " 0 0.98 1.00 0.99 1359\n", + " 1 0.98 0.99 0.98 1594\n", + " 2 0.98 0.96 0.97 1369\n", + " 3 0.97 0.97 0.97 1415\n", + " 4 0.98 0.97 0.97 1373\n", + " 5 0.97 0.96 0.96 1257\n", + " 6 0.98 0.99 0.98 1351\n", + " 7 0.97 0.98 0.97 1422\n", + " 8 0.99 0.95 0.97 1411\n", + " 9 0.96 0.97 0.97 1449\n", "\n", - " accuracy 0.97 14000\n", - " macro avg 0.97 0.97 0.97 14000\n", - "weighted avg 0.97 0.97 0.97 14000\n", - "\n" + " accuracy 0.98 14000\n", + " macro avg 0.98 0.98 0.98 14000\n", + "weighted avg 0.98 0.98 0.98 14000\n", + "\n", + " fit_time score_time test_score\n", + "0 8.054958 5.493052 0.973750\n", + "1 8.554230 4.945253 0.972857\n", + "2 7.932777 5.341562 0.974286\n", + "3 7.616275 5.701586 0.973036\n", + "4 8.145786 5.175171 0.974464\n", + "5 7.198905 5.762730 0.975179\n", + "6 7.349528 5.753122 0.975893\n", + "7 6.830007 5.986614 0.975357\n", + "8 8.169036 5.468261 0.976607\n", + "9 8.215247 5.327437 0.975536\n" ] } ], "source": [ - "# accuracies.append(cv(4,5))\n", - "accuracies.append(cv(4,10))" + "names.append('minmax+pca95+knn')\n", + "classifiers.append(Pipeline([\n", + " ('minmax', MinMaxScaler()),\n", + " ('pca95', PCA(n_components=n95_components)),\n", + " ('knn', op_kNN)\n", + "]))\n", + "\n", + "accuracies.append(cv(4))" ] }, { "cell_type": "code", "execution_count": 19, - "id": "1c27528e", + "id": "0c04fd54", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Pipeline: standard+pca99+knn (94.2929%)\n", + "Pipeline: standard+pca99+knn (94.9071%) (8.42)\n", " precision recall f1-score support\n", "\n", - " 0 0.95 0.98 0.96 1404\n", - " 1 0.96 0.99 0.97 1590\n", - " 2 0.94 0.95 0.94 1395\n", - " 3 0.94 0.94 0.94 1462\n", - " 4 0.96 0.93 0.94 1374\n", - " 5 0.93 0.92 0.92 1245\n", - " 6 0.96 0.97 0.96 1334\n", - " 7 0.93 0.94 0.94 1447\n", - " 8 0.97 0.88 0.92 1321\n", - " 9 0.91 0.91 0.91 1428\n", + " 0 0.97 0.99 0.98 1359\n", + " 1 0.97 0.99 0.98 1594\n", + " 2 0.95 0.93 0.94 1369\n", + " 3 0.94 0.95 0.95 1415\n", + " 4 0.95 0.93 0.94 1373\n", + " 5 0.93 0.91 0.92 1257\n", + " 6 0.96 0.97 0.97 1351\n", + " 7 0.94 0.95 0.95 1422\n", + " 8 0.97 0.91 0.94 1411\n", + " 9 0.92 0.94 0.93 1449\n", "\n", - " accuracy 0.94 14000\n", - " macro avg 0.94 0.94 0.94 14000\n", - "weighted avg 0.94 0.94 0.94 14000\n", - "\n" + " accuracy 0.95 14000\n", + " macro avg 0.95 0.95 0.95 14000\n", + "weighted avg 0.95 0.95 0.95 14000\n", + "\n", + " fit_time score_time test_score\n", + "0 8.228997 7.326211 0.949821\n", + "1 8.296160 7.341953 0.946786\n", + "2 7.655164 7.585432 0.950179\n", + "3 8.579497 7.275667 0.943036\n", + "4 8.647136 7.235398 0.948750\n", + "5 8.524955 7.204496 0.945893\n", + "6 8.622762 7.377557 0.946071\n", + "7 8.968744 7.107852 0.947679\n", + "8 8.420293 7.159137 0.953214\n", + "9 7.535510 7.697132 0.947679\n" ] } ], "source": [ - "# accuracies.append(cv(5,5))\n", - "accuracies.append(cv(5,10))" + "names.append('standard+pca99+knn')\n", + "classifiers.append(Pipeline([\n", + " ('standard', StandardScaler()),\n", + " ('pca99', PCA(n_components=n99_components)),\n", + " ('knn', op_kNN)\n", + "]))\n", + "\n", + "accuracies.append(cv(5,10)) # Pipeline: standard+pca99+knn (94.5357%)" ] }, { "cell_type": "code", "execution_count": 20, - "id": "46bcb35f", + "id": "22c4ca56", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Pipeline: minmax+pca99+knn (96.9929%)\n", + "Pipeline: minmax+pca99+knn (97.3643%) (7.296)\n", " precision recall f1-score support\n", "\n", - " 0 0.96 0.99 0.98 1404\n", - " 1 0.97 0.99 0.98 1590\n", - " 2 0.97 0.97 0.97 1395\n", - " 3 0.97 0.96 0.97 1462\n", - " 4 0.98 0.97 0.97 1374\n", - " 5 0.97 0.96 0.96 1245\n", - " 6 0.98 0.98 0.98 1334\n", - " 7 0.97 0.97 0.97 1447\n", - " 8 0.99 0.93 0.96 1321\n", - " 9 0.95 0.96 0.96 1428\n", + " 0 0.98 1.00 0.99 1359\n", + " 1 0.97 0.99 0.98 1594\n", + " 2 0.98 0.96 0.97 1369\n", + " 3 0.97 0.97 0.97 1415\n", + " 4 0.98 0.97 0.97 1373\n", + " 5 0.96 0.96 0.96 1257\n", + " 6 0.98 0.99 0.98 1351\n", + " 7 0.97 0.98 0.97 1422\n", + " 8 0.99 0.95 0.97 1411\n", + " 9 0.96 0.97 0.97 1449\n", "\n", " accuracy 0.97 14000\n", " macro avg 0.97 0.97 0.97 14000\n", "weighted avg 0.97 0.97 0.97 14000\n", - "\n" + "\n", + " fit_time score_time test_score\n", + "0 7.414330 6.520805 0.972679\n", + "1 7.380615 6.424363 0.971429\n", + "2 7.606246 6.410645 0.972143\n", + "3 8.741697 5.490113 0.970179\n", + "4 8.586719 5.866966 0.972679\n", + "5 8.284014 6.071847 0.973214\n", + "6 8.925915 5.541398 0.975536\n", + "7 7.440964 6.649676 0.972857\n", + "8 7.296453 6.530696 0.975714\n", + "9 7.390439 6.666066 0.974286\n" ] } ], "source": [ - "# accuracies.append(cv(6,5))\n", - "accuracies.append(cv(6,10))" + "names.append('minmax+pca99+knn')\n", + "classifiers.append(Pipeline([\n", + " ('minmax', MinMaxScaler()),\n", + " ('pca99', PCA(n_components=n99_components)),\n", + " ('knn', op_kNN)\n", + "]))\n", + "\n", + "accuracies.append(cv(6))" ] }, { "cell_type": "code", "execution_count": 21, - "id": "45d8092d", + "id": "eb454722", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Pipeline: maxabs+pca95+knn (96.9929%)\n", + "Pipeline: maxabs+pca95+knn (97.5500%) (9.02)\n", " precision recall f1-score support\n", "\n", - " 0 0.96 0.99 0.98 1404\n", - " 1 0.97 0.99 0.98 1590\n", - " 2 0.97 0.97 0.97 1395\n", - " 3 0.97 0.96 0.97 1462\n", - " 4 0.98 0.97 0.97 1374\n", - " 5 0.97 0.96 0.96 1245\n", - " 6 0.98 0.98 0.98 1334\n", - " 7 0.97 0.97 0.97 1447\n", - " 8 0.99 0.93 0.96 1321\n", - " 9 0.95 0.96 0.96 1428\n", + " 0 0.98 1.00 0.99 1359\n", + " 1 0.98 0.99 0.98 1594\n", + " 2 0.98 0.96 0.97 1369\n", + " 3 0.97 0.97 0.97 1415\n", + " 4 0.98 0.97 0.97 1373\n", + " 5 0.97 0.96 0.96 1257\n", + " 6 0.98 0.99 0.98 1351\n", + " 7 0.97 0.98 0.97 1422\n", + " 8 0.99 0.95 0.97 1411\n", + " 9 0.96 0.97 0.97 1449\n", "\n", - " accuracy 0.97 14000\n", - " macro avg 0.97 0.97 0.97 14000\n", - "weighted avg 0.97 0.97 0.97 14000\n", - "\n" + " accuracy 0.98 14000\n", + " macro avg 0.98 0.98 0.98 14000\n", + "weighted avg 0.98 0.98 0.98 14000\n", + "\n", + " fit_time score_time test_score\n", + "0 6.947509 5.675072 0.973750\n", + "1 9.039265 5.126696 0.972857\n", + "2 8.992979 4.936873 0.974286\n", + "3 7.849278 5.546096 0.973036\n", + "4 7.618944 5.663611 0.974464\n", + "5 8.973946 5.126191 0.975179\n", + "6 7.703779 5.666359 0.975893\n", + "7 8.414855 5.337673 0.975357\n", + "8 9.020215 5.024463 0.976607\n", + "9 8.363406 5.144763 0.975536\n" ] } ], "source": [ - "# accuracies.append(cv(7,5))\n", - "accuracies.append(cv(7,10))" + "names.append('maxabs+pca95+knn')\n", + "classifiers.append(Pipeline([\n", + " ('maxabs', MaxAbsScaler()),\n", + " ('pca95', PCA(n_components=n95_components)),\n", + " ('knn', op_kNN)\n", + "]))\n", + "\n", + "accuracies.append(cv(7))" ] }, { "cell_type": "code", "execution_count": 22, - "id": "a805b3fd", + "id": "a5e2b93b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Pipeline: maxabs+pca99+knn (97.2571%)\n", + "Pipeline: maxabs+pca99+knn (97.3643%) (7.742)\n", " precision recall f1-score support\n", "\n", - " 0 0.97 0.99 0.98 1404\n", - " 1 0.98 0.99 0.99 1590\n", - " 2 0.98 0.97 0.97 1395\n", - " 3 0.98 0.97 0.97 1462\n", - " 4 0.98 0.97 0.97 1374\n", - " 5 0.97 0.96 0.97 1245\n", - " 6 0.98 0.99 0.98 1334\n", - " 7 0.97 0.98 0.97 1447\n", - " 8 0.99 0.93 0.96 1321\n", - " 9 0.95 0.97 0.96 1428\n", + " 0 0.98 1.00 0.99 1359\n", + " 1 0.97 0.99 0.98 1594\n", + " 2 0.98 0.96 0.97 1369\n", + " 3 0.97 0.97 0.97 1415\n", + " 4 0.98 0.97 0.97 1373\n", + " 5 0.96 0.96 0.96 1257\n", + " 6 0.98 0.99 0.98 1351\n", + " 7 0.97 0.98 0.97 1422\n", + " 8 0.99 0.95 0.97 1411\n", + " 9 0.96 0.97 0.97 1449\n", "\n", " accuracy 0.97 14000\n", " macro avg 0.97 0.97 0.97 14000\n", "weighted avg 0.97 0.97 0.97 14000\n", - "\n" + "\n", + " fit_time score_time test_score\n", + "0 7.651374 6.346306 0.972679\n", + "1 8.849486 5.692084 0.971429\n", + "2 8.812459 5.638000 0.972143\n", + "3 7.636452 6.359757 0.970179\n", + "4 7.290244 6.555213 0.972679\n", + "5 7.486546 6.490154 0.973214\n", + "6 8.782154 5.571597 0.975536\n", + "7 8.695853 5.666834 0.972857\n", + "8 7.742428 6.395004 0.975714\n", + "9 7.034753 6.624636 0.974286\n" ] } ], "source": [ - "# accuracies.append(cv(8,5))\n", - "accuracies.append(cv(8,10))" + "names.append('maxabs+pca99+knn')\n", + "classifiers.append(Pipeline([\n", + " ('maxabs', MaxAbsScaler()),\n", + " ('pca99', PCA(n_components=n99_components)),\n", + " ('knn', op_kNN)\n", + "]))\n", + "\n", + "accuracies.append(cv(8))" ] }, { "cell_type": "code", "execution_count": 23, - "id": "3af8abf8", + "id": "91b4cbc2", "metadata": {}, "outputs": [ { @@ -679,79 +770,357 @@ "name": "stdout", "output_type": "stream", "text": [ - "Pipeline: power+pca95+knn (94.1071%)\n", + "Pipeline: power+pca95+knn (94.9786%) (38.93)\n", " precision recall f1-score support\n", "\n", - " 0 0.95 0.98 0.97 1404\n", - " 1 0.96 0.99 0.97 1590\n", - " 2 0.94 0.95 0.94 1395\n", - " 3 0.94 0.93 0.94 1462\n", - " 4 0.95 0.93 0.94 1374\n", - " 5 0.93 0.91 0.92 1245\n", - " 6 0.95 0.97 0.96 1334\n", - " 7 0.94 0.94 0.94 1447\n", - " 8 0.95 0.89 0.92 1321\n", - " 9 0.90 0.92 0.91 1428\n", + " 0 0.97 0.99 0.98 1359\n", + " 1 0.98 0.98 0.98 1594\n", + " 2 0.95 0.94 0.95 1369\n", + " 3 0.94 0.94 0.94 1415\n", + " 4 0.96 0.93 0.95 1373\n", + " 5 0.94 0.91 0.93 1257\n", + " 6 0.95 0.98 0.96 1351\n", + " 7 0.94 0.95 0.94 1422\n", + " 8 0.96 0.92 0.94 1411\n", + " 9 0.92 0.94 0.93 1449\n", "\n", - " accuracy 0.94 14000\n", - " macro avg 0.94 0.94 0.94 14000\n", - "weighted avg 0.94 0.94 0.94 14000\n", - "\n" + " accuracy 0.95 14000\n", + " macro avg 0.95 0.95 0.95 14000\n", + "weighted avg 0.95 0.95 0.95 14000\n", + "\n", + " fit_time score_time test_score\n", + "0 36.886070 6.967202 0.946429\n", + "1 37.332160 6.735316 0.945357\n", + "2 37.508776 6.800920 0.948929\n", + "3 38.891379 6.085247 0.945714\n", + "4 38.397027 6.117569 0.949107\n", + "5 37.785432 6.497440 0.945357\n", + "6 39.151339 5.717458 0.949643\n", + "7 38.417527 6.293112 0.948571\n", + "8 38.930055 6.054872 0.956607\n", + "9 37.403106 6.885915 0.949821\n" ] } ], "source": [ - "# accuracies.append(cv(9,5))\n", - "accuracies.append(cv(9,10)) # likes to die" + "names.append('power+pca95+knn')\n", + "classifiers.append(Pipeline([\n", + " ('power', PowerTransformer()),\n", + " ('pca95', PCA(n_components=n95_components)),\n", + " ('knn', op_kNN)\n", + "]))\n", + "\n", + "accuracies.append(cv(9,10)) # likes to die # Pipeline: power+pca95+knn (94.3714%)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "ea870baa", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/jupyterhub/lib/python3.8/site-packages/sklearn/preprocessing/_data.py:3237: RuntimeWarning: divide by zero encountered in log\n", + " loglike = -n_samples / 2 * np.log(x_trans.var())\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Pipeline: power+pca99+knn (94.9786%) (38.16)\n", + " precision recall f1-score support\n", + "\n", + " 0 0.97 0.99 0.98 1359\n", + " 1 0.98 0.98 0.98 1594\n", + " 2 0.95 0.94 0.95 1369\n", + " 3 0.94 0.94 0.94 1415\n", + " 4 0.96 0.93 0.95 1373\n", + " 5 0.94 0.91 0.93 1257\n", + " 6 0.95 0.98 0.96 1351\n", + " 7 0.94 0.95 0.94 1422\n", + " 8 0.96 0.92 0.94 1411\n", + " 9 0.92 0.94 0.93 1449\n", + "\n", + " accuracy 0.95 14000\n", + " macro avg 0.95 0.95 0.95 14000\n", + "weighted avg 0.95 0.95 0.95 14000\n", + "\n", + " fit_time score_time test_score\n", + "0 38.908710 6.235653 0.946429\n", + "1 39.838243 5.715713 0.945357\n", + "2 37.945147 6.488883 0.948929\n", + "3 37.087087 7.073378 0.945714\n", + "4 37.081666 6.340769 0.949107\n", + "5 38.896161 6.251284 0.945357\n", + "6 39.849142 5.632420 0.949643\n", + "7 38.063409 6.452322 0.948571\n", + "8 38.157108 6.539793 0.956607\n", + "9 39.793286 5.645674 0.949821\n" + ] + } + ], + "source": [ + "names.append('power+pca99+knn')\n", + "classifiers.append(Pipeline([\n", + " ('power', PowerTransformer()),\n", + " ('pca99', PCA(n_components=n95_components)),\n", + " ('knn', op_kNN)\n", + "]))\n", + "\n", + "accuracies.append(cv(10,10)) # likes to die # Pipeline: power+pca99+knn (94.6429%)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "0faded62", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Pipeline: bin+pca95+knn (96.8286%) (8.109)\n", + " precision recall f1-score support\n", + "\n", + " 0 0.98 0.99 0.99 1359\n", + " 1 0.96 0.99 0.98 1594\n", + " 2 0.98 0.97 0.98 1369\n", + " 3 0.96 0.96 0.96 1415\n", + " 4 0.98 0.95 0.97 1373\n", + " 5 0.97 0.94 0.95 1257\n", + " 6 0.97 0.99 0.98 1351\n", + " 7 0.97 0.97 0.97 1422\n", + " 8 0.97 0.95 0.96 1411\n", + " 9 0.94 0.97 0.95 1449\n", + "\n", + " accuracy 0.97 14000\n", + " macro avg 0.97 0.97 0.97 14000\n", + "weighted avg 0.97 0.97 0.97 14000\n", + "\n", + " fit_time score_time test_score\n", + "0 7.204604 6.520073 0.967679\n", + "1 8.737834 5.673134 0.969107\n", + "2 7.240958 6.520879 0.968571\n", + "3 7.118583 6.613050 0.965357\n", + "4 8.733715 5.555613 0.969643\n", + "5 7.919914 6.100565 0.966429\n", + "6 8.739508 5.720392 0.969821\n", + "7 8.712544 5.598128 0.967679\n", + "8 7.183513 6.587496 0.970536\n", + "9 8.109238 5.849805 0.970714\n" + ] + } + ], + "source": [ + "names.append('bin+pca95+knn')\n", + "classifiers.append(Pipeline([\n", + " ('bin', Binarizer()),\n", + " ('pca95', PCA(n_components=n95_components)),\n", + " ('knn', op_kNN)\n", + "]))\n", + "\n", + "accuracies.append(cv(11,10))" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "0dbf3a59", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Pipeline: bin+pca99+knn (96.6571%) (7.486)\n", + " precision recall f1-score support\n", + "\n", + " 0 0.98 0.99 0.98 1359\n", + " 1 0.96 0.99 0.98 1594\n", + " 2 0.98 0.97 0.97 1369\n", + " 3 0.96 0.96 0.96 1415\n", + " 4 0.98 0.95 0.97 1373\n", + " 5 0.97 0.94 0.95 1257\n", + " 6 0.96 0.99 0.97 1351\n", + " 7 0.97 0.97 0.97 1422\n", + " 8 0.97 0.94 0.96 1411\n", + " 9 0.93 0.97 0.95 1449\n", + "\n", + " accuracy 0.97 14000\n", + " macro avg 0.97 0.97 0.97 14000\n", + "weighted avg 0.97 0.97 0.97 14000\n", + "\n", + " fit_time score_time test_score\n", + "0 8.838046 5.829949 0.965536\n", + "1 8.505749 5.961209 0.967143\n", + "2 8.716863 6.040388 0.966607\n", + "3 7.011340 6.811409 0.965179\n", + "4 7.140592 6.693154 0.966964\n", + "5 6.975613 6.828256 0.964821\n", + "6 7.691550 6.389498 0.968393\n", + "7 7.684490 6.294239 0.966429\n", + "8 7.486223 6.413954 0.969107\n", + "9 8.752805 5.730480 0.968750\n" + ] + } + ], + "source": [ + "names.append('bin+pca99+knn')\n", + "classifiers.append(Pipeline([\n", + " ('bin', Binarizer()),\n", + " ('pca99', PCA(n_components=n99_components)),\n", + " ('knn', op_kNN)\n", + "]))\n", + "\n", + "accuracies.append(cv(12))" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "5a0b6d1b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Pipeline: pca95+knn (97.5500%) (7.655)\n", + " precision recall f1-score support\n", + "\n", + " 0 0.98 1.00 0.99 1359\n", + " 1 0.98 0.99 0.98 1594\n", + " 2 0.98 0.96 0.97 1369\n", + " 3 0.97 0.97 0.97 1415\n", + " 4 0.98 0.97 0.97 1373\n", + " 5 0.97 0.96 0.96 1257\n", + " 6 0.98 0.99 0.98 1351\n", + " 7 0.97 0.98 0.97 1422\n", + " 8 0.99 0.95 0.97 1411\n", + " 9 0.96 0.97 0.97 1449\n", + "\n", + " accuracy 0.98 14000\n", + " macro avg 0.98 0.98 0.98 14000\n", + "weighted avg 0.98 0.98 0.98 14000\n", + "\n", + " fit_time score_time test_score\n", + "0 7.638611 5.326603 0.973750\n", + "1 6.768761 5.676790 0.972857\n", + "2 9.021050 4.722342 0.974286\n", + "3 9.075169 4.697984 0.973036\n", + "4 7.276331 5.603889 0.974464\n", + "5 6.989354 5.469358 0.975179\n", + "6 8.597729 4.884387 0.975893\n", + "7 8.666089 4.922805 0.975357\n", + "8 7.654789 5.129449 0.976607\n", + "9 6.945298 5.348174 0.975536\n" + ] + } + ], + "source": [ + "names.append('pca95+knn')\n", + "classifiers.append(Pipeline([\n", + " ('pca95', PCA(n_components=n95_components)),\n", + " ('knn', op_kNN)\n", + "]))\n", + "\n", + "accuracies.append(cv(13))" ] }, { "cell_type": "code", "execution_count": 28, - "id": "d971b4df", + "id": "48696165", "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/jupyterhub/lib/python3.8/site-packages/sklearn/preprocessing/_data.py:3237: RuntimeWarning: divide by zero encountered in log\n", - " loglike = -n_samples / 2 * np.log(x_trans.var())\n" - ] - }, { "name": "stdout", "output_type": "stream", "text": [ - "Pipeline: power+pca99+knn (94.4429%)\n", + "Pipeline: pca99+knn (97.3643%) (8.14)\n", " precision recall f1-score support\n", "\n", - " 0 0.95 0.98 0.97 1404\n", - " 1 0.96 0.99 0.98 1590\n", - " 2 0.94 0.95 0.94 1395\n", - " 3 0.94 0.93 0.94 1462\n", - " 4 0.95 0.93 0.94 1374\n", - " 5 0.94 0.91 0.93 1245\n", - " 6 0.95 0.97 0.96 1334\n", - " 7 0.94 0.94 0.94 1447\n", - " 8 0.95 0.90 0.93 1321\n", - " 9 0.91 0.92 0.92 1428\n", + " 0 0.98 1.00 0.99 1359\n", + " 1 0.97 0.99 0.98 1594\n", + " 2 0.98 0.96 0.97 1369\n", + " 3 0.97 0.97 0.97 1415\n", + " 4 0.98 0.97 0.97 1373\n", + " 5 0.96 0.96 0.96 1257\n", + " 6 0.98 0.99 0.98 1351\n", + " 7 0.97 0.98 0.97 1422\n", + " 8 0.99 0.95 0.97 1411\n", + " 9 0.96 0.97 0.97 1449\n", "\n", - " accuracy 0.94 14000\n", - " macro avg 0.94 0.94 0.94 14000\n", - "weighted avg 0.94 0.94 0.94 14000\n", - "\n" + " accuracy 0.97 14000\n", + " macro avg 0.97 0.97 0.97 14000\n", + "weighted avg 0.97 0.97 0.97 14000\n", + "\n", + " fit_time score_time test_score\n", + "0 8.260046 5.959701 0.972857\n", + "1 8.729834 5.585496 0.971429\n", + "2 8.598983 5.930078 0.972143\n", + "3 7.718438 6.483653 0.970179\n", + "4 6.770636 6.657404 0.972679\n", + "5 6.947608 6.525910 0.973214\n", + "6 7.537212 6.372437 0.975536\n", + "7 8.566531 5.778190 0.972857\n", + "8 8.140383 6.020524 0.975714\n", + "9 8.087441 6.029204 0.974286\n" ] } ], "source": [ - "# accuracies.append(cv(10,5))\n", - "accuracies.append(cv(10,10)) # likes to die" + "names.append('pca99+knn')\n", + "classifiers.append(Pipeline([\n", + " ('pca99', PCA(n_components=n99_components)),\n", + " ('knn', op_kNN)\n", + "]))\n", + "\n", + "accuracies.append(cv(14))" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "e72be17c", + "metadata": {}, + "outputs": [], + "source": [ + "names.append('kpca+pca95+knn')\n", + "classifiers.append(Pipeline([\n", + " ('kpca', KernelPCA(n_jobs=-1)),\n", + " ('pca', PCA(n_components=n99_components)),\n", + " ('knn', op_kNN)\n", + "]))\n", + "\n", + "# accuracies.append(cv(15))" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "168ade90", + "metadata": {}, + "outputs": [], + "source": [ + "names.append('kpca+pca99+knn')\n", + "classifiers.append(Pipeline([\n", + " ('kpca', KernelPCA(n_jobs=-1)),\n", + " ('pca', PCA(n_components=n99_components)),\n", + " ('knn', op_kNN)\n", + "]))\n", + "\n", + "# accuracies.append(cv(16))" ] }, { "cell_type": "markdown", - "id": "281e0f59", + "id": "e818ffa3", "metadata": {}, "source": [ "# Auswertung" @@ -759,43 +1128,47 @@ }, { "cell_type": "code", - "execution_count": 29, - "id": "e3eeabc7", + "execution_count": 31, + "id": "5e3edce3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Maximum accuracy (97.2571%) at: ['minmax+pca95+knn', 'maxabs+pca99+knn']\n" + "Maximum accuracy (97.55%) for ['minmax+pca95+knn', 'maxabs+pca95+knn', 'pca95+knn']\n" ] } ], "source": [ - "print(f\"Maximum accuracy ({max(accuracies):.6}%) at: {[names[n] for n in np.where(accuracies==max(accuracies))[0]]}\")" + "print(f\"Maximum accuracy ({max(accuracies):.6}%) for {[names[n] for n in np.where(accuracies==max(accuracies))[0]]}\")" ] }, { "cell_type": "code", - "execution_count": 30, - "id": "7754b1e8", + "execution_count": 32, + "id": "908d1f54", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "name: knn (baseline) (96.9857%)\n", - "name: scalar+knn (94.2143%)\n", - "name: minmax+knn (96.9857%)\n", - "name: standard+pca95+knn (94.7000%)\n", - "name: minmax+pca95+knn (97.2571%)\n", - "name: standard+pca99+knn (94.2929%)\n", - "name: minmax+pca99+knn (96.9929%)\n", - "name: maxabs+pca95+knn (96.9929%)\n", - "name: maxabs+pca99+knn (97.2571%)\n", - "name: power+pca95+knn (94.1071%)\n", - "name: power+pca99+knn (94.4429%)\n" + "name: knn (baseline) (97.3071%)\n", + "name: scalar+knn (94.8000%)\n", + "name: minmax+knn (97.3071%)\n", + "name: standard+pca95+knn (95.3929%)\n", + "name: minmax+pca95+knn (97.5500%)\n", + "name: standard+pca99+knn (94.9071%)\n", + "name: minmax+pca99+knn (97.3643%)\n", + "name: maxabs+pca95+knn (97.5500%)\n", + "name: maxabs+pca99+knn (97.3643%)\n", + "name: power+pca95+knn (94.9786%)\n", + "name: power+pca99+knn (94.9786%)\n", + "name: bin+pca95+knn (96.8286%)\n", + "name: bin+pca99+knn (96.6571%)\n", + "name: pca95+knn (97.5500%)\n", + "name: pca99+knn (97.3643%)\n" ] } ], @@ -804,10 +1177,111 @@ " print(f\"name: {n:20} ({a:.4f}%)\")" ] }, + { + "cell_type": "markdown", + "id": "810fb969", + "metadata": {}, + "source": [ + "Default n=3\\\n", + "name: knn (baseline) (97.2143%)\\\n", + "name: scalar+knn (94.6286%)\\\n", + "name: minmax+knn (97.2143%)\\\n", + "name: standard+pca95+knn (95.1429%)\\\n", + "name: minmax+pca95+knn (97.4357%)\\\n", + "name: standard+pca99+knn (94.7214%)\\\n", + "name: minmax+pca99+knn (97.2714%)\\\n", + "name: maxabs+pca95+knn (97.4357%)\\\n", + "name: maxabs+pca99+knn (97.2714%)\\\n", + "name: power+pca95+knn (94.8071%)\\\n", + "name: power+pca99+knn (94.8071%)\\\n", + "name: bin+pca95+knn (96.6643%)\\\n", + "name: bin+pca99+knn (96.5500%)\\\n", + "name: pca95+knn (97.4357%)\\\n", + "name: pca99+knn (97.2714%)\\" + ] + }, + { + "cell_type": "markdown", + "id": "78b1a366", + "metadata": {}, + "source": [ + "n=3 euclid distance\\\n", + "name: knn (baseline) (97.3071%)\\\n", + "name: scalar+knn (94.8000%)\\\n", + "name: minmax+knn (97.3071%)\\\n", + "name: standard+pca95+knn (95.3929%)\\\n", + "name: minmax+pca95+knn (97.5500%)\\\n", + "name: standard+pca99+knn (94.9071%)\\\n", + "name: minmax+pca99+knn (97.3643%)\\\n", + "name: maxabs+pca95+knn (97.5500%)\\\n", + "name: maxabs+pca99+knn (97.3643%)\\\n", + "name: power+pca95+knn (94.9786%)\\\n", + "name: power+pca99+knn (94.9786%)\\\n", + "name: bin+pca95+knn (96.8286%)\\\n", + "name: bin+pca99+knn (96.6571%)\\\n", + "name: pca95+knn (97.5500%)\\\n", + "name: pca99+knn (97.3643%)\\\n" + ] + }, + { + "cell_type": "markdown", + "id": "9300d407", + "metadata": {}, + "source": [ + "# Hyper Parameter Optimization" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "138aacf6", + "metadata": {}, + "outputs": [], + "source": [ + "# from sklearn.model_selection import GridSearchCV\n", + "\n", + "# grid_params = {\n", + "# 'n_neighbors': [3, 5, 7 , 11],\n", + "# 'weights': ['uniform', 'distance'],\n", + "# 'metrics': ['euclidean', 'manhattan', 'chebyshev', 'minkowski'],\n", + "# }\n", + "\n", + "# gs = GridSearchCV(\n", + "# KNeighborsClassifier(n_jobs=-1),\n", + "# grid_params,\n", + "# cv = 3,\n", + "# verbose=1,\n", + "# n_jobs = -1\n", + "# )\n", + "\n", + "# gs_results = gs.fit(X_train, y_train)\n", + "# Results in gs_results.csv" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "94f98446", + "metadata": {}, + "outputs": [], + "source": [ + "# pd.DataFrame.from_dict(gs_results.cv_results_).sort_values('rank_test_score')" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "869ef7c5", + "metadata": {}, + "outputs": [], + "source": [ + "# pd.DataFrame.from_dict(gs_results.cv_results_).sort_values('rank_test_score')[['rank_test_score','mean_fit_time','param_n_neighbors', 'param_metric', 'param_weights', 'mean_test_score']].to_csv('./gs_result_filtered.csv')" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "22316563", + "id": "bcb8d197", "metadata": {}, "outputs": [], "source": [] diff --git a/0-pilot-project/MNIST-kNN-prepoc-vis.ipynb b/0-pilot-project/MNIST-kNN-prepoc-vis.ipynb index b6d7a5a..1e12466 100644 --- a/0-pilot-project/MNIST-kNN-prepoc-vis.ipynb +++ b/0-pilot-project/MNIST-kNN-prepoc-vis.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "804dacb6", + "id": "f2885b56", "metadata": {}, "source": [ "### Load MNIST dataset" @@ -11,7 +11,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "7d09885b", + "id": "805542e2", "metadata": {}, "outputs": [], "source": [ @@ -23,7 +23,7 @@ { "cell_type": "code", "execution_count": 2, - "id": "bf4121a0", + "id": "26d38ac4", "metadata": {}, "outputs": [], "source": [ @@ -35,7 +35,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "71d91fd8", + "id": "749c3ec9", "metadata": {}, "outputs": [], "source": [ @@ -46,7 +46,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "1dc68441", + "id": "810daa97", "metadata": {}, "outputs": [ { @@ -74,7 +74,7 @@ { "cell_type": "code", "execution_count": 5, - "id": "2c7a4966", + "id": "48b3d387", "metadata": {}, "outputs": [], "source": [ @@ -83,7 +83,7 @@ }, { "cell_type": "markdown", - "id": "e2684670", + "id": "96ed2a09", "metadata": {}, "source": [ "### Fix labels" @@ -92,7 +92,7 @@ { "cell_type": "code", "execution_count": 113, - "id": "dbdbc64f", + "id": "4c537948", "metadata": {}, "outputs": [], "source": [ @@ -105,7 +105,7 @@ { "cell_type": "code", "execution_count": 7, - "id": "4c94aaf6", + "id": "4d138b55", "metadata": {}, "outputs": [], "source": [ @@ -116,7 +116,7 @@ { "cell_type": "code", "execution_count": 126, - "id": "f1ba6703", + "id": "b5284df4", "metadata": {}, "outputs": [], "source": [ @@ -129,7 +129,7 @@ }, { "cell_type": "markdown", - "id": "eec5415d", + "id": "a572aebf", "metadata": {}, "source": [ "### Prepare data for machine learning" @@ -137,7 +137,7 @@ }, { "cell_type": "markdown", - "id": "27ed1cdb", + "id": "3b5bc85f", "metadata": {}, "source": [ "### Identify Train Set and Test Set" @@ -146,7 +146,7 @@ { "cell_type": "code", "execution_count": 9, - "id": "09446324", + "id": "3db579b6", "metadata": {}, "outputs": [ { @@ -173,7 +173,7 @@ }, { "cell_type": "markdown", - "id": "2c3041ac", + "id": "7c035dc8", "metadata": {}, "source": [ "## Pipeline Declaration" @@ -181,14 +181,14 @@ }, { "cell_type": "code", - "execution_count": 10, - "id": "99f24362", + "execution_count": 140, + "id": "4bd42611", "metadata": {}, "outputs": [], "source": [ "from sklearn.pipeline import Pipeline\n", "from sklearn.decomposition import PCA\n", - "from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler\n", + "from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler, Binarizer\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.model_selection import cross_val_predict\n", "from sklearn.metrics import classification_report, accuracy_score\n", @@ -200,17 +200,17 @@ }, { "cell_type": "code", - "execution_count": 122, - "id": "a6ee7588", + "execution_count": 143, + "id": "c347de5b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(3, 3)" + "(4, 4)" ] }, - "execution_count": 122, + "execution_count": 143, "metadata": {}, "output_type": "execute_result" } @@ -218,13 +218,15 @@ "source": [ "names = ['scaler', \n", " 'minmax', \n", - " 'maxabs', \n", + " 'maxabs',\n", + " 'bin'\n", " ]\n", "\n", "classifiers = [\n", " Pipeline([('scaler', StandardScaler())]),\n", " Pipeline([('minmax', MinMaxScaler())]),\n", " Pipeline([('maxabs', MaxAbsScaler())]),\n", + " Pipeline([('bin', Binarizer())]),\n", "]\n", "\n", "len(names), len(classifiers)" @@ -232,7 +234,7 @@ }, { "cell_type": "markdown", - "id": "650c96b4", + "id": "bd566c8d", "metadata": {}, "source": [ "# Crossvalidation" @@ -241,7 +243,7 @@ { "cell_type": "code", "execution_count": 123, - "id": "584cb66b", + "id": "77f6d632", "metadata": {}, "outputs": [], "source": [ @@ -258,7 +260,7 @@ { "cell_type": "code", "execution_count": 128, - "id": "0b815be6", + "id": "bb8eb2e0", "metadata": {}, "outputs": [ { @@ -290,7 +292,7 @@ { "cell_type": "code", "execution_count": 132, - "id": "8640f2ad", + "id": "70f4411f", "metadata": {}, "outputs": [ { @@ -320,7 +322,7 @@ { "cell_type": "code", "execution_count": 133, - "id": "3ef8cf89", + "id": "70f3533d", "metadata": {}, "outputs": [ { @@ -350,7 +352,7 @@ { "cell_type": "code", "execution_count": 134, - "id": "fe0246a2", + "id": "2ec8d300", "metadata": {}, "outputs": [ { @@ -377,10 +379,71 @@ "a = cv(2)" ] }, + { + "cell_type": "code", + "execution_count": 137, + "id": "b1f285c2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3\n", + "3\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAOcAAADnCAYAAADl9EEgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAAADaklEQVR4nO3dQWqDUBRAUX/JwrI0l5ad2VHpRCJto17Tc4aVgB1cHuTx88eyLBPQ83H2CwDrxAlR4oQocUKUOCHqtvHcV7mwv7H2R5MTosQJUeKEKHFClDghSpwQJU6IEidEiROixAlR4oQocUKUOCFKnBAlTogSJ0SJE6LECVHihChxQpQ4IUqcECVOiBInRIkTosQJUeKEKHFClDghSpwQJU6IEidEiROixAlR4oQocUKUOCFKnBB1O/sFrmiM8afPL8vyojd5vWf/W/m935HJCVHihChxQpQ4IUqcECVOiBInRI2N3ZXFFuxvdblsckKUOCFKnBAlTogSJ0SJE6IcGftnto67ORbWYXJClDghSpwQJU6IEidEiROixAlRl91zPh6Pp8/v9/sh73E1f9lj2pEey+SEKHFClDghSpwQJU6IEidEiROiLrvn3JMd6jp7zGOZnBAlTogSJ0SJE6LECVHihChxQpQrAOF8rgCEKxEnRIkTosQJUeKEKHFClDghSpwQJU6IEidEiROixAlR4oQocUKUOCFKnBAlTogSJ0SJE6LECVHihChxQpQrAE8wxuovIU7T5Jo9vpmcECVOiBInRIkTosQJUeKEKHFClD3nCcq7zHmef/WM1zM5IUqcECVOiBInRIkTosQJUeKEqLGxc+su5OB9rB7wNTkhSpwQJU6IEidEiROixAlR4oQo5zk5zLPf652m9jnXM5icECVOiBInRIkTosQJUeKEKKsUDmNV8jMmJ0SJE6LECVHihChxQpQ4IUqcECVOiBInRIkTosQJUeKEKHFClDghSpwQJU6IEidEiROixAlR4oQocUKUOCFKnBDld2tjXJPHF5MTosQJUeKEKHFClDghSpwQZZUSs/eqZJ7nUz7Lz5mcECVOiBInRIkTosQJUeKEKHFC1NjYqzmfBPtbPSdockKUOCFKnBAlTogSJ0SJE6LECVHOc17M1plKZy7fh8kJUeKEKHFClDghSpwQJU6IEidEOc8J53OeE65EnBAlTogSJ0SJE6LECVHihChxQpQ4IUqcECVOiBInRIkTosQJUeKEKHFClDghSpwQJU6IEidEiROixAlR4oQocUKUOCFKnBAlTogSJ0SJE6LECVG3jeerV5MB+zM5IUqcECVOiBInRIkTosQJUZ8VcUzDwD2AfQAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plot_digit(cv(2)-cv(1))" + ] + }, + { + "cell_type": "code", + "execution_count": 145, + "id": "2c7323b6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAOcAAADnCAYAAADl9EEgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAAADlUlEQVR4nO3dwWrbQBhG0ar0/V9ZWWURIjzU9lh3pHOW7cY4XH7Ih6Jt3/c/QM/fsz8AcEycECVOiBInRIkTov4N/t+vcmG+7egfXU6IEidEiROixAlR4oQocUKUOCFKnBAlTogSJ0SJE6LECVHihChxQpQ4IUqcECVOiBInRIkTosQJUeKEKHFClDghSpwQJU6IEidEiROixAlR4oQocULU6BWATLBth298y9t3b4T8JJcTosQJUeKEKHFClDghSpwQJU6IsnMeWHWHnG30vdhB38vlhChxQpQ4IUqcECVOiBInRIkTom65c9oxWYHLCVHihChxQpQ4IUqcECVOiFp2SrnqHPLqY1dX/V7uyOWEKHFClDghSpwQJU6IEidEiROilt05y/yJSN7B5YQocUKUOCFKnBAlTogSJ0SJE6LsnE+wY/IJLidEiROixAlR4oQocUKUOCFKnBC17M452hof/f1WOyUrcDkhSpwQJU6IEidEiROixAlR4oSoZXfOkVW3zPL7NVf9TlflckKUOCFKnBAlTogSJ0SJE6IuO6WUleeSR1793KaY/+NyQpQ4IUqcECVOiBInRIkTosQJUXbOCVbdMWcbfS920J9cTogSJ0SJE6LECVHihChxQpQ4IWobbEuGpwlm7qCvvBpxZYtvpIc/FJcTosQJUeKEKHFClDghSpwQJU6I8jznYl7d82bugVfdUM/ickKUOCFKnBAlTogSJ0SJE6LECVF2zhMs/uwhH+JyQpQ4IUqcECVOiBInRIkTokwpvM1d/yznLC4nRIkTosQJUeKEKHFClDghSpwQZed8wpl7ncfNjo1+Jit+by4nRIkTosQJUeKEKHFClDghSpwQJU6IEidEiROixAlR4oQocUKUOCFKnBDlec7FlJ9b9Jzre7mcECVOiBInRIkTosQJUeKEKFPKE8qvuvOavetwOSFKnBAlTogSJ0SJE6LECVHihCg75wTlHXRVV3wkbMTlhChxQpQ4IUqcECVOiBInRIkTouycJ3i02d15A73jlvmIywlR4oQocUKUOCFKnBAlTogSJ0TZOWNsfXxzOSFKnBAlTogSJ0SJE6LECVHihChxQpQ4IUqcECVOiBInRIkTosQJUeKEKHFClDghSpwQJU6IEidEiROixAlR4oQocUKUOCFKnBAlTogSJ0SJE6LECVGjVwBuH/kUwC8uJ0SJE6LECVHihChxQpQ4IeoLGL5a6fFroA8AAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "a = cv(3)" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "87a073e1", + "id": "b608bd89", "metadata": {}, "outputs": [], "source": [] diff --git a/0-pilot-project/gs_result.csv b/0-pilot-project/gs_result.csv new file mode 100644 index 0000000..3deade6 --- /dev/null +++ b/0-pilot-project/gs_result.csv @@ -0,0 +1,25 @@ +,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_metric,param_n_neighbors,param_weights,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score +1,0.6206035614013672,0.09060096919428257,1070.8031173547108,58.40420733215167,euclidean,3,distance,"{'metric': 'euclidean', 'n_neighbors': 3, 'weights': 'distance'}",0.9682862806021321,0.970589810896234,0.9706953819779278,0.9698571578254312,0.001111613767256926,1 +0,0.3251535892486572,0.127680187632828,1156.720083475113,103.6800414474859,euclidean,3,uniform,"{'metric': 'euclidean', 'n_neighbors': 3, 'weights': 'uniform'}",0.9673755825788826,0.968714844377779,0.9698917818493518,0.9686607362686711,0.0010279463208361468,2 +3,0.581209659576416,0.10390654521562699,1097.628236611684,49.50409271683481,euclidean,5,distance,"{'metric': 'euclidean', 'n_neighbors': 5, 'weights': 'distance'}",0.9670541597471474,0.9677505758825735,0.9693024750883961,0.968035736906039,0.0009397581516544441,3 +5,0.3833950360616048,0.09814461155343633,1019.8115065892538,49.683140897762506,euclidean,7,distance,"{'metric': 'euclidean', 'n_neighbors': 7, 'weights': 'distance'}",0.9656077570043392,0.9673755825788826,0.9683917282760098,0.9671250226197438,0.0011502780041406022,4 +2,0.5214482148488363,0.1338813546209848,1234.2422309716542,11.319769031350406,euclidean,5,uniform,"{'metric': 'euclidean', 'n_neighbors': 5, 'weights': 'uniform'}",0.9659291798360744,0.9661970321958536,0.9679095681988642,0.9666785934102641,0.0008772724469576776,5 +4,0.38950196901957196,0.10594237544622603,1119.6926170190175,64.25975988670193,euclidean,7,uniform,"{'metric': 'euclidean', 'n_neighbors': 7, 'weights': 'uniform'}",0.9643756361493545,0.9659827503080303,0.9674809814636237,0.9659464559736696,0.0012680116558989624,6 +7,0.44870662689208984,0.1539214539086042,1095.8349254131317,95.22915205238016,euclidean,11,distance,"{'metric': 'euclidean', 'n_neighbors': 11, 'weights': 'distance'}",0.9646434885091337,0.9632506562382814,0.9647487410264652,0.9642142952579601,0.0006827491697964685,7 +6,0.6263217926025391,0.045471428070326544,1231.947474082311,10.357145120994318,euclidean,11,uniform,"{'metric': 'euclidean', 'n_neighbors': 11, 'weights': 'uniform'}",0.9637327904858842,0.961911394439385,0.9634094074788385,0.9630178641347026,0.000793452595644972,8 +9,0.6597681840260824,0.08367258300110442,1603.911631822586,11.793869730582724,manhattan,3,distance,"{'metric': 'manhattan', 'n_neighbors': 3, 'weights': 'distance'}",0.961161407832003,0.9625006696308994,0.9638915675559842,0.9625178816729623,0.0011146494876286455,9 +11,0.743593692779541,0.12585558461284738,1623.6791274547577,20.225285349079297,manhattan,5,distance,"{'metric': 'manhattan', 'n_neighbors': 5, 'weights': 'distance'}",0.9598757165050624,0.9628220924626346,0.9633558341369335,0.9620178810348768,0.001530331488777754,10 +8,0.5049304962158203,0.03915105846837655,1631.9402144749959,11.72758222926691,manhattan,3,uniform,"{'metric': 'manhattan', 'n_neighbors': 3, 'weights': 'uniform'}",0.9600899983928859,0.9608935554722237,0.9630343940855031,0.9613393159835374,0.0012426834737134394,11 +13,0.7563843727111816,0.0361495198811297,1650.8071510791779,2.759115546096175,manhattan,7,distance,"{'metric': 'manhattan', 'n_neighbors': 7, 'weights': 'distance'}",0.9593935822574597,0.9614828306637382,0.9609986070931105,0.9606250066714361,0.0008929063713852963,12 +10,0.5714000860850016,0.200821086554179,1670.9476985931396,21.43965257048912,manhattan,5,uniform,"{'metric': 'manhattan', 'n_neighbors': 5, 'weights': 'uniform'}",0.9581078909305191,0.9610006964161354,0.9616950605378763,0.9602678826281769,0.0015534281409951467,13 +12,0.7014182408650717,0.07630416360771637,1602.6147842407227,13.998164063202559,manhattan,7,uniform,"{'metric': 'manhattan', 'n_neighbors': 7, 'weights': 'uniform'}",0.9576793271548723,0.9597150050891948,0.9595521268616737,0.9589821530352469,0.0009236336882771337,14 +15,0.5463813940684,0.20986627389723325,1621.221689303716,10.57220404161948,manhattan,11,distance,"{'metric': 'manhattan', 'n_neighbors': 11, 'weights': 'distance'}",0.9573043338511812,0.957036481491402,0.9578913532626165,0.9574107228683998,0.00035701578260487497,15 +14,0.659561554590861,0.16166694480866448,1647.0242857138317,15.482865038769452,manhattan,11,uniform,"{'metric': 'manhattan', 'n_neighbors': 11, 'weights': 'uniform'}",0.9554829378046821,0.9555900787485938,0.9565520197149898,0.9558750120894218,0.00048071078572838955,16 +19,5.247008244196574,3.982239002878492,1610.0853408177693,4.880005667545898,chebyshev,5,distance,"{'metric': 'chebyshev', 'n_neighbors': 5, 'weights': 'distance'}",0.798253602614239,0.8040392135854717,0.8051001821493625,0.8024643327830244,0.003008776051168824,17 +21,20.442909558614094,10.268306972202161,1091.167144536972,373.74252764500056,chebyshev,7,distance,"{'metric': 'chebyshev', 'n_neighbors': 7, 'weights': 'distance'}",0.7973429045909894,0.8016285423474581,0.7997964213007608,0.7995892894130695,0.0017557240593918855,18 +17,3.0204404989878335,3.242756624385942,1615.3644462426503,47.3978612160567,chebyshev,3,distance,"{'metric': 'chebyshev', 'n_neighbors': 3, 'weights': 'distance'}",0.7828788771629078,0.80393207264156,0.8089038894246223,0.7985716130763634,0.011280549953147328,19 +23,27.638134558995564,4.867014100523201,734.1344640254974,31.858155565000086,chebyshev,11,distance,"{'metric': 'chebyshev', 'n_neighbors': 11, 'weights': 'distance'}",0.7919858573954036,0.7913965822038892,0.7912782599378549,0.7915535665123826,0.00030946900255889454,20 +18,1.1796804269154866,0.7346351248980267,1684.378450314204,9.234898911008068,chebyshev,5,uniform,"{'metric': 'chebyshev', 'n_neighbors': 5, 'weights': 'uniform'}",0.783575293298334,0.7888251995500081,0.7891353262616522,0.7871786063699981,0.0025510708161656103,21 +20,0.49048900604248047,0.05161896574410176,1679.1593386332195,1.1331046631334827,chebyshev,7,uniform,"{'metric': 'chebyshev', 'n_neighbors': 7, 'weights': 'uniform'}",0.7845395617935395,0.7868966625595971,0.7855994856959178,0.7856785700163514,0.0009639058573122337,22 +16,0.6336509386698405,0.19319871048710188,1672.988091468811,6.364915306048909,chebyshev,3,uniform,"{'metric': 'chebyshev', 'n_neighbors': 3, 'weights': 'uniform'}",0.7702898162532812,0.7848074141533187,0.7878495660559306,0.7809822654875102,0.007662028670422474,23 +22,21.680665890375774,3.469126600885206,794.0563353697459,13.966075572059058,chebyshev,11,uniform,"{'metric': 'chebyshev', 'n_neighbors': 11, 'weights': 'uniform'}",0.78052177639685,0.7794503669577328,0.7796528447444552,0.7798749960330128,0.0004647529399678837,24 diff --git a/0-pilot-project/gs_result_filtered.csv b/0-pilot-project/gs_result_filtered.csv new file mode 100644 index 0000000..9e931a1 --- /dev/null +++ b/0-pilot-project/gs_result_filtered.csv @@ -0,0 +1,25 @@ +,rank_test_score,mean_fit_time,param_n_neighbors,param_metric,param_weights,mean_test_score +1,1,0.6206035614013672,3,euclidean,distance,0.9698571578254312 +0,2,0.3251535892486572,3,euclidean,uniform,0.9686607362686711 +3,3,0.581209659576416,5,euclidean,distance,0.968035736906039 +5,4,0.3833950360616048,7,euclidean,distance,0.9671250226197438 +2,5,0.5214482148488363,5,euclidean,uniform,0.9666785934102641 +4,6,0.38950196901957196,7,euclidean,uniform,0.9659464559736696 +7,7,0.44870662689208984,11,euclidean,distance,0.9642142952579601 +6,8,0.6263217926025391,11,euclidean,uniform,0.9630178641347026 +9,9,0.6597681840260824,3,manhattan,distance,0.9625178816729623 +11,10,0.743593692779541,5,manhattan,distance,0.9620178810348768 +8,11,0.5049304962158203,3,manhattan,uniform,0.9613393159835374 +13,12,0.7563843727111816,7,manhattan,distance,0.9606250066714361 +10,13,0.5714000860850016,5,manhattan,uniform,0.9602678826281769 +12,14,0.7014182408650717,7,manhattan,uniform,0.9589821530352469 +15,15,0.5463813940684,11,manhattan,distance,0.9574107228683998 +14,16,0.659561554590861,11,manhattan,uniform,0.9558750120894218 +19,17,5.247008244196574,5,chebyshev,distance,0.8024643327830244 +21,18,20.442909558614094,7,chebyshev,distance,0.7995892894130695 +17,19,3.0204404989878335,3,chebyshev,distance,0.7985716130763634 +23,20,27.638134558995564,11,chebyshev,distance,0.7915535665123826 +18,21,1.1796804269154866,5,chebyshev,uniform,0.7871786063699981 +20,22,0.49048900604248047,7,chebyshev,uniform,0.7856785700163514 +16,23,0.6336509386698405,3,chebyshev,uniform,0.7809822654875102 +22,24,21.680665890375774,11,chebyshev,uniform,0.7798749960330128