Added Binarizer in Preprocessing and Hyperparameter optimization in pipeline

This commit is contained in:
Tuan-Dat Tran
2021-05-18 17:18:26 +00:00
parent 8b647de135
commit b127bc6b84
4 changed files with 943 additions and 356 deletions

View File

@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "markdown",
"id": "804dacb6",
"id": "f2885b56",
"metadata": {},
"source": [
"### Load MNIST dataset"
@@ -11,7 +11,7 @@
{
"cell_type": "code",
"execution_count": 1,
"id": "7d09885b",
"id": "805542e2",
"metadata": {},
"outputs": [],
"source": [
@@ -23,7 +23,7 @@
{
"cell_type": "code",
"execution_count": 2,
"id": "bf4121a0",
"id": "26d38ac4",
"metadata": {},
"outputs": [],
"source": [
@@ -35,7 +35,7 @@
{
"cell_type": "code",
"execution_count": 3,
"id": "71d91fd8",
"id": "749c3ec9",
"metadata": {},
"outputs": [],
"source": [
@@ -46,7 +46,7 @@
{
"cell_type": "code",
"execution_count": 4,
"id": "1dc68441",
"id": "810daa97",
"metadata": {},
"outputs": [
{
@@ -74,7 +74,7 @@
{
"cell_type": "code",
"execution_count": 5,
"id": "2c7a4966",
"id": "48b3d387",
"metadata": {},
"outputs": [],
"source": [
@@ -83,7 +83,7 @@
},
{
"cell_type": "markdown",
"id": "e2684670",
"id": "96ed2a09",
"metadata": {},
"source": [
"### Fix labels"
@@ -92,7 +92,7 @@
{
"cell_type": "code",
"execution_count": 113,
"id": "dbdbc64f",
"id": "4c537948",
"metadata": {},
"outputs": [],
"source": [
@@ -105,7 +105,7 @@
{
"cell_type": "code",
"execution_count": 7,
"id": "4c94aaf6",
"id": "4d138b55",
"metadata": {},
"outputs": [],
"source": [
@@ -116,7 +116,7 @@
{
"cell_type": "code",
"execution_count": 126,
"id": "f1ba6703",
"id": "b5284df4",
"metadata": {},
"outputs": [],
"source": [
@@ -129,7 +129,7 @@
},
{
"cell_type": "markdown",
"id": "eec5415d",
"id": "a572aebf",
"metadata": {},
"source": [
"### Prepare data for machine learning"
@@ -137,7 +137,7 @@
},
{
"cell_type": "markdown",
"id": "27ed1cdb",
"id": "3b5bc85f",
"metadata": {},
"source": [
"### Identify Train Set and Test Set"
@@ -146,7 +146,7 @@
{
"cell_type": "code",
"execution_count": 9,
"id": "09446324",
"id": "3db579b6",
"metadata": {},
"outputs": [
{
@@ -173,7 +173,7 @@
},
{
"cell_type": "markdown",
"id": "2c3041ac",
"id": "7c035dc8",
"metadata": {},
"source": [
"## Pipeline Declaration"
@@ -181,14 +181,14 @@
},
{
"cell_type": "code",
"execution_count": 10,
"id": "99f24362",
"execution_count": 140,
"id": "4bd42611",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.pipeline import Pipeline\n",
"from sklearn.decomposition import PCA\n",
"from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler\n",
"from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler, Binarizer\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn.model_selection import cross_val_predict\n",
"from sklearn.metrics import classification_report, accuracy_score\n",
@@ -200,17 +200,17 @@
},
{
"cell_type": "code",
"execution_count": 122,
"id": "a6ee7588",
"execution_count": 143,
"id": "c347de5b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(3, 3)"
"(4, 4)"
]
},
"execution_count": 122,
"execution_count": 143,
"metadata": {},
"output_type": "execute_result"
}
@@ -218,13 +218,15 @@
"source": [
"names = ['scaler', \n",
" 'minmax', \n",
" 'maxabs', \n",
" 'maxabs',\n",
" 'bin'\n",
" ]\n",
"\n",
"classifiers = [\n",
" Pipeline([('scaler', StandardScaler())]),\n",
" Pipeline([('minmax', MinMaxScaler())]),\n",
" Pipeline([('maxabs', MaxAbsScaler())]),\n",
" Pipeline([('bin', Binarizer())]),\n",
"]\n",
"\n",
"len(names), len(classifiers)"
@@ -232,7 +234,7 @@
},
{
"cell_type": "markdown",
"id": "650c96b4",
"id": "bd566c8d",
"metadata": {},
"source": [
"# Crossvalidation"
@@ -241,7 +243,7 @@
{
"cell_type": "code",
"execution_count": 123,
"id": "584cb66b",
"id": "77f6d632",
"metadata": {},
"outputs": [],
"source": [
@@ -258,7 +260,7 @@
{
"cell_type": "code",
"execution_count": 128,
"id": "0b815be6",
"id": "bb8eb2e0",
"metadata": {},
"outputs": [
{
@@ -290,7 +292,7 @@
{
"cell_type": "code",
"execution_count": 132,
"id": "8640f2ad",
"id": "70f4411f",
"metadata": {},
"outputs": [
{
@@ -320,7 +322,7 @@
{
"cell_type": "code",
"execution_count": 133,
"id": "3ef8cf89",
"id": "70f3533d",
"metadata": {},
"outputs": [
{
@@ -350,7 +352,7 @@
{
"cell_type": "code",
"execution_count": 134,
"id": "fe0246a2",
"id": "2ec8d300",
"metadata": {},
"outputs": [
{
@@ -377,10 +379,71 @@
"a = cv(2)"
]
},
{
"cell_type": "code",
"execution_count": 137,
"id": "b1f285c2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3\n",
"3\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAOcAAADnCAYAAADl9EEgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAAADaklEQVR4nO3dQWqDUBRAUX/JwrI0l5ad2VHpRCJto17Tc4aVgB1cHuTx88eyLBPQ83H2CwDrxAlR4oQocUKUOCHqtvHcV7mwv7H2R5MTosQJUeKEKHFClDghSpwQJU6IEidEiROixAlR4oQocUKUOCFKnBAlTogSJ0SJE6LECVHihChxQpQ4IUqcECVOiBInRIkTosQJUeKEKHFClDghSpwQJU6IEidEiROixAlR4oQocUKUOCFKnBB1O/sFrmiM8afPL8vyojd5vWf/W/m935HJCVHihChxQpQ4IUqcECVOiBInRI2N3ZXFFuxvdblsckKUOCFKnBAlTogSJ0SJE6IcGftnto67ORbWYXJClDghSpwQJU6IEidEiROixAlRl91zPh6Pp8/v9/sh73E1f9lj2pEey+SEKHFClDghSpwQJU6IEidEiROiLrvn3JMd6jp7zGOZnBAlTogSJ0SJE6LECVHihChxQpQrAOF8rgCEKxEnRIkTosQJUeKEKHFClDghSpwQJU6IEidEiROixAlR4oQocUKUOCFKnBAlTogSJ0SJE6LECVHihChxQpQrAE8wxuovIU7T5Jo9vpmcECVOiBInRIkTosQJUeKEKHFClD3nCcq7zHmef/WM1zM5IUqcECVOiBInRIkTosQJUeKEqLGxc+su5OB9rB7wNTkhSpwQJU6IEidEiROixAlR4oQo5zk5zLPf652m9jnXM5icECVOiBInRIkTosQJUeKEKKsUDmNV8jMmJ0SJE6LECVHihChxQpQ4IUqcECVOiBInRIkTosQJUeKEKHFClDghSpwQJU6IEidEiROixAlR4oQocUKUOCFKnBDld2tjXJPHF5MTosQJUeKEKHFClDghSpwQZZUSs/eqZJ7nUz7Lz5mcECVOiBInRIkTosQJUeKEKHFC1NjYqzmfBPtbPSdockKUOCFKnBAlTogSJ0SJE6LECVHOc17M1plKZy7fh8kJUeKEKHFClDghSpwQJU6IEidEOc8J53OeE65EnBAlTogSJ0SJE6LECVHihChxQpQ4IUqcECVOiBInRIkTosQJUeKEKHFClDghSpwQJU6IEidEiROixAlR4oQocUKUOCFKnBAlTogSJ0SJE6LECVG3jeerV5MB+zM5IUqcECVOiBInRIkTosQJUZ8VcUzDwD2AfQAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plot_digit(cv(2)-cv(1))"
]
},
{
"cell_type": "code",
"execution_count": 145,
"id": "2c7323b6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAOcAAADnCAYAAADl9EEgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAAADlUlEQVR4nO3dwWrbQBhG0ar0/V9ZWWURIjzU9lh3pHOW7cY4XH7Ih6Jt3/c/QM/fsz8AcEycECVOiBInRIkTov4N/t+vcmG+7egfXU6IEidEiROixAlR4oQocUKUOCFKnBAlTogSJ0SJE6LECVHihChxQpQ4IUqcECVOiBInRIkTosQJUeKEKHFClDghSpwQJU6IEidEiROixAlR4oQocULU6BWATLBth298y9t3b4T8JJcTosQJUeKEKHFClDghSpwQJU6IsnMeWHWHnG30vdhB38vlhChxQpQ4IUqcECVOiBInRIkTom65c9oxWYHLCVHihChxQpQ4IUqcECVOiFp2SrnqHPLqY1dX/V7uyOWEKHFClDghSpwQJU6IEidEiROilt05y/yJSN7B5YQocUKUOCFKnBAlTogSJ0SJE6LsnE+wY/IJLidEiROixAlR4oQocUKUOCFKnBC17M452hof/f1WOyUrcDkhSpwQJU6IEidEiROixAlR4oSoZXfOkVW3zPL7NVf9TlflckKUOCFKnBAlTogSJ0SJE6IuO6WUleeSR1793KaY/+NyQpQ4IUqcECVOiBInRIkTosQJUXbOCVbdMWcbfS920J9cTogSJ0SJE6LECVHihChxQpQ4IWobbEuGpwlm7qCvvBpxZYtvpIc/FJcTosQJUeKEKHFClDghSpwQJU6I8jznYl7d82bugVfdUM/ickKUOCFKnBAlTogSJ0SJE6LECVF2zhMs/uwhH+JyQpQ4IUqcECVOiBInRIkTokwpvM1d/yznLC4nRIkTosQJUeKEKHFClDghSpwQZed8wpl7ncfNjo1+Jit+by4nRIkTosQJUeKEKHFClDghSpwQJU6IEidEiROixAlR4oQocUKUOCFKnBDlec7FlJ9b9Jzre7mcECVOiBInRIkTosQJUeKEKFPKE8qvuvOavetwOSFKnBAlTogSJ0SJE6LECVHihCg75wTlHXRVV3wkbMTlhChxQpQ4IUqcECVOiBInRIkTouycJ3i02d15A73jlvmIywlR4oQocUKUOCFKnBAlTogSJ0TZOWNsfXxzOSFKnBAlTogSJ0SJE6LECVHihChxQpQ4IUqcECVOiBInRIkTosQJUeKEKHFClDghSpwQJU6IEidEiROixAlR4oQocUKUOCFKnBAlTogSJ0SJE6LECVGjVwBuH/kUwC8uJ0SJE6LECVHihChxQpQ4IeoLGL5a6fFroA8AAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"a = cv(3)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "87a073e1",
"id": "b608bd89",
"metadata": {},
"outputs": [],
"source": []