iui-group-l-name-zensiert/2-second-project/tdt/DataViz.ipynb

1057 lines
29 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"id": "eafd6e6c",
"metadata": {},
"source": [
"# Change Scenario here.\n",
"\n",
"| | GameType | HeightNorm | ArmNorm |\n",
"|:---:|:--------:|:----------:|:-------:|\n",
"| SYY | Sorting | ✅ | ✅ |\n",
"| SYN | Sorting | ✅ | ❌ |\n",
"| SNY | Sorting | ❌ | ✅ |\n",
"| SNN | Sorting | ❌ | ❌ |\n",
"| JYY | Jenga | ✅ | ✅ |\n",
"| JYN | Jenga | ✅ | ❌ |\n",
"| JNY | Jenga | ❌ | ✅ |\n",
"| JNN | Jenga | ❌ | ❌ |\n",
"\n",
"Weights for the corresponding scenario are loaded automatically."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "89c6a73c",
"metadata": {},
"outputs": [],
"source": [
"# Possibilities: 'SYY', 'SYN', 'SNY', 'SNN', \n",
"# 'JYY', 'JYN', 'JNY', 'JNN'\n",
"cenario = 'SNY'"
]
},
{
"cell_type": "markdown",
"id": "5c1dc34e",
"metadata": {},
"source": [
"## Constants"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "6921bc6b",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' # this is required\n",
"os.environ['CUDA_VISIBLE_DEVICES'] = '0' # set to '0' for GPU0, '1' for GPU1 or '2' for GPU2. Check \"gpustat\" in a terminal."
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "9b20b30b",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"glob_path = '/opt/iui-datarelease3-sose2021/*.csv'\n",
"\n",
"pickle_file = '../data.pickle'\n",
"\n",
"pd.set_option('display.float_format', lambda x: '%.2f' % x)"
]
},
{
"cell_type": "markdown",
"id": "047b3321",
"metadata": {},
"source": [
"# Config"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "0c2275bf",
"metadata": {},
"outputs": [],
"source": [
"create_new = False\n",
"checkpoint_path = f\"training_{cenario}/cp.ckpt\"\n",
"checkpoint_dir = os.path.dirname(checkpoint_path)\n",
"\n",
"win_sz = 5\n",
"stride_sz = 1\n",
"\n",
"epoch = 50\n",
"\n",
"# divisor for neuron count step downs (hard to describe), e.g. dense_step = 3: layer1=900, layer2 = 300, layer3 = 100, layer4 = 33...\n",
"dense_steps = 3\n",
"# amount of dense/dropout layers\n",
"layer_count = 3\n",
"# how much to drop\n",
"drop_count = 0.1"
]
},
{
"cell_type": "markdown",
"id": "10d070f3",
"metadata": {},
"source": [
"# Helper Functions"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "46f13510",
"metadata": {},
"outputs": [],
"source": [
"from matplotlib import pyplot as plt\n",
"\n",
"def pplot(dd):\n",
" x = dd.shape[0]\n",
" fix = int(x/3)+1\n",
" fiy = 3\n",
" fig, axs = plt.subplots(fix, fiy, figsize=(3*fiy, 9*fix))\n",
" \n",
" for i in range(x):\n",
" axs[int(i/3)][i%3].plot(dd[i])"
]
},
{
"cell_type": "markdown",
"id": "8aa25439",
"metadata": {},
"source": [
"# Loading Data"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "94f77686",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from glob import glob\n",
"from tqdm import tqdm\n",
"\n",
"def dl_from_blob(filename, user_filter=None):\n",
" \n",
" dic_data = []\n",
" \n",
" for p in tqdm(glob(glob_path)):\n",
" path = p\n",
" filename = path.split('/')[-1].split('.')[0]\n",
" splitname = filename.split('_')\n",
" user = int(splitname[0][1:])\n",
" if (user_filter):\n",
" if (user != user_filter):\n",
" continue\n",
" scenario = splitname[1][len('Scenario'):]\n",
" heightnorm = splitname[2][len('HeightNormalization'):] == 'True'\n",
" armnorm = splitname[3][len('ArmNormalization'):] == 'True'\n",
" rep = int(splitname[4][len('Repetition'):])\n",
" session = int(splitname[5][len('Session'):])\n",
" data = pd.read_csv(path)\n",
" dic_data.append(\n",
" {\n",
" 'filename': path,\n",
" 'user': user,\n",
" 'scenario': scenario,\n",
" 'heightnorm': heightnorm,\n",
" 'armnorm': armnorm,\n",
" 'rep': rep,\n",
" 'session': session,\n",
" 'data': data \n",
" }\n",
" )\n",
" return dic_data"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "f021e1d8",
"metadata": {},
"outputs": [],
"source": [
"import pickle\n",
"\n",
"def save_pickle(f, structure):\n",
" _p = open(f, 'wb')\n",
" pickle.dump(structure, _p)\n",
" _p.close()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "f4a1b342",
"metadata": {},
"outputs": [],
"source": [
"def load_pickles(f) -> list:\n",
" _p = open(pickle_file, 'rb')\n",
" _d = pickle.load(_p)\n",
" _p.close()\n",
" \n",
" return _d"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "1540ece8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading data...\n",
"../data.pickle found...\n",
"768\n",
"CPU times: user 535 ms, sys: 2.43 s, total: 2.97 s\n",
"Wall time: 2.97 s\n"
]
}
],
"source": [
"%%time\n",
"\n",
"def load_data() -> list:\n",
" if os.path.isfile(pickle_file):\n",
" print(f'{pickle_file} found...')\n",
" return load_pickles(pickle_file)\n",
" print(f'Didn\\'t find {pickle_file}...')\n",
" all_data = dl_from_blob(glob_path)\n",
" print(f'Creating {pickle_file}...')\n",
" save_pickle(pickle_file, all_data)\n",
" return all_data\n",
"\n",
"print(\"Loading data...\")\n",
"dic_data = load_data()\n",
"print(len(dic_data))"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "25f648ae",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 343 µs, sys: 200 µs, total: 543 µs\n",
"Wall time: 549 µs\n"
]
}
],
"source": [
"%%time\n",
"\n",
"# Categorized Data\n",
"cdata = dict() \n",
"# Sorting, HeightNorm, ArmNorm\n",
"cdata['SYY'] = list() \n",
"cdata['SYN'] = list() \n",
"cdata['SNY'] = list() \n",
"cdata['SNN'] = list() \n",
"\n",
"# Jenga, HeightNorm, ArmNorm\n",
"cdata['JYY'] = list() \n",
"cdata['JYN'] = list() \n",
"cdata['JNY'] = list() \n",
"cdata['JNN'] = list() \n",
"\n",
"for d in dic_data:\n",
" if d['scenario'] == 'Sorting':\n",
" if d['heightnorm']:\n",
" if d['armnorm']:\n",
" cdata['SYY'].append(d)\n",
" else:\n",
" cdata['SYN'].append(d)\n",
" else:\n",
" if d['armnorm']:\n",
" cdata['SNY'].append(d)\n",
" else:\n",
" cdata['SNN'].append(d)\n",
" elif d['scenario'] == 'Jenga':\n",
" if d['heightnorm']:\n",
" if d['armnorm']:\n",
" cdata['JYY'].append(d)\n",
" else:\n",
" cdata['JYN'].append(d)\n",
" else:\n",
" if d['armnorm']:\n",
" cdata['JNY'].append(d)\n",
" else:\n",
" cdata['JNN'].append(d)"
]
},
{
"cell_type": "markdown",
"id": "049c83fa",
"metadata": {},
"source": [
"# Preprocessing"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "95a39c6e",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"def drop(entry, data=True) -> pd.DataFrame:\n",
" droptable = ['participantID', 'FrameID', 'Scenario', 'HeightNormalization', 'ArmNormalization', 'Repetition', 'Session', 'Unnamed: 0']\n",
" if data:\n",
" centry = pickle.loads(pickle.dumps(entry['data']))\n",
" else:\n",
" centry = pickle.loads(pickle.dumps(entry))\n",
"\n",
" return centry.drop(droptable, axis=1)\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "5bc3de2b",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"left_Hand_ident='left'\n",
"right_Hand_ident='right'\n",
"\n",
"def rem_low_acc(entry, data=True) -> pd.DataFrame:\n",
" if data:\n",
" centry = pickle.loads(pickle.dumps(entry['data']))\n",
" else:\n",
" centry = pickle.loads(pickle.dumps(entry))\n",
" \n",
" centry['LeftHandTrackingAccuracy'] = (centry['LeftHandTrackingAccuracy'] == 'High') * 1.0\n",
" centry['RightHandTrackingAccuracy'] = (centry['RightHandTrackingAccuracy'] == 'High') * 1.0\n",
" \n",
" left_Hand_cols = [c for c in centry if left_Hand_ident in c.lower() and c != 'LeftHandTrackingAccuracy']\n",
" right_Hand_cols = [c for c in centry if right_Hand_ident in c.lower() and c != 'RightHandTrackingAccuracy']\n",
" \n",
" centry.loc[centry['LeftHandTrackingAccuracy'] == 0.0, left_Hand_cols] = np.nan\n",
" centry.loc[centry['RightHandTrackingAccuracy'] == 0.0, right_Hand_cols] = np.nan\n",
"\n",
" return centry"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "ca4a71d9",
"metadata": {},
"outputs": [],
"source": [
"from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
"\n",
"def pad(entry, data=True) -> pd.DataFrame:\n",
" if data:\n",
" centry = pickle.loads(pickle.dumps(entry['data']))\n",
" else:\n",
" centry = pickle.loads(pickle.dumps(entry))\n",
" \n",
" cols = centry.columns\n",
" pentry = pad_sequences(centry.T.to_numpy(),\n",
" maxlen=(int(centry.shape[0]/stride_sz)+1)*stride_sz,\n",
" dtype='float64',\n",
" padding='pre', \n",
" truncating='post',\n",
" value=np.nan\n",
" ) \n",
" pdentry = pd.DataFrame(pentry.T, columns=cols)\n",
" pdentry.loc[0] = [0 for _ in cols]\n",
" return pdentry"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "71aa29b6",
"metadata": {},
"outputs": [],
"source": [
"def interpol(entry, data=True) -> pd.DataFrame:\n",
" if data:\n",
" centry = pickle.loads(pickle.dumps(entry['data']))\n",
" else:\n",
" centry = pickle.loads(pickle.dumps(entry))\n",
" \n",
" return centry.interpolate(limit_direction='both')"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "bdbada4a",
"metadata": {},
"outputs": [],
"source": [
"from tensorflow.keras.preprocessing import timeseries_dataset_from_array\n",
"\n",
"def slicing(entry, label, data=True):\n",
" if data:\n",
" centry = pickle.loads(pickle.dumps(entry['data']))\n",
" else:\n",
" centry = pickle.loads(pickle.dumps(entry))\n",
" \n",
" return timeseries_dataset_from_array(\n",
" data=centry, \n",
" targets=[label for _ in range(centry.shape[0])], \n",
" sequence_length=win_sz,\n",
" sequence_stride=stride_sz, \n",
" batch_size=8, \n",
" seed=177013\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "612c5b39",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 96/96 [00:05<00:00, 9.51it/s] "
]
}
],
"source": [
"acc_data = pd.DataFrame()\n",
"\n",
"for e in tqdm(cdata[cenario]):\n",
" acc_data = acc_data.append(e['data'], ignore_index=True)\n",
"\n",
"ddacc_data = rem_low_acc(drop(acc_data, False),False)\n",
"\n",
"eula = ddacc_data[[c for c in ddacc_data if 'euler' in c.lower()]]\n",
"posi = ddacc_data[[c for c in ddacc_data if 'pos' in c.lower()]]\n",
"eulamin = eula.min()\n",
"eulamax = eula.max()\n",
"eulamean = eula.mean()\n",
"eulastd = eula.std()\n",
"posimin = posi.min()\n",
"posimax = posi.max()\n",
"posimean = posi.mean()\n",
"posistd = posi.std()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d8dde568",
"metadata": {},
"outputs": [],
"source": [
"def minmaxscaler(entry, minimum, maximum):\n",
" return (entry-minimum)/(maximum-minimum)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "238e7d22",
"metadata": {},
"outputs": [],
"source": [
"euler_ident = 'euler'\n",
"pos_ident = 'pos'\n",
"\n",
"def norm(entry, data=True) -> pd.DataFrame:\n",
" if data:\n",
" centry = pickle.loads(pickle.dumps(entry['data']))\n",
" else:\n",
" centry = pickle.loads(pickle.dumps(entry))\n",
" \n",
" euler_cols = [c for c in centry if euler_ident in c.lower()]\n",
" pos_cols = [c for c in centry if pos_ident in c.lower()]\n",
" \n",
" centry[euler_cols] = minmaxscaler(centry[euler_cols], eulamin, eulamax)\n",
" centry[pos_cols] = minmaxscaler(centry[pos_cols], posimin, posimax)\n",
" return centry"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "311ad27d",
"metadata": {},
"outputs": [],
"source": [
"def drop_acc(entry, data=True) -> pd.DataFrame:\n",
" droptable = ['LeftHandTrackingAccuracy', 'RightHandTrackingAccuracy']\n",
" if data:\n",
" centry = pickle.loads(pickle.dumps(entry['data']))\n",
" else:\n",
" centry = pickle.loads(pickle.dumps(entry))\n",
"\n",
" return centry.drop(droptable, axis=1)\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9ce4736b",
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"\n",
"classes = 16 # dynamic\n",
"\n",
"def preproc(data):\n",
" res_list = list()\n",
" \n",
" for e in tqdm(data):\n",
" res_list.append(preproc_entry(e))\n",
" \n",
" return res_list\n",
" \n",
"def preproc_entry(entry, data = True):\n",
" entry2 = pickle.loads(pickle.dumps(entry))\n",
" entry2['data'] = drop(entry2, data)\n",
" \n",
" entry3 = pickle.loads(pickle.dumps(entry2))\n",
" entry3['data'] = rem_low_acc(entry3, data)\n",
" \n",
" entry1 = pickle.loads(pickle.dumps(entry3))\n",
" entry1['data'] = norm(entry1, data)\n",
" \n",
" entry8 = pickle.loads(pickle.dumps(entry1))\n",
" entry8['data'] = drop_acc(entry8, data)\n",
" \n",
"# entry5 = pickle.loads(pickle.dumps(entry4))\n",
"# entry5['data'] = pad(entry5, data)\n",
" \n",
"# entry6 = pickle.loads(pickle.dumps(entry8))\n",
"# entry6['data'] = interpol(entry6, data)\n",
" \n",
" entry7 = pickle.loads(pickle.dumps(entry8))\n",
" entry7['data'] = slicing(entry7, entry7['user'], data)\n",
" \n",
" return entry7\n",
"\n",
"pdata = preproc(cdata[cenario])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2684b1c6",
"metadata": {},
"outputs": [],
"source": [
"a = drop(cdata[cenario][0]['data'], False)\n",
"a['left_OVRHandPrefab_pos_X'].plot()\n",
"plt.plot((a['LeftHandTrackingAccuracy'] == 'High')*1.0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "225ba8a8",
"metadata": {},
"outputs": [],
"source": [
"b = rem_low_acc(a, False)\n",
"b['left_OVRHandPrefab_pos_X'].plot()\n",
"plt.plot(b['LeftHandTrackingAccuracy'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d42fc99a",
"metadata": {},
"outputs": [],
"source": [
"c = norm(b, False)\n",
"c['left_OVRHandPrefab_pos_X'].plot()\n",
"plt.plot(c['LeftHandTrackingAccuracy'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7d58b0f5",
"metadata": {},
"outputs": [],
"source": [
"d = interpol(c, False)\n",
"d['left_OVRHandPrefab_pos_X'].plot()\n",
"plt.plot(d['LeftHandTrackingAccuracy'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fb02ea59",
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"train = np.array([x['data'] for x in pdata if x['session'] == 1])\n",
"test = np.array([x['data'] for x in pdata if x['session'] == 2])\n",
"\n",
"len(train), len(test)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "64b1f388",
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"\n",
"X_train = list()\n",
"y_train = list()\n",
"\n",
"X_test = list()\n",
"y_test = list()\n",
"\n",
"train = list()\n",
"test = list()\n",
"\n",
"for x in tqdm(pdata):\n",
" if x['session'] == 1:\n",
" train.append(\n",
" {\n",
" 'label': x['user'],\n",
" 'data': list()\n",
" })\n",
" for y in x['data'].unbatch().as_numpy_iterator():\n",
" if not np.isnan(y[0]).any():\n",
" X_train.append(y[0])\n",
" y_train.append(y[1])\n",
" \n",
" train[-1]['data'].append(y[0])\n",
" if len(train[-1]['data']) == 0:\n",
" del train[-1]\n",
" if x['session'] == 2:\n",
" test.append(\n",
" {\n",
" 'label': x['user'],\n",
" 'data': list()\n",
" })\n",
" for y in x['data'].unbatch().as_numpy_iterator():\n",
" if not np.isnan(y[0]).any():\n",
" X_test.append(y[0])\n",
" y_test.append(y[1])\n",
" \n",
" test[-1]['data'].append(y[0])\n",
" \n",
" if len(test[-1]['data']) == 0:\n",
" del test[-1]\n",
" \n",
"X_train = np.array(X_train)\n",
"y_train = np.array(y_train)\n",
"X_test = np.array(X_test)\n",
"y_test = np.array(y_test)\n",
"\n",
"print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4bf9d67f",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"Xy_train = list(zip(X_train, y_train))\n",
"Xy_test = list(zip(X_test, y_test))\n",
"train_dict = {\"1\":[], \"2\":[],\"3\":[], \"4\":[], \"5\":[],\"6\":[], \"7\":[], \"8\":[],\"9\":[], \"10\":[], \"11\":[],\"12\":[], \"13\":[], \"14\":[], \"15\": [], \"16\": []}\n",
"\n",
"[train_dict[str(e[1])].append(e[0]) for e in Xy_train]\n",
"[print(f'Key: {k}: {len(v)}') for k, v in train_dict.items()]\n",
"pd.DataFrame.from_dict({k: len(v) for k, v in train_dict.items()}, orient='index').plot.pie(subplots=True, legend=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e608f7f3",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"Xy_test = list(zip(X_test, y_test))\n",
"test_dict = {\"1\":[], \"2\":[],\"3\":[], \"4\":[], \"5\":[],\"6\":[], \"7\":[], \"8\":[],\"9\":[], \"10\":[], \"11\":[],\"12\":[], \"13\":[], \"14\":[], \"15\": [], \"16\": []}\n",
"\n",
"[test_dict[str(e[1])].append(e[0]) for e in Xy_test]\n",
"[print(f'Key: {k}: {len(v)}') for k, v in test_dict.items()]\n",
"pd.DataFrame.from_dict({k: len(v) for k, v in test_dict.items()}, orient='index').plot.pie(subplots=True, legend=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b681b93c",
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"\n",
"from sklearn.preprocessing import LabelBinarizer\n",
"\n",
"\n",
"lb = LabelBinarizer()\n",
"yy_train = lb.fit_transform(y_train)\n",
"yy_test = lb.transform(y_test)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "630ad588",
"metadata": {},
"outputs": [],
"source": [
"for e in test:\n",
" e['label'] = lb.transform([e['label']])\n",
" e['data'] = np.array(e['data'])\n",
"\n",
" \n",
"for e in train:\n",
" e['label'] = lb.transform([e['label']])\n",
" e['data'] = np.array(e['data'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3d127f9a",
"metadata": {},
"outputs": [],
"source": [
"print(X_train.shape)\n",
"print(yy_train.shape)\n",
"print(X_test.shape)\n",
"print(yy_test.shape)"
]
},
{
"cell_type": "markdown",
"id": "5647746c",
"metadata": {},
"source": [
"# Building Model"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c5b4f772",
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"from tensorflow.keras.regularizers import l2\n",
"from tensorflow.keras.models import Sequential\n",
"from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout\n",
"from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau\n",
"from tensorflow.keras.optimizers import Adam\n",
"\n",
"def build_model(shape, classes):\n",
" model = Sequential()\n",
" \n",
" ncount = shape[0]*shape[1]\n",
" \n",
" model.add(Flatten(input_shape=shape, name='flatten'))\n",
" \n",
" model.add(Dropout(drop_count, name=f'dropout_{drop_count*100}'))\n",
" model.add(BatchNormalization(name='batchNorm'))\n",
" \n",
" for i in range(2,layer_count+2):\n",
" neurons = int(ncount/pow(dense_steps,i))\n",
" if neurons <= classes:\n",
" break\n",
" model.add(Dropout(drop_count*i, name=f'HiddenDropout_{drop_count*i*100:.0f}'))\n",
" model.add(Dense(neurons, activation='relu', \n",
" kernel_regularizer=l2(0.001), name=f'Hidden_{i}')\n",
" )\n",
" \n",
" model.add(Dense(classes, activation='softmax', name='Output'))\n",
" \n",
" model.compile(\n",
" optimizer=Adam(),\n",
" loss=\"categorical_crossentropy\", \n",
" metrics=[\"acc\"],\n",
" )\n",
" \n",
" model.summary()\n",
" return model"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a4ad2401",
"metadata": {},
"outputs": [],
"source": [
"checkpoint_file = './goat.weights'\n",
"\n",
"def train_model(X_train, y_train, X_test, y_test):\n",
" model = build_model(X_train[0].shape, 16)\n",
" \n",
" # Create a callback that saves the model's weights\n",
" model_checkpoint = ModelCheckpoint(filepath=checkpoint_path, monitor='loss', \n",
"\t\t\tsave_best_only=True)\n",
" \n",
" reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.5, patience=5, min_lr=0.0001)\n",
"\n",
" callbacks = [model_checkpoint, reduce_lr]\n",
" \n",
" history = model.fit(X_train, \n",
" y_train,\n",
" epochs=epoch,\n",
" batch_size=32,\n",
" verbose=2,\n",
" validation_data=(X_test, y_test),\n",
" callbacks=callbacks\n",
" )\n",
" \n",
" model.load_weights(checkpoint_path)\n",
" return model, history"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "704b40fb",
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"\n",
"if not os.path.isdir(checkpoint_dir) or create_new:\n",
" tf.keras.backend.clear_session()\n",
" model, history = train_model(np.array(X_train), np.array(yy_train), np.array(X_test), np.array(yy_test))\n",
"else:\n",
" print(\"Loaded weights...\")\n",
" model = build_model(X_train[0].shape, 16)\n",
" model.load_weights(checkpoint_path)"
]
},
{
"cell_type": "markdown",
"id": "03971701",
"metadata": {},
"source": [
"# Eval"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "580b1b78",
"metadata": {},
"outputs": [],
"source": [
"def predict(model, entry):\n",
" p_dict = dict()\n",
" predictions = np.argmax(model.predict(entry), axis=-1)\n",
" for p in predictions:\n",
" if p in p_dict:\n",
" p_dict[p] += 1\n",
" else:\n",
" p_dict[p] = 1\n",
" prediction = max(p_dict, key=p_dict.get)\n",
" return prediction+1"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3749d475",
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"\n",
"ltest = [lb.inverse_transform(e['label'])[0] for e in test]\n",
"ptest = [predict(model, e['data']) for e in test]\n",
"\n",
"len(ltest), len(ptest)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c3c48d92",
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"\n",
"ltrain = [lb.inverse_transform(e['label'])[0] for e in train]\n",
"ptrain = [predict(model, e['data']) for e in train]\n",
"\n",
"\n",
"len(ltrain), len(ptrain)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "80f0ac46",
"metadata": {},
"outputs": [],
"source": [
"set(ltrain), set(ltest)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8daae77e",
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"\n",
"from sklearn.metrics import confusion_matrix\n",
"import seaborn as sn\n",
"\n",
"from sklearn.metrics import classification_report\n",
"\n",
"set_digits = set(ltrain)\n",
"\n",
"train_cm = confusion_matrix(ltrain, ptrain, labels=list(set_digits), normalize='true')\n",
"test_cm = confusion_matrix(ltest, ptest, labels=list(set_digits), normalize='true')\n",
"\n",
"df_cm = pd.DataFrame(test_cm, index=set_digits, columns=set_digits)\n",
"plt.figure(figsize = (10,7))\n",
"sn_plot = sn.heatmap(df_cm, annot=True, cmap=\"Greys\")\n",
"plt.ylabel(\"True Label\")\n",
"plt.xlabel(\"Predicted Label\")\n",
"plt.show()\n",
"\n",
"print(classification_report(ltest, ptest, zero_division=0))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "72055847",
"metadata": {},
"outputs": [],
"source": [
"def plot_keras_history(history, name='', acc='acc'):\n",
" \"\"\"Plots keras history.\"\"\"\n",
" import matplotlib.pyplot as plt\n",
"\n",
" training_acc = history.history[acc]\n",
" validation_acc = history.history['val_' + acc]\n",
" loss = history.history['loss']\n",
" val_loss = history.history['val_loss']\n",
"\n",
" epochs = range(len(training_acc))\n",
"\n",
" plt.ylim(0, 1)\n",
" plt.plot(epochs, training_acc, 'tab:blue', label='Training acc')\n",
" plt.plot(epochs, validation_acc, 'tab:orange', label='Validation acc')\n",
" plt.title('Training and validation accuracy ' + name)\n",
" plt.legend()\n",
"\n",
" plt.figure()\n",
"\n",
" plt.plot(epochs, loss, 'tab:green', label='Training loss')\n",
" plt.plot(epochs, val_loss, 'tab:red', label='Validation loss')\n",
" plt.title('Training and validation loss ' + name)\n",
" plt.legend()\n",
" plt.show()\n",
" plt.close()\n",
"if 'history' in locals():\n",
" plot_keras_history(history)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "deb72af6",
"metadata": {},
"outputs": [],
"source": [
"print(f'Scenario: {cenario}')\n",
"print(f'Window Size: {win_sz}')\n",
"print(f'Strides: {stride_sz}')\n",
"print(f'Epochs: {epoch}')\n",
"print(f'HiddenL Count: {layer_count}')\n",
"print(f'Neuron Factor: {dense_steps}')\n",
"print(f'Drop Factor: {drop_count}')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9b2ad872",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
},
"toc-showtags": false
},
"nbformat": 4,
"nbformat_minor": 5
}