iui-group-l-name-zensiert/2-second-project/iel/Week2/DataVizNormTrial.ipynb

1130 lines
61 KiB
Plaintext
Raw Normal View History

2021-08-06 17:39:50 +02:00
{
"cells": [
{
"cell_type": "markdown",
"id": "b6131d61",
"metadata": {},
"source": [
"# Constants"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "6144a350",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' # this is required\n",
"os.environ['CUDA_VISIBLE_DEVICES'] = '2' # set to '0' for GPU0, '1' for GPU1 or '2' for GPU2. Check \"gpustat\" in a terminal."
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "7aa3948f",
"metadata": {},
"outputs": [],
"source": [
"glob_path = '/opt/iui-datarelease3-sose2021/*.csv'\n",
"\n",
"pickle_file = '../data.pickle'"
]
},
{
"cell_type": "markdown",
"id": "89eb31ab",
"metadata": {},
"source": [
"# Config"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "e2be13e5",
"metadata": {},
"outputs": [],
"source": [
"# Possibilities: 'SYY', 'SYN', 'SNY', 'SNN', \n",
"# 'JYY', 'JYN', 'JNY', 'JNN'\n",
"cenario = 'SYN' \n",
"\n",
"win_sz = 10\n",
"stride_sz = 5\n",
"\n",
"# divisor for neuron count step downs (hard to describe), e.g. dense_step = 3: layer1=900, layer2 = 300, layer3 = 100, layer4 = 33...\n",
"dense_steps = 3\n",
"# amount of dense/dropout layers\n",
"layer_count = 3\n",
"# how much to drop\n",
"drop_count = 0.1"
]
},
{
"cell_type": "markdown",
"id": "0eca097f",
"metadata": {},
"source": [
"# Helper Functions"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "82014801",
"metadata": {},
"outputs": [],
"source": [
"from matplotlib import pyplot as plt\n",
"\n",
"def pplot(dd):\n",
" x = dd.shape[0]\n",
" fix = int(x/3)+1\n",
" fiy = 3\n",
" fig, axs = plt.subplots(fix, fiy, figsize=(3*fiy, 9*fix))\n",
" \n",
" for i in range(x):\n",
" axs[int(i/3)][i%3].plot(dd[i])"
]
},
{
"cell_type": "markdown",
"id": "e8d9944b",
"metadata": {},
"source": [
"# Loading Data"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "0ae277a0",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from glob import glob\n",
"import pandas as pd\n",
"from tqdm import tqdm\n",
"\n",
"def dl_from_blob(filename, user_filter=None):\n",
" \n",
" dic_data = []\n",
" \n",
" for p in tqdm(glob(glob_path)):\n",
" path = p\n",
" filename = path.split('/')[-1].split('.')[0]\n",
" splitname = filename.split('_')\n",
" user = int(splitname[0][1:])\n",
" if (user_filter):\n",
" if (user != user_filter):\n",
" continue\n",
" scenario = splitname[1][len('Scenario'):]\n",
" heightnorm = splitname[2][len('HeightNormalization'):] == 'True'\n",
" armnorm = splitname[3][len('ArmNormalization'):] == 'True'\n",
" rep = int(splitname[4][len('Repetition'):])\n",
" session = int(splitname[5][len('Session'):])\n",
" data = pd.read_csv(path)\n",
" dic_data.append(\n",
" {\n",
" 'filename': path,\n",
" 'user': user,\n",
" 'scenario': scenario,\n",
" 'heightnorm': heightnorm,\n",
" 'armnorm': armnorm,\n",
" 'rep': rep,\n",
" 'session': session,\n",
" 'data': data \n",
" }\n",
" )\n",
" return dic_data"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "a7b4f994",
"metadata": {},
"outputs": [],
"source": [
"import pickle\n",
"\n",
"def save_pickle(f, structure):\n",
" _p = open(f, 'wb')\n",
" pickle.dump(structure, _p)\n",
" _p.close()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "b6b6fa69",
"metadata": {},
"outputs": [],
"source": [
"def load_pickles(f) -> list:\n",
" _p = open(pickle_file, 'rb')\n",
" _d = pickle.load(_p)\n",
" _p.close()\n",
" \n",
" return _d"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "38d131f0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading data...\n",
"../data.pickle found...\n",
"768\n",
"CPU times: user 596 ms, sys: 2.15 s, total: 2.75 s\n",
"Wall time: 2.75 s\n"
]
}
],
"source": [
"%%time\n",
"\n",
"def load_data() -> list:\n",
" if os.path.isfile(pickle_file):\n",
" print(f'{pickle_file} found...')\n",
" return load_pickles(pickle_file)\n",
" print(f'Didn\\'t find {pickle_file}...')\n",
" all_data = dl_from_blob(glob_path)\n",
" print(f'Creating {pickle_file}...')\n",
" save_pickle(pickle_file, all_data)\n",
" return all_data\n",
"\n",
"print(\"Loading data...\")\n",
"dic_data = load_data()\n",
"print(len(dic_data))"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "967f81ef",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 398 µs, sys: 0 ns, total: 398 µs\n",
"Wall time: 402 µs\n"
]
}
],
"source": [
"%%time\n",
"\n",
"# Categorized Data\n",
"cdata = dict() \n",
"# Sorting, HeightNorm, ArmNorm\n",
"cdata['SYY'] = list() \n",
"cdata['SYN'] = list() \n",
"cdata['SNY'] = list() \n",
"cdata['SNN'] = list() \n",
"\n",
"# Jenga, HeightNorm, ArmNorm\n",
"cdata['JYY'] = list() \n",
"cdata['JYN'] = list() \n",
"cdata['JNY'] = list() \n",
"cdata['JNN'] = list() \n",
"for d in dic_data:\n",
" if d['scenario'] == 'Sorting':\n",
" if d['heightnorm']:\n",
" if d['armnorm']:\n",
" cdata['SYY'].append(d)\n",
" else:\n",
" cdata['SYN'].append(d)\n",
" else:\n",
" if d['armnorm']:\n",
" cdata['SNY'].append(d)\n",
" else:\n",
" cdata['SNN'].append(d)\n",
" elif d['scenario'] == 'Jenga':\n",
" if d['heightnorm']:\n",
" if d['armnorm']:\n",
" cdata['JYY'].append(d)\n",
" else:\n",
" cdata['JYN'].append(d)\n",
" else:\n",
" if d['armnorm']:\n",
" cdata['JNY'].append(d)\n",
" else:\n",
" cdata['JNN'].append(d)"
]
},
{
"cell_type": "markdown",
"id": "588af385",
"metadata": {},
"source": [
"# Preprocessing"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "375fee1d",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"def drop(entry) -> pd.DataFrame:\n",
" droptable = ['participantID', 'FrameID', 'Scenario', 'HeightNormalization', 'ArmNormalization', 'Repetition', 'Session', 'Unnamed: 0']\n",
" centry = pickle.loads(pickle.dumps(entry))\n",
" return centry['data'].drop(droptable, axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "e2b0b2fc",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"def floatize(entry) -> pd.DataFrame:\n",
" centry = pickle.loads(pickle.dumps(entry))\n",
" centry['data']['LeftHandTrackingAccuracy'] = (entry['data']['LeftHandTrackingAccuracy'] == 'High') * 1.0\n",
" centry['data']['RightHandTrackingAccuracy'] = (entry['data']['RightHandTrackingAccuracy'] == 'High') * 1.0\n",
" return centry['data']"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "9785e9f0",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"right_Hand_ident='right_Hand'\n",
"left_Hand_ident='left_hand'\n",
"\n",
"def rem_low_acc(entry) -> pd.DataFrame:\n",
" centry = pickle.loads(pickle.dumps(entry))\n",
" right_Hand_cols = [c for c in centry['data'] if right_Hand_ident in c]\n",
" left_Hand_cols = [c for c in centry['data'] if left_Hand_ident in c]\n",
" \n",
" centry['data'].loc[centry['data']['RightHandTrackingAccuracy'] == 0.0, right_Hand_cols] = np.nan\n",
" centry['data'].loc[centry['data']['LeftHandTrackingAccuracy'] == 0.0, left_Hand_cols] = np.nan\n",
" return centry['data']"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "3ec4cc1d",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
"\n",
"stride = 150\n",
"def pad(entry) -> pd.DataFrame:\n",
" centry = pickle.loads(pickle.dumps(entry))\n",
" cols = centry['data'].columns\n",
" pentry = pad_sequences(centry['data'].T.to_numpy(),\n",
" maxlen=(int(centry['data'].shape[0]/stride)+1)*stride,\n",
" dtype='float64',\n",
" padding='pre', \n",
" truncating='post',\n",
" value=np.nan\n",
" ) \n",
" pdentry = pd.DataFrame(pentry.T, columns=cols)\n",
" pdentry.loc[0] = [0 for _ in cols]\n",
" return pdentry"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "0361a89c",
"metadata": {},
"outputs": [],
"source": [
"def interpol(entry) -> pd.DataFrame:\n",
" centry = pickle.loads(pickle.dumps(entry))\n",
" return centry['data'].interpolate(method='linear', axis=0)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "52b62534",
"metadata": {},
"outputs": [],
"source": [
"from tensorflow.keras.preprocessing import timeseries_dataset_from_array\n",
"\n",
"def slicing(entry):\n",
" centry = pickle.loads(pickle.dumps(entry))\n",
" return timeseries_dataset_from_array(\n",
" data=centry['data'], \n",
" targets=[centry['user'] for _ in range(centry['data'].shape[0])], \n",
" sequence_length=win_sz,\n",
" sequence_stride=stride_sz, \n",
" batch_size=8, \n",
" seed=177013\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "383a8b9f",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.preprocessing import (StandardScaler, \n",
" MinMaxScaler, \n",
" MaxAbsScaler,\n",
" PowerTransformer,\n",
" Binarizer)\n",
"def scaling(entry,scale):\n",
" \n",
" standard = StandardScaler()\n",
" max_Abs = MaxAbsScaler()\n",
" binarizer = Binarizer()\n",
" entry = entry.to_numpy(dtype=np.float64)\n",
" \n",
" if (scale == 0 ):\n",
" entry = min_Max.fit_transform(entry)\n",
" \n",
" if (scale == 1 ):\n",
" for i in entry:\n",
" entry = standard.fit_transform(entry)\n",
" \n",
" if (scale == 2 ):\n",
" for i in entry:\n",
" entry = max_Abs.fit_transform(entry)\n",
" \n",
" if (scale == 3 ):\n",
" for i in entry:\n",
" entry = binarizer.fit_transform(entry)\n",
" return pd.DataFrame(entry)\n",
"\n",
"\n",
"def minScale(entry):\n",
" entry = entry.to_numpy(dtype=np.float64)\n",
" min_Max = MinMaxScaler()\n",
" entry = min_Max.fit_transform(entry)\n",
" return pd.DataFrame(entry)\n",
" \n",
"\n",
"def stanScale(entry):\n",
" entry = entry.to_numpy(dtype=np.float64)\n",
" standard = StandardScaler()\n",
" entry = standard.fit_transform(entry)\n",
" return pd.DataFrame(entry)\n",
"\n",
"\n",
" \n",
"def maxScale(entry):\n",
" entry = entry.to_numpy(dtype=np.float64)\n",
" binarizer = Binarizer()\n",
" entry = binarizer.fit_transform(entry)\n",
" return pd.DataFrame(entry)\n",
" \n",
" \n",
"\n",
"def binScale(entry):\n",
" entry = entry.to_numpy(dtype=np.float64)\n",
" min_Max = MinMaxScaler()\n",
" entry = min_Max.fit_transform(entry)\n",
" return pd.DataFrame(entry)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "29134efc",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 96/96 [00:16<00:00, 5.95it/s]\n"
]
}
],
"source": [
"from sklearn.preprocessing import (StandardScaler, \n",
" MinMaxScaler, \n",
" MaxAbsScaler,\n",
" PowerTransformer,\n",
" Binarizer)\n",
"\n",
"#%%time\n",
"\n",
"classes = 16 # dynamic\n",
"\n",
"def preproc(data):\n",
" res_list = list()\n",
" temp_list= list()\n",
" for e in tqdm(data):\n",
" res_list.append(preproc_entry(e))\n",
"# for a in tqdm(temp_list):\n",
"# res_list.append(preproc_entry(a))\n",
"# \n",
" return res_list\n",
" \n",
"def preproc_entry(entry):\n",
" entry2 = pickle.loads(pickle.dumps(entry))\n",
" entry2['data'] = drop(entry2)\n",
" \n",
" entry3 = pickle.loads(pickle.dumps(entry2))\n",
" entry3['data'] = floatize(entry3)\n",
" \n",
" entry4 = pickle.loads(pickle.dumps(entry3))\n",
" entry4['data'] = rem_low_acc(entry4)\n",
" \n",
" \n",
" \n",
" entry5 = pickle.loads(pickle.dumps(entry4))\n",
" entry5['data'] = pad(entry5)\n",
" \n",
" entry6 = pickle.loads(pickle.dumps(entry5))\n",
" entry6['data'] = interpol(entry6)\n",
" \n",
" entry8 = pickle.loads(pickle.dumps(entry6))\n",
" entry8['data'] = minScale(entry8['data']) # 0 = minmax, 1 = standard, 2 = maxabs, 3 = binarizer\n",
" \n",
" entry7 = pickle.loads(pickle.dumps(entry8))\n",
" entry7['data'] = slicing(entry7)\n",
" \n",
" \n",
" \n",
" \n",
" return entry7\n",
"\n",
"\n",
"\n",
"\n",
"pdata = preproc(cdata[cenario])\n"
]
},
{
"cell_type": "markdown",
"id": "f8157b26",
"metadata": {},
"source": [
"# Building Model"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "2eb9c242",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"from tensorflow.keras.models import Sequential\n",
"from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout, LSTM\n",
"import tensorflow.keras as keras\n",
"\n",
"def build_model(shape, classes):\n",
" \n",
" model = Sequential()\n",
" ncount = shape[0]*shape[1]\n",
" \n",
" model.add(Flatten(input_shape=shape))\n",
" \n",
" model.add(Dropout(drop_count))\n",
" model.add(BatchNormalization())\n",
" \n",
" for i in range(1,layer_count):\n",
" neurons = int(ncount/pow(dense_steps,i))\n",
" if neurons <= classes*dense_steps:\n",
" break\n",
" model.add(Dropout(drop_count*i))\n",
" model.add(Dense(neurons, activation='relu'))\n",
" \n",
" model.add(Dense(classes, activation='softmax'))\n",
"\n",
" model.compile(\n",
" optimizer=tf.keras.optimizers.Adam(0.001),\n",
" loss=\"categorical_crossentropy\", \n",
" metrics=[\"acc\"],\n",
" )\n",
"\n",
" return model\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "eb3212ae",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"checkpoint_file = './goat.weights'\n",
"\n",
"def train_model(X_train, y_train, X_test, y_test):\n",
" model = build_model(X_train[0].shape, 16)\n",
" \n",
" model.summary()\n",
"\n",
" model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(\n",
" filepath = checkpoint_file,\n",
" save_weights_only=True,\n",
" monitor='val_acc',\n",
" mode='max',\n",
" save_best_only=True\n",
" )\n",
" \n",
" history = model.fit(X_train, \n",
" y_train,\n",
" epochs=30,\n",
" batch_size=128,\n",
" shuffle=True,\n",
" verbose=2,\n",
" validation_data=(X_test, y_test),\n",
" callbacks=[model_checkpoint_callback]\n",
" \n",
" )\n",
" return model, history\n",
"\n",
"\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "cb296665",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 375 µs, sys: 0 ns, total: 375 µs\n",
"Wall time: 396 µs\n"
]
},
{
"data": {
"text/plain": [
"(48, 48)"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"train = np.array([x['data'] for x in pdata if x['session'] == 1])\n",
"test = np.array([x['data'] for x in pdata if x['session'] == 2])\n",
"\n",
"len(train), len(test)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "bf378c00",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 25.7 s, sys: 6.87 s, total: 32.6 s\n",
"Wall time: 9.2 s\n"
]
}
],
"source": [
"%%time\n",
"\n",
"X_train = list()\n",
"y_train = list()\n",
"\n",
"X_test = list()\n",
"y_test = list()\n",
"\n",
"train = list()\n",
"test = list()\n",
"\n",
"for x in pdata:\n",
" if x['session'] == 1:\n",
" train.append(\n",
" {\n",
" 'label': x['user'],\n",
" 'data': list()\n",
" })\n",
" for y in x['data'].unbatch().as_numpy_iterator():\n",
" X_train.append(y[0])\n",
" y_train.append(y[1])\n",
" \n",
" train[-1]['data'].append(y[0])\n",
" if x['session'] == 2:\n",
" test.append(\n",
" {\n",
" 'label': x['user'],\n",
" 'data': list()\n",
" })\n",
" for y in x['data'].unbatch().as_numpy_iterator():\n",
" X_test.append(y[0])\n",
" y_test.append(y[1])\n",
" \n",
" test[-1]['data'].append(y[0])\n",
"\n",
"X_train = np.array(X_train)\n",
"y_train = np.array(y_train)\n",
"X_test = np.array(X_test)\n",
"y_test = np.array(y_test)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "fdb1b754",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"((30432, 10, 338), (30432,), (20502, 10, 338), (20502,))"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train.shape, y_train.shape, X_test.shape, y_test.shape"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "4b29f6dd",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 241 ms, sys: 116 ms, total: 358 ms\n",
"Wall time: 357 ms\n"
]
}
],
"source": [
"%%time\n",
"\n",
"from sklearn.preprocessing import LabelBinarizer\n",
"\n",
"lb = LabelBinarizer()\n",
"yy_train = lb.fit_transform(y_train)\n",
"yy_test = lb.fit_transform(y_test)\n",
"\n",
"for e in test:\n",
" e['label'] = lb.transform([e['label']])\n",
" e['data'] = np.array(e['data'])\n",
" \n",
"for e in train:\n",
" e['label'] = lb.transform([e['label']])\n",
" e['data'] = np.array(e['data'])\n"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "e50d9d82",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(30432, 10, 338)\n",
"(30432, 16)\n",
"(20502, 10, 338)\n",
"(20502, 16)\n"
]
}
],
"source": [
"print(X_train.shape)\n",
"print(yy_train.shape)\n",
"print(X_test.shape)\n",
"print(yy_test.shape)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "29cab8e3",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model: \"sequential\"\n",
"_________________________________________________________________\n",
"Layer (type) Output Shape Param # \n",
"=================================================================\n",
"flatten (Flatten) (None, 3380) 0 \n",
"_________________________________________________________________\n",
"dropout (Dropout) (None, 3380) 0 \n",
"_________________________________________________________________\n",
"batch_normalization (BatchNo (None, 3380) 13520 \n",
"_________________________________________________________________\n",
"dropout_1 (Dropout) (None, 3380) 0 \n",
"_________________________________________________________________\n",
"dense (Dense) (None, 1126) 3807006 \n",
"_________________________________________________________________\n",
"dropout_2 (Dropout) (None, 1126) 0 \n",
"_________________________________________________________________\n",
"dense_1 (Dense) (None, 375) 422625 \n",
"_________________________________________________________________\n",
"dense_2 (Dense) (None, 16) 6016 \n",
"=================================================================\n",
"Total params: 4,249,167\n",
"Trainable params: 4,242,407\n",
"Non-trainable params: 6,760\n",
"_________________________________________________________________\n",
"Epoch 1/30\n",
"238/238 - 2s - loss: 0.2785 - acc: 0.9139 - val_loss: 7.5618 - val_acc: 0.1295\n",
"Epoch 2/30\n",
"238/238 - 1s - loss: 0.0757 - acc: 0.9780 - val_loss: 9.8776 - val_acc: 0.1766\n",
"Epoch 3/30\n",
"238/238 - 1s - loss: 0.0565 - acc: 0.9830 - val_loss: 12.0728 - val_acc: 0.1515\n",
"Epoch 4/30\n",
"238/238 - 1s - loss: 0.0534 - acc: 0.9857 - val_loss: 14.3411 - val_acc: 0.1648\n",
"Epoch 5/30\n",
"238/238 - 1s - loss: 0.0376 - acc: 0.9897 - val_loss: 15.7724 - val_acc: 0.1598\n",
"Epoch 6/30\n",
"238/238 - 1s - loss: 0.0464 - acc: 0.9881 - val_loss: 17.0488 - val_acc: 0.1536\n",
"Epoch 7/30\n",
"238/238 - 1s - loss: 0.0417 - acc: 0.9889 - val_loss: 19.5126 - val_acc: 0.1550\n",
"Epoch 8/30\n",
"238/238 - 1s - loss: 0.0387 - acc: 0.9901 - val_loss: 19.9876 - val_acc: 0.1788\n",
"Epoch 9/30\n",
"238/238 - 1s - loss: 0.0339 - acc: 0.9908 - val_loss: 19.5807 - val_acc: 0.1572\n",
"Epoch 10/30\n",
"238/238 - 1s - loss: 0.0291 - acc: 0.9930 - val_loss: 20.1623 - val_acc: 0.1779\n",
"Epoch 11/30\n",
"238/238 - 1s - loss: 0.0433 - acc: 0.9914 - val_loss: 23.2585 - val_acc: 0.1521\n",
"Epoch 12/30\n",
"238/238 - 1s - loss: 0.0393 - acc: 0.9913 - val_loss: 25.2286 - val_acc: 0.1594\n",
"Epoch 13/30\n",
"238/238 - 1s - loss: 0.0262 - acc: 0.9946 - val_loss: 24.5537 - val_acc: 0.1794\n",
"Epoch 14/30\n",
"238/238 - 1s - loss: 0.0264 - acc: 0.9947 - val_loss: 26.0528 - val_acc: 0.1804\n",
"Epoch 15/30\n",
"238/238 - 1s - loss: 0.0484 - acc: 0.9910 - val_loss: 25.6410 - val_acc: 0.1610\n",
"Epoch 16/30\n",
"238/238 - 1s - loss: 0.0211 - acc: 0.9948 - val_loss: 28.7820 - val_acc: 0.1696\n",
"Epoch 17/30\n",
"238/238 - 1s - loss: 0.0177 - acc: 0.9957 - val_loss: 25.7378 - val_acc: 0.1955\n",
"Epoch 18/30\n",
"238/238 - 1s - loss: 0.0233 - acc: 0.9956 - val_loss: 27.1410 - val_acc: 0.1924\n",
"Epoch 19/30\n",
"238/238 - 1s - loss: 0.0380 - acc: 0.9934 - val_loss: 30.4740 - val_acc: 0.1707\n",
"Epoch 20/30\n",
"238/238 - 1s - loss: 0.0286 - acc: 0.9938 - val_loss: 27.3403 - val_acc: 0.1771\n",
"Epoch 21/30\n",
"238/238 - 1s - loss: 0.0205 - acc: 0.9954 - val_loss: 30.5033 - val_acc: 0.1706\n",
"Epoch 22/30\n",
"238/238 - 1s - loss: 0.0288 - acc: 0.9949 - val_loss: 31.7822 - val_acc: 0.1682\n",
"Epoch 23/30\n",
"238/238 - 1s - loss: 0.0309 - acc: 0.9950 - val_loss: 28.9407 - val_acc: 0.1791\n",
"Epoch 24/30\n",
"238/238 - 1s - loss: 0.0173 - acc: 0.9961 - val_loss: 32.9953 - val_acc: 0.1817\n",
"Epoch 25/30\n",
"238/238 - 1s - loss: 0.0189 - acc: 0.9965 - val_loss: 33.6316 - val_acc: 0.1817\n",
"Epoch 26/30\n",
"238/238 - 1s - loss: 0.0276 - acc: 0.9953 - val_loss: 33.3303 - val_acc: 0.1635\n",
"Epoch 27/30\n",
"238/238 - 1s - loss: 0.0243 - acc: 0.9961 - val_loss: 35.7127 - val_acc: 0.1422\n",
"Epoch 28/30\n",
"238/238 - 1s - loss: 0.0308 - acc: 0.9949 - val_loss: 33.3842 - val_acc: 0.1697\n",
"Epoch 29/30\n",
"238/238 - 1s - loss: 0.0342 - acc: 0.9952 - val_loss: 39.3381 - val_acc: 0.1698\n",
"Epoch 30/30\n",
"238/238 - 1s - loss: 0.0231 - acc: 0.9959 - val_loss: 38.9394 - val_acc: 0.1641\n",
"CPU times: user 1min 7s, sys: 26.2 s, total: 1min 33s\n",
"Wall time: 29.6 s\n"
]
}
],
"source": [
"%%time\n",
"\n",
"model, history = train_model(np.array(X_train), np.array(yy_train), np.array(X_test), np.array(yy_test))"
]
},
{
"cell_type": "markdown",
"id": "941c82f8",
"metadata": {},
"source": [
"# Eval"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "bdf45d51",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"def predict(model, entry):\n",
" p_dict = dict()\n",
" predictions = np.argmax(model.predict(entry['data']), axis=-1)\n",
" for p in predictions:\n",
" if p in p_dict:\n",
" p_dict[p] += 1\n",
" else:\n",
" p_dict[p] = 1\n",
" prediction = max(p_dict, key=p_dict.get)\n",
" return prediction+1"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "5dbc1e1e",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 2.59 s, sys: 335 ms, total: 2.92 s\n",
"Wall time: 2.17 s\n"
]
}
],
"source": [
"%%time\n",
"\n",
"ltest = [lb.inverse_transform(e['label'])[0] for e in test]\n",
"ptest = [predict(model, e) for e in test]\n",
"\n",
"# for e in test:\n",
"# print(f\"Label: {lb.inverse_transform(e['label'])[0]:2d}\")\n",
"# print(f\"Prediction: {predict(model, e):2d}\\n_______________\")"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "10056f7d",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 3.2 s, sys: 264 ms, total: 3.47 s\n",
"Wall time: 2.44 s\n"
]
}
],
"source": [
"%%time\n",
"\n",
"ltrain = [lb.inverse_transform(e['label'])[0] for e in train]\n",
"ptrain = [predict(model, e) for e in train]\n",
"\n",
"# for e in train:\n",
"# print(f\"Label: {lb.inverse_transform(e['label'])[0]:2d}\")\n",
"# print(f\"Prediction: {predict(model, e):2d}\\n_______________\")"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "48aad447",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjEAAAGtCAYAAADnIyVRAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAABVCklEQVR4nO3dfXhU533n//dXo/BgQmLJRENRCFEx3g1basuQKNoE6ghcOwEntECaxNDQDWjjbZPu7g+oN43B0IVta+p23aZuSEBp3OJs0pKsQWntIpJAY0JMwRoMacGoBIXAWIDIZkMWwcz394eGiSQQIGnO0Zwzn9d1zcWchzmf+76Zh1v3eTJ3R0RERCRqyoa7ACIiIiKDoU6MiIiIRJI6MSIiIhJJ6sSIiIhIJKkTIyIiIpGkToyIiIhEkjoxIiIiEjgz22Jmr5nZK/0sNzN7ysxeNbOUmd17s22qEyMiIiJh+ALw4A2WvxeYkns0Ak/fbIPqxIiIiEjg3H03cP4Gq3wA+KJ3+w5wu5n93I22WV7IAhbS3r17Q72UcH19fZhxIgOyd+/e0LLC/iyEWTfQZ12KnoUaZlbI39r/SPcIylWb3H3TAF5fDbT3mP5Bbt7p/l5QtJ0YERERiY5ch2UgnZYhUydGRESkRJmFOvBzM6eAiT2m35yb1y8dEyMiIlKizKxgjwJ4Dvj13FlK7wR+5O797koCjcSIiIhICMzsWeA+YJyZ/QBYA7wOwN3/Avg68D7gVeAi8Bs326Y6MSIiIiUqzN1J7v7hmyx34DcHsk11YkREREpUWVm0jyqJdulFRESkZEVqJCaVSrF161ay2SyzZs1i3rx5vZbv2rWLXbt2YWaMGjWKpUuXUl1dTVtbG01NTfn15s+fz/Tp04dcnt27d7N+/Xqy2SyLFi2isbHx5i+KQJbyij8v7p+FuNevmPLiXLdSyBuqIjs7aeDcvSgfL774ovd8/OM//qO/613v8q997Wu+e/dunz17tn/lK1/ptU5LS0v++Wc+8xn/1V/9VX/xxRf9G9/4hu/Zs8dffPFF//rXv+7Tp0/PT199DNSVK1d89uzZfvLkSb906ZI/9NBDfuzYsQFvp9iylFeceXH+LOizPnx5ca5bhPNC/a0dOXKkF+oRdtndPTq7k9ra2kgmk1RVVVFeXk5dXR0HDx7stc7o0aPzzy9dupTvYY4cOZJEIgHA5cuXC9LzTKVSTJo0iYkTJzJixAjmzp1LS0vLkLc73FnKK/68uH8W4l6/YsqLc91KIU+GYXeSmf2GuzfdfM3eOjs7qayszE9XVFTQ1tZ2zXo7d+7k+eefJ5PJsGrVqvz848ePs3nzZs6dO0djY2P+i26w0uk048ePz08nk0lSqdSQtlkMWcor/ry4fxbiXr9iyotz3UohrxCivjtpOEZi1va3wMwazWy/me3/2te+NqiNz5kzhyeeeIJFixaxffv2/PzJkyezYcMG1qxZw44dO+jq6hrU9kWiIu6fhbjXTyQMRXaxuwELpBNjZql+HoeAZH+vc/dN7j7D3WfMnz+/17KKigrOn//ZzS87OzupqKjotwx1dXUcOHDgmvkTJkxg1KhRnDp1wysZ31QymeTMmTP56XQ6TTLZb9Uik6W84s+L+2ch7vUrprw4160U8iS4kZgk8OvAQ9d5nBvMBmtqakin03R0dHDlyhX27dtHbW1tr3V6vnlaW1vzb56Ojg4ymQwAZ8+e5fTp04wbN24wxcibNm0aJ06coL29na6uLpqbm2loaBjSNoshS3nFnxf3z0Lc61dMeXGuWynkFULUR2KCOiZmB/B6d3+57wIz++ZgNphIJFi8eDEbN24km80yc+ZMqqur2bZtGzU1NdTW1tLS0sLhw4dJJBKMGTOG5cuXA3D06FGam5tJJBKUlZWxZMkSxo4dO5T6UV5ezurVq1m2bBmZTIYFCxYwZcqUIW2zGLKUV/x5cf8sxL1+xZQX57qVQl4hRP1id+buw12G69q7d2+oBauvrw8zTmRA9u7dG1pW2J+FMOsG+qxL0Qt1SGPs2LEF+6398Y9/HPpwTKQudiciIiKFE/Wzk9SJERERKVFR78REe2eYiIiIlCyNxIiIiJSoqI/EqBMjIiJSoqLeidHuJBEREYkkjcSIiIiUqKiPxBRtJ0bXcoi2Rx55JNS8p59+OtQ8KRx91kWGT9Qvdhft0ouIiEjJKtqRGBEREQmWdieJiIhIJEW9E6PdSSIiIhJJGokREREpUVEfiVEnRkREpESpEyMiIiKRpE5MEdm9ezfr168nm82yaNEiGhsbY5MX9bpNnTqVD37wg5gZ3/72t3nhhReuWefee+9l3rx5uDunTp1iy5Yt3HXXXSxcuDC/zvjx49m8eTOtra1DKk/U2zOVSrF161ay2SyzZs1i3rx5vZbv2rWLXbt2YWaMGjWKpUuXUl1dTVtbG01NTfn15s+fz/Tp04dUFoh+e5ZyXpzrVgp5pS42nZhMJsO6detoamoimUyycOFCGhoauPPOOyOfF/W6mRkf+tCHeOqpp+js7OTRRx8llUpx5syZ/DpvetObePDBB9m4cSMXL15k7NixABw9epQNGzYAcNttt7Fu3TqOHDlSVPULOy+bzfLMM8+wcuVKKisrWbt2LbW1tVRXV+fXqa+vp6GhAYCDBw/y7LPPsmLFCqqrq3n88cdJJBJcuHCBxx57jHvuuYdEIlE09VOevluUFx5d7K4fZvZvzWy2mb2+z/wHg8hLpVJMmjSJiRMnMmLECObOnUtLS0sQUaHnRb1ub33rW+no6ODs2bNkMhn279/P3Xff3Wudd7/73XzrW9/i4sWLAPz4xz++Zjv33nsvhw8f5vLly4MuC0S/Pdva2kgmk1RVVVFeXk5dXR0HDx7stc7o0aPzzy9dupQfMh45cmS+w3L58uWCDCVHvT1LOS/OdSuFvEIws4I9hkMgnRgz+yTwv4FPAK+Y2Qd6LN4QRGY6nWb8+PH56WQySTqdDiIq9Lyo1+3222+ns7MzP93Z2cntt9/ea52qqiqqqqpYsWIFq1atYurUqddsZ8aMGbz00kuDLsdVUW/Pzs5OKisr89MVFRW92veqnTt3snLlSr785S/z8MMP5+cfP36cT33qU3z605/mox/96JBGYSD67VnKeXGuWynkSXAjMcuB6e4+H7gPeMzMfju3rN/umpk1mtl+M9u/adOmgIomxSiRSFBVVcWTTz7J5s2befjhh3uNJrzhDW9gwoQJQ96VVErmzJnDE088waJFi9i+fXt+/uTJk9mwYQNr1qxhx44ddHV1DWMpRWQ4RX0kJqhjYsrc/f8CuPsJM7sP+Bszm8QNOjHuvgm42nvxgQQmk8lex1ik02mSyeQAi12ceVGv24ULF6ioqMhPV1RUcOHChV7rdHZ2cuLECbLZLOfOneO1116jqqqK73//+wBMnz6dl19+mWw2O+hyXBX19qyoqOD8+fP56c7Ozl7t21ddXR1f/OIXr5k/YcIERo0axalTp6ipqRl0eaLenqWcF+e6lUJeIUT97KSgRmLSZnbP1Ylch2YeMA6YFkTgtGnTOHHiBO3t7XR1ddHc3Jw/sDHqeVGv2/e//32qqqq44447SCQSzJgxg1Qq1Wud1tZW7rrrLgDGjBlDVVUVZ8+ezS9/+9vfzv79+wddhp6i3p41NTWk02k6Ojq4cuUK+/bto7a2ttc6Pb9IW1tb81+kHR0dZDIZAM6ePcvp06cZN27coMsC0W/PUs6Lc91KIU+CG4n5deBKzxnufgX4dTP7bBCB5eXlrF69mmXLlpHJZFiwYAFTpkwJIir0vKjXLZvN8qUvfYlPfOITlJWV8eKLL3L69GnmzZvHyZMnSaVSHDlyhLe97W2sXr2abDbLV7/6VX7yk58AUFlZSUVFBceOHSvK+oWdl0gkWLx4MRs3biSbzTJz5kyqq6vZtm0bNTU11NbW0tLSwuHDh0kkEowZM4bly5cD3Wd7NTc3k0gkKCsrY8mSJfkzwYqlfsrTd4vywhP1kRhzH9BemzAVbcHk5h555JFQ855++ulQ88K2d+/e0LLq6+tDyxKRa4T
"text/plain": [
"<Figure size 720x504 with 2 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" 1 0.33 0.33 0.33 3\n",
" 2 0.00 0.00 0.00 3\n",
" 3 0.00 0.00 0.00 3\n",
" 4 0.00 0.00 0.00 3\n",
" 5 0.14 0.33 0.20 3\n",
" 6 0.00 0.00 0.00 3\n",
" 7 0.00 0.00 0.00 3\n",
" 8 0.00 0.00 0.00 3\n",
" 9 0.75 1.00 0.86 3\n",
" 10 0.00 0.00 0.00 3\n",
" 11 0.00 0.00 0.00 3\n",
" 12 0.38 1.00 0.55 3\n",
" 13 0.50 0.33 0.40 3\n",
" 14 0.00 0.00 0.00 3\n",
" 15 0.17 0.33 0.22 3\n",
" 16 0.00 0.00 0.00 3\n",
"\n",
" accuracy 0.21 48\n",
" macro avg 0.14 0.21 0.16 48\n",
"weighted avg 0.14 0.21 0.16 48\n",
"\n",
"CPU times: user 649 ms, sys: 204 ms, total: 853 ms\n",
"Wall time: 623 ms\n"
]
}
],
"source": [
"%%time\n",
"\n",
"from sklearn.metrics import confusion_matrix\n",
"import seaborn as sn\n",
"\n",
"from sklearn.metrics import classification_report\n",
"\n",
"set_digits = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }\n",
"\n",
"train_cm = confusion_matrix(ltrain, ptrain, normalize='true')\n",
"test_cm = confusion_matrix(ltest, ptest, normalize='true')\n",
"\n",
"df_cm = pd.DataFrame(test_cm, index=set_digits, columns=set_digits)\n",
"plt.figure(figsize = (10,7))\n",
"sn_plot = sn.heatmap(df_cm, annot=True, cmap=\"Greys\")\n",
"plt.ylabel(\"True Label\")\n",
"plt.xlabel(\"Predicted Label\")\n",
"plt.show()\n",
"\n",
"print(classification_report(ltest, ptest, zero_division=0))"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "9c334bde",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"cenario: SYN\n",
"win_sz: 10\n",
"stride_sz: 5\n",
"dense_steps: 3\n",
"layer_count: 3\n",
"drop_count: 0.1\n"
]
}
],
"source": [
"print(f'cenario: {cenario}')\n",
"print(f'win_sz: {win_sz}')\n",
"print(f'stride_sz: {stride_sz}')\n",
"print(f'dense_steps: {dense_steps}')\n",
"print(f'layer_count: {layer_count}')\n",
"print(f'drop_count: {drop_count}')\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "15fa9b96",
"metadata": {},
"outputs": [],
"source": [
"exit()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}