{ "cells": [ { "cell_type": "markdown", "id": "2bc4ab88", "metadata": {}, "source": [ "## Constants" ] }, { "cell_type": "code", "execution_count": 1, "id": "c767cb34", "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' # this is required\n", "os.environ['CUDA_VISIBLE_DEVICES'] = '0' # set to '0' for GPU0, '1' for GPU1 or '2' for GPU2. Check \"gpustat\" in a terminal." ] }, { "cell_type": "code", "execution_count": 2, "id": "f783fc7f", "metadata": {}, "outputs": [], "source": [ "glob_path = '/opt/iui-datarelease3-sose2021/*.csv'\n", "\n", "pickle_file = '../data.pickle'\n", "\n", "checkpoint_path = \"training_1/cp.ckpt\"\n", "checkpoint_dir = os.path.dirname(checkpoint_path)" ] }, { "cell_type": "markdown", "id": "bb1c9c9b", "metadata": {}, "source": [ "# Config" ] }, { "cell_type": "code", "execution_count": 3, "id": "3d812543", "metadata": {}, "outputs": [], "source": [ "# Possibilities: 'SYY', 'SYN', 'SNY', 'SNN', \n", "# 'JYY', 'JYN', 'JNY', 'JNN'\n", "cenario = 'SYN'\n", "\n", "win_sz = 30\n", "stride_sz = 2\n", "\n", "# divisor for neuron count step downs (hard to describe), e.g. dense_step = 3: layer1=900, layer2 = 300, layer3 = 100, layer4 = 33...\n", "dense_steps = 3\n", "# amount of dense/dropout layers\n", "layer_count = 5\n", "# how much to drop\n", "drop_count = 0.2" ] }, { "cell_type": "markdown", "id": "8cef4021", "metadata": {}, "source": [ "# Helper Functions" ] }, { "cell_type": "code", "execution_count": 4, "id": "cde65835", "metadata": {}, "outputs": [], "source": [ "from matplotlib import pyplot as plt\n", "\n", "def pplot(dd):\n", " x = dd.shape[0]\n", " fix = int(x/3)+1\n", " fiy = 3\n", " fig, axs = plt.subplots(fix, fiy, figsize=(3*fiy, 9*fix))\n", " \n", " for i in range(x):\n", " axs[int(i/3)][i%3].plot(dd[i])" ] }, { "cell_type": "markdown", "id": "476851ec", "metadata": {}, "source": [ "# Loading Data" ] }, { "cell_type": "code", "execution_count": 5, "id": "199e4435", "metadata": { "tags": [] }, "outputs": [], "source": [ "from glob import glob\n", "import pandas as pd\n", "from tqdm import tqdm\n", "\n", "def dl_from_blob(filename, user_filter=None):\n", " \n", " dic_data = []\n", " \n", " for p in tqdm(glob(glob_path)):\n", " path = p\n", " filename = path.split('/')[-1].split('.')[0]\n", " splitname = filename.split('_')\n", " user = int(splitname[0][1:])\n", " if (user_filter):\n", " if (user != user_filter):\n", " continue\n", " scenario = splitname[1][len('Scenario'):]\n", " heightnorm = splitname[2][len('HeightNormalization'):] == 'True'\n", " armnorm = splitname[3][len('ArmNormalization'):] == 'True'\n", " rep = int(splitname[4][len('Repetition'):])\n", " session = int(splitname[5][len('Session'):])\n", " data = pd.read_csv(path)\n", " dic_data.append(\n", " {\n", " 'filename': path,\n", " 'user': user,\n", " 'scenario': scenario,\n", " 'heightnorm': heightnorm,\n", " 'armnorm': armnorm,\n", " 'rep': rep,\n", " 'session': session,\n", " 'data': data \n", " }\n", " )\n", " return dic_data" ] }, { "cell_type": "code", "execution_count": 6, "id": "9e2817c1", "metadata": {}, "outputs": [], "source": [ "import pickle\n", "\n", "def save_pickle(f, structure):\n", " _p = open(f, 'wb')\n", " pickle.dump(structure, _p)\n", " _p.close()" ] }, { "cell_type": "code", "execution_count": 7, "id": "12c5098e", "metadata": {}, "outputs": [], "source": [ "def load_pickles(f) -> list:\n", " _p = open(pickle_file, 'rb')\n", " _d = pickle.load(_p)\n", " _p.close()\n", " \n", " return _d" ] }, { "cell_type": "code", "execution_count": 8, "id": "00ee7490", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Loading data...\n", "../data.pickle found...\n", "768\n", "CPU times: user 548 ms, sys: 2.56 s, total: 3.11 s\n", "Wall time: 3.11 s\n" ] } ], "source": [ "%%time\n", "\n", "def load_data() -> list:\n", " if os.path.isfile(pickle_file):\n", " print(f'{pickle_file} found...')\n", " return load_pickles(pickle_file)\n", " print(f'Didn\\'t find {pickle_file}...')\n", " all_data = dl_from_blob(glob_path)\n", " print(f'Creating {pickle_file}...')\n", " save_pickle(pickle_file, all_data)\n", " return all_data\n", "\n", "print(\"Loading data...\")\n", "dic_data = load_data()\n", "print(len(dic_data))" ] }, { "cell_type": "code", "execution_count": 9, "id": "d1db1537", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 95 µs, sys: 297 µs, total: 392 µs\n", "Wall time: 396 µs\n" ] } ], "source": [ "%%time\n", "\n", "# Categorized Data\n", "cdata = dict() \n", "# Sorting, HeightNorm, ArmNorm\n", "cdata['SYY'] = list() \n", "cdata['SYN'] = list() \n", "cdata['SNY'] = list() \n", "cdata['SNN'] = list() \n", "\n", "# Jenga, HeightNorm, ArmNorm\n", "cdata['JYY'] = list() \n", "cdata['JYN'] = list() \n", "cdata['JNY'] = list() \n", "cdata['JNN'] = list() \n", "\n", "for d in dic_data:\n", " if d['scenario'] == 'Sorting':\n", " if d['heightnorm']:\n", " if d['armnorm']:\n", " cdata['SYY'].append(d)\n", " else:\n", " cdata['SYN'].append(d)\n", " else:\n", " if d['armnorm']:\n", " cdata['SNY'].append(d)\n", " else:\n", " cdata['SNN'].append(d)\n", " elif d['scenario'] == 'Jenga':\n", " if d['heightnorm']:\n", " if d['armnorm']:\n", " cdata['JYY'].append(d)\n", " else:\n", " cdata['JYN'].append(d)\n", " else:\n", " if d['armnorm']:\n", " cdata['JNY'].append(d)\n", " else:\n", " cdata['JNN'].append(d)" ] }, { "cell_type": "markdown", "id": "46382aad", "metadata": {}, "source": [ "# Preprocessing" ] }, { "cell_type": "code", "execution_count": 10, "id": "f7842338", "metadata": { "tags": [] }, "outputs": [], "source": [ "def drop(entry, data=True) -> pd.DataFrame:\n", " droptable = ['participantID', 'FrameID', 'Scenario', 'HeightNormalization', 'ArmNormalization', 'Repetition', 'Session', 'Unnamed: 0']\n", " if data:\n", " centry = pickle.loads(pickle.dumps(entry['data']))\n", " else:\n", " centry = pickle.loads(pickle.dumps(entry))\n", "\n", " return centry.drop(droptable, axis=1)\n", " \n" ] }, { "cell_type": "code", "execution_count": 11, "id": "b73d9485", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "right_Hand_ident='right_Hand'\n", "left_Hand_ident='left_hand'\n", "\n", "def rem_low_acc(entry, data=True) -> pd.DataFrame:\n", " if data:\n", " centry = pickle.loads(pickle.dumps(entry['data']))\n", " else:\n", " centry = pickle.loads(pickle.dumps(entry))\n", " \n", " centry['LeftHandTrackingAccuracy'] = (centry['LeftHandTrackingAccuracy'] == 'High') * 1.0\n", " centry['RightHandTrackingAccuracy'] = (centry['RightHandTrackingAccuracy'] == 'High') * 1.0\n", " \n", " right_Hand_cols = [c for c in centry if right_Hand_ident in c]\n", " left_Hand_cols = [c for c in centry if left_Hand_ident in c]\n", " \n", " centry.loc[centry['RightHandTrackingAccuracy'] == 0.0, right_Hand_cols] = np.nan\n", " centry.loc[centry['LeftHandTrackingAccuracy'] == 0.0, left_Hand_cols] = np.nan\n", " \n", " return centry\n", "\n" ] }, { "cell_type": "code", "execution_count": 12, "id": "1a298d6d", "metadata": {}, "outputs": [], "source": [ "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", "\n", "def pad(entry, data=True) -> pd.DataFrame:\n", " if data:\n", " centry = pickle.loads(pickle.dumps(entry['data']))\n", " else:\n", " centry = pickle.loads(pickle.dumps(entry))\n", " \n", " cols = centry.columns\n", " pentry = pad_sequences(centry.T.to_numpy(),\n", " maxlen=(int(centry.shape[0]/stride_sz)+1)*stride_sz,\n", " dtype='float64',\n", " padding='pre', \n", " truncating='post',\n", " value=np.nan\n", " ) \n", " pdentry = pd.DataFrame(pentry.T, columns=cols)\n", " pdentry.loc[0] = [0 for _ in cols]\n", " return pdentry" ] }, { "cell_type": "code", "execution_count": 13, "id": "3be1bd3f", "metadata": {}, "outputs": [], "source": [ "def interpol(entry, data=True) -> pd.DataFrame:\n", " if data:\n", " centry = pickle.loads(pickle.dumps(entry['data']))\n", " else:\n", " centry = pickle.loads(pickle.dumps(entry))\n", " \n", " return centry.interpolate(method='linear', axis=0)" ] }, { "cell_type": "code", "execution_count": 14, "id": "2a7f4e26", "metadata": {}, "outputs": [], "source": [ "from tensorflow.keras.preprocessing import timeseries_dataset_from_array\n", "\n", "def slicing(entry, label, data=True):\n", " if data:\n", " centry = pickle.loads(pickle.dumps(entry['data']))\n", " else:\n", " centry = pickle.loads(pickle.dumps(entry))\n", " \n", " return timeseries_dataset_from_array(\n", " data=centry, \n", " targets=[label for _ in range(centry.shape[0])], \n", " sequence_length=win_sz,\n", " sequence_stride=stride_sz, \n", " batch_size=8, \n", " seed=177013\n", " )" ] }, { "cell_type": "code", "execution_count": 15, "id": "b012b0f7", "metadata": { "tags": [] }, "outputs": [], "source": [ "# %%time \n", "\n", "# acc_data = pd.DataFrame()\n", "\n", "# for d in tqdm(cdata['SYY']):\n", "# acc_data = acc_data.append(d['data'])\n", "\n", "\n", "# dacc_data = drop(acc_data, False)\n", "# ddacc_data = rem_low_acc(dacc_data, False)\n", "\n", "# for c in ddacc_data:\n", "# print(f\"{c}: {dacc_data[c].min()}, {dacc_data[c].max()}\")" ] }, { "cell_type": "code", "execution_count": 16, "id": "a2440d77", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 96/96 [00:09<00:00, 10.55it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 8.07 s, sys: 1.19 s, total: 9.27 s\n", "Wall time: 9.1 s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "%%time\n", "\n", "classes = 16 # dynamic\n", "\n", "def preproc(data):\n", " res_list = list()\n", " \n", " for e in tqdm(data):\n", " res_list.append(preproc_entry(e))\n", " \n", " return res_list\n", " \n", "def preproc_entry(entry, data = True):\n", " entry2 = pickle.loads(pickle.dumps(entry))\n", " entry2['data'] = drop(entry2, data)\n", " \n", " entry4 = pickle.loads(pickle.dumps(entry2))\n", " entry4['data'] = rem_low_acc(entry4, data)\n", " \n", " entry5 = pickle.loads(pickle.dumps(entry4))\n", " entry5['data'] = pad(entry5, data)\n", " \n", "# entry6 = pickle.loads(pickle.dumps(entry5))\n", "# entry6['data'] = interpol(entry6, data)\n", " \n", " entry7 = pickle.loads(pickle.dumps(entry5))\n", " entry7['data'] = slicing(entry7, entry7['user'], data)\n", " \n", " return entry7\n", "\n", "pdata = preproc(cdata[cenario])" ] }, { "cell_type": "code", "execution_count": 17, "id": "11e96fef", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 96 µs, sys: 107 µs, total: 203 µs\n", "Wall time: 214 µs\n" ] }, { "data": { "text/plain": [ "(48, 48)" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "train = np.array([x['data'] for x in pdata if x['session'] == 1])\n", "test = np.array([x['data'] for x in pdata if x['session'] == 2])\n", "\n", "len(train), len(test)" ] }, { "cell_type": "code", "execution_count": 18, "id": "1807a2f7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 1min, sys: 13.2 s, total: 1min 13s\n", "Wall time: 21.1 s\n" ] } ], "source": [ "%%time\n", "\n", "X_train = list()\n", "y_train = list()\n", "\n", "X_test = list()\n", "y_test = list()\n", "\n", "# train = list()\n", "test = list()\n", "\n", "for x in pdata:\n", " if x['session'] == 1:\n", "# train.append(\n", "# {\n", "# 'label': x['user'],\n", "# 'data': list()\n", "# })\n", " for y in x['data'].unbatch().as_numpy_iterator():\n", " X_train.append(y[0])\n", " y_train.append(y[1])\n", " \n", "# train[-1]['data'].append(y[0])\n", " if x['session'] == 2:\n", " test.append(\n", " {\n", " 'label': x['user'],\n", " 'data': list()\n", " })\n", " for y in x['data'].unbatch().as_numpy_iterator():\n", " X_test.append(y[0])\n", " y_test.append(y[1])\n", " \n", "# test[-1]['data'].append(y[0])\n", "\n", "X_train = np.array(X_train)\n", "y_train = np.array(y_train)\n", "X_test = np.array(X_test)\n", "y_test = np.array(y_test)" ] }, { "cell_type": "code", "execution_count": 19, "id": "44330b34", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(37902, 30, 338) (73692, 30, 338) (37902,) (73692,)\n", "(32745, 30, 338) (48773, 30, 338) (32745,) (48773,)\n" ] } ], "source": [ "XX_train = list()\n", "yy_train = list()\n", "XX_test = list()\n", "yy_test = list()\n", "\n", "for X,y in zip(X_train, y_train):\n", " if not np.isnan(X).any():\n", " XX_train.append(X)\n", " yy_train.append(y)\n", "\n", "for X,y in zip(X_test, y_test):\n", " if not np.isnan(X).any():\n", " XX_test.append(X)\n", " yy_test.append(y)\n", " \n", "XX_train = np.array(XX_train)\n", "yy_train = np.array(yy_train)\n", "XX_test = np.array(XX_test)\n", "yy_test = np.array(yy_test)\n", "\n", "print(np.array(XX_train).shape, X_train.shape, np.array(yy_train).shape, np.array(y_train).shape)\n", "print(np.array(XX_test).shape, X_test.shape, np.array(yy_test).shape, np.array(y_test).shape)" ] }, { "cell_type": "code", "execution_count": 20, "id": "bd805e81", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | 0 | \n", "
---|---|
0 | \n", "11 | \n", "
1 | \n", "11 | \n", "
2 | \n", "11 | \n", "
3 | \n", "11 | \n", "
4 | \n", "11 | \n", "
... | \n", "... | \n", "
37897 | \n", "9 | \n", "
37898 | \n", "9 | \n", "
37899 | \n", "9 | \n", "
37900 | \n", "9 | \n", "
37901 | \n", "9 | \n", "
37902 rows × 1 columns
\n", "\n", " | 0 | \n", "
---|---|
0 | \n", "\n", " |
16 | \n", "0.113055 | \n", "
11 | \n", "0.110680 | \n", "
10 | \n", "0.103662 | \n", "
3 | \n", "0.102739 | \n", "
5 | \n", "0.095404 | \n", "
13 | \n", "0.069469 | \n", "
15 | \n", "0.061738 | \n", "
7 | \n", "0.048625 | \n", "
9 | \n", "0.044826 | \n", "
6 | \n", "0.043955 | \n", "
14 | \n", "0.043375 | \n", "
1 | \n", "0.038890 | \n", "
4 | \n", "0.036489 | \n", "
8 | \n", "0.035433 | \n", "
12 | \n", "0.032030 | \n", "
2 | \n", "0.019630 | \n", "