iui-group-l-name-zensiert/1-first-project/jw/j_Data_Norm_wth_SW.ipynb

383 lines
23 KiB
Plaintext
Raw Normal View History

2021-06-08 16:39:33 +02:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"id": "650490a6",
"metadata": {},
"outputs": [],
"source": [
"# Needed Imports\n",
"import pandas as pd\n",
"import numpy as np\n",
"import tensorflow as tf\n",
"import os\n",
"import pickle\n",
"import matplotlib.pyplot as plt\n",
"from math import isqrt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e1f88b35",
"metadata": {},
"outputs": [],
"source": [
"os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' # this is required\n",
"os.environ['CUDA_VISIBLE_DEVICES'] = '2' # set to '0' for GPU0, '1' for GPU1 or '2' for GPU2. Check \"gpustat\" in a terminal."
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "af650242",
"metadata": {},
"outputs": [],
"source": [
"delim = ';'\n",
"user_count = 100\n",
"base_path = '/opt/iui-datarelease1-sose2021/'\n",
"Xpickle_file = './X.pickle'\n",
"ypickle_file = './y.pickle'\n",
"\n",
"# Function that opens and reads pickle Data from FS and returns the read data as NumpyArray\n",
"def load_pickles():\n",
" _p = open(Xpickle_file, 'rb')\n",
" X = pickle.load(_p)\n",
" _p.close()\n",
" \n",
" _p = open(ypickle_file, 'rb')\n",
" y = pickle.load(_p)\n",
" _p.close()\n",
" \n",
" return (np.asarray(X, dtype = pd.DataFrame), np.asarray(y, dtype = str))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "1b1469fb",
"metadata": {},
"outputs": [],
"source": [
"# Function used to save data as a pickle file\n",
"def save_pickle():\n",
"# _p = open(np.asarray(data, dtype=pd.DataFrame), 'wb')\n",
" _p = open(Xpickle_file, 'wb')\n",
" pickle.dump(X, _p)\n",
" _p.close()\n",
"\n",
"# _p = open(np.asarray(label, dtype=str), 'wb')\n",
" _p = open(ypickle_file, 'wb')\n",
" pickle.dump(y, _p)\n",
" _p.close()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "bd7d0ecd",
"metadata": {},
"outputs": [],
"source": [
"# Function that loads data from the picklefiles and prints them into NumpyArrays (one for Data and one for Lables)\n",
"def load_data():\n",
" if os.path.isfile(Xpickle_file) and os.path.isfile(ypickle_file):\n",
" return load_pickles()\n",
" data = []\n",
" label = []\n",
" for user in range(0, user_count):\n",
" user_path = base_path + str(user) + '/split_letters_csv/'\n",
" for file in os.listdir(user_path):\n",
" file_name = user_path + file\n",
" letter = ''.join(filter(lambda x: x.isalpha(), file))[0]\n",
" data.append(pd.read_csv(file_name, delim))\n",
" label.append(letter)\n",
" return (np.asarray(data, dtype = pd.DataFrame), np.asarray(label, dtype = str))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "9d1991fb",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(13102, 13102)"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Load Data\n",
"X, y = load_data()\n",
"len(X), len(y)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "d1faf687",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(13102,)\n",
"(13102,)\n"
]
}
],
"source": [
"# Show Data Shape\n",
"print(X.shape)\n",
"print(y.shape) "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "752650c3",
"metadata": {},
"outputs": [],
"source": [
"# Show how many datasets are make how many percent \n",
"X_len = np.asarray(list(map(len, X)))\n",
"l = []\n",
"sq_xlen = pd.Series(X_len)\n",
"ptiles = [x*0.01 for x in range(100)]\n",
"for i in ptiles:\n",
" l.append(sq_xlen.quantile(i))\n",
"plt.plot(l, ptiles)\n",
"sq_xlen.describe(percentiles=[x*0.01 for x in range(90,100)])"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "09e4c32f",
"metadata": {},
"outputs": [],
"source": [
"# Remove outliner data from the dataset\n",
"threshold_p = 0.99\n",
"threshold = int(sq_xlen.quantile(threshold_p))\n",
"len_mask = np.where(X_len <= threshold)\n",
"\n",
"X_filter = X[len_mask]\n",
"y_filter = y[len_mask]"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "ab5cc1ec",
"metadata": {},
"outputs": [],
"source": [
"# Sliding Window Function\n",
"def sliding_window(data):\n",
" input_data = data[0]['Force']\n",
" _window_sz = 10\n",
" sum_windows_passed = 0\n",
" \n",
" \n",
" data_above_thresh = []\n",
" thresh = 70\n",
" \n",
" values = 0\n",
" \n",
" for i in range(0, len(input_data), window_sz):\n",
" for j in range(min(i + window_sz, len(input_data))):\n",
" values += input_data[j]\n",
" if values % _window_sz >= thresh:\n",
" data_above_thresh.append(values)\n",
" \n",
" return data_above_thresh"
]
},
{
"cell_type": "code",
"execution_count": 75,
"id": "d2bcca2e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[<matplotlib.lines.Line2D at 0x7f08c64055e0>]"
]
},
"execution_count": 75,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAAApV0lEQVR4nO2de6xlV33fP7/zuI+587RnGA/2GNvJpNRIwbgjQ5oUkZACdqSaSBSZP8BCVK4aaBOpreokUqCJoqZRk0pIhIgICxNRwCUP3MgNOC4tTVIeY2KMbYI9gI1nPPbMeDyve2fuea3+sdY+Z5895z7OOftx7t7fj3S191l7n33Wvufe7/md7/qt3zLnHEIIIapBregOCCGEyA+JvhBCVAiJvhBCVAiJvhBCVAiJvhBCVIhG0R1Yj71797obbrih6G4IIcSW4tFHHz3tnNs36thMi/4NN9zAkSNHiu6GEEJsKczsubWOyd4RQogKIdEXQogKIdEXQogKIdEXQogKIdEXQogKIdEXQogKIdEXQogKIdEXYhKWX4a/+wyoNLnYYkj0hZiE//tf4Iu/BMc0eVBsLST6QoxLtw2PP+D3n/rzQrsixLhI9IUYlx/8b1g5Ddv2wpN/fqXF8/cPwV/+WhE9E2JDJPpCjMuLj/vtT/8bOH8MLpwYHGstw1/8CnztY3D+xMinC1EkEn0hxuWV52BpH+z9Cf/4wouDY9/6NFx8ye//8Kv5902IDZDoCzEuZ38Eu6+HHdf4x/FI/+RT/gNhcQ/88P8U0z8h1mGmSysLMZOc/REc+EnYccA/jov+xVOw/Rq46kY4+lfQWYXGfDH9FGIEivSFGIdeD8497yP9pX1gtWF7Z/kkLO2Fw+/3Ns+R+4rr60Y88afwybdDt1N0T0SOSPSFGIeLL0K3BbtfA7U6bN8/ItJ/Fdz0s3DDP4G/+ehsTuDqtuEL74fnvwYnnyy6NyJHJPpCjMPZH/nt7tf47Y5rBpG+cyHS3wdm8LpfhAsvwCs/LKav6/Hknw/2n/9GYd0Q+SPRF2IcTj/jt7uv99sdrx6I/uoF6Fz2kT7A9T/ltz/6er593AzHvgnNJVh6ld8XlUGiL8Q4PPMlP1B79Y/7xzuu8dF/ZxWWT/m2pSD6+14LC7vgR/+vmL6ux+mnYe8hOHgbPD+DH0oiMzYUfTM7aGZfMbOnzOxJM/vl0H6VmT1sZs+E7Z7Qbmb2UTM7amaPm9mtsWvdHc5/xszuzu62hMiA9iU4+gi89hegFv51dh6A1fPwe//AD/ACbN/nt7UaHHzjbEbSLx/1ov/qW+CVZ/2kMlEJNhPpd4B/65y7GXgT8EEzuxm4F3jEOXcIeCQ8BrgdOBR+7gE+Dv5DAvgw8EbgNuDD0QeFEFuCZ/8a2ite9CPe8F5v41x6xadowiDSB9hzA5x/IddubkhrxX9AXX1oMDYxa30UmbGh6DvnTjjnvhX2LwDfBa4F7gTuD6fdD7wz7N8JfNp5vgbsNrMDwNuBh51zZ5xzrwAPA+9I82aEyJRoEPdVNw/adlwD//xTfv/vH/LbpX2D44t74PI56HVz6eKmOPN9v93747DzWr8ffUsRpWcsT9/MbgDeAHwd2O+ci3LVXgT2h/1rgfhf0LHQtlZ78jXuMbMjZnbk1KlT43QvNVZaHY6fvVTIa4sZ5tIZv9121XD7jmtg1/VBTM3n6Ucs7gGcF/5ZIRqM3vsTsOs6v3/ueHH9EbmyadE3s+3AnwC/4pw7Hz/mnHNAKsnIzrlPOOcOO+cO79u3b+MnpMw3fniGm3/jS7z5d7/C8qomrYgYl876jJdRM2x3hfjl9XdBvTloXwwO5qVXMu/epolSSK+6CXa+GjA4d6zQLon82JTom1kTL/ifcc79aWh+Kdg2hO3J0H4cOBh7+nWhba32meJ7L10AoNtzXGrP0FdyUTwrZ66M8iP+8b+GG98Mt//ucHtf9M9m2rWxOH8CFnbD3JL/gNq+31cLFZVgM9k7BnwS+K5z7vdjhx4Eogycu4EvxtrfF7J43gScCzbQl4C3mdmeMID7ttA2U7jY7MneLM6kFMVx6Qws7h597LW/AHf/D1jYOdzeF/0zmXZtLM6/ECL8wK7rFOlXiM0UXPtp4L3Ad8zssdD2a8DvAA+Y2QeA54B3h2MPAXcAR4EV4P0AzrkzZvZbQJS/9pvOuRn6T/DEdV6aL4ZYOQOLa0T6axGdP0v2zvnjCdG/Fl56qrj+iFzZUPSdc38N2BqH3zrifAd8cI1r3QfMcAWq4ehekb4Y4tIrA+9+s8yip3/hhK8SGrHrIDz9ZR/l2Fr/6qIsaEZugp4bvS+Et3fGjPQXdoXnzojod1pw8aQvHxGxfT90LvkyEqL0SPQTDHn6Un0R0et54V4ccz5hvQHzu2ZH9C++CLhheyeaV7BcTIq0yBeJfgJ5+mIkq+fA9dbO3lmPxd2zI/rRur07YzZVVDbi4skrzxelQ6KfIO7jd6X6ImIl5ByMa++A/3YwM6IfsqR3Hhi0RWUjFOlXAol+gmFPX6IvAlGe/USR/gyJfiTs2/cP2qJS0MuK9KuARD9BXOidRF9ERHn243r64MsyXHgp3f5MymqYTD+/Y9C2bS9gftUvUXok+uugcVzRZxp7Z//r4NyPBtcoktWLUGsOl5KoN/w3GEX6lUCinyCesRNF/X/7/dP8hy88XlSXxCywVrG1zXDtP/LbF76VXn8mpXUR5rdf2b70Kg3kVgSJfoIhT7/nt3979GU+f0SlZyvNyhnABnn343DgFv/c4zMg+qsXYW7Hle1LezWQWxEk+gkcV0b6UZs8/gpz6RWfelmrj//chZ2+jPHxR1Pv1tisFelvf5VEvyJI9BOMyt6J2uTxV5hLZyYbxI048Hp46cn0+jMpqxdgbi17R6JfBST6CYarbEZbN7QVFWSSYmtx9tzgc+S77dS6NBFrRfoLO/2xyNMUpUWin2BUwTXnrjwmKsaldWrpb4Y9r/EzeotelnD14uhIf24JcL4Gjyg1Ev0Ew2UYXGJbRI/ETHDplekjfYBXnk2jN5PTujicox/R3BaOL+fbH5E7Ev0Eo6psJm0eUUFWJii2Fmf3a/z2lefS6c+krBnph7bWxXz7I3JHop9gVJXN5ICuqBidFrQuTGfv7Hy1nxRVZKTvnL+PUZH+3JLfKtIvPRL9BL0RA7ny9CtOVDdnmki/VofdB+FsgZF++5IfVxg1kNsX/ZV8+yRyR6KfYF1PX4kN1WSa2bhx9twAp743dXcmJrJuZO9UGol+griF070iT1+RfiVJI9IHOPR2OPkUnCiopEe0MpbsnUoj0U8wyt5Rnn7FOf+C30Z15yflJ98NjQX41v3T92kS1o30JfpVQaKfwI3I09eM3Irz/Dd8SuO+1053nW1XwU1vgWf/JpVujc1qEP11PX3ZO2VHop8gruuDDwDV3qk0z3/dV8qsN6a/1s5rwzq1BdCP9GXvVBmJfoIhe6eX2Erzq8fqRXjxO3Dwjelcb/t+P0bQWU3neuPQ9/RHRPqNRcAk+hVAop9gdME1efqV5cS3wXXh4G3pXG9HWKawiNr1keiP8vRrNR/tS/RLj0Q/weiCa9FWol85Vl72252vTud626/x24sFLJ8Y3cu2q0cfn1uSp18BJPoJ3IhIX7V3Kkw7TFaKatNMS7QIeVGiP7cDmgujj88tDe5XlBaJfoKRVTZHHBMVIbI7ooHOadkRIv0LBQzmLp+CpTWifJC9UxEk+glGF1xT7Z3KEolgWpH+0j7Aion0l0/Dtr1rH2/K3qkCEv0E8WjeaUauiOyOtCL9etN76oXYO6f9WrhroUi/Ekj0k6yTvaM8/QrSWvazaCdZG3ctdlwDFwqK9CX6lUein6DnHGZhPyqwphm51aW9kp61E7G010fdeeLcxvbO3HaJfgWQ6CfoOWjULOwrT7/ytFbSs3YiGovQvpzuNTdi9Tz02puI9OXplx2JfoKec9RCqJ+so681oytI62L6kX5zIf+1aJfDN4t1B3IXfc19UWok+gkcg0hfpZUF7RWYS1n0i4j0I9Ff2rf2Oc1F6FzWhJSSI9FP4Jy
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"input_data = X[5]['Force']\n",
"window_sz = 10\n",
"sum_windows_passed = 0\n",
" \n",
" \n",
"win_above_thresh = []\n",
"thresh = 70\n",
" \n",
" \n",
"for i in range(0, len(input_data), window_sz):\n",
" values_sum = 0\n",
" for j in range(i, min(i + window_sz, len(input_data))): # evtl i + win_sz -1 \n",
" values_sum += input_data[j]\n",
"\n",
" win_above_thresh.append(values_sum / window_sz)\n",
" \n",
"plt.plot(win_above_thresh)\n",
"plt.plot(X[5]['Force'])"
]
},
{
"cell_type": "code",
"execution_count": 76,
"id": "79e01286",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(array([14, 15, 16, 17, 19, 20, 21]),)\n"
]
}
],
"source": [
"_blep = np.where(np.asarray(win_above_thresh) > thresh)\n",
"print(_blep)"
]
},
{
"cell_type": "code",
"execution_count": 82,
"id": "1503bb77",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"150\n"
]
}
],
"source": [
"print(_blep[0][1]*10)"
]
},
{
"cell_type": "code",
"execution_count": 78,
"id": "eb6d91d5",
"metadata": {},
"outputs": [],
"source": [
"X_new = X[_blep]"
]
},
{
"cell_type": "code",
"execution_count": 79,
"id": "6cea3dbc",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(13102,)\n",
"(7,)\n"
]
}
],
"source": [
"print(X.shape)\n",
"print(X_new.shape)"
]
},
{
"cell_type": "code",
"execution_count": 80,
"id": "35f783ed",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 0.0\n",
"1 0.0\n",
"2 0.0\n",
"3 0.0\n",
"4 0.0\n",
" ... \n",
"307 0.0\n",
"308 0.0\n",
"309 0.0\n",
"310 0.0\n",
"311 0.0\n",
"Name: Force, Length: 312, dtype: float64\n"
]
}
],
"source": [
"print(X[5]['Force'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a3a05a73",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}