{ "cells": [ { "cell_type": "code", "execution_count": 4, "id": "b5fd075a", "metadata": {}, "outputs": [], "source": [ "# Needed Imports\n", "import pandas as pd\n", "import numpy as np\n", "import tensorflow as tf\n", "import os\n", "import pickle\n", "import matplotlib.pyplot as plt\n", "from math import isqrt" ] }, { "cell_type": "code", "execution_count": null, "id": "805e21e0", "metadata": {}, "outputs": [], "source": [ "os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' # this is required\n", "os.environ['CUDA_VISIBLE_DEVICES'] = '2' # set to '0' for GPU0, '1' for GPU1 or '2' for GPU2. Check \"gpustat\" in a terminal." ] }, { "cell_type": "code", "execution_count": 5, "id": "52b164a4", "metadata": {}, "outputs": [], "source": [ "delim = ';'\n", "user_count = 100\n", "base_path = '/opt/iui-datarelease1-sose2021/'\n", "Xpickle_file = './X.pickle'\n", "ypickle_file = './y.pickle'\n", "\n", "# Function that opens and reads pickle Data from FS and returns the read data as NumpyArray\n", "def load_pickles():\n", " _p = open(Xpickle_file, 'rb')\n", " X = pickle.load(_p)\n", " _p.close()\n", " \n", " _p = open(ypickle_file, 'rb')\n", " y = pickle.load(_p)\n", " _p.close()\n", " \n", " return (np.asarray(X, dtype = pd.DataFrame), np.asarray(y, dtype = str))" ] }, { "cell_type": "code", "execution_count": 6, "id": "2b75bbc1", "metadata": {}, "outputs": [], "source": [ "# Function used to save data as a pickle file\n", "def save_pickle():\n", "# _p = open(np.asarray(data, dtype=pd.DataFrame), 'wb')\n", " _p = open(Xpickle_file, 'wb')\n", " pickle.dump(X, _p)\n", " _p.close()\n", "\n", "# _p = open(np.asarray(label, dtype=str), 'wb')\n", " _p = open(ypickle_file, 'wb')\n", " pickle.dump(y, _p)\n", " _p.close()" ] }, { "cell_type": "code", "execution_count": 7, "id": "03037493", "metadata": {}, "outputs": [], "source": [ "# Function that loads data from the picklefiles and prints them into NumpyArrays (one for Data and one for Lables)\n", "def load_data():\n", " if os.path.isfile(Xpickle_file) and os.path.isfile(ypickle_file):\n", " return load_pickles()\n", " data = []\n", " label = []\n", " for user in range(0, user_count):\n", " user_path = base_path + str(user) + '/split_letters_csv/'\n", " for file in os.listdir(user_path):\n", " file_name = user_path + file\n", " letter = ''.join(filter(lambda x: x.isalpha(), file))[0]\n", " data.append(pd.read_csv(file_name, delim))\n", " label.append(letter)\n", " return (np.asarray(data, dtype = pd.DataFrame), np.asarray(label, dtype = str))" ] }, { "cell_type": "code", "execution_count": 8, "id": "b91b4622", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(13102, 13102)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Load Data\n", "X, y = load_data()\n", "len(X), len(y)" ] }, { "cell_type": "code", "execution_count": 9, "id": "817f4cef", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(13102,)\n", "(13102,)\n" ] } ], "source": [ "# Show Data Shape\n", "print(X.shape)\n", "print(y.shape) " ] }, { "cell_type": "code", "execution_count": null, "id": "3c11cf82", "metadata": {}, "outputs": [], "source": [ "# Show how many datasets are make how many percent \n", "X_len = np.asarray(list(map(len, X)))\n", "l = []\n", "sq_xlen = pd.Series(X_len)\n", "ptiles = [x*0.01 for x in range(100)]\n", "for i in ptiles:\n", " l.append(sq_xlen.quantile(i))\n", "plt.plot(l, ptiles)\n", "sq_xlen.describe(percentiles=[x*0.01 for x in range(90,100)])" ] }, { "cell_type": "code", "execution_count": 17, "id": "c34dd9d0", "metadata": {}, "outputs": [], "source": [ "# Remove outliner data from the dataset\n", "threshold_p = 0.99\n", "threshold = int(sq_xlen.quantile(threshold_p))\n", "len_mask = np.where(X_len <= threshold)\n", "\n", "X_filter = X[len_mask]\n", "y_filter = y[len_mask]" ] }, { "cell_type": "code", "execution_count": 98, "id": "eb03d293", "metadata": {}, "outputs": [], "source": [ "# Sliding Window Function\n", "def sliding_window(data):\n", " input_data = data\n", " _window_sz = 10\n", " sum_windows_passed = 0\n", " \n", " \n", " data_above_thresh = []\n", " thresh = 70\n", " \n", " values_sum = 0\n", " \n", " for i in range(0, len(input_data), _window_sz):\n", " for j in range(i, min(i + _window_sz, len(input_data))):\n", " values_sum += input_data[j]\n", " data_above_thresh.append(values_sum / _window_sz)\n", " \n", " return data_above_thresh" ] }, { "cell_type": "code", "execution_count": 75, "id": "1581a370", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 75, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "input_data = X[5]['Force']\n", "window_sz = 10\n", "sum_windows_passed = 0\n", " \n", " \n", "win_above_thresh = []\n", "thresh = 70\n", " \n", " \n", "for i in range(0, len(input_data), window_sz):\n", " values_sum = 0\n", " for j in range(i, min(i + window_sz, len(input_data))): \n", " values_sum += input_data[j]\n", "\n", " win_above_thresh.append(values_sum / window_sz)\n", " \n", "plt.plot(win_above_thresh)\n", "plt.plot(X[5]['Force'])" ] }, { "cell_type": "code", "execution_count": 111, "id": "f26eca93", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(array([140, 150, 160, 170, 190, 200, 210]),)\n" ] } ], "source": [ "_blep = np.where(np.asarray(win_above_thresh) > thresh)\n", "\n", "for i in range(len(_blep[0])):\n", " _blep[0][i] = _blep[0][i] * window_sz\n", " \n", "print(_blep) # s.u. Range der Daten über threshold ist von 140 bis 180 und von 190 bis 220; \n", " # Alles vor 140 und nach 220 ist 0 und kann gecutted werden" ] }, { "cell_type": "code", "execution_count": 120, "id": "407f8efe", "metadata": {}, "outputs": [], "source": [ "X_new = X[_blep]" ] }, { "cell_type": "code", "execution_count": 121, "id": "1c886109", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(13102,)\n", "(124,)\n" ] } ], "source": [ "print(X.shape)\n", "print(X_new[5]['Force'].shape)" ] }, { "cell_type": "code", "execution_count": 134, "id": "cfa4732e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "((13102,), (257, 15), (257, 15))" ] }, "execution_count": 134, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X.shape, X[140].shape, X_new[0].shape\n" ] }, { "cell_type": "code", "execution_count": 141, "id": "4a15e2ac", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 141, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.plot(X[140]['Force'])" ] }, { "cell_type": "code", "execution_count": 142, "id": "4128a3cd", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 142, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.plot(X_new[0]['Force'])" ] }, { "cell_type": "code", "execution_count": 144, "id": "9af3f711", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 144, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.plot(X_new[1]['Force'])\n", "plt.plot(X[150]['Force'])" ] }, { "cell_type": "code", "execution_count": null, "id": "775983d4", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 5 }