318 lines
36 KiB
Plaintext
318 lines
36 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "7a7d7566",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Needed Imports\n",
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"import tensorflow as tf\n",
|
|
"import os\n",
|
|
"import pickle\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"from math import isqrt"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "72dca74e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' # this is required\n",
|
|
"os.environ['CUDA_VISIBLE_DEVICES'] = '2' # set to '0' for GPU0, '1' for GPU1 or '2' for GPU2. Check \"gpustat\" in a terminal."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "148e8cc9",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"delim = ';'\n",
|
|
"user_count = 100\n",
|
|
"base_path = '/opt/iui-datarelease1-sose2021/'\n",
|
|
"Xpickle_file = './X.pickle'\n",
|
|
"ypickle_file = './y.pickle'\n",
|
|
"\n",
|
|
"# Function that opens and reads pickle Data from FS and returns the read data as NumpyArray\n",
|
|
"def load_pickles():\n",
|
|
" _p = open(Xpickle_file, 'rb')\n",
|
|
" X = pickle.load(_p)\n",
|
|
" _p.close()\n",
|
|
" \n",
|
|
" _p = open(ypickle_file, 'rb')\n",
|
|
" y = pickle.load(_p)\n",
|
|
" _p.close()\n",
|
|
" \n",
|
|
" return (np.asarray(X, dtype = pd.DataFrame), np.asarray(y, dtype = str))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "863651d8",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Function used to save data as a pickle file\n",
|
|
"def save_pickle():\n",
|
|
"# _p = open(np.asarray(data, dtype=pd.DataFrame), 'wb')\n",
|
|
" _p = open(Xpickle_file, 'wb')\n",
|
|
" pickle.dump(X, _p)\n",
|
|
" _p.close()\n",
|
|
"\n",
|
|
"# _p = open(np.asarray(label, dtype=str), 'wb')\n",
|
|
" _p = open(ypickle_file, 'wb')\n",
|
|
" pickle.dump(y, _p)\n",
|
|
" _p.close()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "703abfd3",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Function that loads data from the picklefiles and prints them into NumpyArrays (one for Data and one for Lables)\n",
|
|
"def load_data():\n",
|
|
" if os.path.isfile(Xpickle_file) and os.path.isfile(ypickle_file):\n",
|
|
" return load_pickles()\n",
|
|
" data = []\n",
|
|
" label = []\n",
|
|
" for user in range(0, user_count):\n",
|
|
" user_path = base_path + str(user) + '/split_letters_csv/'\n",
|
|
" for file in os.listdir(user_path):\n",
|
|
" file_name = user_path + file\n",
|
|
" letter = ''.join(filter(lambda x: x.isalpha(), file))[0]\n",
|
|
" data.append(pd.read_csv(file_name, delim))\n",
|
|
" label.append(letter)\n",
|
|
" return (np.asarray(data, dtype = pd.DataFrame), np.asarray(label, dtype = str))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"id": "c08e44d1",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Load Data\n",
|
|
"X, y = load_data()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"id": "fc1766db",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"count 13102.000000\n",
|
|
"mean 208.304457\n",
|
|
"std 206.732342\n",
|
|
"min 42.000000\n",
|
|
"50% 185.000000\n",
|
|
"90% 270.000000\n",
|
|
"91% 276.000000\n",
|
|
"92% 286.000000\n",
|
|
"93% 299.000000\n",
|
|
"94% 312.000000\n",
|
|
"95% 333.000000\n",
|
|
"96% 355.000000\n",
|
|
"97% 388.000000\n",
|
|
"98% 456.980000\n",
|
|
"99% 701.940000\n",
|
|
"max 11073.000000\n",
|
|
"dtype: float64"
|
|
]
|
|
},
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
},
|
|
{
|
|
"data": {
|
|
"image/png": "\n",
|
|
"text/plain": [
|
|
"<Figure size 432x288 with 1 Axes>"
|
|
]
|
|
},
|
|
"metadata": {
|
|
"needs_background": "light"
|
|
},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Show how many datasets are make how many percent \n",
|
|
"X_len = np.asarray(list(map(len, X)))\n",
|
|
"l = []\n",
|
|
"sq_xlen = pd.Series(X_len)\n",
|
|
"ptiles = [x*0.01 for x in range(100)]\n",
|
|
"for i in ptiles:\n",
|
|
" l.append(sq_xlen.quantile(i))\n",
|
|
"plt.plot(l, ptiles)\n",
|
|
"sq_xlen.describe(percentiles=[x*0.01 for x in range(90,100)])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"id": "bbca15d8",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Remove outliner data from the dataset\n",
|
|
"threshold_p = 0.99\n",
|
|
"threshold = int(sq_xlen.quantile(threshold_p))\n",
|
|
"len_mask = np.where(X_len <= threshold)\n",
|
|
"\n",
|
|
"X_filter = X[len_mask]\n",
|
|
"y_filter = y[len_mask]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"id": "0577b868",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Sliding Window Function\n",
|
|
"def sliding_window(data):\n",
|
|
" input_data = data\n",
|
|
" _window_sz = 10\n",
|
|
" sum_windows_passed = 0\n",
|
|
" \n",
|
|
" \n",
|
|
" data_above_thresh = []\n",
|
|
" thresh = 30\n",
|
|
" \n",
|
|
" values_sum = 0\n",
|
|
" \n",
|
|
" for i in range(0, len(input_data), _window_sz):\n",
|
|
" for j in range(i, min(i + _window_sz, len(input_data))):\n",
|
|
" values_sum += input_data[j]\n",
|
|
" data_above_thresh.append(values_sum / _window_sz)\n",
|
|
" \n",
|
|
" return data_above_thresh"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"id": "ae4b01be",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"input_data = X[5]['Force']\n",
|
|
"window_sz = 10\n",
|
|
"sum_windows_passed = 0\n",
|
|
" \n",
|
|
" \n",
|
|
"win_above_thresh = []\n",
|
|
" \n",
|
|
" \n",
|
|
"for i in range(0, len(input_data), window_sz):\n",
|
|
" values_sum = 0\n",
|
|
" for j in range(i, min(i + window_sz, len(input_data))): \n",
|
|
" values_sum += input_data[j]\n",
|
|
"\n",
|
|
" win_above_thresh.append(values_sum / window_sz)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"id": "7945cb5f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"thresh = 35\n",
|
|
"\n",
|
|
"_blep = np.where(np.asarray(win_above_thresh) > thresh)\n",
|
|
"ranges = []\n",
|
|
"for i in range(len(_blep[0])):\n",
|
|
" correlation = _blep[0][i] * window_sz\n",
|
|
" ranges.append(list(range(correlation, correlation + window_sz)))\n",
|
|
"ranges = np.array(ranges).flatten()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 17,
|
|
"id": "08ce93bc",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"[<matplotlib.lines.Line2D at 0x7f6a3ccd33a0>]"
|
|
]
|
|
},
|
|
"execution_count": 17,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
},
|
|
{
|
|
"data": {
|
|
"image/png": "\n",
|
|
"text/plain": [
|
|
"<Figure size 432x288 with 1 Axes>"
|
|
]
|
|
},
|
|
"metadata": {
|
|
"needs_background": "light"
|
|
},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"blepped = input_data[ranges]\n",
|
|
"plt.plot(range(len(blepped)), blepped)\n",
|
|
"plt.plot(range(len(input_data)), input_data)\n",
|
|
"plt.plot([140 for _ in range(2000)], [140 for _ in range(2000)])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "3455b49f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.5"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|