461 lines
65 KiB
Plaintext
461 lines
65 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "b5fd075a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Needed Imports\n",
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"import tensorflow as tf\n",
|
|
"import os\n",
|
|
"import pickle\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"from math import isqrt"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "805e21e0",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' # this is required\n",
|
|
"os.environ['CUDA_VISIBLE_DEVICES'] = '2' # set to '0' for GPU0, '1' for GPU1 or '2' for GPU2. Check \"gpustat\" in a terminal."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "52b164a4",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"delim = ';'\n",
|
|
"user_count = 100\n",
|
|
"base_path = '/opt/iui-datarelease1-sose2021/'\n",
|
|
"Xpickle_file = './X.pickle'\n",
|
|
"ypickle_file = './y.pickle'\n",
|
|
"\n",
|
|
"# Function that opens and reads pickle Data from FS and returns the read data as NumpyArray\n",
|
|
"def load_pickles():\n",
|
|
" _p = open(Xpickle_file, 'rb')\n",
|
|
" X = pickle.load(_p)\n",
|
|
" _p.close()\n",
|
|
" \n",
|
|
" _p = open(ypickle_file, 'rb')\n",
|
|
" y = pickle.load(_p)\n",
|
|
" _p.close()\n",
|
|
" \n",
|
|
" return (np.asarray(X, dtype = pd.DataFrame), np.asarray(y, dtype = str))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"id": "2b75bbc1",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Function used to save data as a pickle file\n",
|
|
"def save_pickle():\n",
|
|
"# _p = open(np.asarray(data, dtype=pd.DataFrame), 'wb')\n",
|
|
" _p = open(Xpickle_file, 'wb')\n",
|
|
" pickle.dump(X, _p)\n",
|
|
" _p.close()\n",
|
|
"\n",
|
|
"# _p = open(np.asarray(label, dtype=str), 'wb')\n",
|
|
" _p = open(ypickle_file, 'wb')\n",
|
|
" pickle.dump(y, _p)\n",
|
|
" _p.close()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"id": "03037493",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Function that loads data from the picklefiles and prints them into NumpyArrays (one for Data and one for Lables)\n",
|
|
"def load_data():\n",
|
|
" if os.path.isfile(Xpickle_file) and os.path.isfile(ypickle_file):\n",
|
|
" return load_pickles()\n",
|
|
" data = []\n",
|
|
" label = []\n",
|
|
" for user in range(0, user_count):\n",
|
|
" user_path = base_path + str(user) + '/split_letters_csv/'\n",
|
|
" for file in os.listdir(user_path):\n",
|
|
" file_name = user_path + file\n",
|
|
" letter = ''.join(filter(lambda x: x.isalpha(), file))[0]\n",
|
|
" data.append(pd.read_csv(file_name, delim))\n",
|
|
" label.append(letter)\n",
|
|
" return (np.asarray(data, dtype = pd.DataFrame), np.asarray(label, dtype = str))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"id": "b91b4622",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"(13102, 13102)"
|
|
]
|
|
},
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Load Data\n",
|
|
"X, y = load_data()\n",
|
|
"len(X), len(y)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"id": "817f4cef",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"(13102,)\n",
|
|
"(13102,)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Show Data Shape\n",
|
|
"print(X.shape)\n",
|
|
"print(y.shape) "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "3c11cf82",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Show how many datasets are make how many percent \n",
|
|
"X_len = np.asarray(list(map(len, X)))\n",
|
|
"l = []\n",
|
|
"sq_xlen = pd.Series(X_len)\n",
|
|
"ptiles = [x*0.01 for x in range(100)]\n",
|
|
"for i in ptiles:\n",
|
|
" l.append(sq_xlen.quantile(i))\n",
|
|
"plt.plot(l, ptiles)\n",
|
|
"sq_xlen.describe(percentiles=[x*0.01 for x in range(90,100)])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 17,
|
|
"id": "c34dd9d0",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Remove outliner data from the dataset\n",
|
|
"threshold_p = 0.99\n",
|
|
"threshold = int(sq_xlen.quantile(threshold_p))\n",
|
|
"len_mask = np.where(X_len <= threshold)\n",
|
|
"\n",
|
|
"X_filter = X[len_mask]\n",
|
|
"y_filter = y[len_mask]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 98,
|
|
"id": "eb03d293",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Sliding Window Function\n",
|
|
"def sliding_window(data):\n",
|
|
" input_data = data\n",
|
|
" _window_sz = 10\n",
|
|
" sum_windows_passed = 0\n",
|
|
" \n",
|
|
" \n",
|
|
" data_above_thresh = []\n",
|
|
" thresh = 70\n",
|
|
" \n",
|
|
" values_sum = 0\n",
|
|
" \n",
|
|
" for i in range(0, len(input_data), _window_sz):\n",
|
|
" for j in range(i, min(i + _window_sz, len(input_data))):\n",
|
|
" values_sum += input_data[j]\n",
|
|
" data_above_thresh.append(values_sum / _window_sz)\n",
|
|
" \n",
|
|
" return data_above_thresh"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 75,
|
|
"id": "1581a370",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"[<matplotlib.lines.Line2D at 0x7f08c64055e0>]"
|
|
]
|
|
},
|
|
"execution_count": 75,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
},
|
|
{
|
|
"data": {
|
|
"image/png": "\n",
|
|
"text/plain": [
|
|
"<Figure size 432x288 with 1 Axes>"
|
|
]
|
|
},
|
|
"metadata": {
|
|
"needs_background": "light"
|
|
},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"input_data = X[5]['Force']\n",
|
|
"window_sz = 10\n",
|
|
"sum_windows_passed = 0\n",
|
|
" \n",
|
|
" \n",
|
|
"win_above_thresh = []\n",
|
|
"thresh = 70\n",
|
|
" \n",
|
|
" \n",
|
|
"for i in range(0, len(input_data), window_sz):\n",
|
|
" values_sum = 0\n",
|
|
" for j in range(i, min(i + window_sz, len(input_data))): \n",
|
|
" values_sum += input_data[j]\n",
|
|
"\n",
|
|
" win_above_thresh.append(values_sum / window_sz)\n",
|
|
" \n",
|
|
"plt.plot(win_above_thresh)\n",
|
|
"plt.plot(X[5]['Force'])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 111,
|
|
"id": "f26eca93",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"(array([140, 150, 160, 170, 190, 200, 210]),)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"_blep = np.where(np.asarray(win_above_thresh) > thresh)\n",
|
|
"\n",
|
|
"for i in range(len(_blep[0])):\n",
|
|
" _blep[0][i] = _blep[0][i] * window_sz\n",
|
|
" \n",
|
|
"print(_blep) # s.u. Range der Daten über threshold ist von 140 bis 180 und von 190 bis 220; \n",
|
|
" # Alles vor 140 und nach 220 ist 0 und kann gecutted werden"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 120,
|
|
"id": "407f8efe",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"X_new = X[_blep]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 121,
|
|
"id": "1c886109",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"(13102,)\n",
|
|
"(124,)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(X.shape)\n",
|
|
"print(X_new[5]['Force'].shape)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 134,
|
|
"id": "cfa4732e",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"((13102,), (257, 15), (257, 15))"
|
|
]
|
|
},
|
|
"execution_count": 134,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"X.shape, X[140].shape, X_new[0].shape\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 141,
|
|
"id": "4a15e2ac",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"[<matplotlib.lines.Line2D at 0x7f08c2e739a0>]"
|
|
]
|
|
},
|
|
"execution_count": 141,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
},
|
|
{
|
|
"data": {
|
|
"image/png": "\n",
|
|
"text/plain": [
|
|
"<Figure size 432x288 with 1 Axes>"
|
|
]
|
|
},
|
|
"metadata": {
|
|
"needs_background": "light"
|
|
},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"plt.plot(X[140]['Force'])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 142,
|
|
"id": "4128a3cd",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"[<matplotlib.lines.Line2D at 0x7f08c2dcca90>]"
|
|
]
|
|
},
|
|
"execution_count": 142,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
},
|
|
{
|
|
"data": {
|
|
"image/png": "\n",
|
|
"text/plain": [
|
|
"<Figure size 432x288 with 1 Axes>"
|
|
]
|
|
},
|
|
"metadata": {
|
|
"needs_background": "light"
|
|
},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"plt.plot(X_new[0]['Force'])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 144,
|
|
"id": "9af3f711",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"[<matplotlib.lines.Line2D at 0x7f08c2c16f40>]"
|
|
]
|
|
},
|
|
"execution_count": 144,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
},
|
|
{
|
|
"data": {
|
|
"image/png": "\n",
|
|
"text/plain": [
|
|
"<Figure size 432x288 with 1 Axes>"
|
|
]
|
|
},
|
|
"metadata": {
|
|
"needs_background": "light"
|
|
},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"plt.plot(X_new[1]['Force'])\n",
|
|
"plt.plot(X[150]['Force'])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "775983d4",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.5"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|