Neuer Datensatz als Pickle in Main Folder+Versuch diese zu shorten

This commit is contained in:
Ibrahim El Sayed
2021-06-13 14:01:19 +00:00
parent 39095c6697
commit 250c1cabaa
10 changed files with 2170 additions and 0 deletions

File diff suppressed because one or more lines are too long

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,308 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "6be7788e",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import tensorflow as tf\n",
"import matplotlib.pyplot as plt\n",
"from math import isqrt\n",
"import pickle\n",
"from tqdm import tqdm\n",
"import os\n",
"from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import LabelEncoder, LabelBinarizer\n",
"from tensorflow.keras.models import Sequential\n",
"from tensorflow.keras.layers import Dense, Flatten, BatchNormalization\n",
"\n",
"os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'\n",
"os.environ['CUDA_VISIBLE_DEVICES'] = '2'\n",
"\n",
"delim = ';'\n",
"user_count = 100\n",
"base_path = '/opt/iui-datarelease2-sose2021/'\n",
"Xpickle_file = './X2.pickle'\n",
"ypickle_file = './y2.pickle'\n",
"xshorted_pickle_file = './X2_shorted.pickle'\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "9af358ae",
"metadata": {},
"outputs": [],
"source": [
"def load_pickles():\n",
" _p = open(Xpickle_file, 'rb')\n",
" X = pickle.load(_p)\n",
" _p.close()\n",
" \n",
" _p = open(ypickle_file, 'rb')\n",
" y = pickle.load(_p)\n",
" _p.close()\n",
" \n",
" return (np.asarray(X, dtype=pd.DataFrame), np.asarray(y, dtype=str))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "12a870d7",
"metadata": {},
"outputs": [],
"source": [
"\n",
"def load_data():\n",
" if os.path.isfile(Xpickle_file) and os.path.isfile(ypickle_file):\n",
" return load_pickles()\n",
" data = []\n",
" label = []\n",
" for user in range(0, user_count):\n",
" user_path = base_path + str(user) + '/split_letters_csv/'\n",
" for file in os.listdir(user_path):\n",
" file_name = user_path + file\n",
" letter = ''.join(filter(lambda x: x.isalpha(), file))[0]\n",
" data.append(pd.read_csv(file_name, delim))\n",
" label.append(letter)\n",
" return (np.asarray(data, dtype=pd.DataFrame), np.asarray(label, dtype=str))\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "610ab5e0",
"metadata": {},
"outputs": [],
"source": [
"def shorten_pickle(l):\n",
" \n",
" temp = l\n",
" thresh = 80\n",
" temp_over_T = 0\n",
" isOver = False\n",
" temp_short = [] ##Daten nachdem vorne abgeschnitten wird\n",
" temp_final = [] ##Daten nachdem auch hinten abgeschnitten wurde\n",
" \n",
" temp_X = [] ## Zweite Dimension von Temp, beinhaltet force \n",
" temp_short_X = []\n",
" \n",
" for a in tqdm(range (0, len(temp))): \n",
" temp_X = temp[a] \n",
" \n",
" temp_X = l['Force']\n",
" thresh = 80\n",
" temp_over_T = 0\n",
" \n",
" temp_short = [] ##Daten nachdem vorne abgeschnitten wird\n",
" temp_final = [] ##Daten nachdem auch hinten abgeschnitten wurde\n",
" temp_short_X = []\n",
" \n",
" \n",
" \n",
" for b in range (0, len(temp_X)): \n",
" if(temp_X[b]>thresh):\n",
" temp_over_T = b\n",
" break\n",
" \n",
" for x in range (temp_over_T,len(temp_X)):\n",
" temp_short_X.append(temp_X[x]) ##hier werden die Daten von y appended in eine liste ( Von form data[x][y])\n",
" \n",
" \n",
" for y in range ((len(temp_short_X)-1),0, -1): \n",
" if(temp_short_X[y] > thresh):\n",
" temp_over_T = y\n",
" break\n",
" \n",
" for z in range(0, temp_over_T+1):\n",
" temp_short.append(temp_short_X[z]) ##hier wird [y] als einzelne Datei gespeichert\n",
" \n",
" temp_short.append(0) # Damit beim Plot die linie bis nach unten geht\n",
" \n",
" \n",
" temp_final.append(temp_short)\n",
" \n",
" return temp_final\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "7f232eeb",
"metadata": {},
"outputs": [],
"source": [
"def shorten_v2(npList):\n",
" temp = npList['Force']\n",
" \n",
" print (\"I was here\")\n",
" \n",
" thresh = 80\n",
" leeway = 1\n",
" \n",
" temps_over_T = np.where(temp > thresh)[0]\n",
" print(\"I Was here too\")\n",
" return npList[max(temps_over_T[0],0):temps_over_T[-1]]\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "429f89ae",
"metadata": {},
"outputs": [],
"source": [
"def save_pickle():\n",
"# _p = open(np.asarray(data, dtype=pd.DataFrame), 'wb')\n",
" _p = open(Xpickle_file, 'wb')\n",
" pickle.dump(x, _p)\n",
" _p.close()\n",
"\n",
"# _p = open(np.asarray(label, dtype=str), 'wb')\n",
" _p = open(ypickle_file, 'wb')\n",
" pickle.dump(y, _p)\n",
" _p.close()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "5bb89103",
"metadata": {},
"outputs": [],
"source": [
"## save_pickle() pickles sind erstellt"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "78b21c1c",
"metadata": {},
"outputs": [],
"source": [
"x,y = load_pickles()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "ce783824",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"26179\n"
]
}
],
"source": [
"print (len(x))"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "f1c165b1",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" 0%| | 0/185 [00:00<?, ?it/s]\n"
]
},
{
"ename": "KeyError",
"evalue": "0",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/opt/jupyterhub/lib/python3.8/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 3079\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3080\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3081\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mKeyError\u001b[0m: 0",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<timed exec>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n",
"\u001b[0;32m<ipython-input-4-9fb2e4a993ea>\u001b[0m in \u001b[0;36mshorten_pickle\u001b[0;34m(l)\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0ma\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtqdm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrange\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtemp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 14\u001b[0;31m \u001b[0mtemp_X\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtemp\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 15\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0mtemp_X\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0ml\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Force'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/jupyterhub/lib/python3.8/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3022\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnlevels\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3023\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_multilevel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3024\u001b[0;31m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3025\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_integer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3026\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/jupyterhub/lib/python3.8/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 3080\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3081\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3082\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3083\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3084\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtolerance\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mKeyError\u001b[0m: 0"
]
}
],
"source": [
"%%time\n",
"f_data = np.array(list(map(shorten_pickle, x)))"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "7540198d",
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'f_data' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-11-5fc433f33b3d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mNameError\u001b[0m: name 'f_data' is not defined"
]
}
],
"source": [
"plt.plot(f_data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7f003da0",
"metadata": {},
"outputs": [],
"source": [
"exit()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long