iui-group-l-name-zensiert/2-second-project/iel/Week1 /Test1.ipynb

277 lines
7.8 KiB
Plaintext
Raw Normal View History

2021-07-12 14:40:16 +02:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "2288179b",
2021-07-12 14:40:16 +02:00
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 768/768 [01:11<00:00, 10.72it/s]\n"
]
}
],
2021-07-12 14:40:16 +02:00
"source": [
"import os\n",
"from glob import glob\n",
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split\n",
2021-07-12 14:40:16 +02:00
"from sklearn.pipeline import Pipeline\n",
"from sklearn.decomposition import PCA, KernelPCA\n",
"from sklearn.preprocessing import (StandardScaler, \n",
" MinMaxScaler, \n",
" MaxAbsScaler,\n",
" PowerTransformer,\n",
" Binarizer)\n",
"\n",
2021-07-12 14:40:16 +02:00
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn.model_selection import cross_validate\n",
"from sklearn.metrics import classification_report, accuracy_score\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from math import isqrt\n",
"import pickle\n",
"from tqdm import tqdm\n",
"import os\n",
"\n",
"os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'\n",
"os.environ['CUDA_VISIBLE_DEVICES'] = '2'\n",
"\n",
"def load_data(user_filter=None):\n",
" dic_data = []\n",
" \n",
" for p in tqdm(glob('/opt/iui-datarelease3-sose2021/*.csv')):\n",
2021-07-12 14:40:16 +02:00
" path = p\n",
" filename = path.split('/')[-1]\n",
" user = int(filename.split('_')[0][1:])\n",
" if (user_filter):\n",
" if (user != user_filter):\n",
" continue\n",
" scenario = filename.split('_')[1][len('Scenario'):]\n",
" heightnorm = filename.split('_')[2][len('HeightNormalization'):] == 'True'\n",
" armnorm = filename.split('_')[3][len('ArmNormalization'):] == 'True'\n",
" rep = int(filename.split('.')[0].split('_')[4][len('Repetition'):])\n",
" session = filename.split('_')[5][len('Session'):]\n",
" session = session.split('.')[0]\n",
" \n",
2021-07-12 14:40:16 +02:00
" data = pd.read_csv(path)\n",
" dic_data.append(\n",
" {\n",
" 'filename': path,\n",
" 'user': user,\n",
" 'scenario': scenario,\n",
" 'heightnorm': heightnorm,\n",
" 'armnorm': armnorm,\n",
" 'rep': rep,\n",
" 'session': session,\n",
2021-07-12 14:40:16 +02:00
" 'data': data \n",
" \n",
2021-07-12 14:40:16 +02:00
" }\n",
" )\n",
" return dic_data\n",
"\n",
"dic_data = load_data()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "3df066af",
"metadata": {},
"outputs": [],
"source": [
"\n",
"# dataP = pd.DataFrame.from_dict(fil_dic_data) #pandas Dataframe Form mit 'data0' nur die daten\n",
"\n",
"# tempP = dataP['data']\n",
"\n",
"# tempP = tempP[0].drop(columns=['Scenario','HeightNormalization','ArmNormalization','LeftHandTrackingAccuracy','RightHandTrackingAccuracy']) #P without String Data\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "30296cad",
2021-07-12 14:40:16 +02:00
"metadata": {},
"outputs": [],
"source": [
"fil_dic_data = []\n",
"for d in dic_data:\n",
" if d['scenario'] == 'Sorting':\n",
" if d['heightnorm'] == d['armnorm']:\n",
" fil_dic_data.append(d)"
]
},
{
"cell_type": "markdown",
"id": "7a808f50",
2021-07-12 14:40:16 +02:00
"metadata": {},
"source": [
"Test\n"
2021-07-12 14:40:16 +02:00
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "dc206ded",
2021-07-12 14:40:16 +02:00
"metadata": {},
"outputs": [],
"source": [
"min_Max = MinMaxScaler()\n",
"standard = StandardScaler()\n",
"max_Abs = MaxAbsScaler()\n",
"binarizer = Binarizer()\n"
2021-07-12 14:40:16 +02:00
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "cf5b5695",
2021-07-12 14:40:16 +02:00
"metadata": {},
"outputs": [],
"source": [
"session_data_1 = []\n",
"session_data_2 = []\n",
2021-07-12 14:40:16 +02:00
"\n",
"user_data_1 = []\n",
"user_data_2 = []\n",
2021-07-12 14:40:16 +02:00
"\n",
"data_1 = []\n",
"data_2 = []\n",
2021-07-12 14:40:16 +02:00
"\n",
"for a in fil_dic_data:\n",
" if(a['session'] == '1'): ## Daten aus session 1 für train\n",
" session_data_1.append(a)\n",
" \n",
" if(a['session'] == '2'): ## Daten aus Session 2 zum validaten\n",
" session_data_2.append(a)\n",
2021-07-12 14:40:16 +02:00
"\n",
"for b in session_data_1:\n",
" user_data_1.append(b['user']) ## Label zu 1 \n",
" data_1.append(a['data'])\n",
2021-07-12 14:40:16 +02:00
" \n",
"for c in session_data_2:\n",
" user_data_2.append(b['user']) ## Label zu 2\n",
" data_2.append(a['data'])\n",
" \n",
" \n",
" "
2021-07-12 14:40:16 +02:00
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "82465bca",
2021-07-12 14:40:16 +02:00
"metadata": {},
"outputs": [
{
"name": "stderr",
2021-07-12 14:40:16 +02:00
"output_type": "stream",
"text": [
"100%|██████████| 96/96 [00:00<00:00, 575.65it/s]\n",
"100%|██████████| 96/96 [00:00<00:00, 646.85it/s]\n"
2021-07-12 14:40:16 +02:00
]
}
],
"source": [
"dataF_1 = [] ## Filtered Data session 1\n",
"dataF_2 = [] ## Filtered Data session 2\n",
"\n",
"temp_1 = [] ## Temp Holder für 1\n",
"temp_2 = [] ## Temp Holder für 2\n",
"\n",
"counter = 0 ## Counter für Einspeisen der Daten\n",
2021-07-12 14:40:16 +02:00
"\n",
"\n",
"for a in data_1:\n",
" temp_1.append(pd.DataFrame(a))\n",
2021-07-12 14:40:16 +02:00
"\n",
"for b in data_2:\n",
" temp_2.append(pd.DataFrame(b))\n",
"\n",
"\n",
"for c in tqdm(temp_1):\n",
" dataF_1.append(c.drop(columns=['Scenario','HeightNormalization','ArmNormalization','LeftHandTrackingAccuracy','RightHandTrackingAccuracy','Unnamed: 0', 'FrameID','participantID','Repetition']))\n",
" counter +=1\n",
" \n",
"counter = 0\n",
"\n",
"for d in tqdm(temp_2):\n",
" dataF_2.append(c.drop(columns=['Scenario','HeightNormalization','ArmNormalization','LeftHandTrackingAccuracy','RightHandTrackingAccuracy','Unnamed: 0', 'FrameID','participantID','Repetition']))\n",
" counter +=1\n",
" \n"
2021-07-12 14:40:16 +02:00
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "a6f7076e",
2021-07-12 14:40:16 +02:00
"metadata": {},
"outputs": [
{
"name": "stderr",
2021-07-12 14:40:16 +02:00
"output_type": "stream",
"text": [
"100%|██████████| 96/96 [00:03<00:00, 25.87it/s]\n"
2021-07-12 14:40:16 +02:00
]
}
],
"source": [
"minD = [] ## normalisierte Daten durch Minmax\n",
"staD = [] ## normalisierte Daten durch Standard\n",
"maxD = [] ## normalisierte Daten durch MaxAbs\n",
"binD = [] ## normalisierte Daten durch binarizer\n",
"\n",
"for i in tqdm(dataF_1):\n",
" minD.append( min_Max.fit_transform(i))\n",
" staD.append(standard.fit_transform(i))\n",
" maxD.append(max_Abs.fit_transform(i))\n",
" binD.append(binarizer.fit_transform(i))\n"
2021-07-12 14:40:16 +02:00
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "94676652",
2021-07-12 14:40:16 +02:00
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"x_train,x_test,y_train,y_test = train_test_split(minD,user_data_1,random_state=2)"
2021-07-12 14:40:16 +02:00
]
},
{
"cell_type": "markdown",
"id": "14a4abe1",
2021-07-12 14:40:16 +02:00
"metadata": {},
"source": [
"Classi"
]
2021-07-12 14:40:16 +02:00
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
2021-07-12 14:40:16 +02:00
}
},
"nbformat": 4,
"nbformat_minor": 5
}