normalization Start

master
Ibrahim El Sayed 2021-07-12 14:40:16 +02:00
parent d786a2ffb8
commit c5a2c85788
1 changed files with 380 additions and 0 deletions

View File

@ -0,0 +1,380 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "38f12435",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from glob import glob\n",
"import pandas as pd\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.decomposition import PCA, KernelPCA\n",
"from sklearn.preprocessing import (StandardScaler, \n",
" MinMaxScaler, \n",
" MaxAbsScaler,\n",
" PowerTransformer,\n",
" Binarizer)\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn.model_selection import cross_validate\n",
"from sklearn.metrics import classification_report, accuracy_score\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from math import isqrt\n",
"import pickle\n",
"from tqdm import tqdm\n",
"import os\n",
"\n",
"os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'\n",
"os.environ['CUDA_VISIBLE_DEVICES'] = '2'\n",
"\n",
"def load_data(user_filter=None):\n",
" dic_data = []\n",
" \n",
" for p in glob('/opt/iui-datarelease3-sose2021/*.csv'):\n",
" path = p\n",
" filename = path.split('/')[-1]\n",
" user = int(filename.split('_')[0][1:])\n",
" if (user_filter):\n",
" if (user != user_filter):\n",
" continue\n",
" scenario = filename.split('_')[1][len('Scenario'):]\n",
" heightnorm = filename.split('_')[2][len('HeightNormalization'):] == 'True'\n",
" armnorm = filename.split('_')[3][len('ArmNormalization'):] == 'True'\n",
" rep = int(filename.split('.')[0].split('_')[4][len('Repetition'):])\n",
" data = pd.read_csv(path)\n",
" dic_data.append(\n",
" {\n",
" 'filename': path,\n",
" 'user': user,\n",
" 'scenario': scenario,\n",
" 'heightnorm': heightnorm,\n",
" 'armnorm': armnorm,\n",
" 'rep': rep,\n",
" 'data': data \n",
" }\n",
" )\n",
" return dic_data\n",
"\n",
"dic_data = load_data()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "fa4a164f",
"metadata": {},
"outputs": [],
"source": [
"fil_dic_data = []\n",
"for d in dic_data:\n",
" if d['scenario'] == 'Sorting':\n",
" if d['heightnorm'] == d['armnorm']:\n",
" fil_dic_data.append(d)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "bded63ab",
"metadata": {},
"outputs": [],
"source": [
"# print(fil_dic_data)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "bff8a597",
"metadata": {},
"outputs": [],
"source": [
"min_Max = MinMaxScaler()\n",
"standard = StandardScaler()\n",
"max_Abs = MaxAbsScaler()\n",
"binarizer = Binarizer()\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "cbd99d06",
"metadata": {},
"outputs": [],
"source": [
"# print(fil_dic_data)\n",
" # didnt work \n",
"strData = []\n",
"floatData = []\n",
"intData = []\n",
"\n",
"dataP = pd.DataFrame.from_dict(fil_dic_data) #pandas Dataframe Form mit 'data0' nur die daten\n",
"\n",
"#print (dataP['data'][0])\n",
"\n",
"\n",
"tempP = dataP['data']\n",
" \n",
" # print (tempP)\n",
"\n",
"tempP = tempP[0].drop(columns=['Scenario','HeightNormalization','ArmNormalization','LeftHandTrackingAccuracy','RightHandTrackingAccuracy']) #P without String Data\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"# for a in range (0,len(dataPp)):\n",
"# if(dataPp.str.contains('True|False|High|Low')==False ):\n",
"# # dataPp = dataPp.drop(DataPp[a])\n",
"# print ('hello')\n"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "234f6afe",
"metadata": {},
"outputs": [],
"source": [
"min_Max = MinMaxScaler()\n",
"standard = StandardScaler()\n",
"max_Abs = MaxAbsScaler()\n",
"binarizer = Binarizer()\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "04ae81d2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1277\n",
" Unnamed: 0 FrameID participantID Repetition CenterEyeAnchor_pos_X \\\n",
"0 0 0 4 1 0.075814 \n",
"1 1 1 4 1 0.075603 \n",
"2 2 2 4 1 0.075464 \n",
"3 3 3 4 1 0.074985 \n",
"4 4 4 4 1 0.074755 \n",
"... ... ... ... ... ... \n",
"1272 1272 1272 4 1 0.059269 \n",
"1273 1273 1273 4 1 0.058998 \n",
"1274 1274 1274 4 1 0.058876 \n",
"1275 1275 1275 4 1 0.058837 \n",
"1276 1276 1276 4 1 0.058719 \n",
"\n",
" CenterEyeAnchor_pos_Y CenterEyeAnchor_pos_Z CenterEyeAnchor_euler_X \\\n",
"0 1.597426 0.214518 32.14411 \n",
"1 1.596940 0.213788 32.33485 \n",
"2 1.596355 0.213466 32.50544 \n",
"3 1.595917 0.213272 32.65988 \n",
"4 1.595896 0.212891 32.82001 \n",
"... ... ... ... \n",
"1272 1.562853 0.780597 43.60785 \n",
"1273 1.562540 0.780967 43.66858 \n",
"1274 1.562446 0.781218 43.70253 \n",
"1275 1.562278 0.781637 43.72837 \n",
"1276 1.562163 0.781915 43.73775 \n",
"\n",
" CenterEyeAnchor_euler_Y CenterEyeAnchor_euler_Z ... \\\n",
"0 17.57221 355.132000 ... \n",
"1 18.04424 355.151400 ... \n",
"2 18.46541 355.162400 ... \n",
"3 18.85156 355.278000 ... \n",
"4 19.17233 355.326500 ... \n",
"... ... ... ... \n",
"1272 338.90930 6.801289 ... \n",
"1273 338.83770 6.831078 ... \n",
"1274 338.73530 6.853129 ... \n",
"1275 338.65100 6.852663 ... \n",
"1276 338.56400 6.874183 ... \n",
"\n",
" right_Hand_RingTip_euler_X right_Hand_RingTip_euler_Y \\\n",
"0 324.1219 65.17896 \n",
"1 324.1279 65.55900 \n",
"2 323.9291 67.21324 \n",
"3 323.7837 68.33554 \n",
"4 323.6655 69.47017 \n",
"... ... ... \n",
"1272 297.6745 24.27903 \n",
"1273 297.7101 23.77864 \n",
"1274 297.5444 23.05743 \n",
"1275 297.4029 22.36072 \n",
"1276 297.2895 21.69823 \n",
"\n",
" right_Hand_RingTip_euler_Z right_Hand_PinkyTip_pos_X \\\n",
"0 104.17820 0.171209 \n",
"1 107.53830 0.158228 \n",
"2 115.07650 0.156692 \n",
"3 118.63020 0.155604 \n",
"4 122.65200 0.154516 \n",
"... ... ... \n",
"1272 32.62823 0.135538 \n",
"1273 33.12482 0.135347 \n",
"1274 33.56173 0.135177 \n",
"1275 33.99821 0.135037 \n",
"1276 34.43113 0.134919 \n",
"\n",
" right_Hand_PinkyTip_pos_Y right_Hand_PinkyTip_pos_Z \\\n",
"0 1.233351 0.453072 \n",
"1 1.254817 0.449175 \n",
"2 1.242653 0.457048 \n",
"3 1.236654 0.459492 \n",
"4 1.229719 0.461844 \n",
"... ... ... \n",
"1272 1.105558 1.126205 \n",
"1273 1.104719 1.126016 \n",
"1274 1.103936 1.125776 \n",
"1275 1.103254 1.125685 \n",
"1276 1.102556 1.125620 \n",
"\n",
" right_Hand_PinkyTip_euler_X right_Hand_PinkyTip_euler_Y \\\n",
"0 336.9985 58.71567 \n",
"1 337.3785 58.82201 \n",
"2 338.5847 57.22202 \n",
"3 339.3581 55.64667 \n",
"4 339.9531 54.21407 \n",
"... ... ... \n",
"1272 309.9038 24.25421 \n",
"1273 309.9819 23.98718 \n",
"1274 309.9340 23.53447 \n",
"1275 309.9002 23.10301 \n",
"1276 309.8771 22.69119 \n",
"\n",
" right_Hand_PinkyTip_euler_Z Session \n",
"0 89.64570 2 \n",
"1 98.59170 2 \n",
"2 114.27260 2 \n",
"3 121.56910 2 \n",
"4 129.46830 2 \n",
"... ... ... \n",
"1272 27.30666 2 \n",
"1273 28.01053 2 \n",
"1274 28.82001 2 \n",
"1275 29.60149 2 \n",
"1276 30.31748 2 \n",
"\n",
"[1277 rows x 341 columns]\n"
]
}
],
"source": [
"print(len(tempP))\n",
"print(tempP)"
]
},
{
"cell_type": "code",
"execution_count": 53,
"id": "ed8fadf5",
"metadata": {},
"outputs": [],
"source": [
"data_min_M = min_Max.fit(tempP)\n",
"data_min_MT = min_Max.fit_transform(tempP)\n",
"\n",
"data_stan = standard.fit(tempP)\n",
"data_stanT = standard.fit_transform(tempP)\n",
"\n",
"data_max_A = max_Abs.fit(tempP)\n",
"data_max_AT = max_Abs.fit_transform(tempP)\n",
"\n",
"data_bin = binarizer.fit(tempP)\n",
"data_binT = binarizer.fit_transform(tempP)"
]
},
{
"cell_type": "code",
"execution_count": 56,
"id": "d65387c4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"MinMaxScaler()\n",
"[[0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 1.62054079e-01\n",
" 3.38612629e-01 0.00000000e+00]\n",
" [7.83699060e-04 7.83699060e-04 0.00000000e+00 ... 1.62350807e-01\n",
" 3.79761585e-01 0.00000000e+00]\n",
" [1.56739812e-03 1.56739812e-03 0.00000000e+00 ... 1.57886238e-01\n",
" 4.51889092e-01 0.00000000e+00]\n",
" ...\n",
" [9.98432602e-01 9.98432602e-01 0.00000000e+00 ... 6.38853882e-02\n",
" 5.88324285e-02 0.00000000e+00]\n",
" [9.99216301e-01 9.99216301e-01 0.00000000e+00 ... 6.26814536e-02\n",
" 6.24270056e-02 0.00000000e+00]\n",
" [1.00000000e+00 1.00000000e+00 0.00000000e+00 ... 6.15323220e-02\n",
" 6.57203480e-02 0.00000000e+00]]\n"
]
}
],
"source": [
"print(data_min_M)\n",
"print(data_min_MT)"
]
},
{
"cell_type": "code",
"execution_count": 59,
"id": "c63c4d67",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4cad7f04",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}