iui-group-l-name-zensiert/2-second-project/tdt/DataViz.ipynb

238 lines
16 KiB
Plaintext
Raw Normal View History

{
"cells": [
{
"cell_type": "code",
"execution_count": 88,
"id": "51e157d2",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from glob import glob\n",
"import pandas as pd\n",
"\n",
"def load_data(user_filter=None):\n",
" dic_data = []\n",
" \n",
" for p in glob('/opt/iui-datarelease3-sose2021/*.csv'):\n",
" path = p\n",
" filename = path.split('/')[-1]\n",
" user = int(filename.split('_')[0][1:])\n",
" if (user_filter):\n",
" if (user != user_filter):\n",
" continue\n",
" scenario = filename.split('_')[1][len('Scenario'):]\n",
" heightnorm = filename.split('_')[2][len('HeightNormalization'):] == 'True'\n",
" armnorm = filename.split('_')[3][len('ArmNormalization'):] == 'True'\n",
" rep = int(filename.split('.')[0].split('_')[4][len('Repetition'):])\n",
" data = pd.read_csv(path)\n",
" dic_data.append(\n",
" {\n",
" 'filename': path,\n",
" 'user': user,\n",
" 'scenario': scenario,\n",
" 'heightnorm': heightnorm,\n",
" 'armnorm': armnorm,\n",
" 'rep': rep,\n",
" 'data': data \n",
" }\n",
" )\n",
" return dic_data\n",
"\n",
"dic_data = load_data()"
]
},
{
"cell_type": "code",
"execution_count": 89,
"id": "45e0dcc6",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"384"
]
},
"execution_count": 89,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(dic_data)"
]
},
{
"cell_type": "code",
"execution_count": 85,
"id": "652e33e4",
"metadata": {},
"outputs": [],
"source": [
"fil_dic_data = []\n",
"for d in dic_data:\n",
" if d['scenario'] == 'Sorting':\n",
" if d['heightnorm'] == d['armnorm']:\n",
" fil_dic_data.append(d)"
]
},
{
"cell_type": "code",
"execution_count": 87,
"id": "e77a3dec",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/opt/iui-datarelease3-sose2021/P1_ScenarioSorting_HeightNormalizationTrue_ArmNormalizationTrue_Repetition2.csv\n"
]
},
{
"data": {
"text/plain": [
"337"
]
},
"execution_count": 87,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"index = 1\n",
"entry = fil_dic_data[index]['data']\n",
"print(fil_dic_data[index]['filename'])\n",
"col_of_interst = []\n",
"for col in entry:\n",
" if 'float' in str(entry[col].dtype):\n",
" col_of_interst.append(col)\n",
"len(col_of_interst)"
]
},
{
"cell_type": "code",
"execution_count": 98,
"id": "93e18064",
"metadata": {},
"outputs": [],
"source": [
"len_list = []\n",
"for i in dic_data:\n",
" len_list.append(len(i['data']))"
]
},
{
"cell_type": "code",
"execution_count": 110,
"id": "67a3c912",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 384.000000\n",
"mean 3053.768229\n",
"std 2195.831831\n",
"min 597.000000\n",
"50% 2395.000000\n",
"90% 5977.000000\n",
"91% 6157.600000\n",
"92% 6239.600000\n",
"93% 6341.490000\n",
"94% 6585.200000\n",
"95% 7561.800000\n",
"96% 8158.000000\n",
"97% 8895.250000\n",
"98% 9942.320000\n",
"99% 10315.120000\n",
"max 19371.000000\n",
"dtype: float64"
]
},
"execution_count": 110,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len_series = pd.Series(len_list, dtype='int64')\n",
"len_series.describe(percentiles=[x*0.01 for x in range(90,100)])"
]
},
{
"cell_type": "code",
"execution_count": 111,
"id": "1b473131",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[<matplotlib.lines.Line2D at 0x7f0a6d98aa30>]"
]
},
"execution_count": 111,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD4CAYAAAD8Zh1EAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAAAgJUlEQVR4nO3deXxV9Z3/8dcne8hOFggQSGRHrYABt1qXKqKdkfbX0cK0v6q1daZTZ7Grjn10ZjrTdlpnOqOtU8eZqrWtC7XWUkVxqbZTHSBh3yGyZIGQkIQshKz3O3/cA15jMAFucu7yfj4e95Gzce/n5CRvTs75nu/XnHOIiEj0S/C7ABERCQ8FuohIjFCgi4jECAW6iEiMUKCLiMSIJL8+uKCgwJWWlvr18SIiUWndunVHnHOFg63zLdBLS0uprKz06+NFRKKSmR041TpdchERiREKdBGRGKFAFxGJEUMGupk9YmYNZrb1FOvNzB4wsyoz22xm88NfpoiIDGU4Z+iPAYvfZ/31wHTvdQfwo7MvS0RETteQge6c+z3Q/D6bLAEed0GrgVwzKw5XgSIiMjzhuIY+EagJma/1lr2Hmd1hZpVmVtnY2BiGjxYRkRNG9aaoc+5h51y5c668sHDQdvEiIjGnP+DY29jBS1sPcf+re9ha1zoinxOOB4vqgJKQ+UneMhGRuBIIOOqOHmdPQzu76jvYfbidXfXtVDV20NMXAMAMxmamcN7EnLB/fjgCfQVwp5k9BVwEtDrnDoXhfUVEfBcIODp6+mjv6qPteO87X7t7aTsenK5tOc6uw+3sOdzOsZ7+k/92Qk4a08dl8cHpBcwYl8XMcVlMK8okPSVxRGodMtDN7EngSqDAzGqBvwOSAZxzDwErgRuAKqATuG1EKhUROQM9fQHaunrfHchdvbR3eYEcsq6ta+B8Lx3dfQw1sFt+RgozxmVxU3lJMLjHZzKtKIuc9OTR2UnPkIHunFs2xHoHfCFsFYmIDKKrt5/mYz00dfRw5Fh38GtHNy2dPe8J5vauYDi3d/XS1Rt43/c1g6zUJLLTk8lKSyY7LYlJeelkFWeR7c1npyeTnZZMVtqJ7ZKC67zp5MTIeEbTt865RCS+BQKO1uO9NB3r5ogXzk0dPTR1dHPkWPBrU0cPTcd6ONLeTXt336Dvk5KY4AVuElne14m56WSnJ50M6NAQzkpLJjs96WRAZ6QkkZBgo7z3I0OBLiJh1dXbz+G2Lg61dnG4rYv61i4a2ruDAX2sh8b24NfmYz30B957LSPBYGxGCvkZqeR7Nw/zM1IoyEwhPzOVgszg8gJv/ZiURMxiI5DPlgJdRIbFueAZdb0X0vWtXdS3dZ0M7xPzRzt73/Nvx6QkngziSXljmFuSe3I+PzOVgozg1/zMFPLGpJAYI2fMo02BLhKn+gOO9q5eWo8HX23H+96Z7uqlpbOHw15InwjrgdejzSA/I5XxOalMyhtDeWkexTnpjMtOY3x2GuNzgq/MVEXNaNB3WSSKdff1vyuMT7TMaD3eS2vnO+E8MLSDze4GvyZ9QkpiAuNyUhmfncb5k3K5NjuVcdlpFOekMz4nOF2UlUZKUmTcEBQFukhE6g84th1sZfXeJg4e7aLt+DuhHBrSQ7XgSE9OJCc9mZz04I3ACblpzCoONqfLTks+uS64/t3zackJujYdZRToIhHAOcfbjcd46+0jvFl1hNV7m2k9HrwWnZWW9E7opiUztTAzOD8m2IIjNIxDQzk7LVlnz3FGgS7io3UHWvj5mgO8VdVEfVsXABNz01k0ZxyXTSvg0qn5FGWn+VylRAsFuogPWjt7+e6qnTyxpprcMclcNrWAS6flc9nUAqbkj9GlDjkjCnSRUfbS1kN8/bltNB/r5rMfLOOua2eQoVYgEgb6KRIZJb39Ab6zciePvLmP8yfm8NhtC0akxz2JXwp0kREWCDjW7m/mX1/eRcX+Fm69tJS/vWG2blhK2CnQRUaAc47th9pYsfEgKzYd5FBrF5mpSdy/dC5L5g46oJfIWVOgi4RJ6/FeNtUcpfJACyu3HKKqoYOkBOOKGYXcc8NsrpldxJgU/crJyNFPl8gZ6OsPsPtwBxtrjrKhuoUNNUepaugAgo/Dl0/J458+eh4fOb+YvIwUn6uVeKFAFzmF/oCjqaM72PGU159J3dHjbK49yubaVjq9kWnyM1KYNzmXj82byLySXM6flENW2ugObCACCnSJUz19ARragyF9opvXEz0GHmo9zuG2bg63ddE3oHvXlMQEZhdncXN5CfMm5zKvJI+SselqNy4RQYEuMa23P8BvdzbwZtWRkMDuoulY93uGFUtPTqTY6x3wonPGMj47zZtPpzgnjXHZaeRnpMTMYAgSexToEpOqmzp5qqKaX6yrpbG9m8zU4Cg243PSOHdCdrBbV69712DvgWlkpyXpTFuimgJdYkIg4Khq7GDtvmZe3HqIN6uaSDC4elYRSxdM5sqZhSRFyLiPIiNFgS5Rqbc/wLaDbVTsa2bNvmYqDzSfHClnUl46X7p2BjeVlzA+Rx1bSfxQoEvU6OsP8NPVB3h1x2HWHzjK8d5gK5PS/DEsmjOOBaVjWVg2lslj1bmVxCcFukSF6qZO/ubpDayvPsqs8VncXD6JhWX5LCjNU/eyIh4FukS0rt5+nq6o4Xsv7SQhwfTovMj7UKBLRGrt7OWnq/fz2Fv7OdLRwyXn5HPfTR9gUt4Yv0sTiVgKdIkorZ29/PD1Pfx8TTWdPf1cMaOQP7viHC45J1/XxUWGoECXiBAIOH65vpZ/fnEnLZ09/PEFE/izD01lzoRsv0sTiRoKdPHdhuoW/umFHaw70ML8ybk8fvtCzp2ggR9ETpcCXXyz41Ab//rybl7dcZiCzBTu+5MP8PH5k/RovcgZUqDLqOvo7uPrv9rCrzcdJDM1iS8vmsFtl5VpXE2Rs6TfIBlVzjnueXYLL2w+yJ9fMZU//9BUcsaoq1mRcBhW5xZmttjMdplZlZndPcj6yWb2upltMLPNZnZD+EuVaNfbH+CB16r4zaaDfPm6mXxt8SyFuUgYDXmGbmaJwIPAtUAtUGFmK5xz20M2+zqw3Dn3IzObA6wESkegXolC/QHHcxvquP+1PVQ3d3L9eeP58w9N9bsskZgznEsuC4Eq59xeADN7ClgChAa6A060L8sBDoazSIleFfubufuXm3m78RjnTsjm0VsXcOXMQrUpFxkBwwn0iUBNyHwtcNGAbf4eeNnM/hLIAK4Z7I3M7A7gDoDJkyefbq0SZTq6+7jzifUkJybw0Kcu5LpzxynIRUZQuDqIXgY85pybBNwA/NTM3vPezrmHnXPlzrnywsLCMH20RKp/f2U3De3d/GDZPBafN15hLjLChhPodUBJyPwkb1mo24HlAM65/wXSgIJwFCjRafXeJn785j6WLZzMvMl5fpcjEheGE+gVwHQzKzOzFGApsGLANtXAhwHMbDbBQG8MZ6ESPY529nDX0xspy8/g3htm+12OSNwYMtCdc33AncAqYAfB1izbzOybZnajt9mXgM+Z2SbgSeBW5wYOwSvx4pu/2U5jezf3L52nh4VERtGwftuccysJNkUMXfaNkOntwGXhLU2iTVtXL8sranh2Qx1/dfU0zp+k/lhERpNOn+Ss9PUH+J+qIzy7vo6Xt9XT3RegfEoeX7h6mt+licQdBbqcEecc//bqHp5YU82Rjm5yxyTziQUlfGzeROaW5KpFi4gPFOhyRn62ppoHXtvD1bOK+MSCEq6aWURKUrhawYrImVCgy2mrae7kOyt3cPn0An58S7nOxkUihE6p5LQ9XVFDd1+Af/74BxTmIhFEgS6npau3n5e21XPhlDwm5qb7XY6IhFCgy7DtPtzOkh++SVVDB8sWlgz9D0RkVOkaugypP+D42eoDfHvlDrLSknj0tgVcNbPI77JEZAAFuryvLbWt3PvcFjbXtnLFjEL+5aYLKMxK9bssERmEAl0G1R9wfGflDh55cx/5mak8sGwef/yBYt0EFYlgCnR5D+ccX39uC0+ureGTF03mq4tnkZOuoeJEIp0CXd6lu6+ff3p
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"from matplotlib import pyplot as plt\n",
"l = []\n",
"ptiles = [x*0.01 for x in range(100)]\n",
"for i in ptiles:\n",
" l.append(len_series.quantile(i))\n",
"\n",
"plt.plot(l, ptiles)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e7dac09f",
"metadata": {},
"outputs": [],
"source": [
"\n",
"dtype: float64"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}