{ "cells": [ { "cell_type": "code", "execution_count": 88, "id": "51e157d2", "metadata": {}, "outputs": [], "source": [ "import os\n", "from glob import glob\n", "import pandas as pd\n", "\n", "def load_data(user_filter=None):\n", " dic_data = []\n", " \n", " for p in glob('/opt/iui-datarelease3-sose2021/*.csv'):\n", " path = p\n", " filename = path.split('/')[-1]\n", " user = int(filename.split('_')[0][1:])\n", " if (user_filter):\n", " if (user != user_filter):\n", " continue\n", " scenario = filename.split('_')[1][len('Scenario'):]\n", " heightnorm = filename.split('_')[2][len('HeightNormalization'):] == 'True'\n", " armnorm = filename.split('_')[3][len('ArmNormalization'):] == 'True'\n", " rep = int(filename.split('.')[0].split('_')[4][len('Repetition'):])\n", " data = pd.read_csv(path)\n", " dic_data.append(\n", " {\n", " 'filename': path,\n", " 'user': user,\n", " 'scenario': scenario,\n", " 'heightnorm': heightnorm,\n", " 'armnorm': armnorm,\n", " 'rep': rep,\n", " 'data': data \n", " }\n", " )\n", " return dic_data\n", "\n", "dic_data = load_data()" ] }, { "cell_type": "code", "execution_count": 89, "id": "45e0dcc6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "384" ] }, "execution_count": 89, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(dic_data)" ] }, { "cell_type": "code", "execution_count": 85, "id": "652e33e4", "metadata": {}, "outputs": [], "source": [ "fil_dic_data = []\n", "for d in dic_data:\n", " if d['scenario'] == 'Sorting':\n", " if d['heightnorm'] == d['armnorm']:\n", " fil_dic_data.append(d)" ] }, { "cell_type": "code", "execution_count": 87, "id": "e77a3dec", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/opt/iui-datarelease3-sose2021/P1_ScenarioSorting_HeightNormalizationTrue_ArmNormalizationTrue_Repetition2.csv\n" ] }, { "data": { "text/plain": [ "337" ] }, "execution_count": 87, "metadata": {}, "output_type": "execute_result" } ], "source": [ "index = 1\n", "entry = fil_dic_data[index]['data']\n", "print(fil_dic_data[index]['filename'])\n", "col_of_interst = []\n", "for col in entry:\n", " if 'float' in str(entry[col].dtype):\n", " col_of_interst.append(col)\n", "len(col_of_interst)" ] }, { "cell_type": "code", "execution_count": 98, "id": "93e18064", "metadata": {}, "outputs": [], "source": [ "len_list = []\n", "for i in dic_data:\n", " len_list.append(len(i['data']))" ] }, { "cell_type": "code", "execution_count": 110, "id": "67a3c912", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 384.000000\n", "mean 3053.768229\n", "std 2195.831831\n", "min 597.000000\n", "50% 2395.000000\n", "90% 5977.000000\n", "91% 6157.600000\n", "92% 6239.600000\n", "93% 6341.490000\n", "94% 6585.200000\n", "95% 7561.800000\n", "96% 8158.000000\n", "97% 8895.250000\n", "98% 9942.320000\n", "99% 10315.120000\n", "max 19371.000000\n", "dtype: float64" ] }, "execution_count": 110, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len_series = pd.Series(len_list, dtype='int64')\n", "len_series.describe(percentiles=[x*0.01 for x in range(90,100)])" ] }, { "cell_type": "code", "execution_count": 111, "id": "1b473131", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 111, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "from matplotlib import pyplot as plt\n", "l = []\n", "ptiles = [x*0.01 for x in range(100)]\n", "for i in ptiles:\n", " l.append(len_series.quantile(i))\n", "\n", "plt.plot(l, ptiles)" ] }, { "cell_type": "code", "execution_count": null, "id": "e7dac09f", "metadata": {}, "outputs": [], "source": [ "\n", "dtype: float64" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 5 }