{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "d5ba35c5", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import os\n", "import pickle\n" ] }, { "cell_type": "code", "execution_count": 2, "id": "d79ca127", "metadata": {}, "outputs": [], "source": [ "delim = ';'\n", "\n", "user_count = 100\n", "\n", "base_path = '/opt/iui-datarelease1-sose2021/'\n", "\n", "Xpickle_file = './X.pickle'\n", "\n", "ypickle_file = './y.pickle'" ] }, { "cell_type": "code", "execution_count": 3, "id": "de3b58c7", "metadata": {}, "outputs": [], "source": [ "def load_pickles():\n", " _p = open(Xpickle_file, 'rb')\n", " X = pickle.load(_p)\n", " _p.close()\n", " \n", " _p = open(ypickle_file, 'rb')\n", " y = pickle.load(_p)\n", " _p.close()\n", " \n", " return (np.asarray(X, dtype=pd.DataFrame), np.asarray(y, dtype=str))" ] }, { "cell_type": "code", "execution_count": 4, "id": "76568518", "metadata": {}, "outputs": [], "source": [ "def load_data():\n", " if os.path.isfile(Xpickle_file) and os.path.isfile(ypickle_file):\n", " return load_pickles()\n", " data = []\n", " label = []\n", " for user in range(0, user_count):\n", " user_path = base_path + str(user) + '/split_letters_csv/'\n", " for file in os.listdir(user_path):\n", " file_name = user_path + file\n", " letter = ''.join(filter(lambda x: x.isalpha(), file))[0]\n", " data.append(pd.read_csv(file_name, delim))\n", " label.append(letter)\n", " return (np.asarray(data, dtype=pd.DataFrame), np.asarray(label, dtype=str))" ] }, { "cell_type": "code", "execution_count": 5, "id": "84fade95", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 11, "id": "d62131cf", "metadata": {}, "outputs": [], "source": [ "def save_pickle():\n", "# _p = open(np.asarray(data, dtype=pd.DataFrame), 'wb')\n", " _p = open(Xpickle_file, 'wb')\n", " pickle.dump(X, _p)\n", " _p.close()\n", "\n", "# _p = open(np.asarray(label, dtype=str), 'wb')\n", " _p = open(ypickle_file, 'wb')\n", " pickle.dump(y, _p)\n", " _p.close()" ] }, { "cell_type": "code", "execution_count": 6, "id": "75a7c43a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(13102, 13102)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(X), len(y)" ] }, { "cell_type": "code", "execution_count": 7, "id": "bf0fa338", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 13102.000000\n", "mean 208.304457\n", "std 206.732342\n", "min 42.000000\n", "50% 185.000000\n", "90% 270.000000\n", "91% 276.000000\n", "92% 286.000000\n", "93% 299.000000\n", "94% 312.000000\n", "95% 333.000000\n", "96% 355.000000\n", "97% 388.000000\n", "98% 456.980000\n", "99% 701.940000\n", "max 11073.000000\n", "dtype: float64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD4CAYAAAD8Zh1EAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAAAc20lEQVR4nO3deXRc9Znm8e+r0i7Lkm3Jq7yCV8AY0BgIwUCAtPEEmExIsNMZyIazMd3p5CQDoUNn6NM9k2QO6WSGDnEgoUPSECAkuIMJoWnCEjYLMF6xkVdJXiRLsmRblrXUO3/UtSnLMirbJd1ans85dXTr3p+rHsulx1e3bt2fuTsiIpL+csIOICIiyaFCFxHJECp0EZEMoUIXEckQKnQRkQyRG9YTV1RU+JQpU8J6ehGRtPTGG2/sdffK/raFVuhTpkyhpqYmrKcXEUlLZrb9RNt0yEVEJEOo0EVEMoQKXUQkQwxY6Gb2MzNrNLO1J9huZvYjM6s1s9Vmdn7yY4qIyEAS2UN/AFj4PtuvAaYHt6XAj08/loiInKwBC93dXwBa3mfI9cAvPOZVoNzMxiUroIiIJCYZx9AnAHVx9+uDdccxs6VmVmNmNU1NTUl4ahEROWJIz0N392XAMoDq6mpdt1dEQtcbdXqiUXp6PXaLRumJeuzWGyz3xo0J1vdGne6o0xuN0t3rsfvB+p5epzsajOk9dkxPb5QrZ4/h3InlSf+7JKPQG4CJcfergnUikubcvf9C61Nux4zpZ7m/0uuOL8a+Y6JRegd8rti2I4/ZX7Zjni94jmPGRZ0wpoQYPbwwZQt9OXCrmT0MXAi0ufuuJDyuSNaJRp39h3toP9RNe2c37Yd6gq/dtHfG1nf1RgcuqqPL8SV4orKNf6z4Pc/YbahFcoxIjpGXY+RGcsjNMXIjRm5ODrmRI9tyYl+D+7mRHPJzcyg+Mj7+zwTLkZyco+PzIjnHPMd7jxU3JniO+OfOjc8Ut+2YxzzuOeJyR3LIMTCzQfneDVjoZvYQcDlQYWb1wN8BeQDufi+wAlgE1AIdwGcGJalIGohGnQNdQSH3U8ZHSrrt6PKx2w4c7hlwjzE/El80/ZReznvlcXRMTg4FBbnBtn6K7UgxHi3A+Mc8tsQikRzyTvAckcjxZZvXpzD7K9ujZW1GTs7glF02GLDQ3X3JANsd+ErSEomkEHen5WAXO1o6qGs9RF1LB80HuuLK+Nji3p9AIZcW5DK8KI/SwtjXCeVFzB5XyvDCPIYX5VFWlMfwYFtsXe7RbcMKcomo8OQEQrs4l0iqONTVS31rB3WtHexo7mBHyyHqWjuoa4ndDnb1HjO+JD8SK92gcMeXFzKrsDS4338RlwXrhhWqkGXwqNAlK7g7u9s7WVPfxvpd7Wxv7mBHS+zWtP/wMWOL8iJMHFnEpJHFXDRtFJNGFjNxZDGTRhZTNaKIkgL92Ehq0itTMo67U996iLUNbazd2caahnbWNbTRfLALgByDcWVFTBxZxBUzK5k4ophJo2KlPXFEMRXD8gftTSuRwaRCl7R2pLxX17exumFfrMQb2mk71A1Abo4xfUwpV84ezdkTyjhrfBlzxg2nKD8ScnKR5FOhS1ppbO+MlXf9Pt6ub2NNQxstwZ53fiSHmWNLWXTOOM6eMJyzx5cxc2wphXkqb8kOKnRJaXUtHTy1dhc121pZXd/G7vZOIHbYZMaYUq6aPZq5VeWcW1XOzLGl5OfqitCSvVToknLqWjp4cs0unly9izUNbQBMrSjhwmkjmVtVztyqMs4aP5zifL18ReLpJ0JSwo7mWImvWPNeiZ9bVca3Fs3imrPHMXFkccgJRVKfCl1CoxIXSS4VugypfR1dPFpTz/K3d75X4hPLVeIiSaBClyHx7p79/PzlbTz+Zj2d3VHtiYsMAhW6DKpVdfv4P09v5KXaveTn5vDReRP49CVTmD1ueNjRRDKOCl0GRW/Uuff5zdz9zCZGluTzjb+YyZL5kxhZkh92NJGMpUKXpKtr6eCbj63mlS3NfGTuOP7ho+dQVpQXdiyRjKdCl6Tp7o1y34tb+eGzm4iY8f0b5nLDBVW6LorIEFGhS1KsqW/j64+uYtOeAyw8ayx3XjuH8eVFYccSySoqdDltf1y3m796+C1GFOdz/83VXDl7TNiRRLKSCl1Ombtz34tb+cenNjC3qpz7bqqmsrQg7FgiWUuFLqek+cBhvvnYap59p5GFZ43lBzfO0yVpRUKmQpeT9sb2Fr70yzfZd6ib71w7h5s/MEVvfIqkABW6nJSWg1188ZdvUpwf4YHPzGfOeH1ASCRV6OLRkjB3547frmFfRxf//Jfnq8xFUowKXRL227caeGrtbv7m6hmcNb4s7Dgi0ocKXRKybe9B/u6JdVRPHsEXFpwRdhwR6YcKXQa0qm4fN9z7Mjk5xt2fmEckR2+AiqQiFbq8r+c2NrJ42SsU5Ud4/MsfYNIoXepWJFXpLBc5oe7eKHc8voYpo0r45ecvpGKYPjQkksq0hy4n9MSqnexs6+SbC2eqzEXSgApd+rWqbh93PrGWuVVlXD5jdNhxRCQBKnQ5zra9B/n0z1+nYlgB991UTY7eBBVJCzqGLsd56PUdHDzcwxNfuYTRwwvDjiMiCUpoD93MFprZRjOrNbPb+tk+ycyeM7O3zGy1mS1KflQZKq9va2FuVTmTR5WEHUVETsKAhW5mEeAe4BpgDrDEzOb0Gfa3wCPufh6wGPjnZAeVofHshj28tWMfH5ql4+Yi6SaRPfT5QK27b3H3LuBh4Po+Yxw4cmGPMmBn8iLKUGk52MX/+M0aZo0t5fOXTg07joicpEQKfQJQF3e/PlgX7zvAp8ysHlgB/Pf+HsjMlppZjZnVNDU1nUJcGSzuzrceX0P7oW5+cOM8CnJ1bXORdJOss1yWAA+4exWwCHjQzI57bHdf5u7V7l5dWVmZpKeWZPjdqgb+sG43X//wDGaP01UURdJRIoXeAEyMu18VrIv3OeARAHd/BSgEKpIRUIbGg69sDw61TAs7ioicokQKfSUw3cymmlk+sTc9l/cZswO4EsDMZhMrdB1TSROd3b2sbWjnshmVuvCWSBobsNDdvQe4FXga2EDsbJZ1ZnaXmV0XDPs6cIuZvQ08BHza3X2wQkty/ejZd+nqjXKFzmwRSWsJfbDI3VcQe7Mzft2dccvrgUuSG02GwhvbW7j3+c3cWD2Ri6aNCjuOiJwGffQ/y33vDxsZXVrIt6/t+9ECEUk3KvQstqpuH69tbeHzl05lWIGuAiGS7lToWeypNbvIj+SweP6ksKOISBKo0LPY69taOKeqTHvnIhlChZ6lnlmva7aIZBoVehZqOdjF7Y+vZva44dyiDxKJZAwVeha6/6UtNB/s4gc3nkt+rl4CIplCP81Z5uDhHh58ZTsLzxrLrLG6ZotIJlGhZ5naxgO0d/Zw/bzxYUcRkSRToWeZI9dj0KEWkcyjn+osU7OtBYCxw4tCTiIiyaZCzyJdPVHue3ErF00byZzxOn4ukmlU6Flk+ds72d3eyRcvOyPsKCIyCFToWSIadZa9sJlZY0u5bIZmixLJRCr0LPGnTY1s2nOAL1w2DTNNYiGSiVToWeLe57cwobyIj8zV6YoimUqFniXe2tHKonPGkhfRP7lIptJPdxZRmYtkNv2EZ4G9Bw7T3euUF+eFHUVEBpEKPQsc+TDR+ZNGhJxERAaTCj3DdXb3cvczmxhdWsA5VWVhxxGRQaSpajLcD57ZxKY9B/j5Z/4TBbmRsOOIyCDSHnoGa2zv5Od/3sbHL6jiipmamUgk06nQM9gDL2+jJxrl1g+dGXYUERkCKvQM9taOfcytKmfyqJKwo4jIEFChZ7h8nXsukjX0057Bdrd3UqZzz0Wyhgo9Q+09cJitew9ywWSdey6SLVToGeo3b9QDsGC6LpUrki1U6Bmos7uXn/15K5ecOUozE4lkERV6Bvr+0xvZ036YW6+YHnYUERlCCRW6mS00s41mVmtmt51gzCfMbL2ZrTOzf01uTEnUym0t3P/SVm66eDIXnzEq7DgiMoQG/Oi/mUWAe4CrgXpgpZktd/f1cWOmA7cDl7h7q5npY4kheWFTEwC3XzM75CQiMtQS2UOfD9S6+xZ37wIeBq7vM+YW4B53bwVw98bkxpSTkWNQlK/rtohkm0QKfQJQF3e/PlgXbwYww8z+bGavmtnC/h7IzJaaWY2Z1TQ1NZ1aYnlfO/d1Ul6cH3YMEQlBst4UzQWmA5cDS4Cfmll530Huvszdq929urJSp9MNhje2t+i65yJZKpFCbwAmxt2vCtbFqweWu3u3u28FNhEreBlCNdta2NbcwWUzKsKOIiIhSKTQVwLTzWyqmeUDi4Hlfcb8jtjeOWZWQewQzJbkxZRE3Pv8FkYU5/GxC6rCjiIiIRiw0N29B7gVeBrYADzi7uvM7C4zuy4Y9jTQbGbrgeeAb7h782CFluPVNu7n3zfs4b9dPIXifM1bIpKNEvrJd/cVwIo+6+6MW3bga8FNQrDshS0U5uVw88WTw44iIiHRJ0UzQDTq/HH9HhadM45RwwrCjiMiIVGhZ4Atew+wr6Obi6bqk6Ei2UyFngEa9nUCMK1SMxOJZDMVegaIvYUBZhZyEhEJkwo9A7yyuZlIjjG+vDDsKCISIhV6mmvv7OZXr+3gP58zjnFlRWHHEZEQqdDT3K9e3cGBwz0sXTAt7CgiEjIVeho73BObmejS6RWcPaEs7DgiEjIVehr77ZsNNO0/zBcWnBF2FBFJASr0NOXuLHtxC2dPGM4lZ+r8cxFRoaetQ929bGk6yDVnj9PpiiICqNDTXm6OylxEYlToaWrnvkMAjCjR7EQiEqNCT1Ovb20FoHqyZicSkRgVehpydx6pqWPSyGKmVuj6LSISo0JPQyu3tbKqbh+3XDpVb4iKyFEq9DT0k+c3M7IknxsumDjwYBHJGir0NPPunv08+04jN188haL8SNhxRCSFqNDTzB/X7wHgkxdOCjmJiKQaFXqaeWN7K2dUllBZqqnmRORYKvQ0s6utk6kVw8KOISIpSIWeZtwdndgiIv1RoaeRaNTZ1dbJKH06VET6oUJPI7VNB2g71M0F+nSoiPRDhZ5GHllZRyTH+OD0irCjiEgKUqGniZaDXTz0+g6unau5Q0Wkfyr0NODufPt3a+nqjfLlK84MO46IpCgVehpYsWY3T67Zxd9cPYMZY0rDjiMiKUqFngZe29pMaUGu5g4VkfelQk8TuREjotmJROR9qNBFRDJEQoVuZgvNbKOZ1ZrZbe8z7mNm5mZWnbyI4h52AhFJBwMWuplFgHuAa4A5wBIzm9PPuFLgr4HXkh0ym/VGnRffbWKKZiYSkQEksoc+H6h19y3u3gU8DFzfz7i/B74LdCYxX9Z7et1utjV3cMul08KOIiIpLpFCnwDUxd2vD9YdZWbnAxPd/cn3eyAzW2pmNWZW09TUdNJhs42785PnNzNlVDF/cdbYsOOISIo77TdFzSwHuBv4+kBj3X2Zu1e7e3VlZeXpPnXGe3VLC2/Xt3HLgmk6w0VEBpRIoTcA8ZNXVgXrjigFzgb+ZGbbgIuA5Xpj9PTd+/xmKobl87Hzq8KOIiJpIJFCXwlMN7OpZpYPLAaWH9no7m3uXuHuU9x9CvAqcJ271wxK4iyxcfd+nt/UxKc/MIXCPM0dKiIDG7DQ3b0HuBV4GtgAPOLu68zsLjO7brADZqt3drcD6Ni5iCQsN5FB7r4CWNFn3Z0nGHv56ceSI3J07FxEEqRPioqIZAgVeorSp0NF5GSp0FPUnzY2UpwfYXRpQdhRRCRNqNBTUH1rB/+2ehdL5k+itDAv7DgikiZU6Cno/pe2YsBnPzg17CgikkZU6ClmX0cXD79ex3XzxjOhXHOHikjiVOgp5sFXtnOou5elC3QxLhE5OSr0FHK4p5cHXt7GFTMrmTV2eNhxRCTNqNBTSPOBLpoPdnH1HH06VEROngo9BUX0ryIip0DVISKSIVToKSSqj4eKyGlQoaeQ595pBODM0cNCTiIi6UiFniJ6eqP89MWtnDepnPMnjQg7joikIRV6ivjDut3saOngCwvOwEyXzBWRk6dCTwGxyaC3MK2ihA/PGRN2HBFJUyr0FPDy5mbWNMQmg9aEFiJyqlToKWDZC1uoLC3go+dNCDuKiKQxFXoKqG08wKXTKzQZtIicFhV6CnB3cvRGqIicJhV6yA519dK4/zCVmplIRE6TCj1kb9W10hN1qifr3HMROT0q9JA99HodwwpymT91ZNhRRCTNqdBDVNfSwZOrd/LJCzV3qIicPhV6iO57cQuRHOOzl2juUBE5fSr0kOzr6OLXNXX8l3kTGFtWGHYcEckAKvSQvLy5mc7uKIvnTwo7iohkCBV6SN7Y3kpBbg7nTCgLO4qIZAgVekia9h9mXFkh+bn6JxCR5FCbhCTqrsvkikhSqdBD0NHVw59r9zJdMxOJSBIlVOhmttDMNppZrZnd1s/2r5nZejNbbWbPmtnk5EfNHI/W1NPa0c3SBdPCjiIiGWTAQjezCHAPcA0wB1hiZnP6DHsLqHb3ucBjwPeSHTRTxKaa28IFk0dQPUWfDhWR5ElkD30+UOvuW9y9C3gYuD5+gLs/5+4dwd1XgarkxswcT67ZRX3rIb542RlhRxGRDJNIoU8A6uLu1wfrTuRzwFP9bTCzpWZWY2Y1TU1NiafMEEemmjujsoQrZ40OO46IZJikvilqZp8CqoHv97fd3Ze5e7W7V1dWVibzqdPCK5ubWb+rnS8sOENTzYlI0uUmMKYBmBh3vypYdwwzuwq4A7jM3Q8nJ15m2dx0AIDLZ2Xff2YiMvgS2UNfCUw3s6lmlg8sBpbHDzCz84CfANe5e2PyY2aG3qgDaHYiERkUAxa6u/cAtwJPAxuAR9x9nZndZWbXBcO+DwwDHjWzVWa2/AQPl9X+uH4PY4YXUF6kS+WKSPIlcsgFd18BrOiz7s645auSnCvjvLmjlZc3N/OtRbPIjejzXCKSfGqWIdDZ3cs3H1vNmOEFLNHVFUVkkCS0hy6n54fPvktt4wEe/Nx8zUwkIoNGe+hD4Perd3LV7NFcOl1nt4jI4FGhD7LG9k7qWg5x0bRRYUcRkQynQh9kew90AVA1oijkJCKS6VTog6yrNxos6dxzERlcKvRB5O7c/cwmCvNyOHvC8LDjiEiGU6EPol++up0XNjVxx6LZVI0oDjuOiGQ4Ffog2dx0gH9YsYEFMyr51EWa70NEBp8KfZDc/ps1FORG+P4NczV3qIgMCRX6IFm3s42PnjeBMcMLw44iIllChT4I3J1ed3J1zXMRGUIq9EHw/KYmOrujzB6nM1tEZOio0JPM3bnnuVrGDi/k2nPHhx1HRLKICj3JHnx1Oyu3tfJXV04nP1ffXhEZOmqcJGrYd4h/XLGBy2ZUsmT+xIH/gIhIEqnQk+i5dxrp7I5y57VzdKqiiAw5FXoSvbx5LxXDCphWURJ2FBHJQir0JHlhUxMr1uzmv54/QXvnIhIKFXoS7Ovo4huPvc2Zo4fxtatnhB1HRLKUCj0Jvv3EOpoPdPFPN86jMC8SdhwRyVIq9NP0xKoG/u3tnXz1qumcPaEs7DgiksVU6KdhV9shvv27tZw3qZwvXnZG2HFEJMup0E9RNOp849HVdPc6P/jEPHIj+laKSLjUQqfoF69s46XavfztR2YzRacpikgKUKGfgj9tbOR/PfUOV8ys5JPzJ4UdR0QEgNywA6STwz29fO8PG7n/pa3MHFPKdzV5hYikEBV6gna1HeLz/1LDup3t3HzxZG5fNFunKIpISlGhJ2DdzjY++8BKDh7u5ac3VXP1nDFhRxIROY4K/X1Eo84jNXX8/e/XU1aUx2NfuphZYzVphYikJhX6CWzcvZ87fruGmu2tzJ86kv+75DzNDyoiKS2hQjezhcAPgQhwn7v/7z7bC4BfABcAzcCN7r4tuVFPn7uz/3APrQe7aD7YdczXlrhb88Eu1ja0UVqYy/dumMvHL6jSm58ikvIGLHQziwD3AFcD9cBKM1vu7uvjhn0OaHX3M81sMfBd4MbBCByvuzdKa8exZdy3rI/Z1tFFd6/3+1j5uTmMKslnRHE+o4blc9PFU7j1Q2cysiR/sP8aIiJJkcge+nyg1t23AJjZw8D1QHyhXw98J1h+DPh/Zmbu3n97noZfr9zBj/+0mZaDXbR39pxwXFlRXqygS/KZOLKYc6vKGTksn5HF+YwsOf5WnB/RXriIpLVECn0CUBd3vx648ERj3L3HzNqAUcDe+EFmthRYCjBp0ql9IGdUSQFzq8qPFvGIkvxj9qxHFOczojhPH8UXkawzpG+KuvsyYBlAdXX1Ke29XzVnDFfptEERkeMkshvbAMTPeFwVrOt3jJnlAmXE3hwVEZEhkkihrwSmm9lUM8sHFgPL+4xZDtwcLN8A/MdgHD8XEZETG/CQS3BM/FbgaWKnLf7M3deZ2V1AjbsvB+4HHjSzWqCFWOmLiMgQSugYuruvAFb0WXdn3HIn8PHkRhMRkZOhU0FERDKECl1EJEOo0EVEMoQKXUQkQ1hYZxeaWROwPZQnPzkV9PnEa5pQ7qGVrrkhfbNna+7J7l7Z34bQCj1dmFmNu1eHneNkKffQStfckL7Zlft4OuQiIpIhVOgiIhlChT6wZWEHOEXKPbTSNTekb3bl7kPH0EVEMoT20EVEMoQKXUQkQ2R9oZvZz8ys0czWxq0baWbPmNm7wdcRwXozsx+ZWa2ZrTaz80PKPNHMnjOz9Wa2zsz+Oh1yB1kKzex1M3s7yP4/g/VTzey1IOOvg0s1Y2YFwf3aYPuUELNHzOwtM/t9umQO8mwzszVmtsrMaoJ16fBaKTezx8zsHTPbYGYXp3puM5sZfJ+P3NrN7KtDltvds/oGLADOB9bGrfsecFuwfBvw3WB5EfAUYMBFwGshZR4HnB8slwKbgDmpnjvIYsCwYDkPeC3I9AiwOFh/L/ClYPnLwL3B8mLg1yFm/xrwr8Dvg/spnznIsA2o6LMuHV4r/wJ8PljOB8rTIXdc/giwG5g8VLlD/Qunyg2Y0qfQNwLjguVxwMZg+SfAkv7GhZz/CeDqNMxdDLxJbI7avUBusP5i4Olg+Wng4mA5NxhnIWStAp4FPgT8PvgBTOnMcdn7K/SUfq0Qm/Vsa9/vW6rn7pP1w8CfhzJ31h9yOYEx7r4rWN4NHJnEtL8JsycMZbC+gl/nzyO2p5sWuYNDF6uARuAZYDOwz917giHx+Y6ZgBw4MgH5UPsn4JtANLg/itTPfIQDfzSzNyw2UTuk/mtlKtAE/Dw4zHWfmZWQ+rnjLQYeCpaHJLcKfQAe+28zJc/tNLNhwG+Ar7p7e/y2VM7t7r3uPo/YXu98YFa4id6fmX0EaHT3N8LOcoo+6O7nA9cAXzGzBfEbU/S1kkvsUOiP3f084CCxQxVHpWhuAIL3U64DHu27bTBzq9D7t8fMxgEEXxuD9YlMmD0kzCyPWJn/yt0fD1anfO547r4PeI7Y4Ypyi00wDsfmS4UJyC8BrjOzbcDDxA67/JDUznyUuzcEXxuB3xL7TzTVXyv1QL27vxbcf4xYwad67iOuAd509z3B/SHJrULvX/yk1zcTO0Z9ZP1NwTvTFwFtcb9GDRkzM2LzuG5w97vjNqV0bgAzqzSz8mC5iNix/w3Eiv2GYFjf7KFOQO7ut7t7lbtPIfZr9H+4+1+SwpmPMLMSMys9skzsuO5aUvy14u67gTozmxmsuhJYT4rnjrOE9w63wFDlDvNNg1S4Bd/0XUA3sb2CzxE73vks8C7w78DIYKwB9xA75rsGqA4p8weJ/cq2GlgV3Baleu4gy1zgrSD7WuDOYP004HWgltivqQXB+sLgfm2wfVrIr5fLee8sl5TPHGR8O7itA+4I1qfDa2UeUBO8Vn4HjEiT3CXEfiMri1s3JLn10X8RkQyhQy4iIhlChS4ikiFU6CIiGUKFLiKSIVToIiIZQoUuIpIhVOgiIhni/wPkyoAXyJd+MwAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "\n", "X_len = np.asarray(list(map(len, X)))\n", "l = []\n", "sq_xlen = pd.Series(X_len)\n", "ptiles = [x*0.01 for x in range(100)]\n", "for i in ptiles:\n", " l.append(sq_xlen.quantile(i))\n", "plt.plot(l, ptiles)\n", "sq_xlen.describe(percentiles=[x*0.01 for x in range(90,100)])" ] }, { "cell_type": "code", "execution_count": 49, "id": "4ad7d74b", "metadata": {}, "outputs": [], "source": [ "def fill(x, threshold):\n", " fill = threshold - len(x)\n", " xx = x\n", " for i in range(fill):\n", " xx = xx.append(pd.Series(0,index=x.columns,dtype='float64'), ignore_index=True)\n", " return xx" ] }, { "cell_type": "code", "execution_count": 56, "id": "88e14bfd", "metadata": {}, "outputs": [], "source": [ "threshold_p = 0.98\n", "threshold = int(sq_xlen.quantile(threshold_p))\n", "len_mask = np.where(X_len <= threshold)\n", "\n", "X_filter = X[len_mask]\n", "y_filter = y[len_mask]" ] }, { "cell_type": "code", "execution_count": 57, "id": "0cdc1aff", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 41min 37s, sys: 1.15 s, total: 41min 38s\n", "Wall time: 41min 59s\n" ] } ], "source": [ "%%time\n", "X_filter = list(map(fill, X_filter, [threshold for i in range(len(X_filter))]))" ] }, { "cell_type": "code", "execution_count": 51, "id": "b94d58d2", "metadata": {}, "outputs": [], "source": [ "def plot_data(data):\n", " fig, axs = plt.subplots(4, 3, figsize=(3*3, 3*4))\n", " t = data['Millis']\n", " axs[0][0].plot(t, data['Acc1 X'])\n", " axs[0][1].plot(t, data['Acc1 Y'])\n", " axs[0][2].plot(t, data['Acc1 Z'])\n", " axs[1][0].plot(t, data['Acc2 X'])\n", " axs[1][1].plot(t, data['Acc2 Y'])\n", " axs[1][2].plot(t, data['Acc2 Z'])\n", " axs[2][0].plot(t, data['Gyro X'])\n", " axs[2][1].plot(t, data['Gyro Y'])\n", " axs[2][2].plot(t, data['Gyro Z'])\n", " axs[3][0].plot(t, data['Mag X'])\n", " axs[3][1].plot(t, data['Mag Y'])\n", " axs[3][2].plot(t, data['Mag Z'])\n", "\n", " for a in axs:\n", " for b in a:\n", " b.plot(t, data['Force'])\n" ] }, { "cell_type": "code", "execution_count": 8, "id": "b4685b66", "metadata": {}, "outputs": [], "source": [ "Xfiltered_pickle_file = './X_filter.pickle'\n", "yfiltered_pickle_file = \"./y_filter.pickle\"" ] }, { "cell_type": "code", "execution_count": 63, "id": "cf5e37be", "metadata": {}, "outputs": [], "source": [ "def save_filtered():\n", " _p = open(Xfiltered_pickle_file, 'wb')\n", " pickle.dump(X_filter, _p)\n", " _p.close()\n", "\n", " _p = open(yfiltered_pickle_file, 'wb')\n", " pickle.dump(y_filter, _p)\n", " _p.close()" ] }, { "cell_type": "code", "execution_count": 64, "id": "42af99a2", "metadata": {}, "outputs": [], "source": [ "save_filtered()" ] }, { "cell_type": "code", "execution_count": 9, "id": "f8ee86b4", "metadata": {}, "outputs": [], "source": [ "def load_filtered_pickles():\n", " _p = open(Xfiltered_pickle_file, 'rb')\n", " X = pickle.load(_p)\n", " _p.close()\n", " \n", " _p = open(yfiltered_pickle_file, 'rb')\n", " y = pickle.load(_p)\n", " _p.close()\n", " \n", " return (np.asarray(X, dtype=pd.DataFrame), np.asarray(y, dtype=str))" ] }, { "cell_type": "code", "execution_count": 10, "id": "7ab94e65", "metadata": {}, "outputs": [], "source": [ "XX_filtered, yy_filtered = load_filtered_pickles()" ] }, { "cell_type": "code", "execution_count": 13, "id": "dbb34cbb", "metadata": {}, "outputs": [], "source": [ "# FIRST CELL: set these variables to limit GPU usage.\n", "import os\n", "os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' # this is required\n", "os.environ['CUDA_VISIBLE_DEVICES'] = '2' # set to '0' for GPU0, '1' for GPU1 or '2' for GPU2. Check \"gpustat\" in a terminal." ] }, { "cell_type": "code", "execution_count": 14, "id": "720dce7b", "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import LabelEncoder, LabelBinarizer\n", "import tensorflow as tf\n", "\n", "lb = LabelBinarizer()\n", "\n", "yyt_filtered = lb.fit_transform(yy_filtered)\n", "XX_filtered = np.asarray(XX_filtered).astype('float64')\n", "XXX_filtered = np.delete(XX_filtered, 0, 2)" ] }, { "cell_type": "code", "execution_count": 15, "id": "d9505344", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(10271, 456, 14)\n", "(2568, 456, 14)\n", "(10271, 26)\n", "(2568, 26)\n" ] } ], "source": [ "X_train, X_test, y_train, y_test = train_test_split(XXX_filtered, yyt_filtered, test_size=0.2, random_state=177013)\n", "\n", "print(X_train.shape)\n", "print(X_test.shape)\n", "print(y_train.shape)\n", "print(y_test.shape)" ] }, { "cell_type": "code", "execution_count": 26, "id": "02954ff2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model: \"sequential_3\"\n", "_________________________________________________________________\n", "Layer (type) Output Shape Param # \n", "=================================================================\n", "flatten_3 (Flatten) (None, 6384) 0 \n", "_________________________________________________________________\n", "dense_6 (Dense) (None, 456) 2911560 \n", "_________________________________________________________________\n", "dense_7 (Dense) (None, 26) 11882 \n", "=================================================================\n", "Total params: 2,923,442\n", "Trainable params: 2,923,442\n", "Non-trainable params: 0\n", "_________________________________________________________________\n" ] } ], "source": [ "from tensorflow.keras.models import Sequential\n", "from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv1D, MaxPooling1D\n", "\n", "model = Sequential()\n", "\n", "# model.add(Conv1D(32, 3, input_shape = X_train.shape[1:]))\n", "# model.add(Activation('relu'))\n", "# model.add(MaxPooling1D(pool_size=3))\n", "\n", "# model.add(Conv1D(32, 3))\n", "# model.add(Activation('relu'))\n", "# model.add(MaxPooling1D(pool_size=3))\n", "\n", "model.add(Flatten(input_shape=(456,14)))\n", "\n", "model.add(Dense(456, activation='relu'))\n", "\n", "model.add(Dense(26, activation='softmax'))\n", "\n", "model.compile(\n", " optimizer=tf.keras.optimizers.Adam(0.001),\n", " loss=\"categorical_crossentropy\", \n", " metrics=[\"acc\"],\n", ")\n", "\n", "model.summary()" ] }, { "cell_type": "code", "execution_count": 27, "id": "03524b14", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "((10271, 456, 14), (10271, 26))" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_train.shape, y_train.shape" ] }, { "cell_type": "code", "execution_count": 28, "id": "fbaf27c9", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/10\n", "161/161 [==============================] - 1s 2ms/step - loss: 8658.2280 - acc: 0.0409\n", "Epoch 2/10\n", "161/161 [==============================] - 0s 2ms/step - loss: 26.3454 - acc: 0.0447\n", "Epoch 3/10\n", "161/161 [==============================] - 0s 2ms/step - loss: 10.9452 - acc: 0.0422\n", "Epoch 4/10\n", "161/161 [==============================] - 0s 2ms/step - loss: 3.9099 - acc: 0.0437\n", "Epoch 5/10\n", "161/161 [==============================] - 0s 2ms/step - loss: 3.5461 - acc: 0.0474\n", "Epoch 6/10\n", "161/161 [==============================] - 0s 2ms/step - loss: 4.2588 - acc: 0.0423\n", "Epoch 7/10\n", "161/161 [==============================] - 0s 2ms/step - loss: 3.3213 - acc: 0.0414\n", "Epoch 8/10\n", "161/161 [==============================] - 0s 2ms/step - loss: 3.2681 - acc: 0.0451\n", "Epoch 9/10\n", "161/161 [==============================] - 0s 2ms/step - loss: 3.2482 - acc: 0.0422\n", "Epoch 10/10\n", "161/161 [==============================] - 0s 2ms/step - loss: 3.2693 - acc: 0.0435\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.fit(X_train, y_train, \n", " epochs=10,\n", " batch_size=64,\n", " shuffle=True,\n", " verbose=1\n", " )" ] }, { "cell_type": "code", "execution_count": 30, "id": "d40b45a5", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Evaluate on test data\n", "81/81 [==============================] - 0s 2ms/step - loss: 19.9920 - acc: 0.0331\n", "test loss, test acc: [19.992048263549805, 0.033099688589572906]\n", "Generate predictions for 3 samples\n", "predictions shape: (3, 26)\n" ] }, { "data": { "text/plain": [ "(array(['K', 'T', 'U'], dtype='" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "fig, axs = plt.subplots(13,2,figsize=(20, 60), sharey=True)\n", "for i,j in zip(XXX_filtered, yy_filtered):\n", " num = ord(j) - 64\n", " f = i.T[12]\n", " r = int((num-1)/2)%13\n", " c = (num-1)%2\n", " axs[r][c].title.set_text(f'{j}')\n", " axs[r][c].plot(f)" ] }, { "cell_type": "code", "execution_count": null, "id": "5bf78018", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 5 }