iui-group-l-name-zensiert/1-first-project/T_DataNormaization.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "5d6412cd",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "import pickle\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "fa43325a",
   "metadata": {},
   "outputs": [],
   "source": [
    "delim = ';'\n",
    "\n",
    "user_count = 100\n",
    "\n",
    "base_path = '/opt/iui-datarelease1-sose2021/'\n",
    "\n",
    "Xpickle_file = './X.pickle'\n",
    "\n",
    "ypickle_file = './y.pickle'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "1ea7c2f1",
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_pickles():\n",
    "    _p = open(Xpickle_file, 'rb')\n",
    "    X = pickle.load(_p)\n",
    "    _p.close()\n",
    "        \n",
    "    _p = open(ypickle_file, 'rb')\n",
    "    y = pickle.load(_p)\n",
    "    _p.close()\n",
    "    \n",
    "    return (np.asarray(X, dtype=pd.DataFrame), np.asarray(y, dtype=str))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "91f4642c",
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_data():\n",
    "    if os.path.isfile(Xpickle_file) and os.path.isfile(ypickle_file):\n",
    "        return load_pickles()\n",
    "    data = []\n",
    "    label = []\n",
    "    for user in range(0, user_count):\n",
    "        user_path = base_path + str(user) + '/split_letters_csv/'\n",
    "        for file in os.listdir(user_path):\n",
    "            file_name = user_path + file\n",
    "            letter = ''.join(filter(lambda x: x.isalpha(), file))[0]\n",
    "            data.append(pd.read_csv(file_name, delim))\n",
    "            label.append(letter)\n",
    "    return (np.asarray(data, dtype=pd.DataFrame), np.asarray(label, dtype=str))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "a8629dc5",
   "metadata": {},
   "outputs": [],
   "source": [
    "X, y = load_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "18cd698f",
   "metadata": {},
   "outputs": [],
   "source": [
    "def save_pickle():\n",
    "#     _p = open(np.asarray(data, dtype=pd.DataFrame), 'wb')\n",
    "    _p = open(Xpickle_file, 'wb')\n",
    "    pickle.dump(X, _p)\n",
    "    _p.close()\n",
    "\n",
    "#     _p = open(np.asarray(label, dtype=str), 'wb')\n",
    "    _p = open(ypickle_file, 'wb')\n",
    "    pickle.dump(y, _p)\n",
    "    _p.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "0f505920",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(13102, 13102)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(X), len(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "4bd9f443",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    13102.000000\n",
       "mean       208.304457\n",
       "std        206.732342\n",
       "min         42.000000\n",
       "50%        185.000000\n",
       "90%        270.000000\n",
       "91%        276.000000\n",
       "92%        286.000000\n",
       "93%        299.000000\n",
       "94%        312.000000\n",
       "95%        333.000000\n",
       "96%        355.000000\n",
       "97%        388.000000\n",
       "98%        456.980000\n",
       "99%        701.940000\n",
       "max      11073.000000\n",
       "dtype: float64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD4CAYAAAD8Zh1EAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAAAc20lEQVR4nO3deXRc9Znm8e+r0i7Lkm3Jq7yCV8AY0BgIwUCAtPEEmExIsNMZyIazMd3p5CQDoUNn6NM9k2QO6WSGDnEgoUPSECAkuIMJoWnCEjYLMF6xkVdJXiRLsmRblrXUO3/UtSnLMirbJd1ans85dXTr3p+rHsulx1e3bt2fuTsiIpL+csIOICIiyaFCFxHJECp0EZEMoUIXEckQKnQRkQyRG9YTV1RU+JQpU8J6ehGRtPTGG2/sdffK/raFVuhTpkyhpqYmrKcXEUlLZrb9RNt0yEVEJEOo0EVEMoQKXUQkQwxY6Gb2MzNrNLO1J9huZvYjM6s1s9Vmdn7yY4qIyEAS2UN/AFj4PtuvAaYHt6XAj08/loiInKwBC93dXwBa3mfI9cAvPOZVoNzMxiUroIiIJCYZx9AnAHVx9+uDdccxs6VmVmNmNU1NTUl4ahEROWJIz0N392XAMoDq6mpdt1dEQtcbdXqiUXp6PXaLRumJeuzWGyz3xo0J1vdGne6o0xuN0t3rsfvB+p5epzsajOk9dkxPb5QrZ4/h3InlSf+7JKPQG4CJcfergnUikubcvf9C61Nux4zpZ7m/0uuOL8a+Y6JRegd8rti2I4/ZX7Zjni94jmPGRZ0wpoQYPbwwZQt9OXCrmT0MXAi0ufuuJDyuSNaJRp39h3toP9RNe2c37Yd6gq/dtHfG1nf1RgcuqqPL8SV4orKNf6z4Pc/YbahFcoxIjpGXY+RGcsjNMXIjRm5ODrmRI9tyYl+D+7mRHPJzcyg+Mj7+zwTLkZyco+PzIjnHPMd7jxU3JniO+OfOjc8Ut+2YxzzuOeJyR3LIMTCzQfneDVjoZvYQcDlQYWb1wN8BeQDufi+wAlgE1AIdwGcGJalIGohGnQNdQSH3U8ZHSrrt6PKx2w4c7hlwjzE/El80/ZReznvlcXRMTg4FBbnBtn6K7UgxHi3A+Mc8tsQikRzyTvAckcjxZZvXpzD7K9ujZW1GTs7glF02GLDQ3X3JANsd+ErSEomkEHen5WAXO1o6qGs9RF1LB80HuuLK+Nji3p9AIZcW5DK8KI/SwtjXCeVFzB5XyvDCPIYX5VFWlMfwYFtsXe7RbcMKcomo8OQEQrs4l0iqONTVS31rB3WtHexo7mBHyyHqWjuoa4ndDnb1HjO+JD8SK92gcMeXFzKrsDS4338RlwXrhhWqkGXwqNAlK7g7u9s7WVPfxvpd7Wxv7mBHS+zWtP/wMWOL8iJMHFnEpJHFXDRtFJNGFjNxZDGTRhZTNaKIkgL92Ehq0itTMo67U996iLUNbazd2caahnbWNbTRfLALgByDcWVFTBxZxBUzK5k4ophJo2KlPXFEMRXD8gftTSuRwaRCl7R2pLxX17exumFfrMQb2mk71A1Abo4xfUwpV84ezdkTyjhrfBlzxg2nKD8ScnKR5FOhS1ppbO+MlXf9Pt6ub2NNQxstwZ53fiSHmWNLWXTOOM6eMJyzx5cxc2wphXkqb8kOKnRJaXUtHTy1dhc121pZXd/G7vZOIHbYZMaYUq6aPZq5VeWcW1XOzLGl5OfqitCSvVToknLqWjp4cs0unly9izUNbQBMrSjhwmkjmVtVztyqMs4aP5zifL18ReLpJ0JSwo7mWImvWPNeiZ9bVca3Fs3imrPHMXFkccgJRVKfCl1CoxIXSS4VugypfR1dPFpTz/K3d75X4hPLVeIiSaBClyHx7p79/PzlbTz+Zj2d3VHtiYsMAhW6DKpVdfv4P09v5KXaveTn5vDReRP49CVTmD1ueNjRRDKOCl0GRW/Uuff5zdz9zCZGluTzjb+YyZL5kxhZkh92NJGMpUKXpKtr6eCbj63mlS3NfGTuOP7ho+dQVpQXdiyRjKdCl6Tp7o1y34tb+eGzm4iY8f0b5nLDBVW6LorIEFGhS1KsqW/j64+uYtOeAyw8ayx3XjuH8eVFYccSySoqdDltf1y3m796+C1GFOdz/83VXDl7TNiRRLKSCl1Ombtz34tb+cenNjC3qpz7bqqmsrQg7FgiWUuFLqek+cBhvvnYap59p5GFZ43lBzfO0yVpRUKmQpeT9sb2Fr70yzfZd6ib71w7h5s/MEVvfIqkABW6nJSWg1188ZdvUpwf4YHPzGfOeH1ASCRV6OLRkjB3547frmFfRxf//Jfnq8xFUowKXRL227caeGrtbv7m6hmcNb4s7Dgi0ocKXRKybe9B/u6JdVRPHsEXFpwRdhwR6YcKXQa0qm4fN9z7Mjk5xt2fmEckR2+AiqQiFbq8r+c2NrJ42SsU5Ud4/MsfYNIoXepWJFXpLBc5oe7eKHc8voYpo0r45ecvpGKYPjQkksq0hy4n9MSqnexs6+SbC2eqzEXSgApd+rWqbh93PrGWuVVlXD5jdNhxRCQBKnQ5zra9B/n0z1+nYlgB991UTY7eBBVJCzqGLsd56PUdHDzcwxNfuYTRwwvDjiMiCUpoD93MFprZRjOrNbPb+tk+ycyeM7O3zGy1mS1KflQZKq9va2FuVTmTR5WEHUVETsKAhW5mEeAe4BpgDrDEzOb0Gfa3wCPufh6wGPjnZAeVofHshj28tWMfH5ql4+Yi6SaRPfT5QK27b3H3LuBh4Po+Yxw4cmGPMmBn8iLKUGk52MX/+M0aZo0t5fOXTg07joicpEQKfQJQF3e/PlgX7zvAp8ysHlgB/Pf+HsjMlppZjZnVNDU1nUJcGSzuzrceX0P7oW5+cOM8CnJ1bXORdJOss1yWAA+4exWwCHjQzI57bHdf5u7V7l5dWVmZpKeWZPjdqgb+sG43X//wDGaP01UURdJRIoXeAEyMu18VrIv3OeARAHd/BSgEKpIRUIbGg69sDw61TAs7ioicokQKfSUw3cymmlk+sTc9l/cZswO4EsDMZhMrdB1TSROd3b2sbWjnshmVuvCWSBobsNDdvQe4FXga2EDsbJZ1ZnaXmV0XDPs6cIuZvQ08BHza3X2wQkty/ejZd+nqjXKFzmwRSWsJfbDI3VcQe7Mzft2dccvrgUuSG02GwhvbW7j3+c3cWD2Ri6aNCjuOiJwGffQ/y33vDxsZXVrIt6/t+9ECEUk3KvQstqpuH69tbeHzl05lWIGuAiGS7lToWeypNbvIj+SweP6ksKOISBKo0LPY69taOKeqTHvnIhlChZ6lnlmva7aIZBoVehZqOdjF7Y+vZva44dyiDxKJZAwVeha6/6UtNB/s4gc3nkt+rl4CIplCP81Z5uDhHh58ZTsLzxrLrLG6ZotIJlGhZ5naxgO0d/Zw/bzxYUcRkSRToWeZI9dj0KEWkcyjn+osU7OtBYCxw4tCTiIiyaZCzyJdPVHue3ErF00byZzxOn4ukmlU6Flk+ds72d3eyRcvOyPsKCIyCFToWSIadZa9sJlZY0u5bIZmixLJRCr0LPGnTY1s2nOAL1w2DTNNYiGSiVToWeLe57cwobyIj8zV6YoimUqFniXe2tHKonPGkhfRP7lIptJPdxZRmYtkNv2EZ4G9Bw7T3euUF+eFHUVEBpEKPQsc+TDR+ZNGhJxERAaTCj3DdXb3cvczmxhdWsA5VWVhxxGRQaSpajLcD57ZxKY9B/j5Z/4TBbmRsOOIyCDSHnoGa2zv5Od/3sbHL6jiipmamUgk06nQM9gDL2+jJxrl1g+dGXYUERkCKvQM9taOfcytKmfyqJKwo4jIEFChZ7h8nXsukjX0057Bdrd3UqZzz0Wyhgo9Q+09cJitew9ywWSdey6SLVToGeo3b9QDsGC6LpUrki1U6Bmos7uXn/15K5ecOUozE4lkERV6Bvr+0xvZ036YW6+YHnYUERlCCRW6mS00s41mVmtmt51gzCfMbL2ZrTOzf01uTEnUym0t3P/SVm66eDIXnzEq7DgiMoQG/Oi/mUWAe4CrgXpgpZk
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "\n",
    "X_len = np.asarray(list(map(len, X)))\n",
    "l = []\n",
    "sq_xlen = pd.Series(X_len)\n",
    "ptiles = [x*0.01 for x in range(100)]\n",
    "for i in ptiles:\n",
    "    l.append(sq_xlen.quantile(i))\n",
    "plt.plot(l, ptiles)\n",
    "sq_xlen.describe(percentiles=[x*0.01 for x in range(90,100)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "c535003d",
   "metadata": {},
   "outputs": [],
   "source": [
    "def fill(x, threshold):\n",
    "    fill = threshold - len(x)\n",
    "    xx = x\n",
    "    for i in range(fill):\n",
    "        xx = xx.append(pd.Series(0,index=x.columns,dtype='float64'), ignore_index=True)\n",
    "    return xx"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "id": "4ceefb7e",
   "metadata": {},
   "outputs": [],
   "source": [
    "threshold_p = 0.98\n",
    "threshold = int(sq_xlen.quantile(threshold_p))\n",
    "len_mask = np.where(X_len <= threshold)\n",
    "\n",
    "X_filter = X[len_mask]\n",
    "y_filter = y[len_mask]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "47e7c7a4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 41min 37s, sys: 1.15 s, total: 41min 38s\n",
      "Wall time: 41min 59s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "X_filter = list(map(fill, X_filter, [threshold for i in range(len(X_filter))]))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d6dbd88b",
   "metadata": {},
   "source": [
    "Q: Is there a way to make this quicker?\n",
    "\n",
    "```python\n",
    "X_filter = list(map(fill, X_filter, [threshold for i in range(len(X_filter))]))\n",
    "```\n",
    "\n",
    "CPU times: user 41min 37s, sys: 1.15 s, total: 41min 38s\n",
    "Wall time: 41min 59s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "5d240071",
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_data(data):\n",
    "    fig, axs = plt.subplots(4, 3, figsize=(3*3, 3*4))\n",
    "    t = data['Millis']\n",
    "    axs[0][0].plot(t, data['Acc1 X'])\n",
    "    axs[0][1].plot(t, data['Acc1 Y'])\n",
    "    axs[0][2].plot(t, data['Acc1 Z'])\n",
    "    axs[1][0].plot(t, data['Acc2 X'])\n",
    "    axs[1][1].plot(t, data['Acc2 Y'])\n",
    "    axs[1][2].plot(t, data['Acc2 Z'])\n",
    "    axs[2][0].plot(t, data['Gyro X'])\n",
    "    axs[2][1].plot(t, data['Gyro Y'])\n",
    "    axs[2][2].plot(t, data['Gyro Z'])\n",
    "    axs[3][0].plot(t, data['Mag X'])\n",
    "    axs[3][1].plot(t, data['Mag Y'])\n",
    "    axs[3][2].plot(t, data['Mag Z'])\n",
    "\n",
    "    for a in axs:\n",
    "        for b in a:\n",
    "            b.plot(t, data['Force'])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "91db361c",
   "metadata": {},
   "outputs": [],
   "source": [
    "Xfiltered_pickle_file = './X_filter.pickle'\n",
    "yfiltered_pickle_file = \"./y_filter.pickle\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "id": "c2238568",
   "metadata": {},
   "outputs": [],
   "source": [
    "def save_filtered():\n",
    "    _p = open(Xfiltered_pickle_file, 'wb')\n",
    "    pickle.dump(X_filter, _p)\n",
    "    _p.close()\n",
    "\n",
    "    _p = open(yfiltered_pickle_file, 'wb')\n",
    "    pickle.dump(y_filter, _p)\n",
    "    _p.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "id": "a234a063",
   "metadata": {},
   "outputs": [],
   "source": [
    "save_filtered()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "a0ae2e62",
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_filtered_pickles():\n",
    "    _p = open(Xfiltered_pickle_file, 'rb')\n",
    "    X = pickle.load(_p)\n",
    "    _p.close()\n",
    "        \n",
    "    _p = open(yfiltered_pickle_file, 'rb')\n",
    "    y = pickle.load(_p)\n",
    "    _p.close()\n",
    "    \n",
    "    return (np.asarray(X, dtype=pd.DataFrame), np.asarray(y, dtype=str))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "c030d181",
   "metadata": {},
   "outputs": [],
   "source": [
    "XX_filtered, yy_filtered = load_filtered_pickles()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "338bddeb",
   "metadata": {},
   "outputs": [],
   "source": [
    "# FIRST CELL: set these variables to limit GPU usage.\n",
    "import os\n",
    "os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'  # this is required\n",
    "os.environ['CUDA_VISIBLE_DEVICES'] = '2'          # set to '0' for GPU0, '1' for GPU1 or '2' for GPU2. Check \"gpustat\" in a terminal."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "57ce2aa7",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "\n",
    "le = LabelEncoder()\n",
    "yyt_filtered = le.fit_transform(yy_filtered)\n",
    "XX_filtered = np.asarray(XX_filtered).astype('float64')\n",
    "XXX_filtered = np.delete(np.delete(XX_filtered, 0, 2), 13,2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "id": "deecd898",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10271, 456, 13)\n",
      "(2568, 456, 13)\n",
      "(10271,)\n",
      "(2568,)\n"
     ]
    }
   ],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(XXX_filtered, yyt_filtered, test_size=0.2, random_state=177013)\n",
    "\n",
    "print(X_train.shape)\n",
    "print(X_test.shape)\n",
    "print(y_train.shape)\n",
    "print(y_test.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "id": "8fd1a79c",
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "from tensorflow.keras.models import Sequential\n",
    "from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv1D, MaxPooling1D\n",
    "\n",
    "model = Sequential()\n",
    "\n",
    "# model.add(Conv1D(32, 3, input_shape = X_train.shape[1:]))\n",
    "# model.add(Activation('relu'))\n",
    "# model.add(MaxPooling1D(pool_size=3))\n",
    "\n",
    "# model.add(Conv1D(32, 3))\n",
    "# model.add(Activation('relu'))\n",
    "# model.add(MaxPooling1D(pool_size=3))\n",
    "\n",
    "model.add(Flatten())\n",
    "model.add(Dense(456, activation='relu', input_shape=(456,13)))\n",
    "\n",
    "model.add(Dense(104))\n",
    "\n",
    "model.add(Dense(26))\n",
    "model.add(Activation('sigmoid'))\n",
    "\n",
    "model.compile(\n",
    "    optimizer=tf.keras.optimizers.Adam(0.001),\n",
    "    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n",
    "    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "id": "0562e920",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['K', 'T', 'U', ..., 'F', 'H', 'G'], dtype='<U1')"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "le.inverse_transform(y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "id": "056d3b00",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((10271, 456, 13), (10271,))"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train.shape, y_train.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "id": "3956a9d8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/10\n",
      "321/321 [==============================] - 1s 2ms/step - loss: 3635.9205 - sparse_categorical_accuracy: 0.0375\n",
      "Epoch 2/10\n",
      "321/321 [==============================] - 1s 2ms/step - loss: 13.2679 - sparse_categorical_accuracy: 0.0344\n",
      "Epoch 3/10\n",
      "321/321 [==============================] - 1s 2ms/step - loss: 5.5680 - sparse_categorical_accuracy: 0.0360\n",
      "Epoch 4/10\n",
      "321/321 [==============================] - 1s 2ms/step - loss: 58.0553 - sparse_categorical_accuracy: 0.0411\n",
      "Epoch 5/10\n",
      "321/321 [==============================] - 1s 2ms/step - loss: 4.0946 - sparse_categorical_accuracy: 0.0382\n",
      "Epoch 6/10\n",
      "321/321 [==============================] - 1s 2ms/step - loss: 3.2512 - sparse_categorical_accuracy: 0.0421\n",
      "Epoch 7/10\n",
      "321/321 [==============================] - 1s 2ms/step - loss: 3.2490 - sparse_categorical_accuracy: 0.0432\n",
      "Epoch 8/10\n",
      "321/321 [==============================] - 1s 2ms/step - loss: 3.2503 - sparse_categorical_accuracy: 0.0411\n",
      "Epoch 9/10\n",
      "321/321 [==============================] - 1s 2ms/step - loss: 3.2525 - sparse_categorical_accuracy: 0.0390\n",
      "Epoch 10/10\n",
      "321/321 [==============================] - 1s 2ms/step - loss: 3.2529 - sparse_categorical_accuracy: 0.0426\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<tensorflow.python.keras.callbacks.History at 0x7fac487cd2e0>"
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.fit(X_train, y_train, \n",
    "          epochs=10,\n",
    "          batch_size=32,\n",
    "          verbose=1\n",
    "         )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "id": "8c1f64b6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Evaluate on test data\n",
      "81/81 [==============================] - 0s 1ms/step - loss: 11.4346 - sparse_categorical_accuracy: 0.0312\n",
      "test loss, test acc: [11.434555053710938, 0.031152648851275444]\n",
      "Generate predictions for 3 samples\n",
      "predictions shape: (3, 26)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "(array(['K', 'T', 'U'], dtype='<U1'), array(['R', 'R', 'R'], dtype='<U1'))"
      ]
     },
     "execution_count": 81,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Evaluate the model on the test data using `evaluate`\n",
    "print(\"Evaluate on test data\")\n",
    "results = model.evaluate(X_test, y_test, batch_size=32)\n",
    "print(\"test loss, test acc:\", results)\n",
    "\n",
    "# Generate predictions (probabilities -- the output of the last layer)\n",
    "# on new data using `predict`\n",
    "print(\"Generate predictions for 3 samples\")\n",
    "predictions = model.predict(X_test[:3])\n",
    "print(\"predictions shape:\", predictions.shape)\n",
    "fff= [np.argmax(i) for i in predictions]\n",
    "\n",
    "le.inverse_transform(y_test[:3]), le.inverse_transform(fff)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "id": "d8b48c43",
   "metadata": {},
   "outputs": [],
   "source": [
    "exit()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "63124d15",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
Added rudimentary NN 2021-05-31 15:43:48 +02:00			`{`
			`"cells": [`
			`{`
			`"cell_type": "code",`
			`"execution_count": 1,`
			`"id": "5d6412cd",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"import pandas as pd\n",`
			`"import numpy as np\n",`
			`"import os\n",`
			`"import pickle\n"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 2,`
			`"id": "fa43325a",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"delim = ';'\n",`
			`"\n",`
			`"user_count = 100\n",`
			`"\n",`
			`"base_path = '/opt/iui-datarelease1-sose2021/'\n",`
			`"\n",`
			`"Xpickle_file = './X.pickle'\n",`
			`"\n",`
			`"ypickle_file = './y.pickle'"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 3,`
			`"id": "1ea7c2f1",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"def load_pickles():\n",`
			`" _p = open(Xpickle_file, 'rb')\n",`
			`" X = pickle.load(_p)\n",`
			`" _p.close()\n",`
			`" \n",`
			`" _p = open(ypickle_file, 'rb')\n",`
			`" y = pickle.load(_p)\n",`
			`" _p.close()\n",`
			`" \n",`
			`" return (np.asarray(X, dtype=pd.DataFrame), np.asarray(y, dtype=str))"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 4,`
			`"id": "91f4642c",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"def load_data():\n",`
			`" if os.path.isfile(Xpickle_file) and os.path.isfile(ypickle_file):\n",`
			`" return load_pickles()\n",`
			`" data = []\n",`
			`" label = []\n",`
			`" for user in range(0, user_count):\n",`
			`" user_path = base_path + str(user) + '/split_letters_csv/'\n",`
			`" for file in os.listdir(user_path):\n",`
			`" file_name = user_path + file\n",`
			`" letter = ''.join(filter(lambda x: x.isalpha(), file))[0]\n",`
			`" data.append(pd.read_csv(file_name, delim))\n",`
			`" label.append(letter)\n",`
			`" return (np.asarray(data, dtype=pd.DataFrame), np.asarray(label, dtype=str))"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 5,`
			`"id": "a8629dc5",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"X, y = load_data()"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 11,`
			`"id": "18cd698f",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"def save_pickle():\n",`
			`"# _p = open(np.asarray(data, dtype=pd.DataFrame), 'wb')\n",`
			`" _p = open(Xpickle_file, 'wb')\n",`
			`" pickle.dump(X, _p)\n",`
			`" _p.close()\n",`
			`"\n",`
			`"# _p = open(np.asarray(label, dtype=str), 'wb')\n",`
			`" _p = open(ypickle_file, 'wb')\n",`
			`" pickle.dump(y, _p)\n",`
			`" _p.close()"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 6,`
			`"id": "0f505920",`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"data": {`
			`"text/plain": [`
			`"(13102, 13102)"`
			`]`
			`},`
			`"execution_count": 6,`
			`"metadata": {},`
			`"output_type": "execute_result"`
			`}`
			`],`
			`"source": [`
			`"len(X), len(y)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 7,`
			`"id": "4bd9f443",`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"data": {`
			`"text/plain": [`
			`"count 13102.000000\n",`
			`"mean 208.304457\n",`
			`"std 206.732342\n",`
			`"min 42.000000\n",`
			`"50% 185.000000\n",`
			`"90% 270.000000\n",`
			`"91% 276.000000\n",`
			`"92% 286.000000\n",`
			`"93% 299.000000\n",`
			`"94% 312.000000\n",`
			`"95% 333.000000\n",`
			`"96% 355.000000\n",`
			`"97% 388.000000\n",`
			`"98% 456.980000\n",`
			`"99% 701.940000\n",`
			`"max 11073.000000\n",`
			`"dtype: float64"`
			`]`
			`},`
			`"execution_count": 7,`
			`"metadata": {},`
			`"output_type": "execute_result"`
			`},`
			`{`
			`"data": {`
			"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD4CAYAAAD8Zh1EAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAAAc20lEQVR4nO3deXRc9Znm8e+r0i7Lkm3Jq7yCV8AY0BgIwUCAtPEEmExIsNMZyIazMd3p5CQDoUNn6NM9k2QO6WSGDnEgoUPSECAkuIMJoWnCEjYLMF6xkVdJXiRLsmRblrXUO3/UtSnLMirbJd1ans85dXTr3p+rHsulx1e3bt2fuTsiIpL+csIOICIiyaFCFxHJECp0EZEMoUIXEckQKnQRkQyRG9YTV1RU+JQpU8J6ehGRtPTGG2/sdffK/raFVuhTpkyhpqYmrKcXEUlLZrb9RNt0yEVEJEOo0EVEMoQKXUQkQwxY6Gb2MzNrNLO1J9huZvYjM6s1s9Vmdn7yY4qIyEAS2UN/AFj4PtuvAaYHt6XAj08/loiInKwBC93dXwBa3mfI9cAvPOZVoNzMxiUroIiIJCYZx9AnAHVx9+uDdccxs6VmVmNmNU1NTUl4ahEROWJIz0N392XAMoDq6mpdt1dEQtcbdXqiUXp6PXaLRumJeuzWGyz3xo0J1vdGne6o0xuN0t3rsfvB+p5epzsajOk9dkxPb5QrZ4/h3InlSf+7JKPQG4CJcfergnUikubcvf9C61Nux4zpZ7m/0uuOL8a+Y6JRegd8rti2I4/ZX7Zjni94jmPGRZ0wpoQYPbwwZQt9OXCrmT0MXAi0ufuuJDyuSNaJRp39h3toP9RNe2c37Yd6gq/dtHfG1nf1RgcuqqPL8SV4orKNf6z4Pc/YbahFcoxIjpGXY+RGcsjNMXIjRm5ODrmRI9tyYl+D+7mRHPJzcyg+Mj7+zwTLkZyco+PzIjnHPMd7jxU3JniO+OfOjc8Ut+2YxzzuOeJyR3LIMTCzQfneDVjoZvYQcDlQYWb1wN8BeQDufi+wAlgE1AIdwGcGJalIGohGnQNdQSH3U8ZHSrrt6PKx2w4c7hlwjzE/El80/ZReznvlcXRMTg4FBbnBtn6K7UgxHi3A+Mc8tsQikRzyTvAckcjxZZvXpzD7K9ujZW1GTs7glF02GLDQ3X3JANsd+ErSEomkEHen5WAXO1o6qGs9RF1LB80HuuLK+Nji3p9AIZcW5DK8KI/SwtjXCeVFzB5XyvDCPIYX5VFWlMfwYFtsXe7RbcMKcomo8OQEQrs4l0iqONTVS31rB3WtHexo7mBHyyHqWjuoa4ndDnb1HjO+JD8SK92gcMeXFzKrsDS4338RlwXrhhWqkGXwqNAlK7g7u9s7WVPfxvpd7Wxv7mBHS+zWtP/wMWOL8iJMHFnEpJHFXDRtFJNGFjNxZDGTRhZTNaKIkgL92Ehq0itTMo67U996iLUNbazd2caahnbWNbTRfLALgByDcWVFTBxZxBUzK5k4ophJo2KlPXFEMRXD8gftTSuRwaRCl7R2pLxX17exumFfrMQb2mk71A1Abo4xfUwpV84ezdkTyjhrfBlzxg2nKD8ScnKR5FOhS1ppbO+MlXf9Pt6ub2NNQxstwZ53fiSHmWNLWXTOOM6eMJyzx5cxc2wphXkqb8kOKnRJaXUtHTy1dhc121pZXd/G7vZOIHbYZMaYUq6aPZq5VeWcW1XOzLGl5OfqitCSvVToknLqWjp4cs0unly9izUNbQBMrSjhwmkjmVtVztyqMs4aP5zifL18ReLpJ0JSwo7mWImvWPNeiZ9bVca3Fs3imrPHMXFkccgJRVKfCl1CoxIXSS4VugypfR1dPFpTz/K3d75X4hPLVeIiSaBClyHx7p79/PzlbTz+Zj2d3VHtiYsMAhW6DKpVdfv4P09v5KXaveTn5vDReRP49CVTmD1ueNjRRDKOCl0GRW/Uuff5zdz9zCZGluTzjb+YyZL5kxhZkh92NJGMpUKXpKtr6eCbj63mlS3NfGTuOP7ho+dQVpQXdiyRjKdCl6Tp7o1y34tb+eGzm4iY8f0b5nLDBVW6LorIEFGhS1KsqW/j64+uYtOeAyw8ayx3XjuH8eVFYccSySoqdDltf1y3m796+C1GFOdz/83VXDl7TNiRRLKSCl1Ombtz34tb+cenNjC3qpz7bqqmsrQg7FgiWUuFLqek+cBhvvnYap59p5GFZ43lBzfO0yVpRUKmQpeT9sb2Fr70yzfZd6ib71w7h5s/MEVvfIqkABW6nJSWg1188ZdvUpwf4YHPzGfOeH1ASCRV6OLRkjB3547frmFfRxf//Jfnq8xFUowKXRL227caeGrtbv7m6hmcNb4s7Dgi0ocKXRKybe9B/u6JdVRPHsEXFpwRdhwR6YcKXQa0qm4fN9z7Mjk5xt2fmEckR2+AiqQiFbq8r+c2NrJ42SsU5Ud4/MsfYNIoXepWJFXpLBc5oe7eKHc8voYpo0r45ecvpGKYPjQkksq0hy4n9MSqnexs6+SbC2eqzEXSgApd+rWqbh93PrGWuVVlXD5jdNhxRCQBKnQ5zra9B/n0z1+nYlgB991UTY7eBBVJCzqGLsd56PUdHDzcwxNfuYTRwwvDjiMiCUpoD93MFprZRjOrNbPb+tk+ycyeM7O3zGy1mS1KflQZKq9va2FuVTmTR5WEHUVETsKAhW5mEeAe4BpgDrDEzOb0Gfa3wCPufh6wGPjnZAeVofHshj28tWMfH5ql4+Yi6SaRPfT5QK27b3H3LuBh4Po+Yxw4cmGPMmBn8iLKUGk52MX/+M0aZo0t5fOXTg07joicpEQKfQJQF3e/PlgX7zvAp8ysHlgB/Pf+HsjMlppZjZnVNDU1nUJcGSzuzrceX0P7oW5+cOM8CnJ1bXORdJOss1yWAA+4exWwCHjQzI57bHdf5u7V7l5dWVmZpKeWZPjdqgb+sG43X//wDGaP01UURdJRIoXeAEyMu18VrIv3OeARAHd/BSgEKpIRUIbGg69sDw61TAs7ioicokQKfSUw3cymmlk+sTc9l/cZswO4EsDMZhMrdB1TSROd3b2sbWjnshmVuvCWSBobsNDdvQe4FXga2EDsbJZ1ZnaXmV0XDPs6cIuZvQ08BHza3X2wQkty/ejZd+nqjXKFzmwRSWsJfbDI3VcQe7Mzft2dccvrgUuSG02GwhvbW7j3+c3cWD2Ri6aNCjuOiJwGffQ/y33vDxsZXVrIt6/t+9ECEUk3KvQstqpuH69tbeHzl05lWIGuAiGS7lToWeypNbvIj+SweP6ksKOISBKo0LPY69taOKeqTHvnIhlChZ6lnlmva7aIZBoVehZqOdjF7Y+vZva44dyiDxKJZAwVeha6/6UtNB/s4gc3nkt+rl4CIplCP81Z5uDhHh58ZTsLzxrLrLG6ZotIJlGhZ5naxgO0d/Zw/bzxYUcRkSRToWeZI9dj0KEWkcyjn+osU7OtBYCxw4tCTiIiyaZCzyJdPVHue3ErF00byZzxOn4ukmlU6Flk+ds72d3eyRcvOyPsKCIyCFToWSIadZa9sJlZY0u5bIZmixLJRCr0LPGnTY1s2nOAL1w2DTNNYiGSiVToWeLe57cwobyIj8zV6YoimUqFniXe2tHKonPGkhfRP7lIptJPdxZRmYtkNv2EZ4G9Bw7T3euUF+eFHUVEBpEKPQsc+TDR+ZNGhJxERAaTCj3DdXb3cvczmxhdWsA5VWVhxxGRQaSpajLcD57ZxKY9B/j5Z/4TBbmRsOOIyCDSHnoGa2zv5Od/3sbHL6jiipmamUgk06nQM9gDL2+jJxrl1g+dGXYUERkCKvQM9taOfcytKmfyqJKwo4jIEFChZ7h8nXsukjX0057Bdrd3UqZzz0Wyhgo9Q+09cJitew9ywWSdey6SLVToGeo3b9QDsGC6LpUrki1U6Bmos7uXn/15K5ecOUozE4lkERV6Bvr+0xvZ036YW6+YHnYUERlCCRW6mS00s41mVmtmt51gzCfMbL2ZrTOzf01uTEnUym0t3P/SVm66eDIXnzEq7DgiMoQG/Oi/mUWAe4CrgXpgpZk
			`"text/plain": [`
			`"<Figure size 432x288 with 1 Axes>"`
			`]`
			`},`
			`"metadata": {`
			`"needs_background": "light"`
			`},`
			`"output_type": "display_data"`
			`}`
			`],`
			`"source": [`
			`"import matplotlib.pyplot as plt\n",`
			`"\n",`
			`"X_len = np.asarray(list(map(len, X)))\n",`
			`"l = []\n",`
			`"sq_xlen = pd.Series(X_len)\n",`
			`"ptiles = [x*0.01 for x in range(100)]\n",`
			`"for i in ptiles:\n",`
			`" l.append(sq_xlen.quantile(i))\n",`
			`"plt.plot(l, ptiles)\n",`
			`"sq_xlen.describe(percentiles=[x*0.01 for x in range(90,100)])"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 49,`
			`"id": "c535003d",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"def fill(x, threshold):\n",`
			`" fill = threshold - len(x)\n",`
			`" xx = x\n",`
			`" for i in range(fill):\n",`
			`" xx = xx.append(pd.Series(0,index=x.columns,dtype='float64'), ignore_index=True)\n",`
			`" return xx"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 56,`
			`"id": "4ceefb7e",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"threshold_p = 0.98\n",`
			`"threshold = int(sq_xlen.quantile(threshold_p))\n",`
			`"len_mask = np.where(X_len <= threshold)\n",`
			`"\n",`
			`"X_filter = X[len_mask]\n",`
			`"y_filter = y[len_mask]"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 57,`
			`"id": "47e7c7a4",`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
			`"CPU times: user 41min 37s, sys: 1.15 s, total: 41min 38s\n",`
			`"Wall time: 41min 59s\n"`
			`]`
			`}`
			`],`
			`"source": [`
			`"%%time\n",`
			`"X_filter = list(map(fill, X_filter, [threshold for i in range(len(X_filter))]))"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"id": "d6dbd88b",`
			`"metadata": {},`
			`"source": [`
			`"Q: Is there a way to make this quicker?\n",`
			`"\n",`
			"```python\n",
			`"X_filter = list(map(fill, X_filter, [threshold for i in range(len(X_filter))]))\n",`
			"```\n",
			`"\n",`
			`"CPU times: user 41min 37s, sys: 1.15 s, total: 41min 38s\n",`
			`"Wall time: 41min 59s"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 51,`
			`"id": "5d240071",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"def plot_data(data):\n",`
			`" fig, axs = plt.subplots(4, 3, figsize=(33, 34))\n",`
			`" t = data['Millis']\n",`
			`" axs[0][0].plot(t, data['Acc1 X'])\n",`
			`" axs[0][1].plot(t, data['Acc1 Y'])\n",`
			`" axs[0][2].plot(t, data['Acc1 Z'])\n",`
			`" axs[1][0].plot(t, data['Acc2 X'])\n",`
			`" axs[1][1].plot(t, data['Acc2 Y'])\n",`
			`" axs[1][2].plot(t, data['Acc2 Z'])\n",`
			`" axs[2][0].plot(t, data['Gyro X'])\n",`
			`" axs[2][1].plot(t, data['Gyro Y'])\n",`
			`" axs[2][2].plot(t, data['Gyro Z'])\n",`
			`" axs[3][0].plot(t, data['Mag X'])\n",`
			`" axs[3][1].plot(t, data['Mag Y'])\n",`
			`" axs[3][2].plot(t, data['Mag Z'])\n",`
			`"\n",`
			`" for a in axs:\n",`
			`" for b in a:\n",`
			`" b.plot(t, data['Force'])\n"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 8,`
			`"id": "91db361c",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"Xfiltered_pickle_file = './X_filter.pickle'\n",`
			`"yfiltered_pickle_file = \"./y_filter.pickle\""`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 63,`
			`"id": "c2238568",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"def save_filtered():\n",`
			`" _p = open(Xfiltered_pickle_file, 'wb')\n",`
			`" pickle.dump(X_filter, _p)\n",`
			`" _p.close()\n",`
			`"\n",`
			`" _p = open(yfiltered_pickle_file, 'wb')\n",`
			`" pickle.dump(y_filter, _p)\n",`
			`" _p.close()"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 64,`
			`"id": "a234a063",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"save_filtered()"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 9,`
			`"id": "a0ae2e62",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"def load_filtered_pickles():\n",`
			`" _p = open(Xfiltered_pickle_file, 'rb')\n",`
			`" X = pickle.load(_p)\n",`
			`" _p.close()\n",`
			`" \n",`
			`" _p = open(yfiltered_pickle_file, 'rb')\n",`
			`" y = pickle.load(_p)\n",`
			`" _p.close()\n",`
			`" \n",`
			`" return (np.asarray(X, dtype=pd.DataFrame), np.asarray(y, dtype=str))"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 10,`
			`"id": "c030d181",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"XX_filtered, yy_filtered = load_filtered_pickles()"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 11,`
			`"id": "338bddeb",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"# FIRST CELL: set these variables to limit GPU usage.\n",`
			`"import os\n",`
			`"os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' # this is required\n",`
			`"os.environ['CUDA_VISIBLE_DEVICES'] = '2' # set to '0' for GPU0, '1' for GPU1 or '2' for GPU2. Check \"gpustat\" in a terminal."`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 57,`
			`"id": "57ce2aa7",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"from sklearn.model_selection import train_test_split\n",`
			`"from sklearn.preprocessing import LabelEncoder\n",`
			`"\n",`
			`"le = LabelEncoder()\n",`
			`"yyt_filtered = le.fit_transform(yy_filtered)\n",`
			`"XX_filtered = np.asarray(XX_filtered).astype('float64')\n",`
			`"XXX_filtered = np.delete(np.delete(XX_filtered, 0, 2), 13,2)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 59,`
			`"id": "deecd898",`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
			`"(10271, 456, 13)\n",`
			`"(2568, 456, 13)\n",`
			`"(10271,)\n",`
			`"(2568,)\n"`
			`]`
			`}`
			`],`
			`"source": [`
			`"X_train, X_test, y_train, y_test = train_test_split(XXX_filtered, yyt_filtered, test_size=0.2, random_state=177013)\n",`
			`"\n",`
			`"print(X_train.shape)\n",`
			`"print(X_test.shape)\n",`
			`"print(y_train.shape)\n",`
			`"print(y_test.shape)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 77,`
			`"id": "8fd1a79c",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"import tensorflow as tf\n",`
			`"from tensorflow.keras.models import Sequential\n",`
			`"from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv1D, MaxPooling1D\n",`
			`"\n",`
			`"model = Sequential()\n",`
			`"\n",`
			`"# model.add(Conv1D(32, 3, input_shape = X_train.shape[1:]))\n",`
			`"# model.add(Activation('relu'))\n",`
			`"# model.add(MaxPooling1D(pool_size=3))\n",`
			`"\n",`
			`"# model.add(Conv1D(32, 3))\n",`
			`"# model.add(Activation('relu'))\n",`
			`"# model.add(MaxPooling1D(pool_size=3))\n",`
			`"\n",`
			`"model.add(Flatten())\n",`
			`"model.add(Dense(456, activation='relu', input_shape=(456,13)))\n",`
			`"\n",`
			`"model.add(Dense(104))\n",`
			`"\n",`
			`"model.add(Dense(26))\n",`
			`"model.add(Activation('sigmoid'))\n",`
			`"\n",`
			`"model.compile(\n",`
			`" optimizer=tf.keras.optimizers.Adam(0.001),\n",`
			`" loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n",`
			`" metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],\n",`
			`")"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 78,`
			`"id": "0562e920",`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"data": {`
			`"text/plain": [`
			`"array(['K', 'T', 'U', ..., 'F', 'H', 'G'], dtype='<U1')"`
			`]`
			`},`
			`"execution_count": 78,`
			`"metadata": {},`
			`"output_type": "execute_result"`
			`}`
			`],`
			`"source": [`
			`"le.inverse_transform(y_test)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 79,`
			`"id": "056d3b00",`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"data": {`
			`"text/plain": [`
			`"((10271, 456, 13), (10271,))"`
			`]`
			`},`
			`"execution_count": 79,`
			`"metadata": {},`
			`"output_type": "execute_result"`
			`}`
			`],`
			`"source": [`
			`"X_train.shape, y_train.shape"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 80,`
			`"id": "3956a9d8",`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
			`"Epoch 1/10\n",`
			`"321/321 [==============================] - 1s 2ms/step - loss: 3635.9205 - sparse_categorical_accuracy: 0.0375\n",`
			`"Epoch 2/10\n",`
			`"321/321 [==============================] - 1s 2ms/step - loss: 13.2679 - sparse_categorical_accuracy: 0.0344\n",`
			`"Epoch 3/10\n",`
			`"321/321 [==============================] - 1s 2ms/step - loss: 5.5680 - sparse_categorical_accuracy: 0.0360\n",`
			`"Epoch 4/10\n",`
			`"321/321 [==============================] - 1s 2ms/step - loss: 58.0553 - sparse_categorical_accuracy: 0.0411\n",`
			`"Epoch 5/10\n",`
			`"321/321 [==============================] - 1s 2ms/step - loss: 4.0946 - sparse_categorical_accuracy: 0.0382\n",`
			`"Epoch 6/10\n",`
			`"321/321 [==============================] - 1s 2ms/step - loss: 3.2512 - sparse_categorical_accuracy: 0.0421\n",`
			`"Epoch 7/10\n",`
			`"321/321 [==============================] - 1s 2ms/step - loss: 3.2490 - sparse_categorical_accuracy: 0.0432\n",`
			`"Epoch 8/10\n",`
			`"321/321 [==============================] - 1s 2ms/step - loss: 3.2503 - sparse_categorical_accuracy: 0.0411\n",`
			`"Epoch 9/10\n",`
			`"321/321 [==============================] - 1s 2ms/step - loss: 3.2525 - sparse_categorical_accuracy: 0.0390\n",`
			`"Epoch 10/10\n",`
			`"321/321 [==============================] - 1s 2ms/step - loss: 3.2529 - sparse_categorical_accuracy: 0.0426\n"`
			`]`
			`},`
			`{`
			`"data": {`
			`"text/plain": [`
			`"<tensorflow.python.keras.callbacks.History at 0x7fac487cd2e0>"`
			`]`
			`},`
			`"execution_count": 80,`
			`"metadata": {},`
			`"output_type": "execute_result"`
			`}`
			`],`
			`"source": [`
			`"model.fit(X_train, y_train, \n",`
			`" epochs=10,\n",`
			`" batch_size=32,\n",`
			`" verbose=1\n",`
			`" )"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 81,`
			`"id": "8c1f64b6",`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
			`"Evaluate on test data\n",`
			`"81/81 [==============================] - 0s 1ms/step - loss: 11.4346 - sparse_categorical_accuracy: 0.0312\n",`
			`"test loss, test acc: [11.434555053710938, 0.031152648851275444]\n",`
			`"Generate predictions for 3 samples\n",`
			`"predictions shape: (3, 26)\n"`
			`]`
			`},`
			`{`
			`"data": {`
			`"text/plain": [`
			`"(array(['K', 'T', 'U'], dtype='<U1'), array(['R', 'R', 'R'], dtype='<U1'))"`
			`]`
			`},`
			`"execution_count": 81,`
			`"metadata": {},`
			`"output_type": "execute_result"`
			`}`
			`],`
			`"source": [`
			"# Evaluate the model on the test data using `evaluate`\n",
			`"print(\"Evaluate on test data\")\n",`
			`"results = model.evaluate(X_test, y_test, batch_size=32)\n",`
			`"print(\"test loss, test acc:\", results)\n",`
			`"\n",`
			`"# Generate predictions (probabilities -- the output of the last layer)\n",`
			"# on new data using `predict`\n",
			`"print(\"Generate predictions for 3 samples\")\n",`
			`"predictions = model.predict(X_test[:3])\n",`
			`"print(\"predictions shape:\", predictions.shape)\n",`
			`"fff= [np.argmax(i) for i in predictions]\n",`
			`"\n",`
			`"le.inverse_transform(y_test[:3]), le.inverse_transform(fff)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 82,`
			`"id": "d8b48c43",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"exit()"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"id": "63124d15",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": []`
			`}`
			`],`
			`"metadata": {`
			`"kernelspec": {`
			`"display_name": "Python 3",`
			`"language": "python",`
			`"name": "python3"`
			`},`
			`"language_info": {`
			`"codemirror_mode": {`
			`"name": "ipython",`
			`"version": 3`
			`},`
			`"file_extension": ".py",`
			`"mimetype": "text/x-python",`
			`"name": "python",`
			`"nbconvert_exporter": "python",`
			`"pygments_lexer": "ipython3",`
			`"version": "3.8.5"`
			`}`
			`},`
			`"nbformat": 4,`
			`"nbformat_minor": 5`
			`}`