iui-group-l-name-zensiert/1-first-project/jw/T_DataNormaization.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "53f57e68",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "import pickle\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "b240abfb",
   "metadata": {},
   "outputs": [],
   "source": [
    "delim = ';'\n",
    "\n",
    "user_count = 100\n",
    "\n",
    "base_path = '/opt/iui-datarelease1-sose2021/'\n",
    "\n",
    "Xpickle_file = './X.pickle'\n",
    "\n",
    "ypickle_file = './y.pickle'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "f5464e9e",
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_pickles():\n",
    "    _p = open(Xpickle_file, 'rb')\n",
    "    X = pickle.load(_p)\n",
    "    _p.close()\n",
    "        \n",
    "    _p = open(ypickle_file, 'rb')\n",
    "    y = pickle.load(_p)\n",
    "    _p.close()\n",
    "    \n",
    "    return (np.asarray(X, dtype=pd.DataFrame), np.asarray(y, dtype=str))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "3c51ef28",
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_data():\n",
    "    if os.path.isfile(Xpickle_file) and os.path.isfile(ypickle_file):\n",
    "        return load_pickles()\n",
    "    data = []\n",
    "    label = []\n",
    "    for user in range(0, user_count):\n",
    "        user_path = base_path + str(user) + '/split_letters_csv/'\n",
    "        for file in os.listdir(user_path):\n",
    "            file_name = user_path + file\n",
    "            letter = ''.join(filter(lambda x: x.isalpha(), file))[0]\n",
    "            data.append(pd.read_csv(file_name, delim))\n",
    "            label.append(letter)\n",
    "    return (np.asarray(data, dtype=pd.DataFrame), np.asarray(label, dtype=str))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "03ce941d",
   "metadata": {},
   "outputs": [],
   "source": [
    "X, y = load_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "7ea0c433",
   "metadata": {},
   "outputs": [],
   "source": [
    "def save_pickle():\n",
    "#     _p = open(np.asarray(data, dtype=pd.DataFrame), 'wb')\n",
    "    _p = open(Xpickle_file, 'wb')\n",
    "    pickle.dump(X, _p)\n",
    "    _p.close()\n",
    "\n",
    "#     _p = open(np.asarray(label, dtype=str), 'wb')\n",
    "    _p = open(ypickle_file, 'wb')\n",
    "    pickle.dump(y, _p)\n",
    "    _p.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "210a3d37",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(13102, 13102)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(X), len(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "eadec7ee",
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_data(data):\n",
    "    fig, axs = plt.subplots(4, 3, figsize=(3*3, 3*4))\n",
    "    t = data['Millis']\n",
    "    axs[0][0].plot(t, data['Acc1 X'])\n",
    "    axs[0][1].plot(t, data['Acc1 Y'])\n",
    "    axs[0][2].plot(t, data['Acc1 Z'])\n",
    "    axs[1][0].plot(t, data['Acc2 X'])\n",
    "    axs[1][1].plot(t, data['Acc2 Y'])\n",
    "    axs[1][2].plot(t, data['Acc2 Z'])\n",
    "    axs[2][0].plot(t, data['Gyro X'])\n",
    "    axs[2][1].plot(t, data['Gyro Y'])\n",
    "    axs[2][2].plot(t, data['Gyro Z'])\n",
    "    axs[3][0].plot(t, data['Mag X'])\n",
    "    axs[3][1].plot(t, data['Mag Y'])\n",
    "    axs[3][2].plot(t, data['Mag Z'])\n",
    "\n",
    "    for a in axs:\n",
    "        for b in a:\n",
    "            b.plot(t, data['Force'])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "ade23f06",
   "metadata": {},
   "outputs": [],
   "source": [
    "# FIRST CELL: set these variables to limit GPU usage.\n",
    "import os\n",
    "os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'  # this is required\n",
    "os.environ['CUDA_VISIBLE_DEVICES'] = '2'          # set to '0' for GPU0, '1' for GPU1 or '2' for GPU2. Check \"gpustat\" in a terminal."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "d5db75fc",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "\n",
    "le = LabelEncoder()\n",
    "yyt_filtered = le.fit_transform(yy_filtered) # Lables in Zahlenwerte transformiert\n",
    "XX_filtered = np.asarray(XX_filtered).astype('float64')\n",
    "XXX_filtered = np.delete(np.delete(XX_filtered, 0, 2), 13,2) # Drops time col and Millis col"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "id": "290be797",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10271, 456, 13)\n",
      "(2568, 456, 13)\n",
      "(10271,)\n",
      "(2568,)\n"
     ]
    }
   ],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(XXX_filtered, yyt_filtered, test_size=0.2, random_state=177013)\n",
    "\n",
    "print(X_train.shape)\n",
    "print(X_test.shape)\n",
    "print(y_train.shape)\n",
    "print(y_test.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "id": "cf763407",
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "from tensorflow.keras.models import Sequential\n",
    "from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv1D, MaxPooling1D\n",
    "\n",
    "model = Sequential()\n",
    "\n",
    "# model.add(Conv1D(32, 3, input_shape = X_train.shape[1:]))\n",
    "# model.add(Activation('relu'))\n",
    "# model.add(MaxPooling1D(pool_size=3))\n",
    "\n",
    "# model.add(Conv1D(32, 3))\n",
    "# model.add(Activation('relu'))\n",
    "# model.add(MaxPooling1D(pool_size=3))\n",
    "\n",
    "model.add(Flatten(input_shape = (456,13)))\n",
    "model.add(Dense(456, activation = 'relu'))\n",
    "\n",
    "model.add(Dense(104))\n",
    "\n",
    "model.add(Dense(26))\n",
    "model.add(Activation('sigmoid'))\n",
    "\n",
    "model.compile(\n",
    "    optimizer = tf.keras.optimizers.Adam(0.001),\n",
    "    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True),\n",
    "    metrics = [tf.keras.metrics.SparseCategoricalAccuracy()],\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "id": "ed97582c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['K', 'T', 'U', ..., 'F', 'H', 'G'], dtype='<U1')"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "le.inverse_transform(y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "id": "3f42617d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((10271, 456, 13), (10271,))"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train.shape, y_train.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "id": "95526298",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/10\n",
      "321/321 [==============================] - 1s 2ms/step - loss: 3635.9205 - sparse_categorical_accuracy: 0.0375\n",
      "Epoch 2/10\n",
      "321/321 [==============================] - 1s 2ms/step - loss: 13.2679 - sparse_categorical_accuracy: 0.0344\n",
      "Epoch 3/10\n",
      "321/321 [==============================] - 1s 2ms/step - loss: 5.5680 - sparse_categorical_accuracy: 0.0360\n",
      "Epoch 4/10\n",
      "321/321 [==============================] - 1s 2ms/step - loss: 58.0553 - sparse_categorical_accuracy: 0.0411\n",
      "Epoch 5/10\n",
      "321/321 [==============================] - 1s 2ms/step - loss: 4.0946 - sparse_categorical_accuracy: 0.0382\n",
      "Epoch 6/10\n",
      "321/321 [==============================] - 1s 2ms/step - loss: 3.2512 - sparse_categorical_accuracy: 0.0421\n",
      "Epoch 7/10\n",
      "321/321 [==============================] - 1s 2ms/step - loss: 3.2490 - sparse_categorical_accuracy: 0.0432\n",
      "Epoch 8/10\n",
      "321/321 [==============================] - 1s 2ms/step - loss: 3.2503 - sparse_categorical_accuracy: 0.0411\n",
      "Epoch 9/10\n",
      "321/321 [==============================] - 1s 2ms/step - loss: 3.2525 - sparse_categorical_accuracy: 0.0390\n",
      "Epoch 10/10\n",
      "321/321 [==============================] - 1s 2ms/step - loss: 3.2529 - sparse_categorical_accuracy: 0.0426\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<tensorflow.python.keras.callbacks.History at 0x7fac487cd2e0>"
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.fit(X_train, y_train, \n",
    "          epochs=10,\n",
    "          batch_size=32,\n",
    "          verbose=1\n",
    "         )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "id": "8f8fedfd",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Evaluate on test data\n",
      "81/81 [==============================] - 0s 1ms/step - loss: 11.4346 - sparse_categorical_accuracy: 0.0312\n",
      "test loss, test acc: [11.434555053710938, 0.031152648851275444]\n",
      "Generate predictions for 3 samples\n",
      "predictions shape: (3, 26)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "(array(['K', 'T', 'U'], dtype='<U1'), array(['R', 'R', 'R'], dtype='<U1'))"
      ]
     },
     "execution_count": 81,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Evaluate the model on the test data using `evaluate`\n",
    "print(\"Evaluate on test data\")\n",
    "results = model.evaluate(X_test, y_test, batch_size=32)\n",
    "print(\"test loss, test acc:\", results)\n",
    "\n",
    "# Generate predictions (probabilities -- the output of the last layer)\n",
    "# on new data using `predict`\n",
    "print(\"Generate predictions for 3 samples\")\n",
    "predictions = model.predict(X_test[:3])\n",
    "print(\"predictions shape:\", predictions.shape)\n",
    "fff= [np.argmax(i) for i in predictions]\n",
    "\n",
    "le.inverse_transform(y_test[:3]), le.inverse_transform(fff)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "id": "fbfbaf01",
   "metadata": {},
   "outputs": [],
   "source": [
    "exit()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "07d40334",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}