iui-group-l-name-zensiert/1-first-project/ies/TestNetrwork.ipynb

548 lines
27 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "8c784b5a",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import pickle\n",
"import pandas as pd\n",
"import numpy as np\n",
"import tensorflow as tf\n",
"import matplotlib.pyplot as plt\n",
"from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import LabelEncoder, LabelBinarizer\n",
"from tensorflow.keras.models import Sequential\n",
"from tensorflow.keras.layers import Dense, Flatten, BatchNormalization\n",
"\n",
"os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'\n",
"os.environ['CUDA_VISIBLE_DEVICES'] = '2'\n",
"\n",
"\n",
"\n",
"delim = ';'\n",
"user_count = 100\n",
"base_path = '/opt/iui-datarelease1-sose2021/'\n",
"\n",
"Xpickle_file = './X.pickle'\n",
"\n",
"ypickle_file = './y.pickle'"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "7b486d61",
"metadata": {},
"outputs": [],
"source": [
"def load_pickles():\n",
" _p = open(Xpickle_file, 'rb')\n",
" X = pickle.load(_p)\n",
" _p.close()\n",
" \n",
" _p = open(ypickle_file, 'rb')\n",
" y = pickle.load(_p)\n",
" _p.close()\n",
" \n",
" return (np.asarray(X, dtype=pd.DataFrame), np.asarray(y, dtype=str))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "5ea384ea",
"metadata": {},
"outputs": [],
"source": [
"def shorten(npList):\n",
" temp = npList['Force']\n",
" thresh = 100\n",
" leeway = 5\n",
" \n",
" temps_over_T = np.where(temp > thresh)[0]\n",
" return npList[max(temps_over_T[0]-leeway,0):temps_over_T[-1]+leeway]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "09aad3f2",
"metadata": {},
"outputs": [],
"source": [
"def load_data():\n",
" if os.path.isfile(Xpickle_file) and os.path.isfile(ypickle_file):\n",
" return load_pickles()\n",
" data = []\n",
" label = []\n",
" for user in range(0, user_count):\n",
" user_path = base_path + str(user) + '/split_letters_csv/'\n",
" for file in os.listdir(user_path):\n",
" file_name = user_path + file\n",
" letter = ''.join(filter(lambda x: x.isalpha(), file))[0]\n",
" data.append(pd.read_csv(file_name, delim))\n",
" label.append(letter)\n",
" return (np.asarray(data, dtype=pd.DataFrame), np.asarray(label, dtype=str), np.asarray(file_name))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "37d66d26",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 2.76 s, sys: 205 ms, total: 2.97 s\n",
"Wall time: 2.97 s\n"
]
}
],
"source": [
"%%time\n",
"x, y = load_data()\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "3178395b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 3.22 s, sys: 2.07 ms, total: 3.22 s\n",
"Wall time: 3.22 s\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<timed exec>:1: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n"
]
}
],
"source": [
"%%time\n",
"f_data = np.array(list(map(shorten, x)))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "dcbb85b7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 13102.000000\n",
"mean 61.169058\n",
"std 30.698514\n",
"min 10.000000\n",
"50% 57.000000\n",
"95% 102.000000\n",
"96% 107.000000\n",
"97% 113.000000\n",
"98% 127.000000\n",
"99% 156.000000\n",
"max 1522.000000\n",
"dtype: float64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"x_len = np.asarray(list(map(len, f_data)))\n",
"l = []\n",
"sq_xlen = pd.Series(x_len)\n",
"ptiles = [x*0.01 for x in range(100)]\n",
"for i in ptiles:\n",
" l.append(sq_xlen.quantile(i))\n",
"plt.plot(l, ptiles)\n",
"sq_xlen.describe(percentiles=[x*0.01 for x in range(95,100)])"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "1878d067",
"metadata": {},
"outputs": [],
"source": [
"thresh_p = 0.99\n",
"thresh = int(sq_xlen.quantile(thresh_p))\n",
"len_mask = np.where(x_len <= thresh)\n",
"\n",
"x_filter = f_data[len_mask]\n",
"y_filter = y[len_mask]\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "3a01c1ad",
"metadata": {},
"outputs": [],
"source": [
"lb = LabelBinarizer()\n",
"a = [x.drop(labels='Millis', axis=1) for x in x_filter]\n",
"x_filter = pad_sequences(x_filter, dtype=float, padding='post')\n",
"yt_filter = lb.fit_transform(y_filter)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "634a024c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 34.7 ms, sys: 5.84 ms, total: 40.6 ms\n",
"Wall time: 39.2 ms\n"
]
}
],
"source": [
"%%time\n",
"x_train, x_test, y_train, y_test = train_test_split(x_filter, yt_filter, test_size=0.2, random_state=177013)\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "0109b9b6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model: \"sequential\"\n",
"_________________________________________________________________\n",
"Layer (type) Output Shape Param # \n",
"=================================================================\n",
"flatten (Flatten) (None, 2340) 0 \n",
"_________________________________________________________________\n",
"batch_normalization (BatchNo (None, 2340) 9360 \n",
"_________________________________________________________________\n",
"dense (Dense) (None, 2200) 5150200 \n",
"_________________________________________________________________\n",
"dense_1 (Dense) (None, 1100) 2421100 \n",
"_________________________________________________________________\n",
"dense_2 (Dense) (None, 550) 605550 \n",
"_________________________________________________________________\n",
"dense_3 (Dense) (None, 225) 123975 \n",
"_________________________________________________________________\n",
"dense_4 (Dense) (None, 26) 5876 \n",
"=================================================================\n",
"Total params: 8,316,061\n",
"Trainable params: 8,311,381\n",
"Non-trainable params: 4,680\n",
"_________________________________________________________________\n"
]
}
],
"source": [
"model = Sequential()\n",
"\n",
"model.add(Flatten(input_shape=x_filter[0].shape))\n",
"\n",
"model.add(BatchNormalization())\n",
"\n",
"model.add(Dense(2200, activation='relu'))\n",
"\n",
"model.add(Dense(1100, activation='relu'))\n",
"\n",
"model.add(Dense(550, activation='relu'))\n",
"\n",
"model.add(Dense(225, activation='relu'))\n",
"\n",
"model.add(Dense(26, activation='softmax'))\n",
"\n",
"model.compile(\n",
" optimizer=tf.keras.optimizers.Adam(0.001),\n",
" loss=\"categorical_crossentropy\", \n",
" metrics=[\"acc\"],\n",
")\n",
"\n",
"model.summary()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "204ed561",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1/32\n",
"82/82 [==============================] - 1s 3ms/step - loss: 2.8553 - acc: 0.1745\n",
"Epoch 2/32\n",
"82/82 [==============================] - 0s 3ms/step - loss: 1.7793 - acc: 0.4480\n",
"Epoch 3/32\n",
"82/82 [==============================] - 0s 3ms/step - loss: 1.2391 - acc: 0.6070\n",
"Epoch 4/32\n",
"82/82 [==============================] - 0s 3ms/step - loss: 0.9623 - acc: 0.7021\n",
"Epoch 5/32\n",
"82/82 [==============================] - 0s 3ms/step - loss: 0.8489 - acc: 0.7336\n",
"Epoch 6/32\n",
"82/82 [==============================] - 0s 3ms/step - loss: 0.5827 - acc: 0.8169\n",
"Epoch 7/32\n",
"82/82 [==============================] - 0s 3ms/step - loss: 0.5208 - acc: 0.8313\n",
"Epoch 8/32\n",
"82/82 [==============================] - 0s 3ms/step - loss: 0.5864 - acc: 0.8147\n",
"Epoch 9/32\n",
"82/82 [==============================] - 0s 3ms/step - loss: 0.4101 - acc: 0.8710\n",
"Epoch 10/32\n",
"82/82 [==============================] - 0s 3ms/step - loss: 0.2856 - acc: 0.9087\n",
"Epoch 11/32\n",
"82/82 [==============================] - 0s 3ms/step - loss: 0.2789 - acc: 0.9126\n",
"Epoch 12/32\n",
"82/82 [==============================] - 0s 3ms/step - loss: 0.3118 - acc: 0.9027\n",
"Epoch 13/32\n",
"82/82 [==============================] - 0s 3ms/step - loss: 0.3337 - acc: 0.9054\n",
"Epoch 14/32\n",
"82/82 [==============================] - 0s 3ms/step - loss: 0.3052 - acc: 0.9049\n",
"Epoch 15/32\n",
"82/82 [==============================] - 0s 3ms/step - loss: 0.2052 - acc: 0.9403\n",
"Epoch 16/32\n",
"82/82 [==============================] - 0s 3ms/step - loss: 0.4292 - acc: 0.8907\n",
"Epoch 17/32\n",
"82/82 [==============================] - 0s 3ms/step - loss: 0.1545 - acc: 0.9542\n",
"Epoch 18/32\n",
"82/82 [==============================] - 0s 3ms/step - loss: 0.1401 - acc: 0.9575\n",
"Epoch 19/32\n",
"82/82 [==============================] - 0s 3ms/step - loss: 0.1907 - acc: 0.9483\n",
"Epoch 20/32\n",
"82/82 [==============================] - 0s 3ms/step - loss: 0.2635 - acc: 0.9303\n",
"Epoch 21/32\n",
"82/82 [==============================] - 0s 3ms/step - loss: 0.1116 - acc: 0.9671\n",
"Epoch 22/32\n",
"82/82 [==============================] - 0s 3ms/step - loss: 0.2453 - acc: 0.9317\n",
"Epoch 23/32\n",
"82/82 [==============================] - 0s 3ms/step - loss: 0.1090 - acc: 0.9681\n",
"Epoch 24/32\n",
"82/82 [==============================] - 0s 3ms/step - loss: 0.1578 - acc: 0.9541\n",
"Epoch 25/32\n",
"82/82 [==============================] - 0s 3ms/step - loss: 0.1609 - acc: 0.9570\n",
"Epoch 26/32\n",
"82/82 [==============================] - 0s 3ms/step - loss: 0.0801 - acc: 0.9775\n",
"Epoch 27/32\n",
"82/82 [==============================] - 0s 3ms/step - loss: 0.1597 - acc: 0.9615\n",
"Epoch 28/32\n",
"82/82 [==============================] - 0s 3ms/step - loss: 0.0695 - acc: 0.9807\n",
"Epoch 29/32\n",
"82/82 [==============================] - 0s 3ms/step - loss: 0.0622 - acc: 0.9853\n",
"Epoch 30/32\n",
"82/82 [==============================] - 0s 3ms/step - loss: 0.0655 - acc: 0.9841\n",
"Epoch 31/32\n",
"82/82 [==============================] - 0s 3ms/step - loss: 0.0383 - acc: 0.9910\n",
"Epoch 32/32\n",
"82/82 [==============================] - 0s 3ms/step - loss: 0.0716 - acc: 0.9792\n",
"CPU times: user 14 s, sys: 3.02 s, total: 17 s\n",
"Wall time: 8.95 s\n"
]
},
{
"data": {
"text/plain": [
"<tensorflow.python.keras.callbacks.History at 0x7f6dec4bf130>"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"model.fit(x_train, y_train, \n",
" epochs=32,\n",
" batch_size=128,\n",
" shuffle=True,\n",
" verbose=1\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "10a0d074",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Evaluate on test data\n",
"82/82 [==============================] - 0s 2ms/step - loss: 1.7331 - acc: 0.7341\n",
"test loss, test acc: [1.7330855131149292, 0.7341040372848511]\n",
"Generate predictions for 3 samples\n",
"predictions shape: (3, 26)\n"
]
},
{
"data": {
"text/plain": [
"(array(['N', 'U', 'I'], dtype='<U1'), array(['N', 'U', 'I'], dtype='<U1'))"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Evaluate the model on the test data using `evaluate`\n",
"print(\"Evaluate on test data\")\n",
"results = model.evaluate(x_test, y_test, batch_size=32)\n",
"print(\"test loss, test acc:\", results)\n",
"\n",
"# Generate predictions (probabilities -- the output of the last layer)\n",
"# on new data using `predict`\n",
"print(\"Generate predictions for 3 samples\")\n",
"predictions = model.predict(x_test[:3])\n",
"print(\"predictions shape:\", predictions.shape)\n",
"\n",
"lb.inverse_transform(y_test[:3]), lb.inverse_transform(predictions)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "63f89de5",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "1a5d0352",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "4ebc323f",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "445a8e54",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "32206e00",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "fe00f947",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "d0fbe763",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "d18608c3",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 14,
"id": "6a0e538b",
"metadata": {},
"outputs": [],
"source": [
"exit()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "521b2be6",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}