In [1]:
import pandas as pd
import numpy as np

In [2]:
delim = ';'

base_path = '/opt/iui-datarelease1-sose2021/'

Xpickle_file = './X.pickle'

ypickle_file = './y.pickle'

In [3]:
THRESH = [70]
LEEWAY = [0]
EPOCH = [20, 30, 50]

DENSE_COUNT = range(1,4)
DENSE_NEURONS = range(600, 2401, 600)

DENSE2_COUNT = range(1,4)
DENSE2_NEURONS = range(600, 2401, 600)

AVG_FROM = 30

threshold_p = 0.99

In [4]:
import pickle

def load_pickles():
    _p = open(Xpickle_file, 'rb')
    X = pickle.load(_p)
    _p.close()
        
    _p = open(ypickle_file, 'rb')
    y = pickle.load(_p)
    _p.close()
    
    return (np.asarray(X, dtype=pd.DataFrame), np.asarray(y, dtype=str))

In [5]:
import os

def load_data():
    if os.path.isfile(Xpickle_file) and os.path.isfile(ypickle_file):
        return load_pickles()
    data = []
    label = []
    for user in range(0, user_count):
        user_path = base_path + str(user) + '/split_letters_csv/'
        for file in os.listdir(user_path):
            file_name = user_path + file
            letter = ''.join(filter(lambda x: x.isalpha(), file))[0]
            data.append(pd.read_csv(file_name, delim))
            label.append(letter)
    return (np.asarray(data, dtype=pd.DataFrame), np.asarray(label, dtype=str), np.asarray(file_name))

In [6]:
def shorten(npList, thresh, leeway):
    temp        = npList['Force']
    
    temps_over_T = np.where(temp > thresh)[0]
    return npList[max(temps_over_T[0]-leeway,0):temps_over_T[-1]+leeway]

In [7]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

def preproc(data, label, thresh, leeway):
    #shorten
    XX = np.array(list(map(shorten, data, [thresh for _ in range(len(data))], [leeway for _ in range(len(data))])),dtype=object)

    #filter
    len_mask = np.where(np.asarray(list(map(len, XX))) <= int(pd.Series(np.asarray(list(map(len, XX)))).quantile(threshold_p)))
    X_filter = XX[len_mask] 
    y_filter = label[len_mask]
    
    #drop millis
    [x.drop(labels='Millis', axis=1) for x in X_filter]

    #pad
    X_filter = pad_sequences(X_filter, dtype=float, padding='post')
   
    return (X_filter, y_filter)

In [8]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization

def build_model(dcount, dnons, dcount2, dnons2, X_shape):
    model = Sequential()

    model.add(BatchNormalization(input_shape=X_shape))
    
    model.add(Flatten())

    for i in range(dcount):
        model.add(Dense(dnons, activation='relu'))
        
    for i in range(dcount2):
        model.add(Dense(dnons2, activation='relu'))
        
    model.add(Dense(26, activation='softmax'))

    model.compile(
        optimizer=tf.keras.optimizers.Adam(0.001),
        loss="categorical_crossentropy", 
        metrics=["acc"],
    )

    return model



In [9]:
def get_avg_acc(X_train, y_train, X_test, y_test, epoch, dcount, dnons, dcount2, dnons2):
    accs = []
    for i in range(AVG_FROM):
        model = build_model(dcount, dnons, dcount2, dnons2, X_train[0].shape)
        model.fit(X_train, y_train, 
                  epochs=epoch,
                  batch_size=128,
                  shuffle=True,
                  validation_data=(X_test, y_test),
                  verbose=0,
                 )
        results = model.evaluate(X_test, y_test, batch_size=128, verbose=0)
        accs.append((model,results[1]))
    return np.mean(np.delete(accs,0,1).astype('float64'))


In [10]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, LabelBinarizer
import tensorflow as tf
X, y = load_data()
result = pd.DataFrame({'Threshold': pd.Series([], dtype='int'),
                       'Leeway': pd.Series([], dtype='int'),
                       'Epoch': pd.Series([], dtype='int'),
                       'DENSE_COUNT1': pd.Series([], dtype='int'),
                       'DENSE_NEURON1': pd.Series([], dtype='int'),
                       'DENSE_COUNT2': pd.Series([], dtype='int'),
                       'DENSE_NEURON2': pd.Series([], dtype='int'),
                       'Accuracy': pd.Series([], dtype='float')})

In [11]:
# FIRST CELL: set these variables to limit GPU usage.
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'  # this is required
os.environ['CUDA_VISIBLE_DEVICES'] = '1'          # set to '0' for GPU0, '1' for GPU1 or '2' for GPU2. Check "gpustat" in a terminal.

In [12]:
%%time

for t in THRESH:
    for l in LEEWAY:
        for e in EPOCH:
            for dc in DENSE_COUNT:
                for dn in DENSE_NEURONS:
                    for dc2 in DENSE2_COUNT:
                        for dn2 in DENSE2_NEURONS:
                            print(f"Testing with: Threshold: {t}")
                            print(f"              Leeway: {l}")
                            print(f"              Epoch: {e}")
                            print(f"              Dense Count 1: {dc}")
                            print(f"              Dense Neurons 1: {dn}")
                            print(f"              Dense Count 2: {dc2}")
                            print(f"              Dense Neurons 2: {dn2}")
                            Xp, yp = preproc(X, y, t, l)
                            lb = LabelBinarizer()

                            ypt = lb.fit_transform(yp)
                            X_train, X_test, y_train, y_test = train_test_split(Xp, ypt, test_size=0.2, random_state=177013)
                            acc = get_avg_acc(X_train,y_train,X_test, y_test, e, dc,dn,dc2,dn2)
                            result = result.append({'Threshold':     t,
                                                    'Leeway':        l,
                                                    'Epoch':         e,
                                                    'DENSE_COUNT1':  dc,
                                                    'DENSE_NEURON1': dn,
                                                    'DENSE_COUNT2':  dc2,
                                                    'DENSE_NEURON2': dn2,
                                                    'Accuracy':      acc}, ignore_index=True)
                            print(f"Accuracy: {acc*100:.2f}\n\n")
                            result.to_csv('results.csv', header=False)

Testing with: Threshold: 70
              Leeway: 0
              Epoch: 20
              Dense Count 1: 1
              Dense Neurons 1: 600
              Dense Count 2: 1
              Dense Neurons 2: 600
Accuracy: 76.83
Testing with: Threshold: 70
              Leeway: 0
              Epoch: 20
              Dense Count 1: 1
              Dense Neurons 1: 600
              Dense Count 2: 1
              Dense Neurons 2: 1200
Accuracy: 77.67
Testing with: Threshold: 70
              Leeway: 0
              Epoch: 20
              Dense Count 1: 1
              Dense Neurons 1: 600
              Dense Count 2: 1
              Dense Neurons 2: 1800
Accuracy: 77.85
Testing with: Threshold: 70
              Leeway: 0
              Epoch: 20
              Dense Count 1: 1
              Dense Neurons 1: 600
              Dense Count 2: 1
              Dense Neurons 2: 2400
Accuracy: 77.80
Testing with: Threshold: 70
              Leeway: 0
              Epoch: 20
              Dense Count

KeyboardInterrupt: 

In [13]:
result.to_csv('./results.csv')

In [14]:
exit()