Initial commit

Signed-off-by: Tuan-Dat Tran <tuan-dat.tran@tudattr.dev>
2024-12-31 13:36:22 +01:00
commit 931652c494
78 changed files with 46976 additions and 0 deletions
--- a/obu-node/src/.gitkeep
+++ b/obu-node/src/.gitkeep
--- a/obu-node/src/changedb.py
+++ b/obu-node/src/changedb.py
@@ -0,0 +1,8 @@
+import pandas as pd
+
+#Script to change the used database to simulate having a new database in the final version. The new database is the old one minus 50 elements
+df = pd.read_csv('C:/Users/Firas/Desktop/docker/data/train_c1.csv')
+r=len(df)-50
+sampled = df.sample(n=r)
+sampled.to_csv('C:/Users/Firas/Desktop/docker/data/train_c1.csv', index=False)
+print(f"Sampled {r} lines and updated it as a new database")
--- a/obu-node/src/check_conn.py
+++ b/obu-node/src/check_conn.py
@@ -0,0 +1,31 @@
+import requests
+import sys
+from time import sleep
+import subprocess
+
+
+def check_connection(ip):
+    try:
+        response = requests.post(f"http://{ip}/check_connection")
+        if response.status_code == 200:
+            print(f"Connetion established with {ip}. The script will run in 15 seconds.")
+            sleep(15)
+            execute_python_file(main_script, *new_args)
+    except:
+        sleep(5)
+        check_connection(ip)
+
+
+def execute_python_file(main_script, *args):
+    cmd = ['python', main_script] + list(args)
+    try:
+        subprocess.run(cmd, check=True)
+    except subprocess.CalledProcessError as e:
+        print(f"Error running the script: {e}")
+
+
+if __name__ == "__main__":
+    ip = sys.argv[1] #ip with port to check, for the clients, check the DMLO
+    main_script = sys.argv[2]
+    new_args = sys.argv[3:]
+    check_connection(ip)
--- a/obu-node/src/client.py
+++ b/obu-node/src/client.py
@@ -0,0 +1,356 @@
+import argparse
+import os
+from pathlib import Path
+import pandas as pd
+from sklearn.preprocessing import MinMaxScaler
+import numpy as np
+import tensorflow as tf
+from tensorflow import keras
+import sys
+import flwr as fl
+import json
+import requests
+from flwr.common import Scalar, Config
+from time import sleep
+from typing import Dict, Union
+from watchdog.observers import Observer
+from watchdog.events import FileSystemEventHandler
+from flask import Flask, request
+import threading
+from time import time_ns
+
+# Make TensorFlow logs less verbose
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
+app = Flask(__name__)
+
+
+@app.route("/upload", methods=["POST"])
+def upload():
+    global new_data, database_changed
+    data = request.data
+    data = data.decode("utf-8")
+    formatted_lines = []
+    for line in data.strip().split("\n"):
+        elements = line.split(",")
+        formatted_line = f"{elements[1]}, {elements[2]}, {elements[4].split()[0]}"
+        formatted_lines.append(formatted_line)
+    new_data = "\n".join(formatted_lines)
+    new_data = pd.DataFrame(
+        [line.split(",") for line in new_data.strip().split("\n")],
+        columns=["lat", "lon", "rtt"],
+    )
+    database_changed = True
+    return "Received new datapoints from the network monitoring tool", 200
+
+
+def run_flask():
+    app.run(host="0.0.0.0", port=80)
+
+
+flask_thread = threading.Thread(target=run_flask)
+flask_thread.setDaemon(True)
+flask_thread.start()
+
+"""
+gpu_id = 0  # Index of the GPU you want to use
+physical_devices = tf.config.list_physical_devices('GPU')
+print(physical_devices)
+tf.config.set_visible_devices(physical_devices[gpu_id], 'GPU')
+tf.config.experimental.set_memory_growth(physical_devices[gpu_id], True)
+"""
+
+client_id = sys.argv[4]
+server_ip = sys.argv[1]
+dmlo_ip = sys.argv[2]
+server_ip_kpi = sys.argv[3]
+
+q_alpha = 0.95
+n_features = 3
+n_future = 1
+n_past = 400
+learning_rate_argv = 0.001
+database_changed = False
+rounds_involved, uc6_02_start_obu = (
+    0,
+    0,
+)  # Simple workaround to help measure the model upload time
+
+data_df = pd.read_csv("../resources/train_c1.csv")
+datapoints = len(data_df)
+
+
+def reload_data(data_df):  # untested change (db01)
+    """Reloading the dataset after detecting a change"""
+    print("Database is being processed")
+    # data_df = pd.read_csv("data/train_c1.csv") #db01
+    train_df, test_df = np.split(data_df, [int(0.70 * len(data_df))])
+
+    # Scaling the dataframe
+    train = train_df
+    scalers = {}
+
+    # Scaling train data
+    for i in train_df.columns:
+        scaler = MinMaxScaler(feature_range=(-1, 1))
+        s_s = scaler.fit_transform(train[i].values.reshape(-1, 1))
+        s_s = np.reshape(s_s, len(s_s))
+        scalers["scaler_" + i] = scaler
+        train[i] = s_s
+
+    # Scaling test data
+    test = test_df
+    for i in train_df.columns:
+        scaler = scalers["scaler_" + i]
+        s_s = scaler.transform(test[i].values.reshape(-1, 1))
+        s_s = np.reshape(s_s, len(s_s))
+        scalers["scaler_" + i] = scaler
+        test[i] = s_s
+
+    def split_series(series, n_past, n_future):
+        X, y = list(), list()
+        # Loop to create array of every observations (past) and predictions (future) for every datapoint
+        for window_start in range(len(series)):
+            # Calculating boundaries for each datapoint
+            past_end = window_start + n_past
+            future_end = past_end + n_future
+            # Loop will end if the number of datapoints is less than observations (past)
+            if future_end > len(series):
+                break
+            past, future = (
+                series[window_start:past_end, :],
+                series[past_end:future_end, :],
+            )
+            X.append(past)
+            y.append(future)
+        return np.array(X), np.array(y)
+
+    # Creating X_train, y_train, X_test, y_test
+    X_train, y_train = split_series(train.values, n_past, n_future)
+    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], n_features))
+    y_train = y_train.reshape((y_train.shape[0], y_train.shape[1], n_features))
+    X_test, y_test = split_series(test.values, n_past, n_future)
+    X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], n_features))
+    y_test = y_test.reshape((y_test.shape[0], y_test.shape[1], n_features))
+
+    print(X_train.shape)
+    print(y_train.shape)
+    print(X_test.shape)
+    print(y_test.shape)
+
+    y_train = y_train[:, :, 2]
+    y_test = y_test[:, :, 2]
+
+    global database_changed
+    database_changed = False
+
+    return X_train, y_train, X_test, y_test, train_df, scalers
+
+
+class QuantileMetric(tf.keras.metrics.Metric):
+    def __init__(self, name="quantile_metric", **kwargs):
+        super(QuantileMetric, self).__init__(name=name, **kwargs)
+        self.quantile_metric = self.add_weight(
+            name="quantile_metric", initializer="zeros"
+        )
+        self.quantile_metric_count = self.add_weight(
+            name="quantile_metric_count", initializer="zeros"
+        )
+
+    def update_state(self, y_true, y_pred, sample_weight=None):
+        quantileCondition = tf.math.greater(y_true, tf.squeeze(y_pred))
+        qc = tf.math.reduce_sum(tf.cast(quantileCondition, tf.float32))
+        self.quantile_metric.assign_add(qc)
+        self.quantile_metric_count.assign_add(
+            tf.cast(tf.size(quantileCondition), tf.float32)
+        )
+
+    def result(self):
+        return self.quantile_metric / self.quantile_metric_count
+
+    def reset_state(self):
+        self.quantile_metric.assign(0.0)
+        self.quantile_metric_count.assign(0)
+
+
+def tilted_loss(y_true, y_pred):
+    q = q_alpha
+    e = y_true - y_pred
+    tl = tf.stack([q * e, (q - 1) * e])
+    e_max = tf.math.reduce_max(tl, axis=0, keepdims=True)
+    return tf.reduce_mean(e_max)
+
+
+class LSTMClient(fl.client.NumPyClient):
+    def __init__(self, best_model, X_train, y_train, X_test, y_test, train_df, scalers):
+        self.best_model = best_model
+        self.X_train, self.y_train = X_train, y_train
+        self.X_test, self.y_test = X_test, y_test
+        self.train_df = train_df
+        self.scalers = scalers
+        self.properties = {"client_id": client_id}
+
+    def get_properties(self, config: Config) -> Dict[str, Scalar]:
+        return self.properties
+
+    def get_parameters(self, config):
+        """Get parameters of the local model."""
+        return self.best_model.get_weights()
+
+    def fit(self, parameters, config):
+        """Train parameters on the locally held training set."""
+
+        uc6_01_end = time_ns()  # Time required to download the global model from the agg.node in secs (Target <2s) has another part on the agg.node side
+        global uc6_02_start_obu, rounds_involved
+
+        rounds_involved += 1
+        uc6_02_end = time_ns()  # Time required to upload the model (has another part on the agg.node side, in sec * 1000000000) (Target < 2s)
+        if rounds_involved > 1:
+            kpi_uc6_02 = uc6_02_end - uc6_02_start_obu
+            try:
+                response = requests.post(
+                    f"http://{server_ip_kpi}/upload_kpi02", json={f"kpi02": kpi_uc6_02}
+                )
+                if response.status_code != 200:
+                    print(f"Failed to send KPI_02. Status code: {response.status_code}")
+            except requests.exceptions.RequestException as e:
+                print(f"Error while sending KPI_02: {e}")
+
+        try:
+            response = requests.post(
+                f"http://{server_ip_kpi}/upload_kpi01", json={f"kpi01": uc6_01_end}
+            )
+            if response.status_code != 200:
+                print(f"Failed to send KPI_01. Status code: {response.status_code}")
+        except requests.exceptions.RequestException as e:
+            print(f"Error while sending KPI_01: {e}")
+
+        if database_changed == True:
+            try:
+                (
+                    client.X_train,
+                    client.y_train,
+                    client.X_test,
+                    client.y_test,
+                    client.train_df,
+                    client.scalers,
+                ) = reload_data(new_data)
+            except Exception as e:
+                print(f"Error with the new data: {e}")
+
+        uc6_05_start = time_ns()
+
+        # Update local model parameters
+        self.best_model.set_weights(parameters)
+
+        # Get hyperparameters for this round
+        batch_size: int = config["batch_size"]
+        epochs: int = config["local_epochs"]
+
+        # Train the model using hyperparameters from config
+        history = self.best_model.fit(
+            self.X_train, self.y_train, batch_size, epochs, validation_split=0.1
+        )
+
+        # Return updated model parameters and results
+        parameters_prime = self.best_model.get_weights()
+        num_examples_train = len(self.X_train)
+        results = {
+            "id": client_id,
+            "loss": history.history["loss"][0],
+            "accuracy": history.history["mean_absolute_error"][0],
+            "val_loss": history.history["val_loss"][0],
+            "val_accuracy": history.history["val_mean_absolute_error"][0],
+        }
+        uc6_05_end = time_ns()
+        global kpi_uc6_05
+        kpi_uc6_05 = (
+            (uc6_05_end - uc6_05_start) / 1000000000
+        )  # Time required to finish a training round (inkl. all local epochs) on the OBU side in sec (target <240s)
+        try:
+            response = requests.post(
+                f"http://{server_ip_kpi}/upload_kpi05", json={f"kpi05": kpi_uc6_05}
+            )
+            if response.status_code != 200:
+                print(f"Failed to send KPI_05. Status code: {response.status_code}")
+        except requests.exceptions.RequestException as e:
+            print(f"Error while sending KPI_05: {e}")
+
+        uc6_02_start_obu = time_ns()
+        return parameters_prime, num_examples_train, results
+
+    def evaluate(self, parameters, config):
+        """Evaluate parameters on the locally held test set."""
+
+        # Update local model with global parameters
+        self.best_model.set_weights(parameters)
+
+        # Evaluate global model parameters on the local test data and return results
+        loss, metric, error = self.best_model.evaluate(self.X_test, self.y_test, 32)
+        num_examples_test = len(self.X_test)
+
+        pred = self.best_model.predict(self.X_test)
+        pred_copies = np.repeat(pred, 3, axis=-1)
+        pred_copies = np.expand_dims(pred_copies, axis=1)
+        for index, i in enumerate(self.train_df.columns):
+            scaler = self.scalers["scaler_" + i]
+            pred_copies[:, :, index] = scaler.inverse_transform(
+                pred_copies[:, :, index]
+            )
+        np.save("prediction_client1.npy", pred_copies[:, :, 2])
+        return loss, num_examples_test, {"accuracy": error}
+
+
+def main() -> None:
+    uc6_04_start = time_ns()
+
+    X_train, y_train, X_test, y_test, train_df, scalers = reload_data(data_df)
+
+    uc6_04_end = time_ns()
+    global kpi_uc6_04
+    kpi_uc6_04 = (
+        uc6_04_end - uc6_04_start
+    ) / 1000000000  # Time required to process training data by OBU in sec (Target <60s)
+    try:
+        response = requests.post(
+            f"http://{server_ip_kpi}/upload_kpi04", json={f"kpi04": kpi_uc6_04}
+        )
+        if response.status_code != 200:
+            print(f"Failed to send KPI_04. Status code: {response.status_code}")
+    except requests.exceptions.RequestException as e:
+        print(f"Error while sending KPI_04: {e}")
+
+    best_model = tf.keras.models.load_model(
+        "../resources/best_model_no_tuner_40.h5", compile=False
+    )
+    opt = tf.keras.optimizers.Adam(learning_rate=learning_rate_argv)
+
+    best_model.compile(
+        optimizer=opt,
+        loss=[tilted_loss],
+        metrics=[QuantileMetric(), keras.metrics.MeanAbsoluteError()],
+    )
+
+    global client
+    client = LSTMClient(best_model, X_train, y_train, X_test, y_test, train_df, scalers)
+
+    for i in range(40):
+        try:
+            response = requests.post(f"http://{server_ip_kpi}/check_connection")
+            if response.status_code == 200:
+                sleep(5)
+                break
+        except:
+            print(
+                "\n\n\n\nConnection to the Agg.Node could not be established, trying again in 5 seconds...\n",
+                flush=True,
+            )
+            sleep(5)
+
+    fl.client.start_numpy_client(
+        server_address=server_ip,
+        client=client,
+    )
+
+
+if __name__ == "__main__":
+    main()