Initial commit

Signed-off-by: Tuan-Dat Tran <tuan-dat.tran@tudattr.dev>
This commit is contained in:
Tuan-Dat Tran
2024-12-31 13:36:22 +01:00
commit 931652c494
78 changed files with 46976 additions and 0 deletions

0
obu-node/src/.gitkeep Normal file
View File

8
obu-node/src/changedb.py Normal file
View File

@@ -0,0 +1,8 @@
import pandas as pd
#Script to change the used database to simulate having a new database in the final version. The new database is the old one minus 50 elements
df = pd.read_csv('C:/Users/Firas/Desktop/docker/data/train_c1.csv')
r=len(df)-50
sampled = df.sample(n=r)
sampled.to_csv('C:/Users/Firas/Desktop/docker/data/train_c1.csv', index=False)
print(f"Sampled {r} lines and updated it as a new database")

View File

@@ -0,0 +1,31 @@
import requests
import sys
from time import sleep
import subprocess
def check_connection(ip):
try:
response = requests.post(f"http://{ip}/check_connection")
if response.status_code == 200:
print(f"Connetion established with {ip}. The script will run in 15 seconds.")
sleep(15)
execute_python_file(main_script, *new_args)
except:
sleep(5)
check_connection(ip)
def execute_python_file(main_script, *args):
cmd = ['python', main_script] + list(args)
try:
subprocess.run(cmd, check=True)
except subprocess.CalledProcessError as e:
print(f"Error running the script: {e}")
if __name__ == "__main__":
ip = sys.argv[1] #ip with port to check, for the clients, check the DMLO
main_script = sys.argv[2]
new_args = sys.argv[3:]
check_connection(ip)

356
obu-node/src/client.py Normal file
View File

@@ -0,0 +1,356 @@
import argparse
import os
from pathlib import Path
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import tensorflow as tf
from tensorflow import keras
import sys
import flwr as fl
import json
import requests
from flwr.common import Scalar, Config
from time import sleep
from typing import Dict, Union
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
from flask import Flask, request
import threading
from time import time_ns
# Make TensorFlow logs less verbose
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
app = Flask(__name__)
@app.route("/upload", methods=["POST"])
def upload():
global new_data, database_changed
data = request.data
data = data.decode("utf-8")
formatted_lines = []
for line in data.strip().split("\n"):
elements = line.split(",")
formatted_line = f"{elements[1]}, {elements[2]}, {elements[4].split()[0]}"
formatted_lines.append(formatted_line)
new_data = "\n".join(formatted_lines)
new_data = pd.DataFrame(
[line.split(",") for line in new_data.strip().split("\n")],
columns=["lat", "lon", "rtt"],
)
database_changed = True
return "Received new datapoints from the network monitoring tool", 200
def run_flask():
app.run(host="0.0.0.0", port=80)
flask_thread = threading.Thread(target=run_flask)
flask_thread.setDaemon(True)
flask_thread.start()
"""
gpu_id = 0 # Index of the GPU you want to use
physical_devices = tf.config.list_physical_devices('GPU')
print(physical_devices)
tf.config.set_visible_devices(physical_devices[gpu_id], 'GPU')
tf.config.experimental.set_memory_growth(physical_devices[gpu_id], True)
"""
client_id = sys.argv[4]
server_ip = sys.argv[1]
dmlo_ip = sys.argv[2]
server_ip_kpi = sys.argv[3]
q_alpha = 0.95
n_features = 3
n_future = 1
n_past = 400
learning_rate_argv = 0.001
database_changed = False
rounds_involved, uc6_02_start_obu = (
0,
0,
) # Simple workaround to help measure the model upload time
data_df = pd.read_csv("../resources/train_c1.csv")
datapoints = len(data_df)
def reload_data(data_df): # untested change (db01)
"""Reloading the dataset after detecting a change"""
print("Database is being processed")
# data_df = pd.read_csv("data/train_c1.csv") #db01
train_df, test_df = np.split(data_df, [int(0.70 * len(data_df))])
# Scaling the dataframe
train = train_df
scalers = {}
# Scaling train data
for i in train_df.columns:
scaler = MinMaxScaler(feature_range=(-1, 1))
s_s = scaler.fit_transform(train[i].values.reshape(-1, 1))
s_s = np.reshape(s_s, len(s_s))
scalers["scaler_" + i] = scaler
train[i] = s_s
# Scaling test data
test = test_df
for i in train_df.columns:
scaler = scalers["scaler_" + i]
s_s = scaler.transform(test[i].values.reshape(-1, 1))
s_s = np.reshape(s_s, len(s_s))
scalers["scaler_" + i] = scaler
test[i] = s_s
def split_series(series, n_past, n_future):
X, y = list(), list()
# Loop to create array of every observations (past) and predictions (future) for every datapoint
for window_start in range(len(series)):
# Calculating boundaries for each datapoint
past_end = window_start + n_past
future_end = past_end + n_future
# Loop will end if the number of datapoints is less than observations (past)
if future_end > len(series):
break
past, future = (
series[window_start:past_end, :],
series[past_end:future_end, :],
)
X.append(past)
y.append(future)
return np.array(X), np.array(y)
# Creating X_train, y_train, X_test, y_test
X_train, y_train = split_series(train.values, n_past, n_future)
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], n_features))
y_train = y_train.reshape((y_train.shape[0], y_train.shape[1], n_features))
X_test, y_test = split_series(test.values, n_past, n_future)
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], n_features))
y_test = y_test.reshape((y_test.shape[0], y_test.shape[1], n_features))
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)
y_train = y_train[:, :, 2]
y_test = y_test[:, :, 2]
global database_changed
database_changed = False
return X_train, y_train, X_test, y_test, train_df, scalers
class QuantileMetric(tf.keras.metrics.Metric):
def __init__(self, name="quantile_metric", **kwargs):
super(QuantileMetric, self).__init__(name=name, **kwargs)
self.quantile_metric = self.add_weight(
name="quantile_metric", initializer="zeros"
)
self.quantile_metric_count = self.add_weight(
name="quantile_metric_count", initializer="zeros"
)
def update_state(self, y_true, y_pred, sample_weight=None):
quantileCondition = tf.math.greater(y_true, tf.squeeze(y_pred))
qc = tf.math.reduce_sum(tf.cast(quantileCondition, tf.float32))
self.quantile_metric.assign_add(qc)
self.quantile_metric_count.assign_add(
tf.cast(tf.size(quantileCondition), tf.float32)
)
def result(self):
return self.quantile_metric / self.quantile_metric_count
def reset_state(self):
self.quantile_metric.assign(0.0)
self.quantile_metric_count.assign(0)
def tilted_loss(y_true, y_pred):
q = q_alpha
e = y_true - y_pred
tl = tf.stack([q * e, (q - 1) * e])
e_max = tf.math.reduce_max(tl, axis=0, keepdims=True)
return tf.reduce_mean(e_max)
class LSTMClient(fl.client.NumPyClient):
def __init__(self, best_model, X_train, y_train, X_test, y_test, train_df, scalers):
self.best_model = best_model
self.X_train, self.y_train = X_train, y_train
self.X_test, self.y_test = X_test, y_test
self.train_df = train_df
self.scalers = scalers
self.properties = {"client_id": client_id}
def get_properties(self, config: Config) -> Dict[str, Scalar]:
return self.properties
def get_parameters(self, config):
"""Get parameters of the local model."""
return self.best_model.get_weights()
def fit(self, parameters, config):
"""Train parameters on the locally held training set."""
uc6_01_end = time_ns() # Time required to download the global model from the agg.node in secs (Target <2s) has another part on the agg.node side
global uc6_02_start_obu, rounds_involved
rounds_involved += 1
uc6_02_end = time_ns() # Time required to upload the model (has another part on the agg.node side, in sec * 1000000000) (Target < 2s)
if rounds_involved > 1:
kpi_uc6_02 = uc6_02_end - uc6_02_start_obu
try:
response = requests.post(
f"http://{server_ip_kpi}/upload_kpi02", json={f"kpi02": kpi_uc6_02}
)
if response.status_code != 200:
print(f"Failed to send KPI_02. Status code: {response.status_code}")
except requests.exceptions.RequestException as e:
print(f"Error while sending KPI_02: {e}")
try:
response = requests.post(
f"http://{server_ip_kpi}/upload_kpi01", json={f"kpi01": uc6_01_end}
)
if response.status_code != 200:
print(f"Failed to send KPI_01. Status code: {response.status_code}")
except requests.exceptions.RequestException as e:
print(f"Error while sending KPI_01: {e}")
if database_changed == True:
try:
(
client.X_train,
client.y_train,
client.X_test,
client.y_test,
client.train_df,
client.scalers,
) = reload_data(new_data)
except Exception as e:
print(f"Error with the new data: {e}")
uc6_05_start = time_ns()
# Update local model parameters
self.best_model.set_weights(parameters)
# Get hyperparameters for this round
batch_size: int = config["batch_size"]
epochs: int = config["local_epochs"]
# Train the model using hyperparameters from config
history = self.best_model.fit(
self.X_train, self.y_train, batch_size, epochs, validation_split=0.1
)
# Return updated model parameters and results
parameters_prime = self.best_model.get_weights()
num_examples_train = len(self.X_train)
results = {
"id": client_id,
"loss": history.history["loss"][0],
"accuracy": history.history["mean_absolute_error"][0],
"val_loss": history.history["val_loss"][0],
"val_accuracy": history.history["val_mean_absolute_error"][0],
}
uc6_05_end = time_ns()
global kpi_uc6_05
kpi_uc6_05 = (
(uc6_05_end - uc6_05_start) / 1000000000
) # Time required to finish a training round (inkl. all local epochs) on the OBU side in sec (target <240s)
try:
response = requests.post(
f"http://{server_ip_kpi}/upload_kpi05", json={f"kpi05": kpi_uc6_05}
)
if response.status_code != 200:
print(f"Failed to send KPI_05. Status code: {response.status_code}")
except requests.exceptions.RequestException as e:
print(f"Error while sending KPI_05: {e}")
uc6_02_start_obu = time_ns()
return parameters_prime, num_examples_train, results
def evaluate(self, parameters, config):
"""Evaluate parameters on the locally held test set."""
# Update local model with global parameters
self.best_model.set_weights(parameters)
# Evaluate global model parameters on the local test data and return results
loss, metric, error = self.best_model.evaluate(self.X_test, self.y_test, 32)
num_examples_test = len(self.X_test)
pred = self.best_model.predict(self.X_test)
pred_copies = np.repeat(pred, 3, axis=-1)
pred_copies = np.expand_dims(pred_copies, axis=1)
for index, i in enumerate(self.train_df.columns):
scaler = self.scalers["scaler_" + i]
pred_copies[:, :, index] = scaler.inverse_transform(
pred_copies[:, :, index]
)
np.save("prediction_client1.npy", pred_copies[:, :, 2])
return loss, num_examples_test, {"accuracy": error}
def main() -> None:
uc6_04_start = time_ns()
X_train, y_train, X_test, y_test, train_df, scalers = reload_data(data_df)
uc6_04_end = time_ns()
global kpi_uc6_04
kpi_uc6_04 = (
uc6_04_end - uc6_04_start
) / 1000000000 # Time required to process training data by OBU in sec (Target <60s)
try:
response = requests.post(
f"http://{server_ip_kpi}/upload_kpi04", json={f"kpi04": kpi_uc6_04}
)
if response.status_code != 200:
print(f"Failed to send KPI_04. Status code: {response.status_code}")
except requests.exceptions.RequestException as e:
print(f"Error while sending KPI_04: {e}")
best_model = tf.keras.models.load_model(
"../resources/best_model_no_tuner_40.h5", compile=False
)
opt = tf.keras.optimizers.Adam(learning_rate=learning_rate_argv)
best_model.compile(
optimizer=opt,
loss=[tilted_loss],
metrics=[QuantileMetric(), keras.metrics.MeanAbsoluteError()],
)
global client
client = LSTMClient(best_model, X_train, y_train, X_test, y_test, train_df, scalers)
for i in range(40):
try:
response = requests.post(f"http://{server_ip_kpi}/check_connection")
if response.status_code == 200:
sleep(5)
break
except:
print(
"\n\n\n\nConnection to the Agg.Node could not be established, trying again in 5 seconds...\n",
flush=True,
)
sleep(5)
fl.client.start_numpy_client(
server_address=server_ip,
client=client,
)
if __name__ == "__main__":
main()