Initial commit
Signed-off-by: Tuan-Dat Tran <tuan-dat.tran@tudattr.dev>
This commit is contained in:
0
obu-node/src/.gitkeep
Normal file
0
obu-node/src/.gitkeep
Normal file
8
obu-node/src/changedb.py
Normal file
8
obu-node/src/changedb.py
Normal file
@@ -0,0 +1,8 @@
|
||||
import pandas as pd
|
||||
|
||||
#Script to change the used database to simulate having a new database in the final version. The new database is the old one minus 50 elements
|
||||
df = pd.read_csv('C:/Users/Firas/Desktop/docker/data/train_c1.csv')
|
||||
r=len(df)-50
|
||||
sampled = df.sample(n=r)
|
||||
sampled.to_csv('C:/Users/Firas/Desktop/docker/data/train_c1.csv', index=False)
|
||||
print(f"Sampled {r} lines and updated it as a new database")
|
||||
31
obu-node/src/check_conn.py
Normal file
31
obu-node/src/check_conn.py
Normal file
@@ -0,0 +1,31 @@
|
||||
import requests
|
||||
import sys
|
||||
from time import sleep
|
||||
import subprocess
|
||||
|
||||
|
||||
def check_connection(ip):
|
||||
try:
|
||||
response = requests.post(f"http://{ip}/check_connection")
|
||||
if response.status_code == 200:
|
||||
print(f"Connetion established with {ip}. The script will run in 15 seconds.")
|
||||
sleep(15)
|
||||
execute_python_file(main_script, *new_args)
|
||||
except:
|
||||
sleep(5)
|
||||
check_connection(ip)
|
||||
|
||||
|
||||
def execute_python_file(main_script, *args):
|
||||
cmd = ['python', main_script] + list(args)
|
||||
try:
|
||||
subprocess.run(cmd, check=True)
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Error running the script: {e}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
ip = sys.argv[1] #ip with port to check, for the clients, check the DMLO
|
||||
main_script = sys.argv[2]
|
||||
new_args = sys.argv[3:]
|
||||
check_connection(ip)
|
||||
356
obu-node/src/client.py
Normal file
356
obu-node/src/client.py
Normal file
@@ -0,0 +1,356 @@
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
import pandas as pd
|
||||
from sklearn.preprocessing import MinMaxScaler
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from tensorflow import keras
|
||||
import sys
|
||||
import flwr as fl
|
||||
import json
|
||||
import requests
|
||||
from flwr.common import Scalar, Config
|
||||
from time import sleep
|
||||
from typing import Dict, Union
|
||||
from watchdog.observers import Observer
|
||||
from watchdog.events import FileSystemEventHandler
|
||||
from flask import Flask, request
|
||||
import threading
|
||||
from time import time_ns
|
||||
|
||||
# Make TensorFlow logs less verbose
|
||||
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
@app.route("/upload", methods=["POST"])
|
||||
def upload():
|
||||
global new_data, database_changed
|
||||
data = request.data
|
||||
data = data.decode("utf-8")
|
||||
formatted_lines = []
|
||||
for line in data.strip().split("\n"):
|
||||
elements = line.split(",")
|
||||
formatted_line = f"{elements[1]}, {elements[2]}, {elements[4].split()[0]}"
|
||||
formatted_lines.append(formatted_line)
|
||||
new_data = "\n".join(formatted_lines)
|
||||
new_data = pd.DataFrame(
|
||||
[line.split(",") for line in new_data.strip().split("\n")],
|
||||
columns=["lat", "lon", "rtt"],
|
||||
)
|
||||
database_changed = True
|
||||
return "Received new datapoints from the network monitoring tool", 200
|
||||
|
||||
|
||||
def run_flask():
|
||||
app.run(host="0.0.0.0", port=80)
|
||||
|
||||
|
||||
flask_thread = threading.Thread(target=run_flask)
|
||||
flask_thread.setDaemon(True)
|
||||
flask_thread.start()
|
||||
|
||||
"""
|
||||
gpu_id = 0 # Index of the GPU you want to use
|
||||
physical_devices = tf.config.list_physical_devices('GPU')
|
||||
print(physical_devices)
|
||||
tf.config.set_visible_devices(physical_devices[gpu_id], 'GPU')
|
||||
tf.config.experimental.set_memory_growth(physical_devices[gpu_id], True)
|
||||
"""
|
||||
|
||||
client_id = sys.argv[4]
|
||||
server_ip = sys.argv[1]
|
||||
dmlo_ip = sys.argv[2]
|
||||
server_ip_kpi = sys.argv[3]
|
||||
|
||||
q_alpha = 0.95
|
||||
n_features = 3
|
||||
n_future = 1
|
||||
n_past = 400
|
||||
learning_rate_argv = 0.001
|
||||
database_changed = False
|
||||
rounds_involved, uc6_02_start_obu = (
|
||||
0,
|
||||
0,
|
||||
) # Simple workaround to help measure the model upload time
|
||||
|
||||
data_df = pd.read_csv("../resources/train_c1.csv")
|
||||
datapoints = len(data_df)
|
||||
|
||||
|
||||
def reload_data(data_df): # untested change (db01)
|
||||
"""Reloading the dataset after detecting a change"""
|
||||
print("Database is being processed")
|
||||
# data_df = pd.read_csv("data/train_c1.csv") #db01
|
||||
train_df, test_df = np.split(data_df, [int(0.70 * len(data_df))])
|
||||
|
||||
# Scaling the dataframe
|
||||
train = train_df
|
||||
scalers = {}
|
||||
|
||||
# Scaling train data
|
||||
for i in train_df.columns:
|
||||
scaler = MinMaxScaler(feature_range=(-1, 1))
|
||||
s_s = scaler.fit_transform(train[i].values.reshape(-1, 1))
|
||||
s_s = np.reshape(s_s, len(s_s))
|
||||
scalers["scaler_" + i] = scaler
|
||||
train[i] = s_s
|
||||
|
||||
# Scaling test data
|
||||
test = test_df
|
||||
for i in train_df.columns:
|
||||
scaler = scalers["scaler_" + i]
|
||||
s_s = scaler.transform(test[i].values.reshape(-1, 1))
|
||||
s_s = np.reshape(s_s, len(s_s))
|
||||
scalers["scaler_" + i] = scaler
|
||||
test[i] = s_s
|
||||
|
||||
def split_series(series, n_past, n_future):
|
||||
X, y = list(), list()
|
||||
# Loop to create array of every observations (past) and predictions (future) for every datapoint
|
||||
for window_start in range(len(series)):
|
||||
# Calculating boundaries for each datapoint
|
||||
past_end = window_start + n_past
|
||||
future_end = past_end + n_future
|
||||
# Loop will end if the number of datapoints is less than observations (past)
|
||||
if future_end > len(series):
|
||||
break
|
||||
past, future = (
|
||||
series[window_start:past_end, :],
|
||||
series[past_end:future_end, :],
|
||||
)
|
||||
X.append(past)
|
||||
y.append(future)
|
||||
return np.array(X), np.array(y)
|
||||
|
||||
# Creating X_train, y_train, X_test, y_test
|
||||
X_train, y_train = split_series(train.values, n_past, n_future)
|
||||
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], n_features))
|
||||
y_train = y_train.reshape((y_train.shape[0], y_train.shape[1], n_features))
|
||||
X_test, y_test = split_series(test.values, n_past, n_future)
|
||||
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], n_features))
|
||||
y_test = y_test.reshape((y_test.shape[0], y_test.shape[1], n_features))
|
||||
|
||||
print(X_train.shape)
|
||||
print(y_train.shape)
|
||||
print(X_test.shape)
|
||||
print(y_test.shape)
|
||||
|
||||
y_train = y_train[:, :, 2]
|
||||
y_test = y_test[:, :, 2]
|
||||
|
||||
global database_changed
|
||||
database_changed = False
|
||||
|
||||
return X_train, y_train, X_test, y_test, train_df, scalers
|
||||
|
||||
|
||||
class QuantileMetric(tf.keras.metrics.Metric):
|
||||
def __init__(self, name="quantile_metric", **kwargs):
|
||||
super(QuantileMetric, self).__init__(name=name, **kwargs)
|
||||
self.quantile_metric = self.add_weight(
|
||||
name="quantile_metric", initializer="zeros"
|
||||
)
|
||||
self.quantile_metric_count = self.add_weight(
|
||||
name="quantile_metric_count", initializer="zeros"
|
||||
)
|
||||
|
||||
def update_state(self, y_true, y_pred, sample_weight=None):
|
||||
quantileCondition = tf.math.greater(y_true, tf.squeeze(y_pred))
|
||||
qc = tf.math.reduce_sum(tf.cast(quantileCondition, tf.float32))
|
||||
self.quantile_metric.assign_add(qc)
|
||||
self.quantile_metric_count.assign_add(
|
||||
tf.cast(tf.size(quantileCondition), tf.float32)
|
||||
)
|
||||
|
||||
def result(self):
|
||||
return self.quantile_metric / self.quantile_metric_count
|
||||
|
||||
def reset_state(self):
|
||||
self.quantile_metric.assign(0.0)
|
||||
self.quantile_metric_count.assign(0)
|
||||
|
||||
|
||||
def tilted_loss(y_true, y_pred):
|
||||
q = q_alpha
|
||||
e = y_true - y_pred
|
||||
tl = tf.stack([q * e, (q - 1) * e])
|
||||
e_max = tf.math.reduce_max(tl, axis=0, keepdims=True)
|
||||
return tf.reduce_mean(e_max)
|
||||
|
||||
|
||||
class LSTMClient(fl.client.NumPyClient):
|
||||
def __init__(self, best_model, X_train, y_train, X_test, y_test, train_df, scalers):
|
||||
self.best_model = best_model
|
||||
self.X_train, self.y_train = X_train, y_train
|
||||
self.X_test, self.y_test = X_test, y_test
|
||||
self.train_df = train_df
|
||||
self.scalers = scalers
|
||||
self.properties = {"client_id": client_id}
|
||||
|
||||
def get_properties(self, config: Config) -> Dict[str, Scalar]:
|
||||
return self.properties
|
||||
|
||||
def get_parameters(self, config):
|
||||
"""Get parameters of the local model."""
|
||||
return self.best_model.get_weights()
|
||||
|
||||
def fit(self, parameters, config):
|
||||
"""Train parameters on the locally held training set."""
|
||||
|
||||
uc6_01_end = time_ns() # Time required to download the global model from the agg.node in secs (Target <2s) has another part on the agg.node side
|
||||
global uc6_02_start_obu, rounds_involved
|
||||
|
||||
rounds_involved += 1
|
||||
uc6_02_end = time_ns() # Time required to upload the model (has another part on the agg.node side, in sec * 1000000000) (Target < 2s)
|
||||
if rounds_involved > 1:
|
||||
kpi_uc6_02 = uc6_02_end - uc6_02_start_obu
|
||||
try:
|
||||
response = requests.post(
|
||||
f"http://{server_ip_kpi}/upload_kpi02", json={f"kpi02": kpi_uc6_02}
|
||||
)
|
||||
if response.status_code != 200:
|
||||
print(f"Failed to send KPI_02. Status code: {response.status_code}")
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"Error while sending KPI_02: {e}")
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
f"http://{server_ip_kpi}/upload_kpi01", json={f"kpi01": uc6_01_end}
|
||||
)
|
||||
if response.status_code != 200:
|
||||
print(f"Failed to send KPI_01. Status code: {response.status_code}")
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"Error while sending KPI_01: {e}")
|
||||
|
||||
if database_changed == True:
|
||||
try:
|
||||
(
|
||||
client.X_train,
|
||||
client.y_train,
|
||||
client.X_test,
|
||||
client.y_test,
|
||||
client.train_df,
|
||||
client.scalers,
|
||||
) = reload_data(new_data)
|
||||
except Exception as e:
|
||||
print(f"Error with the new data: {e}")
|
||||
|
||||
uc6_05_start = time_ns()
|
||||
|
||||
# Update local model parameters
|
||||
self.best_model.set_weights(parameters)
|
||||
|
||||
# Get hyperparameters for this round
|
||||
batch_size: int = config["batch_size"]
|
||||
epochs: int = config["local_epochs"]
|
||||
|
||||
# Train the model using hyperparameters from config
|
||||
history = self.best_model.fit(
|
||||
self.X_train, self.y_train, batch_size, epochs, validation_split=0.1
|
||||
)
|
||||
|
||||
# Return updated model parameters and results
|
||||
parameters_prime = self.best_model.get_weights()
|
||||
num_examples_train = len(self.X_train)
|
||||
results = {
|
||||
"id": client_id,
|
||||
"loss": history.history["loss"][0],
|
||||
"accuracy": history.history["mean_absolute_error"][0],
|
||||
"val_loss": history.history["val_loss"][0],
|
||||
"val_accuracy": history.history["val_mean_absolute_error"][0],
|
||||
}
|
||||
uc6_05_end = time_ns()
|
||||
global kpi_uc6_05
|
||||
kpi_uc6_05 = (
|
||||
(uc6_05_end - uc6_05_start) / 1000000000
|
||||
) # Time required to finish a training round (inkl. all local epochs) on the OBU side in sec (target <240s)
|
||||
try:
|
||||
response = requests.post(
|
||||
f"http://{server_ip_kpi}/upload_kpi05", json={f"kpi05": kpi_uc6_05}
|
||||
)
|
||||
if response.status_code != 200:
|
||||
print(f"Failed to send KPI_05. Status code: {response.status_code}")
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"Error while sending KPI_05: {e}")
|
||||
|
||||
uc6_02_start_obu = time_ns()
|
||||
return parameters_prime, num_examples_train, results
|
||||
|
||||
def evaluate(self, parameters, config):
|
||||
"""Evaluate parameters on the locally held test set."""
|
||||
|
||||
# Update local model with global parameters
|
||||
self.best_model.set_weights(parameters)
|
||||
|
||||
# Evaluate global model parameters on the local test data and return results
|
||||
loss, metric, error = self.best_model.evaluate(self.X_test, self.y_test, 32)
|
||||
num_examples_test = len(self.X_test)
|
||||
|
||||
pred = self.best_model.predict(self.X_test)
|
||||
pred_copies = np.repeat(pred, 3, axis=-1)
|
||||
pred_copies = np.expand_dims(pred_copies, axis=1)
|
||||
for index, i in enumerate(self.train_df.columns):
|
||||
scaler = self.scalers["scaler_" + i]
|
||||
pred_copies[:, :, index] = scaler.inverse_transform(
|
||||
pred_copies[:, :, index]
|
||||
)
|
||||
np.save("prediction_client1.npy", pred_copies[:, :, 2])
|
||||
return loss, num_examples_test, {"accuracy": error}
|
||||
|
||||
|
||||
def main() -> None:
|
||||
uc6_04_start = time_ns()
|
||||
|
||||
X_train, y_train, X_test, y_test, train_df, scalers = reload_data(data_df)
|
||||
|
||||
uc6_04_end = time_ns()
|
||||
global kpi_uc6_04
|
||||
kpi_uc6_04 = (
|
||||
uc6_04_end - uc6_04_start
|
||||
) / 1000000000 # Time required to process training data by OBU in sec (Target <60s)
|
||||
try:
|
||||
response = requests.post(
|
||||
f"http://{server_ip_kpi}/upload_kpi04", json={f"kpi04": kpi_uc6_04}
|
||||
)
|
||||
if response.status_code != 200:
|
||||
print(f"Failed to send KPI_04. Status code: {response.status_code}")
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"Error while sending KPI_04: {e}")
|
||||
|
||||
best_model = tf.keras.models.load_model(
|
||||
"../resources/best_model_no_tuner_40.h5", compile=False
|
||||
)
|
||||
opt = tf.keras.optimizers.Adam(learning_rate=learning_rate_argv)
|
||||
|
||||
best_model.compile(
|
||||
optimizer=opt,
|
||||
loss=[tilted_loss],
|
||||
metrics=[QuantileMetric(), keras.metrics.MeanAbsoluteError()],
|
||||
)
|
||||
|
||||
global client
|
||||
client = LSTMClient(best_model, X_train, y_train, X_test, y_test, train_df, scalers)
|
||||
|
||||
for i in range(40):
|
||||
try:
|
||||
response = requests.post(f"http://{server_ip_kpi}/check_connection")
|
||||
if response.status_code == 200:
|
||||
sleep(5)
|
||||
break
|
||||
except:
|
||||
print(
|
||||
"\n\n\n\nConnection to the Agg.Node could not be established, trying again in 5 seconds...\n",
|
||||
flush=True,
|
||||
)
|
||||
sleep(5)
|
||||
|
||||
fl.client.start_numpy_client(
|
||||
server_address=server_ip,
|
||||
client=client,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user