age_cache_simulation/00_aoi_caching_simulation/06-multi_aoi_simulation.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "71f85f2a-423f-44d2-b80d-da9ac8d3961a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import simpy\n",
    "import random\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import pandas as pd\n",
    "from enum import Enum\n",
    "import os\n",
    "import shutil\n",
    "from tqdm import tqdm\n",
    "import math\n",
    "from dataclasses import dataclass, field\n",
    "from typing import List, Union, Dict\n",
    "import math\n",
    "\n",
    "# Constants\n",
    "SEED = 42\n",
    "ACCESS_COUNT_LIMIT = 1000   # Total time to run the simulation\n",
    "EXPERIMENT_BASE_DIR = \"./experiments/\"\n",
    "TEMP_BASE_DIR = \"./.aoi_cache/\"\n",
    "\n",
    "ZIPF_CONSTANT = 2      # Shape parameter for the Zipf distribution (controls skewness) Needs to be: 1< \n",
    "\n",
    "# Set random seeds\n",
    "random.seed(SEED)\n",
    "np.random.seed(SEED)\n",
    "\n",
    "os.makedirs(TEMP_BASE_DIR, exist_ok=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "d88effd8-d92b-47d1-9e15-527166073e81",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Types of cache\n",
    "class EvictionStrategy(Enum):\n",
    "    LRU = 1\n",
    "    RANDOM_EVICTION = 2\n",
    "    TTL = 3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "1d6a3c67-f9a5-4d9c-8ade-e1ca6944867c",
   "metadata": {},
   "outputs": [],
   "source": [
    "@dataclass\n",
    "class DatabaseObject:\n",
    "    id: int\n",
    "    data: str\n",
    "    lambda_value: int\n",
    "    mu_value: Union[float, None]\n",
    "    ttl: Union[float, None]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "f40af914-a6c3-4e44-b7de-b3b40a743fb2",
   "metadata": {},
   "outputs": [],
   "source": [
    "@dataclass\n",
    "class CacheObject:\n",
    "    id: int # id of object\n",
    "    data: DatabaseObject # body of object\n",
    "    initial_fetch_timer: float # time at which the object was initially pulled into the cache (object_start_time)\n",
    "    age_timer: float # time at which the object was last pulled into the cache (initial fetch)\n",
    "    last_access: float # time at which the object was last accesse\n",
    "    next_refresh: Union[float, None] # scheduled time for the object to be requested (for refresh cache)\n",
    "    next_expiry: Union[float, None] # scheduled time for the object to be evicted (for ttl cache) (ttl)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "00a944e4-842b-49ba-bb36-587d9c12fdf4",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Base class for all cache types\n",
    "@dataclass\n",
    "class SimulationConfig:\n",
    "    db_objects: Union[int, List[DatabaseObject]]\n",
    "    cache_size: int\n",
    "    eviction_strategy: EvictionStrategy\n",
    "\n",
    "    def __post_init__(self):\n",
    "        if not hasattr(self, 'eviction_strategy') or self.eviction_strategy is None:\n",
    "            raise ValueError(\"Eviction strategy must be defined in subclasses.\")\n",
    "\n",
    "    def __repr__(self):\n",
    "        db_object_count = self.db_objects if isinstance(self.db_objects, int) else len(self.db_objects)\n",
    "        return f\"[{self.__class__.__name__}] Database Object Count: {db_object_count}, Cache Size: {self.cache_size}, Eviction Strategy: {self.eviction_strategy}\"\n",
    "        \n",
    "    def generate_objects(self):\n",
    "        if isinstance(self.db_objects, int):\n",
    "            self.db_objects = [\n",
    "                DatabaseObject(id=i, data=f\"Generated Object {i}\", lambda_value=np.random.zipf(ZIPF_CONSTANT), mu_value=None, ttl=None) \n",
    "                for i in range(self.db_objects)\n",
    "            ]\n",
    "\n",
    "    def from_file(self, path: str, lambda_column_name: str):\n",
    "        df = pd.read_csv(path)\n",
    "        lambdas = df[lambda_column_name]\n",
    "\n",
    "        self.db_objects = [\n",
    "                DatabaseObject(id=i, data=f\"Generated Object {i}\", lambda_value=lambdas[i], mu_value=None, ttl=None) \n",
    "                for i in range(self.db_objects)\n",
    "            ]\n",
    "            \n",
    "# Specific cache type variants\n",
    "@dataclass\n",
    "class TTLSimulation(SimulationConfig):\n",
    "    eviction_strategy: EvictionStrategy = field(default=EvictionStrategy.TTL, init=False)\n",
    "\n",
    "    def __repr__(self):\n",
    "        return super().__repr__().replace(super().__class__.__name__, self.__class__.__name__)\n",
    "        \n",
    "    def generate_objects(self, fixed_ttl):\n",
    "        if isinstance(self.db_objects, int):\n",
    "            self.db_objects = [\n",
    "                DatabaseObject(id=i, data=f\"Generated Object {i}\", lambda_value=np.random.zipf(ZIPF_CONSTANT), mu_value=None, ttl=fixed_ttl) \n",
    "                for i in range(self.db_objects)\n",
    "            ]\n",
    "\n",
    "    \n",
    "    def from_file(self, path: str, lambda_column_name: str, ttl_column_name: str):\n",
    "        df = pd.read_csv(path)\n",
    "        lambdas = df[lambda_column_name]\n",
    "        ttls = df[ttl_column_name]\n",
    "\n",
    "        self.db_objects = [\n",
    "                DatabaseObject(id=i, data=f\"Generated Object {i}\", lambda_value=lambdas[i], mu_value=None, ttl=ttls[i]) \n",
    "                for i in range(self.db_objects)\n",
    "            ]\n",
    "            \n",
    "@dataclass\n",
    "class LRUSimulation(SimulationConfig):\n",
    "    eviction_strategy: EvictionStrategy = field(default=EvictionStrategy.LRU, init=False)\n",
    "    \n",
    "    def __repr__(self):\n",
    "        return super().__repr__().replace(super().__class__.__name__, self.__class__.__name__)\n",
    "        \n",
    "\n",
    "@dataclass\n",
    "class RandomEvictionSimulation(SimulationConfig):\n",
    "    eviction_strategy: EvictionStrategy = field(default=EvictionStrategy.RANDOM_EVICTION, init=False)\n",
    "\n",
    "    \n",
    "    def __repr__(self):\n",
    "        return super().__repr__().replace(super().__class__.__name__, self.__class__.__name__)\n",
    "\n",
    "@dataclass\n",
    "class RefreshSimulation(TTLSimulation):\n",
    "\n",
    "    \n",
    "    def __repr__(self):\n",
    "        return super().__repr__().replace(super().__class__.__name__, self.__class__.__name__)\n",
    "        \n",
    "    def generate_objects(self, fixed_ttl, max_refresh_rate):\n",
    "        if isinstance(self.db_objects, int):\n",
    "            self.db_objects = [\n",
    "                DatabaseObject(id=i, data=f\"Generated Object {i}\", lambda_value=np.random.zipf(ZIPF_CONSTANT), mu_value=np.random.uniform(1, max_refresh_rate), ttl=fixed_ttl) \n",
    "                for i in range(self.db_objects)\n",
    "            ]\n",
    "            \n",
    "    def from_file(self, path: str, lambda_column_name: str, ttl_column_name: str, mu_column_name: str):\n",
    "        df = pd.read_csv(path)\n",
    "        lambdas = df[lambda_column_name]\n",
    "        ttls = df[ttl_column_name]\n",
    "        mus = df[mu_column_name]\n",
    "\n",
    "        self.db_objects = [\n",
    "                DatabaseObject(id=i, data=f\"Generated Object {i}\", lambda_value=lambdas[i], mu_value=mus[i], ttl=ttls[i]) \n",
    "                for i in range(self.db_objects)\n",
    "            ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "5cea042f-e9fc-4a1e-9750-de212ca70601",
   "metadata": {},
   "outputs": [],
   "source": [
    "class Database:\n",
    "    data: Dict[int, DatabaseObject]\n",
    "    \n",
    "    def __init__(self, data: List[DatabaseObject]):\n",
    "        self.data = {i: data[i] for i in range(len(data))}\n",
    "\n",
    "    def get_object(self, obj_id):\n",
    "        # print(f\"[{env.now:.2f}] Database: Fetched {self.data.get(obj_id, 'Unknown')} for ID {obj_id}\")\n",
    "        return self.data.get(obj_id, None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "499bf543-b2c6-4e4d-afcc-0a6665ce3ae1",
   "metadata": {},
   "outputs": [],
   "source": [
    "class Cache:\n",
    "    capacity: int\n",
    "    eviction_strategy: EvictionStrategy\n",
    "    cache_size_over_time: List[int]  # To record cache state at each interval\n",
    "    storage: Dict[int, CacheObject]\n",
    "    hits: Dict[int, int] # hit counter for each object\n",
    "    misses: Dict[int, int] # miss counter for each object\n",
    "    access_count: Dict[int, int] # access counter for each object (should be hit+miss)\n",
    "    next_request: Dict[int, float] # scheduled time for each object to be requested\n",
    "    cumulative_age: Dict[int, List[float]] # list of ages of each object at the time it was requested (current time - age_timer)\n",
    "    cumulative_cache_time: Dict[int, List[float]] # list of total time of each object spent in cache when it was evicted (current time - initial fetch time)\n",
    "    request_log: Dict[int, List[float]] # list of timestamps when each object was requested\n",
    "    \n",
    "    def __init__(self, env, db, simulation_config):\n",
    "        self.env = env\n",
    "        self.db = db\n",
    "        self.capacity = simulation_config.cache_size\n",
    "        self.eviction_strategy = simulation_config.eviction_strategy\n",
    "        self.cache_size_over_time = []\n",
    "        self.storage = {}\n",
    "\n",
    "        db_object_count = len(self.db.data)\n",
    "        \n",
    "        self.hits = {i: 0 for i in range(db_object_count)}\n",
    "        self.misses = {i: 0 for i in range(db_object_count)}\n",
    "        self.access_count = {i: 0 for i in range(db_object_count)}\n",
    "        self.next_request = {i: np.random.exponential(1/self.db.data[i].lambda_value) for i in range(len(self.db.data))}\n",
    "        self.cumulative_age = {i: [] for i in range(db_object_count)}\n",
    "        self.cumulative_cache_time = {i: [] for i in range(db_object_count)}\n",
    "        self.request_log = {i: [] for i in range(db_object_count)}\n",
    "\n",
    "        \n",
    "    def get(self, obj_id):\n",
    "        assert len(self.storage) <= self.capacity, f\"Too many objects in cache ({len(self.storage)}).\"\n",
    "        # print(f\"[{self.env.now:.2f}] Requesting Object {obj_id}... (Cache Size: {len(self.storage)})\")\n",
    "\n",
    "        # Schedule next request\n",
    "        next_request = self.env.now + np.random.exponential(1/self.db.data[obj_id].lambda_value)\n",
    "        self.request_log[obj_id].append(next_request)\n",
    "        self.next_request[obj_id] = next_request\n",
    "        self.access_count[obj_id] += 1\n",
    "        # print(f\"[{self.env.now:.2f}] Client: Schedule next request for {obj_id}@{next_request:.2f}\")\n",
    "        \n",
    "        if obj_id in self.storage:\n",
    "            # Cache hit: Refresh TTL if TTL-Cache\n",
    "            if self.storage[obj_id].next_expiry:\n",
    "                assert self.env.now <= self.storage[obj_id].next_expiry, f\"[{self.env.now:.2f}] Cache should never hit on an expired cache entry.\"\n",
    "                self.storage[obj_id].next_expiry = self.env.now + self.db.data[obj_id].ttl\n",
    "                    \n",
    "            # Cache hit: increment hit count and update cumulative age\n",
    "            self.hits[obj_id] += 1\n",
    "            age = self.env.now - self.storage[obj_id].age_timer\n",
    "            self.cumulative_age[obj_id].append(age)\n",
    "            self.storage[obj_id].last_access = self.env.now\n",
    "\n",
    "            assert len(self.cumulative_age[obj_id]) == self.access_count[obj_id], f\"[{self.env.now:.2f}] Age values collected and object access count do not match.\"\n",
    "            # print(f\"[{env.now:.2f}] {obj_id} Hit: Current Age {age:.2f} (Average: {sum(self.cumulative_age[obj_id])/len(self.cumulative_age[obj_id]):.2f}) \")\n",
    "            return self.storage[obj_id]\n",
    "        else:\n",
    "            # Cache miss: increment miss count\n",
    "            self.misses[obj_id] += 1\n",
    "            self.cumulative_age[obj_id].append(0)\n",
    "            \n",
    "            # Cache miss: Add TTL if TTL-Cache\n",
    "            # When full cache: If Non-TTL-Cache: Evict. If TTL-Cache: Don't add to Cache.\n",
    "            if len(self.storage) == self.capacity:\n",
    "                if self.eviction_strategy == EvictionStrategy.LRU:\n",
    "                    self.evict_oldest()\n",
    "                elif self.eviction_strategy == EvictionStrategy.RANDOM_EVICTION:\n",
    "                    self.evict_random()\n",
    "                elif self.eviction_strategy == EvictionStrategy.TTL:\n",
    "                    # print(f\"[{self.env.now:.2f}] Cache: Capacity reached. Not accepting new request.\")\n",
    "                    return\n",
    "\n",
    "            # Cache miss: Construct CacheObject from Database Object\n",
    "            db_object = self.db.get_object(obj_id)\n",
    "            initial_fetch_timer=self.env.now\n",
    "            age_timer=self.env.now\n",
    "            last_access=self.env.now\n",
    "            next_refresh = (self.env.now + np.random.exponential(1/db_object.mu_value)) if db_object.mu_value is not None else None\n",
    "            next_expiry = (self.env.now + db_object.ttl) if db_object.ttl is not None else None\n",
    "            cache_object = CacheObject(id=obj_id, data=db_object, \n",
    "                                       initial_fetch_timer=initial_fetch_timer, age_timer=age_timer, \n",
    "                                       last_access=last_access,next_refresh=next_refresh, next_expiry=next_expiry\n",
    "                                      )\n",
    "            self.storage[obj_id] = cache_object\n",
    "            \n",
    "            assert len(self.cumulative_age[obj_id]) == self.access_count[obj_id], f\"[{self.env.now:.2f}] Age values collected and object access count do not match.\"\n",
    "            # print(f\"[{env.now:.2f}] {obj_id} Miss: Average Age {sum(self.cumulative_age[obj_id])/len(self.cumulative_age[obj_id]):.2f} \")\n",
    "            return self.storage[obj_id]\n",
    "\n",
    "    def refresh_object(self, obj_id):\n",
    "        \"\"\"Refresh the object from the database to keep it up-to-date. TTL is increased on refresh.\"\"\"\n",
    "        assert obj_id in self.storage, f\"[{self.env.now:.2f}] Refreshed object has to be in cache\"\n",
    "        db_object = self.db.get_object(obj_id)\n",
    "        age_timer = self.env.now\n",
    "        next_refresh = self.env.now + np.random.exponential(1/db_object.mu_value)\n",
    "        # next_expiry = self.env.now + db_object.ttl if db_object.ttl is not None else None\n",
    "\n",
    "        self.storage[obj_id].data = db_object\n",
    "        self.storage[obj_id].age_timer = age_timer\n",
    "        self.storage[obj_id].next_refresh = next_refresh\n",
    "\n",
    "        # print(f\"[{self.env.now:.2f}] Cache: Refreshed object {obj_id}\")\n",
    "        \n",
    "    def evict_oldest(self):\n",
    "        \"\"\"Remove the oldest item from the cache to make space.\"\"\"\n",
    "        assert self.capacity == len(self.storage), f\"[{self.env.now:.2f}] Expecting cache to be at capacity\"\n",
    "        oldest_id = min(self.storage.items(), key=lambda item: item[1].last_access)[0]\n",
    "        \n",
    "        # print(f\"[{self.env.now:.2f}] Cache: Evicting oldest object {oldest_id}.\")\n",
    "        self.cumulative_cache_time[oldest_id].append(self.env.now - self.storage[oldest_id].initial_fetch_timer)\n",
    "        del self.storage[oldest_id]\n",
    "        \n",
    "    def evict_random(self):\n",
    "        \"\"\"Remove a random item from the cache to make space.\"\"\"\n",
    "        assert self.capacity == len(self.storage), f\"[{self.env.now:.2f}] Expecting cache to be at capacity\"\n",
    "        random_id = np.random.choice(list(self.storage.keys()))  # Select a random key from the cache\n",
    "        \n",
    "        # print(f\"[{self.env.now:.2f}] Cache: Evicting random object {random_id}.\")\n",
    "        self.cumulative_cache_time[random_id].append(self.env.now - self.storage[random_id].initial_fetch_timer)\n",
    "        del self.storage[random_id]\n",
    "        \n",
    "    def check_expired(self, obj_id):\n",
    "        \"\"\"Remove object if its TTL expired.\"\"\"\n",
    "        assert self.storage, f\"[{self.env.now:.2f}] Expecting cache to be not empty\"\n",
    "        assert self.env.now >= self.storage[obj_id].next_expiry\n",
    "        \n",
    "        # print(f\"[{self.env.now:.2f}] Cache: Object {obj_id} expired\")\n",
    "        self.cumulative_cache_time[obj_id].append(self.env.now - self.storage[obj_id].initial_fetch_timer)\n",
    "        del self.storage[obj_id]\n",
    "\n",
    "                \n",
    "    def record_cache_state(self):\n",
    "        \"\"\"Record the current cache state (number of objects in cache) over time.\"\"\"\n",
    "        self.cache_size_over_time.append((self.env.now, len(self.storage)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "687f5634-8edf-4337-b42f-bbb292d47f0f",
   "metadata": {},
   "outputs": [],
   "source": [
    "def client_request_process(env, cache, event):\n",
    "    \"\"\"Client process that makes requests for objects from the cache.\"\"\"\n",
    "    last_print = 0\n",
    "    with tqdm(total=ACCESS_COUNT_LIMIT, desc=\"Progress\", leave=True) as pbar:\n",
    "        while True:\n",
    "            request_id, next_request = min(cache.next_request.items(), key=lambda x: x[1])\n",
    "            expiry_id = -1\n",
    "            next_expiry = float('inf')\n",
    "            refresh_id = -1\n",
    "            next_refresh = float('inf')\n",
    "\n",
    "            if cache.storage:\n",
    "                expiry_id, next_expiry = min(cache.storage.items(), key=lambda x: x[1].next_expiry if x[1].next_expiry is not None else float('inf'))\n",
    "                next_expiry = cache.storage[expiry_id].next_expiry\n",
    "                refresh_id, next_refresh = min(cache.storage.items(), key=lambda x: x[1].next_refresh if x[1].next_refresh is not None else float('inf'))\n",
    "                next_refresh = cache.storage[refresh_id].next_refresh\n",
    "\n",
    "            events = [\n",
    "                (request_id, next_request),\n",
    "                (expiry_id, next_expiry),\n",
    "                (refresh_id, next_refresh)\n",
    "            ]\n",
    "\n",
    "            event_id, event_timestamp = min(events, key=lambda x: x[1] if x[1] is not None else float('inf'))\n",
    "            \n",
    "            # if event_id == request_id and event_timestamp == next_request:\n",
    "            #     print(f\"[{env.now:.2f}] Waiting for request...\")\n",
    "            # elif event_id == expiry_id and event_timestamp == next_expiry:\n",
    "            #     print(f\"[{env.now:.2f}] Waiting for expiry until...\")\n",
    "            # elif event_id == refresh_id and event_timestamp == next_refresh:\n",
    "            #     print(f\"[{env.now:.2f}] Waiting for refresh...\")\n",
    "\n",
    "            wait_time = event_timestamp - env.now\n",
    "            wait_time += math.ulp(wait_time) # Round up\n",
    "\n",
    "            yield(env.timeout(wait_time))\n",
    "            if event_id == request_id and event_timestamp == next_request:\n",
    "                assert env.now >= next_request, f\"[{env.now}] Time for request should've been reached for Object {request_id}\"\n",
    "                cache.get(request_id)\n",
    "            elif event_id == expiry_id and event_timestamp == next_expiry:\n",
    "                assert env.now >= next_expiry, f\"[{env.now}] Time for expiry should've been reached for Object {expiry_id}\"\n",
    "                cache.check_expired(expiry_id)\n",
    "            elif event_id == refresh_id and event_timestamp == next_refresh:\n",
    "                assert env.now >= next_refresh, f\"[{env.now}] Time for refresh should've been reached for Object {refresh_id}\"\n",
    "                cache.refresh_object(refresh_id)\n",
    "            else:\n",
    "                assert False, \"Unreachable\"\n",
    "\n",
    "            # For progress bar\n",
    "            if (int(env.now) % 1) == 0 and int(env.now) != last_print:\n",
    "                last_print = int(env.now)\n",
    "                pbar.n = min(cache.access_count.values())\n",
    "                pbar.refresh()\n",
    "            \n",
    "            # Simulation stop condition\n",
    "            if all(access_count >= ACCESS_COUNT_LIMIT for access_count in cache.access_count.values()):\n",
    "                print(f\"Simulation ended after {env.now} seconds.\")\n",
    "                for obj_id in cache.storage.keys():\n",
    "                    cache.cumulative_cache_time[obj_id].append(env.now - cache.storage[obj_id].initial_fetch_timer)\n",
    "                event.succeed()\n",
    "            \n",
    "            cache.record_cache_state()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "c8516830-9880-4d9e-a91b-000338baf9d6",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "class Simulation:\n",
    "    def __init__(self, simulation_config: Union[TTLSimulation, LRUSimulation, RandomEvictionSimulation, RefreshSimulation]):\n",
    "        # Initialize simulation environment\n",
    "        self.env = simpy.Environment()\n",
    "        \n",
    "        # Instantiate components\n",
    "        self.db = Database(simulation_config.db_objects)\n",
    "        self.cache = Cache(self.env, self.db, simulation_config)\n",
    "\n",
    "    def run_simulation(self):\n",
    "        # Start processes\n",
    "        # env.process(age_cache_process(env, cache))\n",
    "        stop_event = self.env.event()\n",
    "        self.env.process(client_request_process(self.env, self.cache, stop_event))\n",
    "        \n",
    "        # Run the simulation\n",
    "        self.env.run(until=stop_event)\n",
    "        self.end_time = self.env.now"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "e269b607-16b9-46d0-8a97-7324f2002c72",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Simulate with a Cache that does random evictions, We'll have 100 Database Objects and a Cache Size of 10\n",
    "# We'll generate lambdas from a zipf distribution\n",
    "# config = RandomEvictionSimulation(100, 10)\n",
    "# config.generate_objects()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "33fdc5fd-1f39-4b51-b2c7-6ea6acf2b753",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Simulate with a Cache that does lru, We'll have 100 Database Objects and a Cache Size of 10\n",
    "# We'll generate lambdas from a zipf distribution\n",
    "config = LRUSimulation(100, 10)\n",
    "config.from_file('./input/2024-12-14/results.csv', 'Lambda')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "6c391bfd-b294-4ff7-8b22-51777368a6b9",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Simulate with a Cache that does Refreshes with TTL based eviction, We'll have 100 Database Objects and a Cache Size of 10\n",
    "# We'll generate lambdas from a zipf distribution. Each object will have a fixed ttl of 1 when its pulled into the cache. Mu for the refresh rate is 10\n",
    "# config = RefreshSimulation(100, 10)\n",
    "# config.from_file(path='./input/2024-12-13/output.csv', lambda_column_name='Lambda', ttl_column_name='TTL_2', mu_column_name='u_opt_2')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "0a444c9d-53dd-4cab-b8f1-100ad3ab213a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Simulate with a Cache that does TTL based eviction, We'll have 100 Database Objects and a Cache Size of 10\n",
    "# We'll take lambdas from the \"lambda\" column of the file \"../calculated.csv\" and the TTLs for each object from the \"optimal_TTL\" column of the same file.\n",
    "# config = TTLSimulation(100, 10)\n",
    "# config.from_file(\"../calculated.csv\", \"lambda\", \"optimal_TTL\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "6ac338bd-2094-41d2-8e92-565d03422b87",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(f\"{TEMP_BASE_DIR}/simulation_config.txt\", 'w') as f:\n",
    "    f.write(str(config))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "66f65699-a3c9-48c4-8f1f-b9d7834c026a",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Progress:   0%|                                              | 0/1000 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.04482947830142957\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "ename": "NameError",
     "evalue": "name 'error_wait_time' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[8], line 39\u001b[0m, in \u001b[0;36mclient_request_process\u001b[0;34m(env, cache, event)\u001b[0m\n\u001b[1;32m     38\u001b[0m \u001b[38;5;28mprint\u001b[39m(wait_time)\n\u001b[0;32m---> 39\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43merror_wait_time\u001b[49m)\n\u001b[1;32m     40\u001b[0m \u001b[38;5;28;01myield\u001b[39;00m(env\u001b[38;5;241m.\u001b[39mtimeout(wait_time))\n",
      "\u001b[0;31mNameError\u001b[0m: name 'error_wait_time' is not defined",
      "\nThe above exception was the direct cause of the following exception:\n",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "File \u001b[0;32m<timed exec>:2\u001b[0m\n",
      "Cell \u001b[0;32mIn[9], line 17\u001b[0m, in \u001b[0;36mSimulation.run_simulation\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m     14\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39menv\u001b[38;5;241m.\u001b[39mprocess(client_request_process(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39menv, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcache, stop_event))\n\u001b[1;32m     16\u001b[0m \u001b[38;5;66;03m# Run the simulation\u001b[39;00m\n\u001b[0;32m---> 17\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43menv\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43muntil\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstop_event\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     18\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mend_time \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39menv\u001b[38;5;241m.\u001b[39mnow\n",
      "File \u001b[0;32m~/.genesis/workspace/python-virtualenv/graphs/lib/python3.12/site-packages/simpy/core.py:246\u001b[0m, in \u001b[0;36mEnvironment.run\u001b[0;34m(self, until)\u001b[0m\n\u001b[1;32m    244\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m    245\u001b[0m     \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m--> 246\u001b[0m         \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstep\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    247\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m StopSimulation \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[1;32m    248\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m exc\u001b[38;5;241m.\u001b[39margs[\u001b[38;5;241m0\u001b[39m]  \u001b[38;5;66;03m# == until.value\u001b[39;00m\n",
      "File \u001b[0;32m~/.genesis/workspace/python-virtualenv/graphs/lib/python3.12/site-packages/simpy/core.py:204\u001b[0m, in \u001b[0;36mEnvironment.step\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    202\u001b[0m exc \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mtype\u001b[39m(event\u001b[38;5;241m.\u001b[39m_value)(\u001b[38;5;241m*\u001b[39mevent\u001b[38;5;241m.\u001b[39m_value\u001b[38;5;241m.\u001b[39margs)\n\u001b[1;32m    203\u001b[0m exc\u001b[38;5;241m.\u001b[39m__cause__ \u001b[38;5;241m=\u001b[39m event\u001b[38;5;241m.\u001b[39m_value\n\u001b[0;32m--> 204\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exc\n",
      "\u001b[0;31mNameError\u001b[0m: name 'error_wait_time' is not defined"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "simulation = Simulation(config)\n",
    "simulation.run_simulation()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "6f900c68-1f34-48d1-b346-ef6ea6911fa5",
   "metadata": {},
   "outputs": [
    {
     "ename": "AttributeError",
     "evalue": "'Simulation' object has no attribute 'end_time'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[16], line 3\u001b[0m\n\u001b[1;32m      1\u001b[0m cache \u001b[38;5;241m=\u001b[39m simulation\u001b[38;5;241m.\u001b[39mcache\n\u001b[1;32m      2\u001b[0m db \u001b[38;5;241m=\u001b[39m simulation\u001b[38;5;241m.\u001b[39mdb\n\u001b[0;32m----> 3\u001b[0m simulation_end_time \u001b[38;5;241m=\u001b[39m \u001b[43msimulation\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mend_time\u001b[49m\n\u001b[1;32m      4\u001b[0m database_object_count \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlen\u001b[39m(db\u001b[38;5;241m.\u001b[39mdata)\n",
      "\u001b[0;31mAttributeError\u001b[0m: 'Simulation' object has no attribute 'end_time'"
     ]
    }
   ],
   "source": [
    "cache = simulation.cache\n",
    "db = simulation.db\n",
    "simulation_end_time = simulation.end_time\n",
    "database_object_count = len(db.data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3b6f7c1f-ea54-4496-bb9a-370cee2d2751",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "statistics = []\n",
    "# Calculate and print hit rate and average age for each object\n",
    "for obj_id in range(database_object_count):\n",
    "    if cache.access_count[obj_id] != 0:\n",
    "        output = \"\"\n",
    "        expected_hit_rate = None\n",
    "        hit_rate = cache.hits[obj_id] / max(1, cache.access_count[obj_id])\n",
    "        output += f\"Object {obj_id}: Hit Rate = {hit_rate:.2f}, \"\n",
    "        if db.data[obj_id].ttl is not None:\n",
    "            expected_hit_rate = 1-math.exp(-db.data[obj_id].lambda_value*(db.data[obj_id].ttl))\n",
    "            output += f\"Expected Hit Rate = {expected_hit_rate:.2f}, \"\n",
    "        avg_cache_time = sum(cache.cumulative_cache_time[obj_id]) / max(1, simulation_end_time) \n",
    "        output += f\"Average Time spend in Cache: {avg_cache_time:.2f}, \"\n",
    "        avg_age = sum(cache.cumulative_age[obj_id]) / max(len(cache.cumulative_age[obj_id]), 1)\n",
    "        output += f\"Average Age = {avg_age:.2f}, \"\n",
    "        expected_age = hit_rate / (db.data[obj_id].lambda_value * (1 - pow(hit_rate,2)))\n",
    "        output += f\"Expected Age = {expected_age:.2f}\"\n",
    "        print(output)\n",
    "        if db.data[obj_id].ttl is not None:\n",
    "            statistics.append({\n",
    "                \"obj_id\": obj_id,\n",
    "                \"hit_rate\": hit_rate, \n",
    "                \"expected_hit_rate\": expected_hit_rate, \n",
    "                \"avg_cache_time\":avg_cache_time, \n",
    "                \"avg_age\": avg_age, \n",
    "                \"expected_age\": expected_age\n",
    "                })\n",
    "        else:\n",
    "            statistics.append({\n",
    "                \"obj_id\": obj_id,\n",
    "                \"hit_rate\": hit_rate, \n",
    "                \"avg_cache_time\":avg_cache_time, \n",
    "                \"avg_age\": avg_age, \n",
    "                \"expected_age\": expected_age\n",
    "                })"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b2d18372-cdba-4151-ae32-5bf45466bf94",
   "metadata": {},
   "outputs": [],
   "source": [
    "stats = pd.DataFrame(statistics)\n",
    "stats.to_csv(f\"{TEMP_BASE_DIR}/hit_age.csv\",index=False)\n",
    "stats.drop(\"obj_id\", axis=1).describe().to_csv(f\"{TEMP_BASE_DIR}/overall_hit_age.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "be7e67e7-4533-438a-ab65-ca813f48052a",
   "metadata": {},
   "outputs": [],
   "source": [
    "expected_hit_rate = None\n",
    "expected_hit_rate_delta = None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "80971714-44f1-47db-9e89-85be7c885bde",
   "metadata": {},
   "outputs": [],
   "source": [
    "access_count = pd.DataFrame.from_dict(cache.access_count, orient='index', columns=['access_count'])\n",
    "hits = pd.DataFrame.from_dict(cache.hits, orient='index', columns=['hits'])\n",
    "misses = pd.DataFrame.from_dict(cache.misses, orient='index', columns=['misses'])\n",
    "mu = pd.DataFrame.from_dict({l: db.data[l].mu_value for l in range(database_object_count)}, orient='index', columns=['mu'])\n",
    "lmbda = pd.DataFrame.from_dict({l: db.data[l].lambda_value for l in range(database_object_count)}, orient='index', columns=['lambda'])\n",
    "\n",
    "hit_rate = pd.DataFrame(stats['hit_rate'])\n",
    "hit_rate.index = range(database_object_count)\n",
    "if 'expected_hit_rate' in stats:\n",
    "    expected_hit_rate = pd.DataFrame(stats['expected_hit_rate'])\n",
    "    expected_hit_rate.index = range(database_object_count)\n",
    "    expected_hit_rate_delta = pd.DataFrame((hit_rate.to_numpy()-expected_hit_rate.to_numpy()), columns=['expected_hit_rate_delta'])\n",
    "    expected_hit_rate_delta.index = range(database_object_count)\n",
    "avg_cache_time = pd.DataFrame(stats['avg_cache_time'])\n",
    "avg_cache_time.index = range(database_object_count)\n",
    "cache_time_delta = pd.DataFrame((hit_rate.to_numpy()-avg_cache_time.to_numpy()), columns=['cache_time_delta'])\n",
    "cache_time_delta.index = range(database_object_count)\n",
    "\n",
    "avg_age = pd.DataFrame(stats['avg_age'])\n",
    "avg_age.index = range(database_object_count)\n",
    "\n",
    "ages = {k: str(v) for k,v in cache.cumulative_age.items()}\n",
    "ages = pd.DataFrame.from_dict(ages, orient='index', columns=['ages'])\n",
    "\n",
    "merged = access_count.merge(hits, left_index=True, right_index=True).merge(misses, left_index=True, right_index=True) \\\n",
    "    .merge(mu, left_index=True, right_index=True).merge(lmbda, left_index=True, right_index=True) \\\n",
    "    .merge(hit_rate, left_index=True, right_index=True)\n",
    "if 'expected_hit_rate' in stats:\n",
    "    merged = merged.merge(expected_hit_rate, left_index=True, right_index=True).merge(expected_hit_rate_delta, left_index=True, right_index=True)\n",
    "merged = merged.merge(avg_cache_time, left_index=True, right_index=True).merge(cache_time_delta, left_index=True, right_index=True) \\\n",
    "    .merge(avg_age, left_index=True, right_index=True).merge(ages, left_index=True, right_index=True)\n",
    "merged.to_csv(f\"{TEMP_BASE_DIR}/details.csv\", index_label=\"obj_id\")\n",
    "merged"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "01f8f9ee-c278-4a22-8562-ba02e77f5ddd",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Extract recorded data for plotting\n",
    "times, cache_sizes = zip(*cache.cache_size_over_time)\n",
    "\n",
    "# Plot the cache size over time\n",
    "plt.figure(figsize=(30, 5))\n",
    "plt.plot(times, cache_sizes, label=\"Objects in Cache\")\n",
    "plt.xlabel(\"Time (s)\")\n",
    "plt.ylabel(\"Number of Cached Objects\")\n",
    "plt.title(\"Number of Objects in Cache Over Time\")\n",
    "plt.legend()\n",
    "plt.grid(True)\n",
    "plt.savefig(f\"{TEMP_BASE_DIR}/objects_in_cache_over_time.pdf\")\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f30a0497-9b2e-4ea9-8ebf-6687de19aaa9",
   "metadata": {},
   "outputs": [],
   "source": [
    "from collections import Counter\n",
    "# Count occurrences of each number\n",
    "count = Counter([l.lambda_value for l in db.data.values()])\n",
    "\n",
    "# Separate the counts into two lists for plotting\n",
    "x = list(count.keys())  # List of unique numbers\n",
    "y = list(count.values())  # List of their respective counts\n",
    "\n",
    "# Plot the data\n",
    "plt.figure(figsize=(8, 6))\n",
    "plt.bar(x, y, color='skyblue')\n",
    "\n",
    "# Adding labels and title\n",
    "plt.xlabel('Number')\n",
    "plt.ylabel('Occurrences')\n",
    "plt.title('Occurance of each lambda in db')\n",
    "plt.savefig(f\"{TEMP_BASE_DIR}/lambda_distribution.pdf\")\n",
    "\n",
    "# Show the plot\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c192564b-d3c6-40e1-a614-f7a5ee787c4e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Plotting lambda against access_count.\n",
    "\n",
    "plt.figure(figsize=(8, 6))\n",
    "plt.scatter(merged['lambda'], merged['access_count'], alpha=0.7, edgecolor='k')\n",
    "plt.title('Lambda vs Access Count', fontsize=14)\n",
    "plt.xlabel('Lambda', fontsize=12)\n",
    "plt.ylabel('Access Count', fontsize=12)\n",
    "plt.grid(alpha=0.3)\n",
    "\n",
    "plt.savefig(f\"{TEMP_BASE_DIR}/lambda_vs_access_count.pdf\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "00a12eea-c805-4209-9143-48fa65619873",
   "metadata": {},
   "outputs": [],
   "source": [
    "from collections import Counter\n",
    "# Count occurrences of each number\n",
    "count = Counter(np.array([l.mu_value if l.mu_value is not None else 0.0 for l in db.data.values()  ]).round(0))\n",
    "\n",
    "# Separate the counts into two lists for plotting\n",
    "x = list(count.keys())  # List of unique numbers\n",
    "y = list(count.values())  # List of their respective counts\n",
    "\n",
    "# Plot the data\n",
    "plt.figure(figsize=(8, 6))\n",
    "plt.bar(x, y, color='skyblue')\n",
    "\n",
    "# Adding labels and title\n",
    "plt.xlabel('Number')\n",
    "plt.ylabel('Occurrences')\n",
    "plt.title('Occurance of each mu in db (rounded)')\n",
    "\n",
    "# Show the plot\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "adbfeb40-76bd-4224-ac45-65c7b2b2cb7b",
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_requests(object_id: int):\n",
    "    mu = db.mu_values[object_id]\n",
    "    lmb = db.lambda_values[object_id]\n",
    "    rq_log = np.array(cache.request_log[object_id])\n",
    "    df = rq_log[1:] - rq_log[:-1]\n",
    "    pd.DataFrame(df, columns=[f\"{object_id}, mu:{mu:.2f}, lambda: {lmb:.2f}\"]).plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1f550686-3463-4e50-be83-ceafb27512b0",
   "metadata": {},
   "outputs": [],
   "source": [
    "def print_rate(object_id: int):\n",
    "    # Calculate time intervals between consecutive events\n",
    "    intervals = np.diff(np.array(cache.request_log[object_id]))  # Differences between each event time\n",
    "    \n",
    "    # Calculate the rate per second for each interval\n",
    "    rates = 1 / intervals  # Inverse of the time interval gives rate per second\n",
    "    \n",
    "    # Optional: Calculate the average event rate over all intervals\n",
    "    average_rate = np.mean(rates)\n",
    "    print(\"Average event rate per second:\", average_rate)\n",
    "    print(\"The mu is: \", db.lambda_values[object_id])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "135f4a26-a666-4fd5-8f71-1f62abd4bb81",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(config)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b47990b1-0231-43ac-8bc5-8340abe4a8b3",
   "metadata": {},
   "outputs": [],
   "source": [
    "# os.makedirs(EXPERIMENT_BASE_DIR, exist_ok=True)\n",
    "# folder_name = experiment_name.replace(\" \", \"_\").replace(\"(\", \"\").replace(\")\", \"\").replace(\".\", \"_\")\n",
    "# folder_path = os.path.join(EXPERIMENT_BASE_DIR, folder_name)\n",
    "# os.makedirs(folder_path, exist_ok=True)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "db83cad4-7cc6-4702-ae3a-d1af30a561d2",
   "metadata": {},
   "outputs": [],
   "source": [
    "# file_names = os.listdir(TEMP_BASE_DIR)\n",
    "    \n",
    "# for file_name in file_names:\n",
    "#     shutil.move(os.path.join(TEMP_BASE_DIR, file_name), folder_path)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "graphs",
   "language": "python",
   "name": "graphs"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}