{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "71f85f2a-423f-44d2-b80d-da9ac8d3961a", "metadata": {}, "outputs": [], "source": [ "import simpy\n", "import random\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "from enum import Enum\n", "import os\n", "import shutil\n", "from tqdm import tqdm\n", "\n", "# Types of cache\n", "class CacheType(Enum):\n", " LRU = 1\n", " RANDOM_EVICTION = 2\n", "\n", "# Constants\n", "SEED = 42\n", "DATABASE_OBJECTS = 100 # Number of objects in the database\n", "ACCESS_COUNT_LIMIT = 1000 # Total time to run the simulation\n", "EXPERIMENT_BASE_DIR = \"./experiments/\"\n", "TEMP_BASE_DIR = \"./.aoi_cache/\"\n", "\n", "ZIPF_CONSTANT = 2 # Shape parameter for the Zipf distribution (controls skewness) Needs to be: 1< \n", "\n", "# Set random seeds\n", "random.seed(SEED)\n", "np.random.seed(SEED)\n", "\n", "# Initialize simulation environment\n", "env = simpy.Environment()\n", "\n", "os.makedirs(TEMP_BASE_DIR, exist_ok=True)" ] }, { "cell_type": "markdown", "id": "9a37d7a3-3e11-4b89-8dce-6091dd38b16f", "metadata": {}, "source": [ "How to set certain parameters for specific scenarios\n", "\n", "\n", "| Name | Cache Capacity | MAX_REFRESH_RATE | cache_type | CACHE_TTL |\n", "| -------------------- | -------------------- | ---------------- | ------------------------- | --------- |\n", "| Default | DATABASE_OBJECTS | 1< | CacheType.LRU | 5 |\n", "| No Refresh | DATABASE_OBJECTS | 0 | CacheType.LRU | 5 |\n", "| Infinite TTL | DATABASE_OBJECTS / 2 | 0 | CacheType.LRU | 0 |\n", "| Random Eviction (RE) | DATABASE_OBJECTS / 2 | 1< | CacheType.RANDOM_EVICTION | 5 |\n", "| RE without Refresh | DATABASE_OBJECTS / 2 | 0 | CacheType.RANDOM_EVICTION | 5 |\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 2, "id": "3d0ab5b1-162a-42c8-80a3-d31f763101f1", "metadata": {}, "outputs": [], "source": [ "# Configuration (Just example, will be overwritten in next block\n", "\n", "CACHE_CAPACITY = DATABASE_OBJECTS # Maximum number of objects the cache can hold\n", "\n", "# MAX_REFRESH_RATE is used as the maximum for a uniform\n", "# distribution for mu.\n", "# If MAX_REFRESH_RATE is 0, we do not do any refreshes.\n", "MAX_REFRESH_RATE = 0\n", "\n", "cache_type = CacheType.LRU\n", "\n", "# CACHE_TTL is used to determin which TTL to set when an\n", "# object is pulled into the cache\n", "# If CACHE_TTL is set to 0, the TTL is infinite\n", "CACHE_TTL = 5\n" ] }, { "cell_type": "code", "execution_count": 3, "id": "3ff299ca-ec65-453b-b167-9a0f7728a207", "metadata": {}, "outputs": [], "source": [ "configurations = {\n", " \"default\": (DATABASE_OBJECTS, 10, CacheType.LRU, 5),\n", " \"No Refresh\": (DATABASE_OBJECTS, 0, CacheType.LRU, 5),\n", " \"Infinite TTL\": (int(DATABASE_OBJECTS / 2), 0, CacheType.LRU, 0),\n", " \"Random Eviction\": (int(DATABASE_OBJECTS / 2), 10, CacheType.RANDOM_EVICTION, 5),\n", " \"RE without Refresh\": (int(DATABASE_OBJECTS / 2), 0, CacheType.RANDOM_EVICTION, 5),\n", " \"No Refresh (0.5s ttl)\": (DATABASE_OBJECTS, 0, CacheType.LRU, 0.5),\n", " \"No Refresh (1.0s ttl)\": (DATABASE_OBJECTS, 0, CacheType.LRU, 1),\n", " \"No Refresh (2.0s ttl)\": (DATABASE_OBJECTS, 0, CacheType.LRU, 2),\n", " \"No Refresh (3.0s ttl)\": (DATABASE_OBJECTS, 0, CacheType.LRU, 3),\n", " \"No Refresh (4.0s ttl)\": (DATABASE_OBJECTS, 0, CacheType.LRU, 4),\n", " \"No Refresh (5.0s ttl)\": (DATABASE_OBJECTS, 0, CacheType.LRU, 5),\n", "}\n", "\n", "experiment_name = \"No Refresh (1.0s ttl)\"\n", "config = configurations[experiment_name]\n", "\n", "CACHE_CAPACITY = config[0]\n", "MAX_REFRESH_RATE = config[1]\n", "cache_type = config[2]\n", "CACHE_TTL = config[3]\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "5cea042f-e9fc-4a1e-9750-de212ca70601", "metadata": {}, "outputs": [], "source": [ "class Database:\n", " def __init__(self):\n", " # Each object now has a specific refresh rate 'mu'\n", " self.data = {i: f\"Object {i}\" for i in range(1, DATABASE_OBJECTS + 1)}\n", " self.lambda_values = {i: np.random.zipf(ZIPF_CONSTANT) for i in range(1, DATABASE_OBJECTS + 1)} # Request rate 'lambda' for each object\n", " # Refresh rate 'mu' for each object\n", " if MAX_REFRESH_RATE == 0:\n", " self.mu_values = {i: 0 for i in range(1,DATABASE_OBJECTS + 1)} \n", " else:\n", " self.mu_values = {i: np.random.uniform(1, MAX_REFRESH_RATE) for i in range(1, DATABASE_OBJECTS + 1)}\n", " self.next_request = {i: np.random.exponential(1/self.lambda_values[i]) for i in range(1, DATABASE_OBJECTS + 1)}\n", "\n", "\n", " def get_object(self, obj_id):\n", " # print(f\"[{env.now:.2f}] Database: Fetched {self.data.get(obj_id, 'Unknown')} for ID {obj_id}\")\n", " return self.data.get(obj_id, None)" ] }, { "cell_type": "code", "execution_count": 5, "id": "499bf543-b2c6-4e4d-afcc-0a6665ce3ae1", "metadata": {}, "outputs": [], "source": [ "class Cache:\n", " def __init__(self, env, db, cache_type):\n", " self.cache_type = cache_type\n", " self.env = env\n", " self.db = db\n", " self.storage = {} # Dictionary to store cached objects\n", " self.ttl = {} # Dictionary to store TTLs\n", " self.age = {} # Dictionary to store age of each object\n", " self.cache_size_over_time = [] # To record cache state at each interval\n", " self.cache_next_request_over_time = []\n", " self.request_log = {i: [] for i in range(1, DATABASE_OBJECTS + 1)}\n", " self.hits = {i: 0 for i in range(1, DATABASE_OBJECTS + 1)} # Track hits per object\n", " self.misses = {i: 0 for i in range(1, DATABASE_OBJECTS + 1)} # Track misses per object\n", " self.cumulative_age = {i: 0 for i in range(1, DATABASE_OBJECTS + 1)} # Track cumulative age per object\n", " self.access_count = {i: 0 for i in range(1, DATABASE_OBJECTS + 1)} # Track access count per object\n", " self.next_refresh = {} # Track the next refresh time for each cached object\n", " \n", " def get(self, obj_id):\n", " if obj_id in self.storage and \\\n", " (self.ttl[obj_id] > env.now or CACHE_TTL == 0):\n", " # Cache hit: increment hit count and update cumulative age\n", " self.hits[obj_id] += 1\n", " self.cumulative_age[obj_id] += self.age[obj_id]\n", " self.access_count[obj_id] += 1\n", " else:\n", " # Cache miss: increment miss count\n", " self.misses[obj_id] += 1\n", " self.cumulative_age[obj_id] += 0\n", " self.access_count[obj_id] += 1\n", " \n", " # Fetch the object from the database if it’s not in cache\n", " obj = self.db.get_object(obj_id)\n", " \n", " # If the cache is full, evict the oldest object\n", " if len(self.storage) > CACHE_CAPACITY:\n", " if self.cache_type == CacheType.LRU:\n", " self.evict_oldest()\n", " elif self.cache_type == CacheType.RANDOM_EVICTION:\n", " self.evict_random()\n", " \n", " # Add the object to cache, set TTL, reset age, and schedule next refresh\n", " self.storage[obj_id] = obj\n", " if CACHE_TTL != 0:\n", " self.ttl[obj_id] = env.now + CACHE_TTL\n", " else:\n", " self.ttl[obj_id] = 0\n", " self.age[obj_id] = 0\n", " if MAX_REFRESH_RATE != 0:\n", " self.next_refresh[obj_id] = env.now + np.random.exponential(1/self.db.mu_values[obj_id]) # Schedule refresh\n", "\n", " \n", " def evict_oldest(self):\n", " \"\"\"Remove the oldest item from the cache to make space.\"\"\"\n", " oldest_id = max(self.age, key=self.age.get) # Find the oldest item by age\n", " print(f\"[{env.now:.2f}] Cache: Evicting oldest object {oldest_id} to make space at {self.ttl[oldest_id]:.2f}\")\n", " del self.storage[oldest_id]\n", " del self.ttl[oldest_id]\n", " del self.age[oldest_id]\n", "\n", " def evict_random(self):\n", " \"\"\"Remove a random item from the cache to make space.\"\"\"\n", " random_id = np.random.choice(list(self.storage.keys())) # Select a random key from the cache\n", " print(f\"[{env.now:.2f}] Cache: Evicting random object {random_id} to make space at {self.ttl[random_id]:.2f}\")\n", " del self.storage[random_id]\n", " del self.ttl[random_id]\n", " del self.age[random_id]\n", " \n", " def refresh_object(self, obj_id):\n", " \"\"\"Refresh the object from the database to keep it up-to-date. TTL is increased on refresh.\"\"\"\n", " obj = self.db.get_object(obj_id)\n", " self.storage[obj_id] = obj\n", " if CACHE_TTL != 0:\n", " self.ttl[obj_id] = env.now + CACHE_TTL\n", " else:\n", " self.ttl[obj_id] = 0\n", " self.age[obj_id] = 0\n", " # print(f\"[{env.now:.2f}] Cache: Refreshed object {obj_id}\")\n", " \n", " def age_objects(self):\n", " \"\"\"Increment age of each cached object.\"\"\"\n", " for obj_id in list(self.age.keys()):\n", " if CACHE_TTL != 0:\n", " if self.ttl[obj_id] > env.now:\n", " self.age[obj_id] += 1\n", " # print(f\"[{env.now:.2f}] Cache: Object {obj_id} aged to {self.age[obj_id]}\")\n", " else:\n", " # Remove object if its TTL expired\n", " # print(f\"[{env.now:.2f}] Cache: Object {obj_id} expired\")\n", " del self.storage[obj_id]\n", " del self.ttl[obj_id]\n", " del self.age[obj_id]\n", " else:\n", " self.age[obj_id] += 1\n", " \n", " def record_cache_state(self):\n", " \"\"\"Record the current cache state (number of objects in cache) over time.\"\"\"\n", " self.cache_size_over_time.append((env.now, len(self.storage)))\n", " self.cache_next_request_over_time.append((env.now, self.db.next_request.copy()))" ] }, { "cell_type": "code", "execution_count": 6, "id": "7286d498-aa6c-4efb-bb28-fe29736eab64", "metadata": {}, "outputs": [], "source": [ "def age_cache_process(env, cache):\n", " \"\"\"Process that ages cache objects over time, removes expired items, and refreshes based on object-specific intervals.\"\"\"\n", " while True:\n", " cache.age_objects() # Age objects and remove expired ones\n", "\n", "\n", " if MAX_REFRESH_RATE != 0:\n", " # Refresh objects based on their individual refresh intervals\n", " for obj_id in list(cache.storage.keys()):\n", " # Check if it's time to refresh this object based on next_refresh\n", " if env.now >= cache.next_refresh[obj_id]:\n", " cache.refresh_object(obj_id)\n", " # Schedule the next refresh based on the object's mu\n", " cache.next_refresh[obj_id] = env.now + np.random.exponential(1/cache.db.mu_values[obj_id])\n", " \n", " cache.record_cache_state() # Record cache state at each time step\n", " yield env.timeout(1) # Run every second" ] }, { "cell_type": "code", "execution_count": 7, "id": "687f5634-8edf-4337-b42f-bbb292d47f0f", "metadata": {}, "outputs": [], "source": [ "def client_request_process(env, cache, event):\n", " \"\"\"Client process that makes requests for objects from the cache.\"\"\"\n", " last_print = 0\n", " with tqdm(total=ACCESS_COUNT_LIMIT, desc=\"Progress\", leave=True) as pbar:\n", " while True:\n", " obj_id, next_request = min(cache.db.next_request.items(), key=lambda x: x[1])\n", " yield env.timeout(next_request - env.now)\n", " if (int(env.now) % 1) == 0 and int(env.now) != last_print:\n", " last_print = int(env.now)\n", " pbar.n = min(cache.access_count.values())\n", " pbar.refresh()\n", " if env.now >= next_request:\n", " # print(f\"[{env.now:.2f}] Client: Requesting object {obj_id}\")\n", " cache.get(obj_id)\n", " \n", " # print(f\"[{env.now:.2f}] Client: Schedule next request for {obj_id}\")\n", " next_request = env.now + np.random.exponential(1/cache.db.lambda_values[obj_id])\n", " cache.request_log[obj_id].append(next_request)\n", " cache.db.next_request[obj_id] = next_request\n", " if all(access_count >= ACCESS_COUNT_LIMIT for access_count in cache.access_count.values()):\n", " event.succeed()" ] }, { "cell_type": "code", "execution_count": 8, "id": "c8516830-9880-4d9e-a91b-000338baf9d6", "metadata": { "scrolled": true }, "outputs": [], "source": [ "# Instantiate components\n", "db = Database()\n", "cache = Cache(env, db, cache_type)\n", "stop_event = env.event()" ] }, { "cell_type": "code", "execution_count": 9, "id": "2ba34b36-9ed5-4996-9600-11dfd25d8e60", "metadata": { "scrolled": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Progress: 99%|██████████████████████████████████████████████████████████████████████████████████████████ | 99/100 [00:00<00:00, 182.10it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 482 ms, sys: 81.4 ms, total: 563 ms\n", "Wall time: 550 ms\n" ] } ], "source": [ "%%time\n", "\n", "# Start processes\n", "env.process(age_cache_process(env, cache))\n", "env.process(client_request_process(env, cache, stop_event))\n", "\n", "# Run the simulation\n", "env.run(until=stop_event)" ] }, { "cell_type": "code", "execution_count": 21, "id": "83f8287c-8dd8-4ce3-a9dd-4d4d6f834dfe", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | avg_age | \n", "expected_age | \n", "
|---|---|---|
| 1 | \n", "NaN | \n", "NaN | \n", "
| 2 | \n", "NaN | \n", "NaN | \n", "
| 3 | \n", "NaN | \n", "NaN | \n", "
| 4 | \n", "NaN | \n", "NaN | \n", "
| 5 | \n", "NaN | \n", "NaN | \n", "
| ... | \n", "... | \n", "... | \n", "
| 96 | \n", "NaN | \n", "NaN | \n", "
| 97 | \n", "NaN | \n", "NaN | \n", "
| 98 | \n", "NaN | \n", "NaN | \n", "
| 99 | \n", "NaN | \n", "NaN | \n", "
| 100 | \n", "NaN | \n", "NaN | \n", "
100 rows × 2 columns
\n", "| \n", " | access_count | \n", "hits | \n", "misses | \n", "mu | \n", "lambda | \n", "hit_rate | \n", "avg_age | \n", "expected_age | \n", "age_delta | \n", "
|---|---|---|---|---|---|---|---|---|---|
| 1 | \n", "122 | \n", "60 | \n", "62 | \n", "0 | \n", "1 | \n", "0.491803 | \n", "0.245902 | \n", "0.120935 | \n", "0.124966 | \n", "
| 2 | \n", "382 | \n", "288 | \n", "94 | \n", "0 | \n", "3 | \n", "0.753927 | \n", "0.397906 | \n", "0.284203 | \n", "0.113703 | \n", "
| 3 | \n", "127 | \n", "61 | \n", "66 | \n", "0 | \n", "1 | \n", "0.480315 | \n", "0.267717 | \n", "0.115351 | \n", "0.152365 | \n", "
| 4 | \n", "113 | \n", "54 | \n", "59 | \n", "0 | \n", "1 | \n", "0.477876 | \n", "0.238938 | \n", "0.114183 | \n", "0.124755 | \n", "
| 5 | \n", "244 | \n", "163 | \n", "81 | \n", "0 | \n", "2 | \n", "0.668033 | \n", "0.282787 | \n", "0.223134 | \n", "0.059653 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 96 | \n", "139 | \n", "73 | \n", "66 | \n", "0 | \n", "1 | \n", "0.525180 | \n", "0.287770 | \n", "0.137907 | \n", "0.149863 | \n", "
| 97 | \n", "127 | \n", "66 | \n", "61 | \n", "0 | \n", "1 | \n", "0.519685 | \n", "0.267717 | \n", "0.135036 | \n", "0.132680 | \n", "
| 98 | \n", "4578 | \n", "4458 | \n", "120 | \n", "0 | \n", "37 | \n", "0.973788 | \n", "0.455221 | \n", "0.474131 | \n", "-0.018911 | \n", "
| 99 | \n", "482 | \n", "382 | \n", "100 | \n", "0 | \n", "4 | \n", "0.792531 | \n", "0.398340 | \n", "0.314053 | \n", "0.084287 | \n", "
| 100 | \n", "249 | \n", "170 | \n", "79 | \n", "0 | \n", "2 | \n", "0.682731 | \n", "0.337349 | \n", "0.233061 | \n", "0.104289 | \n", "
100 rows × 9 columns
\n", "