{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "920665b8-9204-42df-ab59-1b9324387750",
   "metadata": {},
   "outputs": [],
   "source": [
    "import simpy\n",
    "import random\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import pandas as pd\n",
    "\n",
    "# Constants\n",
    "SEED = 256\n",
    "CACHE_TTL = 5          # Cache TTL in seconds\n",
    "CACHE_CAPACITY = 100   # Maximum number of objects the cache can hold\n",
    "ACCESS_COUNT_LIMIT = 10_000   # Total time to run the simulation\n",
    "EXPORT_NAME = \"./export.csv\"\n",
    "\n",
    "ZIPF_CONSTANT = 2      # Shape parameter for the Zipf distribution (controls skewness) Needs to be: 1 <\n",
    "MAX_REFRESH_RATE = 10\n",
    "\n",
    "\n",
    "# Set random seeds\n",
    "random.seed(SEED)\n",
    "np.random.seed(SEED)\n",
    "\n",
    "# Initialize simulation environment\n",
    "env = simpy.Environment()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "5cea042f-e9fc-4a1e-9750-de212ca70601",
   "metadata": {},
   "outputs": [],
   "source": [
    "class Database:\n",
    "    def __init__(self):\n",
    "        # Each object now has a specific refresh rate 'mu'\n",
    "        self.data = {i: f\"Object {i}\" for i in range(1, CACHE_CAPACITY + 1)}\n",
    "        self.lambda_values = {i: np.random.zipf(ZIPF_CONSTANT) for i in range(1, CACHE_CAPACITY + 1)}  # Request rate 'lambda' for each object\n",
    "        self.mu_values = {i: np.random.uniform(1, MAX_REFRESH_RATE) for i in range(1, CACHE_CAPACITY + 1)}  # Refresh rate 'mu' for each object\n",
    "        self.next_request = {i: np.random.exponential(self.lambda_values[i]) for i in range(1, CACHE_CAPACITY + 1)}\n",
    "\n",
    "\n",
    "    def get_object(self, obj_id):\n",
    "        # print(f\"[{env.now:.2f}] Database: Fetched {self.data.get(obj_id, 'Unknown')} for ID {obj_id}\")\n",
    "        return self.data.get(obj_id, None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "499bf543-b2c6-4e4d-afcc-0a6665ce3ae1",
   "metadata": {},
   "outputs": [],
   "source": [
    "class Cache:\n",
    "    def __init__(self, env, db):\n",
    "        self.env = env\n",
    "        self.db = db\n",
    "        self.storage = {}  # Dictionary to store cached objects\n",
    "        self.ttl = {}      # Dictionary to store TTLs\n",
    "        self.age = {}      # Dictionary to store age of each object\n",
    "        self.cache_size_over_time = []  # To record cache state at each interval\n",
    "        self.cache_next_request_over_time = []\n",
    "        self.request_log = {i: [] for i in range(1, CACHE_CAPACITY + 1)}\n",
    "        self.hits = {i: 0 for i in range(1, CACHE_CAPACITY + 1)}  # Track hits per object\n",
    "        self.misses = {i: 0 for i in range(1, CACHE_CAPACITY + 1)}  # Track misses per object\n",
    "        self.cumulative_age = {i: 0 for i in range(1, CACHE_CAPACITY + 1)}  # Track cumulative age per object\n",
    "        self.access_count = {i: 0 for i in range(1, CACHE_CAPACITY + 1)}  # Track access count per object\n",
    "        self.next_refresh = {}  # Track the next refresh time for each cached object\n",
    "        \n",
    "    def get(self, obj_id):\n",
    "        if obj_id in self.storage and self.ttl[obj_id] > env.now:\n",
    "            # Cache hit: increment hit count and update cumulative age\n",
    "            self.hits[obj_id] += 1\n",
    "            self.cumulative_age[obj_id] += self.age[obj_id]\n",
    "            self.access_count[obj_id] += 1\n",
    "        else:\n",
    "            # Cache miss: increment miss count\n",
    "            self.misses[obj_id] += 1\n",
    "            self.access_count[obj_id] += 1\n",
    "            \n",
    "            # Fetch the object from the database if it’s not in cache\n",
    "            obj = self.db.get_object(obj_id)\n",
    "            \n",
    "            # If the cache is full, evict the oldest object\n",
    "            if len(self.storage) >= CACHE_CAPACITY:\n",
    "                self.evict_oldest()\n",
    "            \n",
    "            # Add the object to cache, set TTL, reset age, and schedule next refresh\n",
    "            self.storage[obj_id] = obj\n",
    "            self.ttl[obj_id] = env.now + CACHE_TTL\n",
    "            self.age[obj_id] = 0\n",
    "            self.next_refresh[obj_id] = env.now + np.random.exponential(self.db.mu_values[obj_id])  # Schedule refresh\n",
    "\n",
    "        \n",
    "    def evict_oldest(self):\n",
    "        \"\"\"Remove the oldest item from the cache to make space.\"\"\"\n",
    "        oldest_id = max(self.age, key=self.age.get)  # Find the oldest item by age\n",
    "        print(f\"[{env.now:.2f}] Cache: Evicting object {oldest_id} to make space\")\n",
    "        del self.storage[oldest_id]\n",
    "        del self.ttl[oldest_id]\n",
    "        del self.age[oldest_id]\n",
    "        \n",
    "    def refresh_object(self, obj_id):\n",
    "        \"\"\"Refresh the object from the database to keep it up-to-date.\"\"\"\n",
    "        obj = self.db.get_object(obj_id)\n",
    "        self.storage[obj_id] = obj\n",
    "        self.ttl[obj_id] = env.now + CACHE_TTL\n",
    "        self.age[obj_id] = 0\n",
    "        # print(f\"[{env.now:.2f}] Cache: Refreshed object {obj_id}\")\n",
    "        \n",
    "    def age_objects(self):\n",
    "        \"\"\"Increment age of each cached object.\"\"\"\n",
    "        for obj_id in list(self.age.keys()):\n",
    "            if self.ttl[obj_id] > env.now:\n",
    "                self.age[obj_id] += 1\n",
    "                # print(f\"[{env.now:.2f}] Cache: Object {obj_id} aged to {self.age[obj_id]}\")\n",
    "            else:\n",
    "                # Remove object if its TTL expired\n",
    "                # print(f\"[{env.now:.2f}] Cache: Object {obj_id} expired\")\n",
    "                del self.storage[obj_id]\n",
    "                del self.ttl[obj_id]\n",
    "                del self.age[obj_id]\n",
    "                \n",
    "    def record_cache_state(self):\n",
    "        \"\"\"Record the current cache state (number of objects in cache) over time.\"\"\"\n",
    "        self.cache_size_over_time.append((env.now, len(self.storage)))\n",
    "        self.cache_next_request_over_time.append((env.now, self.db.next_request.copy()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "7286d498-aa6c-4efb-bb28-fe29736eab64",
   "metadata": {},
   "outputs": [],
   "source": [
    "def age_cache_process(env, cache):\n",
    "    \"\"\"Process that ages cache objects over time, removes expired items, and refreshes based on object-specific intervals.\"\"\"\n",
    "    while True:\n",
    "        cache.age_objects()  # Age objects and remove expired ones\n",
    "\n",
    "        # Refresh objects based on their individual refresh intervals\n",
    "        for obj_id in list(cache.storage.keys()):\n",
    "            # Check if it's time to refresh this object based on next_refresh\n",
    "            if env.now >= cache.next_refresh[obj_id]:\n",
    "                cache.refresh_object(obj_id)\n",
    "                # Schedule the next refresh based on the object's mu\n",
    "                cache.next_refresh[obj_id] = env.now + np.random.exponential(cache.db.mu_values[obj_id])\n",
    "        \n",
    "        cache.record_cache_state()  # Record cache state at each time step\n",
    "        yield env.timeout(1)  # Run every second\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "687f5634-8edf-4337-b42f-bbb292d47f0f",
   "metadata": {},
   "outputs": [],
   "source": [
    "def client_request_process(env, cache, event):\n",
    "    \"\"\"Client process that makes requests for objects from the cache.\"\"\"\n",
    "    lowest_lambda_object = max(cache.db.lambda_values.items(), key=lambda x: x[1])\n",
    "    lowest_lambda_object = [lowest_lambda_object] if isinstance(lowest_lambda_object, int) else lowest_lambda_object\n",
    "    while True:\n",
    "        obj_id, next_request = min(cache.db.next_request.items(), key=lambda x: x[1])\n",
    "        yield env.timeout(next_request - env.now)\n",
    "        if env.now >= next_request:\n",
    "            # print(f\"[{env.now:.2f}] Client: Requesting object {obj_id}\")\n",
    "            cache.get(obj_id)\n",
    "            \n",
    "            # print(f\"[{env.now:.2f}] Client: Schedule next request for {obj_id}\")\n",
    "            next_request = env.now + np.random.exponential(cache.db.lambda_values[obj_id])\n",
    "            cache.request_log[obj_id].append(next_request)\n",
    "            cache.db.next_request[obj_id] = next_request\n",
    "        if all(cache.access_count[obj] >= ACCESS_COUNT_LIMIT for obj in lowest_lambda_object):\n",
    "            event.succeed()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "c8516830-9880-4d9e-a91b-000338baf9d6",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# Instantiate components\n",
    "db = Database()\n",
    "cache = Cache(env, db)\n",
    "stop_event = env.event()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2ba34b36-9ed5-4996-9600-11dfd25d8e60",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "\n",
    "# Start processes\n",
    "env.process(age_cache_process(env, cache))\n",
    "env.process(client_request_process(env, cache, stop_event))\n",
    "\n",
    "# Run the simulation\n",
    "env.run(until=stop_event)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3b6f7c1f-ea54-4496-bb9a-370cee2d2751",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# Calculate and print hit rate and average age for each object\n",
    "for obj_id in range(1, CACHE_CAPACITY + 1):\n",
    "    if cache.access_count[obj_id] != 0:\n",
    "        hit_rate = cache.hits[obj_id] / max(1, cache.access_count[obj_id])  # Avoid division by zero\n",
    "        avg_age = cache.cumulative_age[obj_id] / max(1, cache.hits[obj_id])  # Only average over hits\n",
    "        print(f\"Object {obj_id}: Hit Rate = {hit_rate:.2f}, Average Age = {avg_age:.2f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "01f8f9ee-c278-4a22-8562-ba02e77f5ddd",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Extract recorded data for plotting\n",
    "times, cache_sizes = zip(*cache.cache_size_over_time)\n",
    "\n",
    "# Plot the cache size over time\n",
    "plt.figure(figsize=(30, 5))\n",
    "plt.plot(times, cache_sizes, label=\"Objects in Cache\")\n",
    "plt.xlabel(\"Time (s)\")\n",
    "plt.ylabel(\"Number of Cached Objects\")\n",
    "plt.title(\"Number of Objects in Cache Over Time\")\n",
    "plt.legend()\n",
    "plt.grid(True)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "80971714-44f1-47db-9e89-85be7c885bde",
   "metadata": {},
   "outputs": [],
   "source": [
    "access_count = pd.DataFrame.from_dict(cache.access_count, orient='index', columns=['access_count'])\n",
    "hits = pd.DataFrame.from_dict(cache.hits, orient='index', columns=['hits'])\n",
    "misses = pd.DataFrame.from_dict(cache.misses, orient='index', columns=['misses'])\n",
    "mu = pd.DataFrame.from_dict(db.mu_values, orient='index', columns=['mu'])\n",
    "lmbda = pd.DataFrame.from_dict(db.lambda_values, orient='index', columns=['lambda'])\n",
    "hit_rate = pd.DataFrame(np.round((hits.to_numpy()/access_count.to_numpy())*100,2), columns=['hit_rate'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fa36397f-9f09-4467-9e77-8a6b6b1bc691",
   "metadata": {},
   "outputs": [],
   "source": [
    "merged = access_count.merge(hits, left_index=True, right_index=True).merge(misses, left_index=True, right_index=True)\\\n",
    "    .merge(mu, left_index=True, right_index=True).merge(lmbda, left_index=True, right_index=True)\\\n",
    "    .merge(hit_rate, left_index=True, right_index=True)\n",
    "merged.to_csv(EXPORT_NAME)\n",
    "merged"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f30a0497-9b2e-4ea9-8ebf-6687de19aaa9",
   "metadata": {},
   "outputs": [],
   "source": [
    "from collections import Counter\n",
    "# Count occurrences of each number\n",
    "count = Counter(list(db.lambda_values.values()))\n",
    "\n",
    "# Separate the counts into two lists for plotting\n",
    "x = list(count.keys())  # List of unique numbers\n",
    "y = list(count.values())  # List of their respective counts\n",
    "\n",
    "# Plot the data\n",
    "plt.figure(figsize=(8, 6))\n",
    "plt.bar(x, y, color='skyblue')\n",
    "\n",
    "# Adding labels and title\n",
    "plt.xlabel('Number')\n",
    "plt.ylabel('Occurrences')\n",
    "plt.title('Occurance of each lambda in db')\n",
    "\n",
    "# Show the plot\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "00a12eea-c805-4209-9143-48fa65619873",
   "metadata": {},
   "outputs": [],
   "source": [
    "from collections import Counter\n",
    "# Count occurrences of each number\n",
    "count = Counter(np.array(list(db.mu_values.values())).round(0))\n",
    "\n",
    "# Separate the counts into two lists for plotting\n",
    "x = list(count.keys())  # List of unique numbers\n",
    "y = list(count.values())  # List of their respective counts\n",
    "\n",
    "# Plot the data\n",
    "plt.figure(figsize=(8, 6))\n",
    "plt.bar(x, y, color='skyblue')\n",
    "\n",
    "# Adding labels and title\n",
    "plt.xlabel('Number')\n",
    "plt.ylabel('Occurrences')\n",
    "plt.title('Occurance of each mu in db (rounded)')\n",
    "\n",
    "# Show the plot\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "adbfeb40-76bd-4224-ac45-65c7b2b2cb7b",
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_requests(object_id: int):\n",
    "    mu = db.mu_values[object_id]\n",
    "    lmb = db.lambda_values[object_id]\n",
    "    rq_log = np.array(cache.request_log[object_id])\n",
    "    df = rq_log[1:] - rq_log[:-1]\n",
    "    pd.DataFrame(df, columns=[f\"{object_id}, mu:{mu:.2f}, lambda: {lmb:.2f}\"]).plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1f550686-3463-4e50-be83-ceafb27512b0",
   "metadata": {},
   "outputs": [],
   "source": [
    "def print_rate(object_id: int):\n",
    "    # Calculate time intervals between consecutive events\n",
    "    intervals = np.diff(np.array(cache.request_log[object_id]))  # Differences between each event time\n",
    "    \n",
    "    # Calculate the rate per second for each interval\n",
    "    rates = 1 / intervals  # Inverse of the time interval gives rate per second\n",
    "    \n",
    "    # Optional: Calculate the average event rate over all intervals\n",
    "    average_rate = np.mean(rates)\n",
    "    print(\"Average event rate per second:\", average_rate)\n",
    "    print(\"The mu is: \", db.lambda_values[object_id])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f246dc61-f131-4d17-80c9-ccff1c4fec64",
   "metadata": {},
   "outputs": [],
   "source": [
    "[(print_rate(i),plot_requests(i)) for i in range(1,10)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b2d18372-cdba-4151-ae32-5bf45466bf94",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "graphs",
   "language": "python",
   "name": "graphs"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}