age_cache_simulation/aoi_cache_simulation.ipynb

422 lines
16 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "920665b8-9204-42df-ab59-1b9324387750",
"metadata": {},
"outputs": [],
"source": [
"import simpy\n",
"import random\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"\n",
"# Constants\n",
"SEED = 256\n",
"CACHE_TTL = 5 # Cache TTL in seconds\n",
"CACHE_CAPACITY = 100 # Maximum number of objects the cache can hold\n",
"ACCESS_COUNT_LIMIT = 10_000 # Total time to run the simulation\n",
"EXPORT_NAME = \"./export.csv\"\n",
"\n",
"ZIPF_CONSTANT = 2 # Shape parameter for the Zipf distribution (controls skewness) Needs to be: 1 <\n",
"MAX_REFRESH_RATE = 10\n",
"\n",
"\n",
"# Set random seeds\n",
"random.seed(SEED)\n",
"np.random.seed(SEED)\n",
"\n",
"# Initialize simulation environment\n",
"env = simpy.Environment()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "5cea042f-e9fc-4a1e-9750-de212ca70601",
"metadata": {},
"outputs": [],
"source": [
"class Database:\n",
" def __init__(self):\n",
" # Each object now has a specific refresh rate 'mu'\n",
" self.data = {i: f\"Object {i}\" for i in range(1, CACHE_CAPACITY + 1)}\n",
" self.lambda_values = {i: np.random.zipf(ZIPF_CONSTANT) for i in range(1, CACHE_CAPACITY + 1)} # Request rate 'lambda' for each object\n",
" self.mu_values = {i: np.random.uniform(1, MAX_REFRESH_RATE) for i in range(1, CACHE_CAPACITY + 1)} # Refresh rate 'mu' for each object\n",
" self.next_request = {i: np.random.exponential(self.lambda_values[i]) for i in range(1, CACHE_CAPACITY + 1)}\n",
"\n",
"\n",
" def get_object(self, obj_id):\n",
" # print(f\"[{env.now:.2f}] Database: Fetched {self.data.get(obj_id, 'Unknown')} for ID {obj_id}\")\n",
" return self.data.get(obj_id, None)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "499bf543-b2c6-4e4d-afcc-0a6665ce3ae1",
"metadata": {},
"outputs": [],
"source": [
"class Cache:\n",
" def __init__(self, env, db):\n",
" self.env = env\n",
" self.db = db\n",
" self.storage = {} # Dictionary to store cached objects\n",
" self.ttl = {} # Dictionary to store TTLs\n",
" self.age = {} # Dictionary to store age of each object\n",
" self.cache_size_over_time = [] # To record cache state at each interval\n",
" self.cache_next_request_over_time = []\n",
" self.request_log = {i: [] for i in range(1, CACHE_CAPACITY + 1)}\n",
" self.hits = {i: 0 for i in range(1, CACHE_CAPACITY + 1)} # Track hits per object\n",
" self.misses = {i: 0 for i in range(1, CACHE_CAPACITY + 1)} # Track misses per object\n",
" self.cumulative_age = {i: 0 for i in range(1, CACHE_CAPACITY + 1)} # Track cumulative age per object\n",
" self.access_count = {i: 0 for i in range(1, CACHE_CAPACITY + 1)} # Track access count per object\n",
" self.next_refresh = {} # Track the next refresh time for each cached object\n",
" \n",
" def get(self, obj_id):\n",
" if obj_id in self.storage and self.ttl[obj_id] > env.now:\n",
" # Cache hit: increment hit count and update cumulative age\n",
" self.hits[obj_id] += 1\n",
" self.cumulative_age[obj_id] += self.age[obj_id]\n",
" self.access_count[obj_id] += 1\n",
" else:\n",
" # Cache miss: increment miss count\n",
" self.misses[obj_id] += 1\n",
" self.access_count[obj_id] += 1\n",
" \n",
" # Fetch the object from the database if its not in cache\n",
" obj = self.db.get_object(obj_id)\n",
" \n",
" # If the cache is full, evict the oldest object\n",
" if len(self.storage) >= CACHE_CAPACITY:\n",
" self.evict_oldest()\n",
" \n",
" # Add the object to cache, set TTL, reset age, and schedule next refresh\n",
" self.storage[obj_id] = obj\n",
" self.ttl[obj_id] = env.now + CACHE_TTL\n",
" self.age[obj_id] = 0\n",
" self.next_refresh[obj_id] = env.now + np.random.exponential(self.db.mu_values[obj_id]) # Schedule refresh\n",
"\n",
" \n",
" def evict_oldest(self):\n",
" \"\"\"Remove the oldest item from the cache to make space.\"\"\"\n",
" oldest_id = max(self.age, key=self.age.get) # Find the oldest item by age\n",
" print(f\"[{env.now:.2f}] Cache: Evicting object {oldest_id} to make space\")\n",
" del self.storage[oldest_id]\n",
" del self.ttl[oldest_id]\n",
" del self.age[oldest_id]\n",
" \n",
" def refresh_object(self, obj_id):\n",
" \"\"\"Refresh the object from the database to keep it up-to-date.\"\"\"\n",
" obj = self.db.get_object(obj_id)\n",
" self.storage[obj_id] = obj\n",
" self.ttl[obj_id] = env.now + CACHE_TTL\n",
" self.age[obj_id] = 0\n",
" # print(f\"[{env.now:.2f}] Cache: Refreshed object {obj_id}\")\n",
" \n",
" def age_objects(self):\n",
" \"\"\"Increment age of each cached object.\"\"\"\n",
" for obj_id in list(self.age.keys()):\n",
" if self.ttl[obj_id] > env.now:\n",
" self.age[obj_id] += 1\n",
" # print(f\"[{env.now:.2f}] Cache: Object {obj_id} aged to {self.age[obj_id]}\")\n",
" else:\n",
" # Remove object if its TTL expired\n",
" # print(f\"[{env.now:.2f}] Cache: Object {obj_id} expired\")\n",
" del self.storage[obj_id]\n",
" del self.ttl[obj_id]\n",
" del self.age[obj_id]\n",
" \n",
" def record_cache_state(self):\n",
" \"\"\"Record the current cache state (number of objects in cache) over time.\"\"\"\n",
" self.cache_size_over_time.append((env.now, len(self.storage)))\n",
" self.cache_next_request_over_time.append((env.now, self.db.next_request.copy()))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "7286d498-aa6c-4efb-bb28-fe29736eab64",
"metadata": {},
"outputs": [],
"source": [
"def age_cache_process(env, cache):\n",
" \"\"\"Process that ages cache objects over time, removes expired items, and refreshes based on object-specific intervals.\"\"\"\n",
" while True:\n",
" cache.age_objects() # Age objects and remove expired ones\n",
"\n",
" # Refresh objects based on their individual refresh intervals\n",
" for obj_id in list(cache.storage.keys()):\n",
" # Check if it's time to refresh this object based on next_refresh\n",
" if env.now >= cache.next_refresh[obj_id]:\n",
" cache.refresh_object(obj_id)\n",
" # Schedule the next refresh based on the object's mu\n",
" cache.next_refresh[obj_id] = env.now + np.random.exponential(cache.db.mu_values[obj_id])\n",
" \n",
" cache.record_cache_state() # Record cache state at each time step\n",
" yield env.timeout(1) # Run every second\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "687f5634-8edf-4337-b42f-bbb292d47f0f",
"metadata": {},
"outputs": [],
"source": [
"def client_request_process(env, cache, event):\n",
" \"\"\"Client process that makes requests for objects from the cache.\"\"\"\n",
" lowest_lambda_object = max(cache.db.lambda_values.items(), key=lambda x: x[1])\n",
" lowest_lambda_object = [lowest_lambda_object] if isinstance(lowest_lambda_object, int) else lowest_lambda_object\n",
" while True:\n",
" obj_id, next_request = min(cache.db.next_request.items(), key=lambda x: x[1])\n",
" yield env.timeout(next_request - env.now)\n",
" if env.now >= next_request:\n",
" # print(f\"[{env.now:.2f}] Client: Requesting object {obj_id}\")\n",
" cache.get(obj_id)\n",
" \n",
" # print(f\"[{env.now:.2f}] Client: Schedule next request for {obj_id}\")\n",
" next_request = env.now + np.random.exponential(cache.db.lambda_values[obj_id])\n",
" cache.request_log[obj_id].append(next_request)\n",
" cache.db.next_request[obj_id] = next_request\n",
" if all(cache.access_count[obj] >= ACCESS_COUNT_LIMIT for obj in lowest_lambda_object):\n",
" event.succeed()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "c8516830-9880-4d9e-a91b-000338baf9d6",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# Instantiate components\n",
"db = Database()\n",
"cache = Cache(env, db)\n",
"stop_event = env.event()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2ba34b36-9ed5-4996-9600-11dfd25d8e60",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"%%time\n",
"\n",
"# Start processes\n",
"env.process(age_cache_process(env, cache))\n",
"env.process(client_request_process(env, cache, stop_event))\n",
"\n",
"# Run the simulation\n",
"env.run(until=stop_event)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3b6f7c1f-ea54-4496-bb9a-370cee2d2751",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# Calculate and print hit rate and average age for each object\n",
"for obj_id in range(1, CACHE_CAPACITY + 1):\n",
" if cache.access_count[obj_id] != 0:\n",
" hit_rate = cache.hits[obj_id] / max(1, cache.access_count[obj_id]) # Avoid division by zero\n",
" avg_age = cache.cumulative_age[obj_id] / max(1, cache.hits[obj_id]) # Only average over hits\n",
" print(f\"Object {obj_id}: Hit Rate = {hit_rate:.2f}, Average Age = {avg_age:.2f}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "01f8f9ee-c278-4a22-8562-ba02e77f5ddd",
"metadata": {},
"outputs": [],
"source": [
"# Extract recorded data for plotting\n",
"times, cache_sizes = zip(*cache.cache_size_over_time)\n",
"\n",
"# Plot the cache size over time\n",
"plt.figure(figsize=(30, 5))\n",
"plt.plot(times, cache_sizes, label=\"Objects in Cache\")\n",
"plt.xlabel(\"Time (s)\")\n",
"plt.ylabel(\"Number of Cached Objects\")\n",
"plt.title(\"Number of Objects in Cache Over Time\")\n",
"plt.legend()\n",
"plt.grid(True)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "80971714-44f1-47db-9e89-85be7c885bde",
"metadata": {},
"outputs": [],
"source": [
"access_count = pd.DataFrame.from_dict(cache.access_count, orient='index', columns=['access_count'])\n",
"hits = pd.DataFrame.from_dict(cache.hits, orient='index', columns=['hits'])\n",
"misses = pd.DataFrame.from_dict(cache.misses, orient='index', columns=['misses'])\n",
"mu = pd.DataFrame.from_dict(db.mu_values, orient='index', columns=['mu'])\n",
"lmbda = pd.DataFrame.from_dict(db.lambda_values, orient='index', columns=['lambda'])\n",
"hit_rate = pd.DataFrame(np.round((hits.to_numpy()/access_count.to_numpy())*100,2), columns=['hit_rate'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fa36397f-9f09-4467-9e77-8a6b6b1bc691",
"metadata": {},
"outputs": [],
"source": [
"merged = access_count.merge(hits, left_index=True, right_index=True).merge(misses, left_index=True, right_index=True)\\\n",
" .merge(mu, left_index=True, right_index=True).merge(lmbda, left_index=True, right_index=True)\\\n",
" .merge(hit_rate, left_index=True, right_index=True)\n",
"merged.to_csv(EXPORT_NAME)\n",
"merged"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f30a0497-9b2e-4ea9-8ebf-6687de19aaa9",
"metadata": {},
"outputs": [],
"source": [
"from collections import Counter\n",
"# Count occurrences of each number\n",
"count = Counter(list(db.lambda_values.values()))\n",
"\n",
"# Separate the counts into two lists for plotting\n",
"x = list(count.keys()) # List of unique numbers\n",
"y = list(count.values()) # List of their respective counts\n",
"\n",
"# Plot the data\n",
"plt.figure(figsize=(8, 6))\n",
"plt.bar(x, y, color='skyblue')\n",
"\n",
"# Adding labels and title\n",
"plt.xlabel('Number')\n",
"plt.ylabel('Occurrences')\n",
"plt.title('Occurance of each lambda in db')\n",
"\n",
"# Show the plot\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "00a12eea-c805-4209-9143-48fa65619873",
"metadata": {},
"outputs": [],
"source": [
"from collections import Counter\n",
"# Count occurrences of each number\n",
"count = Counter(np.array(list(db.mu_values.values())).round(0))\n",
"\n",
"# Separate the counts into two lists for plotting\n",
"x = list(count.keys()) # List of unique numbers\n",
"y = list(count.values()) # List of their respective counts\n",
"\n",
"# Plot the data\n",
"plt.figure(figsize=(8, 6))\n",
"plt.bar(x, y, color='skyblue')\n",
"\n",
"# Adding labels and title\n",
"plt.xlabel('Number')\n",
"plt.ylabel('Occurrences')\n",
"plt.title('Occurance of each mu in db (rounded)')\n",
"\n",
"# Show the plot\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "adbfeb40-76bd-4224-ac45-65c7b2b2cb7b",
"metadata": {},
"outputs": [],
"source": [
"def plot_requests(object_id: int):\n",
" mu = db.mu_values[object_id]\n",
" lmb = db.lambda_values[object_id]\n",
" rq_log = np.array(cache.request_log[object_id])\n",
" df = rq_log[1:] - rq_log[:-1]\n",
" pd.DataFrame(df, columns=[f\"{object_id}, mu:{mu:.2f}, lambda: {lmb:.2f}\"]).plot()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1f550686-3463-4e50-be83-ceafb27512b0",
"metadata": {},
"outputs": [],
"source": [
"def print_rate(object_id: int):\n",
" # Calculate time intervals between consecutive events\n",
" intervals = np.diff(np.array(cache.request_log[object_id])) # Differences between each event time\n",
" \n",
" # Calculate the rate per second for each interval\n",
" rates = 1 / intervals # Inverse of the time interval gives rate per second\n",
" \n",
" # Optional: Calculate the average event rate over all intervals\n",
" average_rate = np.mean(rates)\n",
" print(\"Average event rate per second:\", average_rate)\n",
" print(\"The mu is: \", db.lambda_values[object_id])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f246dc61-f131-4d17-80c9-ccff1c4fec64",
"metadata": {},
"outputs": [],
"source": [
"[(print_rate(i),plot_requests(i)) for i in range(1,10)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b2d18372-cdba-4151-ae32-5bf45466bf94",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "graphs",
"language": "python",
"name": "graphs"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}