age_cache_simulation/aoi_cache_simulation.ipynb

422 lines
16 KiB
Plaintext
Raw Normal View History

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "920665b8-9204-42df-ab59-1b9324387750",
"metadata": {},
"outputs": [],
"source": [
"import simpy\n",
"import random\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"\n",
"# Constants\n",
"SEED = 256\n",
"CACHE_TTL = 5 # Cache TTL in seconds\n",
"CACHE_CAPACITY = 100 # Maximum number of objects the cache can hold\n",
"ACCESS_COUNT_LIMIT = 10_000 # Total time to run the simulation\n",
"EXPORT_NAME = \"./export.csv\"\n",
"\n",
"ZIPF_CONSTANT = 2 # Shape parameter for the Zipf distribution (controls skewness) Needs to be: 1 <\n",
"MAX_REFRESH_RATE = 10\n",
"\n",
"\n",
"# Set random seeds\n",
"random.seed(SEED)\n",
"np.random.seed(SEED)\n",
"\n",
"# Initialize simulation environment\n",
"env = simpy.Environment()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "5cea042f-e9fc-4a1e-9750-de212ca70601",
"metadata": {},
"outputs": [],
"source": [
"class Database:\n",
" def __init__(self):\n",
" # Each object now has a specific refresh rate 'mu'\n",
" self.data = {i: f\"Object {i}\" for i in range(1, CACHE_CAPACITY + 1)}\n",
" self.lambda_values = {i: np.random.zipf(ZIPF_CONSTANT) for i in range(1, CACHE_CAPACITY + 1)} # Request rate 'lambda' for each object\n",
" self.mu_values = {i: np.random.uniform(1, MAX_REFRESH_RATE) for i in range(1, CACHE_CAPACITY + 1)} # Refresh rate 'mu' for each object\n",
" self.next_request = {i: np.random.exponential(self.lambda_values[i]) for i in range(1, CACHE_CAPACITY + 1)}\n",
"\n",
"\n",
" def get_object(self, obj_id):\n",
" # print(f\"[{env.now:.2f}] Database: Fetched {self.data.get(obj_id, 'Unknown')} for ID {obj_id}\")\n",
" return self.data.get(obj_id, None)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "499bf543-b2c6-4e4d-afcc-0a6665ce3ae1",
"metadata": {},
"outputs": [],
"source": [
"class Cache:\n",
" def __init__(self, env, db):\n",
" self.env = env\n",
" self.db = db\n",
" self.storage = {} # Dictionary to store cached objects\n",
" self.ttl = {} # Dictionary to store TTLs\n",
" self.age = {} # Dictionary to store age of each object\n",
" self.cache_size_over_time = [] # To record cache state at each interval\n",
" self.cache_next_request_over_time = []\n",
" self.request_log = {i: [] for i in range(1, CACHE_CAPACITY + 1)}\n",
" self.hits = {i: 0 for i in range(1, CACHE_CAPACITY + 1)} # Track hits per object\n",
" self.misses = {i: 0 for i in range(1, CACHE_CAPACITY + 1)} # Track misses per object\n",
" self.cumulative_age = {i: 0 for i in range(1, CACHE_CAPACITY + 1)} # Track cumulative age per object\n",
" self.access_count = {i: 0 for i in range(1, CACHE_CAPACITY + 1)} # Track access count per object\n",
" self.next_refresh = {} # Track the next refresh time for each cached object\n",
" \n",
" def get(self, obj_id):\n",
" if obj_id in self.storage and self.ttl[obj_id] > env.now:\n",
" # Cache hit: increment hit count and update cumulative age\n",
" self.hits[obj_id] += 1\n",
" self.cumulative_age[obj_id] += self.age[obj_id]\n",
" self.access_count[obj_id] += 1\n",
" else:\n",
" # Cache miss: increment miss count\n",
" self.misses[obj_id] += 1\n",
" self.access_count[obj_id] += 1\n",
" \n",
" # Fetch the object from the database if its not in cache\n",
" obj = self.db.get_object(obj_id)\n",
" \n",
" # If the cache is full, evict the oldest object\n",
" if len(self.storage) >= CACHE_CAPACITY:\n",
" self.evict_oldest()\n",
" \n",
" # Add the object to cache, set TTL, reset age, and schedule next refresh\n",
" self.storage[obj_id] = obj\n",
" self.ttl[obj_id] = env.now + CACHE_TTL\n",
" self.age[obj_id] = 0\n",
" self.next_refresh[obj_id] = env.now + np.random.exponential(self.db.mu_values[obj_id]) # Schedule refresh\n",
"\n",
" \n",
" def evict_oldest(self):\n",
" \"\"\"Remove the oldest item from the cache to make space.\"\"\"\n",
" oldest_id = max(self.age, key=self.age.get) # Find the oldest item by age\n",
" print(f\"[{env.now:.2f}] Cache: Evicting object {oldest_id} to make space\")\n",
" del self.storage[oldest_id]\n",
" del self.ttl[oldest_id]\n",
" del self.age[oldest_id]\n",
" \n",
" def refresh_object(self, obj_id):\n",
" \"\"\"Refresh the object from the database to keep it up-to-date.\"\"\"\n",
" obj = self.db.get_object(obj_id)\n",
" self.storage[obj_id] = obj\n",
" self.ttl[obj_id] = env.now + CACHE_TTL\n",
" self.age[obj_id] = 0\n",
" # print(f\"[{env.now:.2f}] Cache: Refreshed object {obj_id}\")\n",
" \n",
" def age_objects(self):\n",
" \"\"\"Increment age of each cached object.\"\"\"\n",
" for obj_id in list(self.age.keys()):\n",
" if self.ttl[obj_id] > env.now:\n",
" self.age[obj_id] += 1\n",
" # print(f\"[{env.now:.2f}] Cache: Object {obj_id} aged to {self.age[obj_id]}\")\n",
" else:\n",
" # Remove object if its TTL expired\n",
" # print(f\"[{env.now:.2f}] Cache: Object {obj_id} expired\")\n",
" del self.storage[obj_id]\n",
" del self.ttl[obj_id]\n",
" del self.age[obj_id]\n",
" \n",
" def record_cache_state(self):\n",
" \"\"\"Record the current cache state (number of objects in cache) over time.\"\"\"\n",
" self.cache_size_over_time.append((env.now, len(self.storage)))\n",
" self.cache_next_request_over_time.append((env.now, self.db.next_request.copy()))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "7286d498-aa6c-4efb-bb28-fe29736eab64",
"metadata": {},
"outputs": [],
"source": [
"def age_cache_process(env, cache):\n",
" \"\"\"Process that ages cache objects over time, removes expired items, and refreshes based on object-specific intervals.\"\"\"\n",
" while True:\n",
" cache.age_objects() # Age objects and remove expired ones\n",
"\n",
" # Refresh objects based on their individual refresh intervals\n",
" for obj_id in list(cache.storage.keys()):\n",
" # Check if it's time to refresh this object based on next_refresh\n",
" if env.now >= cache.next_refresh[obj_id]:\n",
" cache.refresh_object(obj_id)\n",
" # Schedule the next refresh based on the object's mu\n",
" cache.next_refresh[obj_id] = env.now + np.random.exponential(cache.db.mu_values[obj_id])\n",
" \n",
" cache.record_cache_state() # Record cache state at each time step\n",
" yield env.timeout(1) # Run every second\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "687f5634-8edf-4337-b42f-bbb292d47f0f",
"metadata": {},
"outputs": [],
"source": [
"def client_request_process(env, cache, event):\n",
" \"\"\"Client process that makes requests for objects from the cache.\"\"\"\n",
" lowest_lambda_object = max(cache.db.lambda_values.items(), key=lambda x: x[1])\n",
" lowest_lambda_object = [lowest_lambda_object] if isinstance(lowest_lambda_object, int) else lowest_lambda_object\n",
" while True:\n",
" obj_id, next_request = min(cache.db.next_request.items(), key=lambda x: x[1])\n",
" yield env.timeout(next_request - env.now)\n",
" if env.now >= next_request:\n",
" # print(f\"[{env.now:.2f}] Client: Requesting object {obj_id}\")\n",
" cache.get(obj_id)\n",
" \n",
" # print(f\"[{env.now:.2f}] Client: Schedule next request for {obj_id}\")\n",
" next_request = env.now + np.random.exponential(cache.db.lambda_values[obj_id])\n",
" cache.request_log[obj_id].append(next_request)\n",
" cache.db.next_request[obj_id] = next_request\n",
" if all(cache.access_count[obj] >= ACCESS_COUNT_LIMIT for obj in lowest_lambda_object):\n",
" event.succeed()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "c8516830-9880-4d9e-a91b-000338baf9d6",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# Instantiate components\n",
"db = Database()\n",
"cache = Cache(env, db)\n",
"stop_event = env.event()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2ba34b36-9ed5-4996-9600-11dfd25d8e60",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"%%time\n",
"\n",
"# Start processes\n",
"env.process(age_cache_process(env, cache))\n",
"env.process(client_request_process(env, cache, stop_event))\n",
"\n",
"# Run the simulation\n",
"env.run(until=stop_event)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3b6f7c1f-ea54-4496-bb9a-370cee2d2751",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# Calculate and print hit rate and average age for each object\n",
"for obj_id in range(1, CACHE_CAPACITY + 1):\n",
" if cache.access_count[obj_id] != 0:\n",
" hit_rate = cache.hits[obj_id] / max(1, cache.access_count[obj_id]) # Avoid division by zero\n",
" avg_age = cache.cumulative_age[obj_id] / max(1, cache.hits[obj_id]) # Only average over hits\n",
" print(f\"Object {obj_id}: Hit Rate = {hit_rate:.2f}, Average Age = {avg_age:.2f}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "01f8f9ee-c278-4a22-8562-ba02e77f5ddd",
"metadata": {},
"outputs": [],
"source": [
"# Extract recorded data for plotting\n",
"times, cache_sizes = zip(*cache.cache_size_over_time)\n",
"\n",
"# Plot the cache size over time\n",
"plt.figure(figsize=(30, 5))\n",
"plt.plot(times, cache_sizes, label=\"Objects in Cache\")\n",
"plt.xlabel(\"Time (s)\")\n",
"plt.ylabel(\"Number of Cached Objects\")\n",
"plt.title(\"Number of Objects in Cache Over Time\")\n",
"plt.legend()\n",
"plt.grid(True)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "80971714-44f1-47db-9e89-85be7c885bde",
"metadata": {},
"outputs": [],
"source": [
"access_count = pd.DataFrame.from_dict(cache.access_count, orient='index', columns=['access_count'])\n",
"hits = pd.DataFrame.from_dict(cache.hits, orient='index', columns=['hits'])\n",
"misses = pd.DataFrame.from_dict(cache.misses, orient='index', columns=['misses'])\n",
"mu = pd.DataFrame.from_dict(db.mu_values, orient='index', columns=['mu'])\n",
"lmbda = pd.DataFrame.from_dict(db.lambda_values, orient='index', columns=['lambda'])\n",
"hit_rate = pd.DataFrame(np.round((hits.to_numpy()/access_count.to_numpy())*100,2), columns=['hit_rate'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fa36397f-9f09-4467-9e77-8a6b6b1bc691",
"metadata": {},
"outputs": [],
"source": [
"merged = access_count.merge(hits, left_index=True, right_index=True).merge(misses, left_index=True, right_index=True)\\\n",
" .merge(mu, left_index=True, right_index=True).merge(lmbda, left_index=True, right_index=True)\\\n",
" .merge(hit_rate, left_index=True, right_index=True)\n",
"merged.to_csv(EXPORT_NAME)\n",
"merged"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f30a0497-9b2e-4ea9-8ebf-6687de19aaa9",
"metadata": {},
"outputs": [],
"source": [
"from collections import Counter\n",
"# Count occurrences of each number\n",
"count = Counter(list(db.lambda_values.values()))\n",
"\n",
"# Separate the counts into two lists for plotting\n",
"x = list(count.keys()) # List of unique numbers\n",
"y = list(count.values()) # List of their respective counts\n",
"\n",
"# Plot the data\n",
"plt.figure(figsize=(8, 6))\n",
"plt.bar(x, y, color='skyblue')\n",
"\n",
"# Adding labels and title\n",
"plt.xlabel('Number')\n",
"plt.ylabel('Occurrences')\n",
"plt.title('Occurance of each lambda in db')\n",
"\n",
"# Show the plot\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "00a12eea-c805-4209-9143-48fa65619873",
"metadata": {},
"outputs": [],
"source": [
"from collections import Counter\n",
"# Count occurrences of each number\n",
"count = Counter(np.array(list(db.mu_values.values())).round(0))\n",
"\n",
"# Separate the counts into two lists for plotting\n",
"x = list(count.keys()) # List of unique numbers\n",
"y = list(count.values()) # List of their respective counts\n",
"\n",
"# Plot the data\n",
"plt.figure(figsize=(8, 6))\n",
"plt.bar(x, y, color='skyblue')\n",
"\n",
"# Adding labels and title\n",
"plt.xlabel('Number')\n",
"plt.ylabel('Occurrences')\n",
"plt.title('Occurance of each mu in db (rounded)')\n",
"\n",
"# Show the plot\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "adbfeb40-76bd-4224-ac45-65c7b2b2cb7b",
"metadata": {},
"outputs": [],
"source": [
"def plot_requests(object_id: int):\n",
" mu = db.mu_values[object_id]\n",
" lmb = db.lambda_values[object_id]\n",
" rq_log = np.array(cache.request_log[object_id])\n",
" df = rq_log[1:] - rq_log[:-1]\n",
" pd.DataFrame(df, columns=[f\"{object_id}, mu:{mu:.2f}, lambda: {lmb:.2f}\"]).plot()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1f550686-3463-4e50-be83-ceafb27512b0",
"metadata": {},
"outputs": [],
"source": [
"def print_rate(object_id: int):\n",
" # Calculate time intervals between consecutive events\n",
" intervals = np.diff(np.array(cache.request_log[object_id])) # Differences between each event time\n",
" \n",
" # Calculate the rate per second for each interval\n",
" rates = 1 / intervals # Inverse of the time interval gives rate per second\n",
" \n",
" # Optional: Calculate the average event rate over all intervals\n",
" average_rate = np.mean(rates)\n",
" print(\"Average event rate per second:\", average_rate)\n",
" print(\"The mu is: \", db.lambda_values[object_id])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f246dc61-f131-4d17-80c9-ccff1c4fec64",
"metadata": {},
"outputs": [],
"source": [
"[(print_rate(i),plot_requests(i)) for i in range(1,10)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b2d18372-cdba-4151-ae32-5bf45466bf94",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "graphs",
"language": "python",
"name": "graphs"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}