feat(objective optimization): Perform gridsearch as single-core and multi-core to find optimal parameters to minimize objective

Signed-off-by: Tuan-Dat Tran <tuan-dat.tran@tudattr.dev>
2024-12-03 14:19:45 +01:00
parent 799f7b78d4
commit 272f722f23
5 changed files with 558 additions and 566 deletions
--- a/00_aoi_caching_simulation/.gitignore
+++ b/00_aoi_caching_simulation/.gitignore
@@ -1 +1,2 @@
 .ipynb_checkpoints/
 *.csv
--- a/01_nb_cncf_optimization/.ipynb_checkpoints/nb_cost_optimization-checkpoint.ipynb
+++ b/01_nb_cncf_optimization/.ipynb_checkpoints/nb_cost_optimization-checkpoint.ipynb
--- a/01_nb_cncf_optimization/01-objective_gridsearch.ipynb
+++ b/01_nb_cncf_optimization/01-objective_gridsearch.ipynb
@@ -0,0 +1,287 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "ab5cd7d1-1a57-46fc-8282-dae0a6cc2944",
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import random\n",
    "import pandas as pd\n",
    "import itertools\n",
    "from tqdm import tqdm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "3d1ad0b9-f6a8-4e98-84aa-6e02e4279954",
   "metadata": {},
   "outputs": [],
   "source": [
    "SEED = 42\n",
    "np.random.seed(SEED)\n",
    "random.seed(SEED)\n",
    "\n",
    "ZIPF_CONSTANT = 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "5a27d416-8f98-4814-af9e-6c6bef95f4ef",
   "metadata": {},
   "outputs": [],
   "source": [
    "def eta_star(db_object_count, c_f, cache_sz, c_delta, lambda_vals):\n",
    "    num = (db_object_count * c_f - cache_sz * c_delta)\n",
    "    denom = np.sum(1.0/lambda_vals)\n",
    "    if denom == 0:\n",
    "        print(\"sum(1.0/lambda_vals) == 0\")\n",
    "        print(db_object_count, c_f, cache_sz, c_delta, lambda_vals)\n",
    "    return max(0, num/denom)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "6276a9ce-f839-4fe6-90f2-2195cf065fc8",
   "metadata": {},
   "outputs": [],
   "source": [
    "def h_i_star(c_f, eta, lambda_vals, c_delta):\n",
    "    optimized_hitrate = (c_f - (eta/lambda_vals)) / c_delta\n",
    "    return optimized_hitrate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "dcd31a8c-6864-4b9a-8bb3-998f0c32baf6",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_index_of_furthest_hitrate_from_boundary(hitrates):\n",
    "    lower_bound_violation =  hitrates[(hitrates < 0)]\n",
    "    upper_bound_violation = hitrates[(hitrates > 1)]\n",
    "    smallest_delta = np.abs(np.min(lower_bound_violation))\n",
    "    biggest_delta = np.max(upper_bound_violation) - 1\n",
    "    if smallest_delta > biggest_delta:\n",
    "        print(smallest_delta)\n",
    "        index = np.where(hitrates == np.min(local_hitrates))[0][0]\n",
    "        return index\n",
    "    else:\n",
    "        \n",
    "        index = np.where(hitrates == np.max(local_hitrates))[0][0]\n",
    "        return index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "9d774304-ae68-43b3-a76a-e970c06c5236",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_index_of_furthest_hitrate_from_boundary(hitrates):\n",
    "    outside_bounds = (hitrates < 0) | (hitrates > 1)\n",
    "    distances = np.where(outside_bounds, np.maximum(np.abs(hitrates - 0), np.abs(hitrates - 1)), -np.inf)\n",
    "    index = np.argmax(distances)\n",
    "    return index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "0e21c26f-058a-4e56-a5ad-1c47bf28656c",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "def optimize_hitrates(db_object_count, cache_size, c_f, c_delta, lambda_vals):\n",
    "    optimized_hitrates = np.zeros(db_object_count)\n",
    "    current_db_object_count = db_object_count\n",
    "    current_cache_size = cache_size\n",
    "    \n",
    "    differenc_set = np.arange(db_object_count)\n",
    "    fix_i = []\n",
    "    while True:\n",
    "        if current_db_object_count == 0:\n",
    "            if current_cache_size > 0:\n",
    "                # print(\"Re-optimize objects with optimal hitrate of 0.\")\n",
    "                differenc_set = np.where(optimized_hitrates == 0)[0]\n",
    "                fix_i = np.setdiff1d(np.arange(db_object_count), differenc_set).tolist()\n",
    "                current_db_object_count = len(differenc_set)\n",
    "                continue\n",
    "            else:\n",
    "                # print(\"Stop optimization.\")\n",
    "                optimized_hitrates[differenc_set] = 0\n",
    "                break\n",
    "        \n",
    "        eta = eta_star(current_db_object_count, c_f, current_cache_size, c_delta, lambda_vals[differenc_set])\n",
    "        optimized_hitrates[differenc_set] = h_i_star(c_f, eta, lambda_vals[differenc_set], c_delta)\n",
    "\n",
    "        if eta < 0:\n",
    "            # print(\"eta was negative.\")\n",
    "            current_cache_size = current_db_object_count * c_f / c_delta  # Adjust cache size for next iteration\n",
    "            continue\n",
    "        \n",
    "        if len((optimized_hitrates[differenc_set])[((optimized_hitrates[differenc_set]) < 0) | ((optimized_hitrates[differenc_set])> 1)]) == 0:\n",
    "            # print(\"All values optimized.\")\n",
    "            break\n",
    "        \n",
    "        max_outbound_index = get_index_of_furthest_hitrate_from_boundary(optimized_hitrates)\n",
    "        fix_i.append(max_outbound_index)\n",
    "        differenc_set = np.setdiff1d(np.arange(db_object_count), fix_i)\n",
    "    \n",
    "        old_hitrate = optimized_hitrates[max_outbound_index]\n",
    "        optimized_hitrates[max_outbound_index] = (1 if optimized_hitrates[max_outbound_index] > 1 else 0)\n",
    "        \n",
    "        current_db_object_count -= 1\n",
    "        current_cache_size -= optimized_hitrates[max_outbound_index]\n",
    "    return optimized_hitrates"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "b6bf3329-3a63-4807-ab8b-8a54f824f47e",
   "metadata": {},
   "outputs": [],
   "source": [
    "def objective_function(optimized_hitrates, c_f, c_delta, lambda_vals):\n",
    "    return np.sum(lambda_vals*(1-optimized_hitrates)*c_f+0.5*np.power(optimized_hitrates,2)*c_delta)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "7a998837-72b8-4039-95a5-ca8d9c8e65ab",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Perform grid search\n",
    "def grid_search(db_object_counts, cache_sizes, c_f_values, c_delta_values):\n",
    "    best_objective = float('inf')\n",
    "    best_params = None\n",
    "\n",
    "    # Iterate through all combinations of parameters\n",
    "    for db_object_count, cache_size, c_f, c_delta in tqdm(itertools.product(db_object_counts, cache_sizes, c_f_values, c_delta_values), total=len(db_object_counts) * len(cache_sizes) * len(c_f_values) * len(c_delta_values), desc=\"Grid Search Progress\"):\n",
    "        if db_object_count < cache_size:\n",
    "            continue\n",
    "        lambda_vals = np.array([np.random.zipf(ZIPF_CONSTANT) for i in np.arange(1, db_object_count + 1,1)])\n",
    "        # print(db_object_count, cache_size, c_f, c_delta)\n",
    "        # Call the optimization function\n",
    "        optimized_hitrates = optimize_hitrates(db_object_count, cache_size, c_f, c_delta, lambda_vals)\n",
    "\n",
    "        # Compute the objective function\n",
    "        objective = objective_function(optimized_hitrates, c_f, c_delta, lambda_vals)\n",
    "        \n",
    "        # Track the best (minimum) objective and corresponding parameters\n",
    "        if objective < best_objective:\n",
    "            best_objective = objective\n",
    "            best_params = (db_object_count, cache_size, c_f, c_delta)\n",
    "\n",
    "    return best_objective, best_params"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "a271b52d-1f24-4670-ae3f-af5dd9096a2f",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Grid Search Progress: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 64152/64152 [12:27<00:00, 85.87it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 12min 16s, sys: 11.5 s, total: 12min 28s\n",
      "Wall time: 12min 27s\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "# Define the grid search space\n",
    "test_ratios = np.array([0.1, 0.2, 0.5, 0.7, 1, 1.5, 2, 5, 10])\n",
    "db_object_count_values = np.round(np.array([10, 15, 30, 100, 200, 500]))\n",
    "cache_size_values = np.unique(np.round(np.array([db_object_count_values * i for i in test_ratios]).flatten()))\n",
    "c_f_values = np.array([0.1, 0.2, 0.5, 0.7, 1, 1.5, 2, 5, 10])\n",
    "c_delta_values = np.unique(np.array([c_f_values * i for i in test_ratios]).flatten())\n",
    "\n",
    "best_objective, best_params = grid_search(db_object_count_values, cache_size_values, c_f_values, c_delta_values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "b2f625d0-ebe0-4a5d-92ff-7de03942ef51",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.05000000000000002, (10, 10.0, 1.5, 0.010000000000000002))"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "best_objective, best_params "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "86a23d02-6f14-4d4d-ad8a-39084ea69151",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "graphs",
   "language": "python",
   "name": "graphs"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/01_nb_cncf_optimization/02-objective_multi-core_gridsearch.ipynb
+++ b/01_nb_cncf_optimization/02-objective_multi-core_gridsearch.ipynb
@@ -0,0 +1,270 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "ab5cd7d1-1a57-46fc-8282-dae0a6cc2944",
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import random\n",
    "import pandas as pd\n",
    "import itertools\n",
    "from joblib import Parallel, delayed\n",
    "import os.path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "3d1ad0b9-f6a8-4e98-84aa-6e02e4279954",
   "metadata": {},
   "outputs": [],
   "source": [
    "SEED = 42\n",
    "np.random.seed(SEED)\n",
    "random.seed(SEED)\n",
    "\n",
    "ZIPF_CONSTANT = 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "5a27d416-8f98-4814-af9e-6c6bef95f4ef",
   "metadata": {},
   "outputs": [],
   "source": [
    "def eta_star(db_object_count, c_f, cache_sz, c_delta, lambda_vals):\n",
    "    num = (db_object_count * c_f - cache_sz * c_delta)\n",
    "    denom = np.sum(1.0/lambda_vals)\n",
    "    if denom == 0:\n",
    "        print(\"sum(1.0/lambda_vals) == 0\")\n",
    "        print(db_object_count, c_f, cache_sz, c_delta, lambda_vals)\n",
    "    return max(0, num/denom)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "6276a9ce-f839-4fe6-90f2-2195cf065fc8",
   "metadata": {},
   "outputs": [],
   "source": [
    "def h_i_star(c_f, eta, lambda_vals, c_delta):\n",
    "    optimized_hitrate = (c_f - (eta/lambda_vals)) / c_delta\n",
    "    return optimized_hitrate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "dcd31a8c-6864-4b9a-8bb3-998f0c32baf6",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_index_of_furthest_hitrate_from_boundary(hitrates):\n",
    "    lower_bound_violation =  hitrates[(hitrates < 0)]\n",
    "    upper_bound_violation = hitrates[(hitrates > 1)]\n",
    "    smallest_delta = np.abs(np.min(lower_bound_violation))\n",
    "    biggest_delta = np.max(upper_bound_violation) - 1\n",
    "    if smallest_delta > biggest_delta:\n",
    "        index = np.where(hitrates == np.min(local_hitrates))[0][0]\n",
    "        return index\n",
    "    else:\n",
    "        \n",
    "        index = np.where(hitrates == np.max(local_hitrates))[0][0]\n",
    "        return index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "9d774304-ae68-43b3-a76a-e970c06c5236",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_index_of_furthest_hitrate_from_boundary(hitrates):\n",
    "    outside_bounds = (hitrates < 0) | (hitrates > 1)\n",
    "    distances = np.where(outside_bounds, np.maximum(np.abs(hitrates - 0), np.abs(hitrates - 1)), -np.inf)\n",
    "    index = np.argmax(distances)\n",
    "    return index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "0e21c26f-058a-4e56-a5ad-1c47bf28656c",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "def optimize_hitrates(db_object_count, cache_size, c_f, c_delta, lambda_vals):\n",
    "    optimized_hitrates = np.zeros(db_object_count)\n",
    "    current_db_object_count = db_object_count\n",
    "    current_cache_size = cache_size\n",
    "    \n",
    "    differenc_set = np.arange(db_object_count)\n",
    "    fix_i = []\n",
    "    while True:\n",
    "        if current_db_object_count == 0:\n",
    "            if current_cache_size > 0:\n",
    "                # print(\"Re-optimize objects with optimal hitrate of 0.\")\n",
    "                differenc_set = np.where(optimized_hitrates == 0)[0]\n",
    "                fix_i = np.setdiff1d(np.arange(db_object_count), differenc_set).tolist()\n",
    "                current_db_object_count = len(differenc_set)\n",
    "                continue\n",
    "            else:\n",
    "                # print(\"Stop optimization.\")\n",
    "                optimized_hitrates[differenc_set] = 0\n",
    "                break\n",
    "        \n",
    "        eta = eta_star(current_db_object_count, c_f, current_cache_size, c_delta, lambda_vals[differenc_set])\n",
    "        optimized_hitrates[differenc_set] = h_i_star(c_f, eta, lambda_vals[differenc_set], c_delta)\n",
    "\n",
    "        if eta < 0:\n",
    "            # print(\"eta was negative.\")\n",
    "            current_cache_size = current_db_object_count * c_f / c_delta  # Adjust cache size for next iteration\n",
    "            continue\n",
    "        \n",
    "        if len((optimized_hitrates[differenc_set])[((optimized_hitrates[differenc_set]) < 0) | ((optimized_hitrates[differenc_set])> 1)]) == 0:\n",
    "            # print(\"All values optimized.\")\n",
    "            break\n",
    "        \n",
    "        max_outbound_index = get_index_of_furthest_hitrate_from_boundary(optimized_hitrates)\n",
    "        fix_i.append(max_outbound_index)\n",
    "        differenc_set = np.setdiff1d(np.arange(db_object_count), fix_i)\n",
    "    \n",
    "        old_hitrate = optimized_hitrates[max_outbound_index]\n",
    "        optimized_hitrates[max_outbound_index] = (1 if optimized_hitrates[max_outbound_index] > 1 else 0)\n",
    "        \n",
    "        current_db_object_count -= 1\n",
    "        current_cache_size -= optimized_hitrates[max_outbound_index]\n",
    "    return optimized_hitrates"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "b6bf3329-3a63-4807-ab8b-8a54f824f47e",
   "metadata": {},
   "outputs": [],
   "source": [
    "def objective_function(optimized_hitrates, c_f, c_delta, lambda_vals):\n",
    "    return np.sum(lambda_vals*(1-optimized_hitrates)*c_f+0.5*np.power(optimized_hitrates,2)*c_delta)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "bd4536e9-273b-4f49-b06c-2f00605e0f7d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the task to be parallelized\n",
    "def grid_search_task(db_object_count, cache_size, c_f, c_delta, db_object_counts, cache_sizes, c_f_values, c_delta_values):\n",
    "    if db_object_count < cache_size:\n",
    "        return None  # Skip this combination if db_object_count < cache_size\n",
    "    \n",
    "    # Generate lambda_vals\n",
    "    lambda_vals = np.array([np.random.zipf(ZIPF_CONSTANT) for _ in np.arange(1, db_object_count + 1, 1)])\n",
    "    \n",
    "    # Call the optimization function\n",
    "    optimized_hitrates = optimize_hitrates(db_object_count, cache_size, c_f, c_delta, lambda_vals)\n",
    "\n",
    "    # Compute the objective function\n",
    "    objective = objective_function(optimized_hitrates, c_f, c_delta, lambda_vals)\n",
    "\n",
    "    return (objective, db_object_count, cache_size, c_f, c_delta)\n",
    "\n",
    "# Perform grid search with parallelization and tqdm progress bar\n",
    "def grid_search(db_object_counts, cache_sizes, c_f_values, c_delta_values):\n",
    "    results = []  # List to collect the results (objective, parameters)\n",
    "    total_combinations = len(db_object_counts) * len(cache_sizes) * len(c_f_values) * len(c_delta_values)\n",
    "    \n",
    "    # Use Parallel from joblib to parallelize the grid search\n",
    "    task_results = Parallel(n_jobs=-1, verbose=1)(\n",
    "        delayed(grid_search_task)(db_object_count, cache_size, c_f, c_delta, db_object_counts, cache_sizes, c_f_values, c_delta_values)\n",
    "        for db_object_count, cache_size, c_f, c_delta in itertools.product(db_object_counts, cache_sizes, c_f_values, c_delta_values)\n",
    "    )\n",
    "\n",
    "    # Collect valid results\n",
    "    for result in task_results:\n",
    "        if result is not None:\n",
    "            results.append(result)\n",
    "    \n",
    "    # Convert the results into a pandas DataFrame\n",
    "    df = pd.DataFrame(results, columns=[\"Objective\", \"db_object_count\", \"cache_size\", \"c_f (Miss Cost)\", \"c_delta (Refresh Cost)\"])\n",
    "    \n",
    "    return df\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "a92c6772-6609-41a8-a3d1-4d640b69a864",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 7.96 ms, sys: 21.6 ms, total: 29.6 ms\n",
      "Wall time: 27 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "# Define the grid search space\n",
    "test_ratios = np.array([0.1, 0.2, 0.5, 0.7, 1, 1.5, 2, 5, 10])\n",
    "db_object_count_values = np.round(np.array([10, 15, 30, 100, 200, 500]))\n",
    "cache_size_values = np.unique(np.round(np.array([db_object_count_values * i for i in test_ratios]).flatten()))\n",
    "c_f_values = np.array([0.1, 0.2, 0.5, 0.7, 1, 1.5, 2, 5, 10])\n",
    "c_delta_values = np.unique(np.array([c_f_values * i for i in test_ratios]).flatten())\n",
    "\n",
    "objective_result_file = \"./objective_grid-search_multi-core.csv\"\n",
    "\n",
    "results_df = None\n",
    "if not os.path.isfile(objective_result_file):\n",
    "    # Call the grid search function\n",
    "    results_df = grid_search(db_object_count_values, cache_size_values, c_f_values, c_delta_values)\n",
    "    results_df.to_csv(objective_result_file,index=False)\n",
    "else:\n",
    "    results_df = pd.read_csv(objective_result_file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "45d7f86f-edee-4fc5-835f-1e311ab2e411",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "graphs",
   "language": "python",
   "name": "graphs"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/01_nb_cncf_optimization/nb_cost_optimization.ipynb
+++ b/01_nb_cncf_optimization/nb_cost_optimization.ipynb
@@ -1,566 +0,0 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "ab5cd7d1-1a57-46fc-8282-dae0a6cc2944",
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import random\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "3d1ad0b9-f6a8-4e98-84aa-6e02e4279954",
   "metadata": {},
   "outputs": [],
   "source": [
    "DATABASE_OBJECT_COUNT = 100\n",
    "CACHE_SIZE = DATABASE_OBJECT_COUNT/2\n",
    "ZIPF_CONSTANT = 2\n",
    "\n",
    "CACHE_MISS_COST = 2\n",
    "CACHE_REFRESH_COST = 1\n",
    "\n",
    "SEED = 42\n",
    "np.random.seed(SEED)\n",
    "random.seed(SEED)\n",
    "\n",
    "LAMBDA_VALUES = np.array([np.random.zipf(ZIPF_CONSTANT) for i in np.arange(1, DATABASE_OBJECT_COUNT + 1,1)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "9cc83cf6-5c78-4f0d-b7cb-08cdb80c362e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# LAMBDA_VALUES = np.array([0.03, 0.04,0.05,0.06,0.07,1,1.1,1.2,1.3,1.4,1.5])\n",
    "# DATABASE_OBJECT_COUNT = len(LAMBDA_VALUES)\n",
    "# CACHE_SIZE = 4.4\n",
    "# CACHE_MISS_COST = 7\n",
    "# CACHE_REFRESH_COST = 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "3dc07233-0b56-4fee-a93b-212836c18b42",
   "metadata": {},
   "outputs": [],
   "source": [
    "db_object_count = DATABASE_OBJECT_COUNT\n",
    "cache_sz = CACHE_SIZE\n",
    "\n",
    "lambda_vals = LAMBDA_VALUES\n",
    "c_f = CACHE_MISS_COST\n",
    "c_delta = CACHE_REFRESH_COST"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "5a27d416-8f98-4814-af9e-6c6bef95f4ef",
   "metadata": {},
   "outputs": [],
   "source": [
    "def eta_star(db_object_count, c_f, cache_sz, c_delta, lambda_vals):\n",
    "    num = (db_object_count * c_f - cache_sz * c_delta)\n",
    "    denom = np.sum(1.0/lambda_vals)\n",
    "    return max(0, num/denom)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "6276a9ce-f839-4fe6-90f2-2195cf065fc8",
   "metadata": {},
   "outputs": [],
   "source": [
    "def h_i_star(c_f, eta, lambda_vals, c_delta):\n",
    "    optimized_hitrate = (c_f - (eta/lambda_vals)) / c_delta\n",
    "    return optimized_hitrate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "dcd31a8c-6864-4b9a-8bb3-998f0c32baf6",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_index_of_furthest_hitrate_from_boundary(hitrates):\n",
    "    lower_bound_violation =  hitrates[(hitrates < 0)]\n",
    "    upper_bound_violation = hitrates[(hitrates > 1)]\n",
    "    smallest_delta = np.abs(np.min(lower_bound_violation))\n",
    "    biggest_delta = np.max(upper_bound_violation) - 1\n",
    "    if smallest_delta > biggest_delta:\n",
    "        print(smallest_delta)\n",
    "        index = np.where(hitrates == np.min(local_hitrates))[0][0]\n",
    "        return index\n",
    "    else:\n",
    "        \n",
    "        index = np.where(hitrates == np.max(local_hitrates))[0][0]\n",
    "        return index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "9d774304-ae68-43b3-a76a-e970c06c5236",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_index_of_furthest_hitrate_from_boundary(hitrates):\n",
    "    outside_bounds = (hitrates < 0) | (hitrates > 1)\n",
    "    distances = np.where(outside_bounds, np.maximum(np.abs(hitrates - 0), np.abs(hitrates - 1)), -np.inf)\n",
    "    index = np.argmax(distances)\n",
    "    return index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "19678083-15e1-439b-be8c-42033d501644",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 1,  3,  1,  1,  2,  1,  5,  1,  1,  1,  2,  1,  1,  1,  2,  2,  1,\n",
       "        1,  3,  1,  1,  1,  1,  2,  1,  1,  1,  5,  1,  1,  1,  4,  1,  4,\n",
       "        1,  1,  1,  3,  8,  1,  4,  4,  2,  1,  1,  1, 10,  1,  1,  1,  5,\n",
       "        9,  1,  1,  1,  1,  1, 17,  2,  1, 26,  1,  1,  2,  1, 10,  1, 69,\n",
       "        1,  1,  2,  1,  1,  1,  3,  2,  2,  3, 15,  1,  1,  5,  2,  1,  1,\n",
       "        2,  1,  2,  1,  1,  2,  2,  3,  1,  2,  1,  1, 37,  4,  2])"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lambda_vals"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "ccd4b95d-1cdd-4c99-a22e-4b31338993cf",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2.1159070575516945\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([-0.11590706,  1.29469765, -0.11590706, -0.11590706,  0.94204647,\n",
       "       -0.11590706, -0.11590706, -0.11590706, -0.11590706,  0.94204647,\n",
       "       -0.11590706, -0.11590706, -0.11590706,  0.94204647,  0.94204647,\n",
       "       -0.11590706, -0.11590706,  1.29469765, -0.11590706, -0.11590706,\n",
       "       -0.11590706, -0.11590706,  0.94204647, -0.11590706, -0.11590706,\n",
       "       -0.11590706, -0.11590706, -0.11590706, -0.11590706,  1.47102324,\n",
       "       -0.11590706,  1.47102324, -0.11590706, -0.11590706, -0.11590706,\n",
       "        1.29469765,  1.73551162, -0.11590706,  1.47102324,  1.47102324,\n",
       "        0.94204647, -0.11590706, -0.11590706, -0.11590706,  1.78840929,\n",
       "       -0.11590706, -0.11590706, -0.11590706,  1.76489922, -0.11590706,\n",
       "       -0.11590706, -0.11590706, -0.11590706, -0.11590706,  1.87553488,\n",
       "        0.94204647, -0.11590706,  1.91861896, -0.11590706, -0.11590706,\n",
       "        0.94204647, -0.11590706,  1.78840929, -0.11590706,  1.96933468,\n",
       "       -0.11590706, -0.11590706,  0.94204647, -0.11590706, -0.11590706,\n",
       "       -0.11590706,  1.29469765,  0.94204647,  0.94204647,  1.29469765,\n",
       "        1.85893953, -0.11590706, -0.11590706,  0.94204647, -0.11590706,\n",
       "       -0.11590706,  0.94204647, -0.11590706,  0.94204647, -0.11590706,\n",
       "       -0.11590706,  0.94204647,  0.94204647,  1.29469765, -0.11590706,\n",
       "        0.94204647, -0.11590706, -0.11590706,  1.94281332,  1.47102324,\n",
       "        0.94204647])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "eta = eta_star(db_object_count, c_f, cache_sz, c_delta, lambda_vals[lambda_vals != lambda_vals[6]])\n",
    "print(eta)\n",
    "optimized_hitrates = (c_f - eta / lambda_vals[lambda_vals != lambda_vals[6]]) / c_delta\n",
    "optimized_hitrates"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "05b17074-719f-4bca-8434-2aaee26094d0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>96.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>0.437500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>0.726101</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>-0.115907</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>-0.115907</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>-0.115907</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>0.942046</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>1.969335</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               0\n",
       "count  96.000000\n",
       "mean    0.437500\n",
       "std     0.726101\n",
       "min    -0.115907\n",
       "25%    -0.115907\n",
       "50%    -0.115907\n",
       "75%     0.942046\n",
       "max     1.969335"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(optimized_hitrates).describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "0e21c26f-058a-4e56-a5ad-1c47bf28656c",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Optimized: 67 1.97 // [ 1.79077042 -0.09229584  1.         -0.09229584 -0.09229584]\n",
      "Optimized: 97 1.94 // [-0.07876743 -0.07876743  1.          1.48030814  0.96061628]\n",
      "Optimized: 60 1.92 // [ 0.96720258 -0.06559484  1.         -0.06559484 -0.06559484]\n",
      "Optimized: 57 1.88 // [-0.05274002 -0.05274002  1.          0.97362999 -0.05274002]\n",
      "Optimized: 78 1.86 // [ 0.97977406  1.31984937  1.         -0.04045188 -0.04045188]\n",
      "Optimized: 46 1.80 // [-0.02836604 -0.02836604  1.         -0.02836604 -0.02836604]\n",
      "Optimized: 65 1.80 // [ 0.99140044 -0.01719911  1.         -0.01719911  1.        ]\n",
      "Optimized: 51 1.78 // [-0.00600086  1.59879983  1.         -0.00600086 -0.00600086]\n",
      "Optimized: 38 1.75 // [0.00491746 1.33497249 1.         0.00491746 1.50122936]\n",
      "Optimized: 6 1.60 // [1.00774103 0.01548205 1.         0.01548205 0.01548205]\n",
      "Optimized: 27 1.60 // [0.02399435 0.02399435 1.         0.02399435 0.02399435]\n",
      "Optimized: 50 1.61 // [0.03255485 0.03255485 1.         1.         0.03255485]\n",
      "Optimized: 81 1.61 // [0.04116395 0.04116395 1.         1.02058197 0.04116395]\n",
      "Optimized: 31 1.51 // [0.04982206 0.04982206 1.         0.04982206 1.51245552]\n",
      "Optimized: 33 1.51 // [1.         0.05714286 1.         0.05714286 0.05714286]\n",
      "Optimized: 40 1.52 // [1.         0.06451613 1.         1.51612903 1.03225806]\n",
      "Optimized: 41 1.52 // [0.07194245 1.         1.         1.03597122 0.07194245]\n",
      "Optimized: 98 1.52 // [0.07942238 1.         1.         1.03971119]\n",
      "Optimized: 1 1.36 // []\n",
      "Optimized: 18 1.36 // [0.09223301 0.09223301 1.         0.09223301 0.09223301]\n",
      "Optimized: 37 1.37 // [0.09756098 0.09756098 1.         1.         0.09756098]\n",
      "Optimized: 74 1.37 // [0.10294118 0.10294118 1.         1.05147059 1.05147059]\n",
      "Optimized: 77 1.37 // [1.05418719 1.05418719 1.         1.         0.10837438]\n",
      "Optimized: 92 1.37 // [1.05693069 1.05693069 1.         0.11386139 1.05693069]\n",
      "Optimized: 4 1.06 // [0.11940299 0.11940299 1.         0.11940299 1.        ]\n",
      "Optimized: 10 1.06 // [0.12030075 0.12030075 1.         0.12030075 0.12030075]\n",
      "Optimized: 14 1.06 // [0.12121212 0.12121212 1.         1.06060606 0.12121212]\n",
      "Optimized: 15 1.06 // [0.1221374 1.        1.        0.1221374 0.1221374]\n",
      "Optimized: 23 1.06 // [0.12307692 0.12307692 1.         0.12307692 0.12307692]\n",
      "Optimized: 42 1.06 // [1.         1.         1.         0.12403101 0.12403101]\n",
      "Optimized: 58 1.06 // [0.125 1.    1.    0.125 1.   ]\n",
      "Optimized: 63 1.06 // [0.12598425 0.12598425 1.         0.12598425 1.        ]\n",
      "Optimized: 70 1.06 // [0.12698413 0.12698413 1.         0.12698413 0.12698413]\n",
      "Optimized: 75 1.06 // [0.128 1.    1.    1.064 1.   ]\n",
      "Optimized: 76 1.06 // [1. 1. 1. 1. 1.]\n",
      "Optimized: 82 1.07 // [0.1300813 1.        1.        0.1300813 0.1300813]\n",
      "Optimized: 85 1.07 // [0.13114754 0.13114754 1.         0.13114754 1.06557377]\n",
      "Optimized: 87 1.07 // [1.        0.1322314 1.        0.1322314 0.1322314]\n",
      "Optimized: 90 1.07 // [0.13333333 0.13333333 1.         1.06666667 1.        ]\n",
      "Optimized: 91 1.07 // [0.13445378 1.         1.         1.         0.13445378]\n",
      "Optimized: 94 1.07 // [1.         0.13559322 1.         0.13559322 0.13559322]\n",
      "Optimized: 99 1.07 // [1. 1. 1.]\n",
      "All values optimized.\n"
     ]
    }
   ],
   "source": [
    "\"\"\"\n",
    "Perform theoretical optimization to compute optimal hit probabilities.\n",
    "\n",
    "Parameters:\n",
    "- lambda_vals (numpy array): Request rates for each item.\n",
    "- B (float): Total cache size.\n",
    "- c_f (float): Fetching linear cost (cache miss cost).\n",
    "- c_delta (float): Age linear cost.\n",
    "\n",
    "Returns:\n",
    "- h_opt (numpy array): Optimal hit probabilities for each item.\n",
    "\"\"\"\n",
    "optimized_hitrates = np.zeros(DATABASE_OBJECT_COUNT)\n",
    "current_db_object_count = DATABASE_OBJECT_COUNT\n",
    "current_cache_size = CACHE_SIZE\n",
    "\n",
    "differenc_set = np.arange(DATABASE_OBJECT_COUNT)\n",
    "fix_i = []\n",
    "\n",
    "while True:\n",
    "    if current_db_object_count == 0:\n",
    "        print(\"No objects left to optimize.\")\n",
    "        if current_cache_size > 0:\n",
    "            print(\"Add obj with optimized hitrate 0 and add them to optimization pool for re-optimization.\")\n",
    "            # Redistribute unused cache size among items with zero hit probability\n",
    "            differenc_set = np.where(optimized_hitrates == 0)[0]\n",
    "            fix_i = np.setdiff1d(np.arange(DATABASE_OBJECT_COUNT), differenc_set).tolist()\n",
    "            current_db_object_count = len(differenc_set)\n",
    "            continue\n",
    "        else:\n",
    "            \"Reset\"\n",
    "            optimized_hitrates[differenc_set] = 0\n",
    "            break\n",
    "    # Compute Lagrangian multiplier and optimal hit probabilities\n",
    "    eta = eta_star(current_db_object_count, c_f, current_cache_size, c_delta, lambda_vals[differenc_set])\n",
    "    optimized_hitrates[differenc_set] = (c_f - eta / lambda_vals[differenc_set]) / c_delta\n",
    "    if eta < 0:\n",
    "        print(\"eta was negative.\")\n",
    "        current_cache_size = current_db_object_count * c_f / c_delta  # Adjust cache size for next iteration\n",
    "        continue\n",
    "    \n",
    "    if len((optimized_hitrates[differenc_set])[((optimized_hitrates[differenc_set]) < 0) | ((optimized_hitrates[differenc_set])> 1)]) == 0:\n",
    "        print(\"All values optimized.\")\n",
    "        break\n",
    "    \n",
    "    max_outbound_index = get_index_of_furthest_hitrate_from_boundary(optimized_hitrates)\n",
    "    fix_i.append(max_outbound_index)\n",
    "    differenc_set = np.setdiff1d(np.arange(DATABASE_OBJECT_COUNT), fix_i)\n",
    "\n",
    "    old_hitrate = optimized_hitrates[max_outbound_index]\n",
    "    optimized_hitrates[max_outbound_index] = (1 if optimized_hitrates[max_outbound_index] > 1 else 0)\n",
    "    \n",
    "    print(f\"Optimized: {max_outbound_index} {old_hitrate:.2f} // {optimized_hitrates[max_outbound_index-2:max_outbound_index+3]}\")\n",
    "    \n",
    "    current_db_object_count -= 1\n",
    "    current_cache_size -= optimized_hitrates[max_outbound_index]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "f559ee7a-be2f-4076-b01c-f08950ad5a88",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.13793103, 1.        , 0.13793103, 0.13793103, 1.        ,\n",
       "       0.13793103, 1.        , 0.13793103, 0.13793103, 0.13793103,\n",
       "       1.        , 0.13793103, 0.13793103, 0.13793103, 1.        ,\n",
       "       1.        , 0.13793103, 0.13793103, 1.        , 0.13793103,\n",
       "       0.13793103, 0.13793103, 0.13793103, 1.        , 0.13793103,\n",
       "       0.13793103, 0.13793103, 1.        , 0.13793103, 0.13793103,\n",
       "       0.13793103, 1.        , 0.13793103, 1.        , 0.13793103,\n",
       "       0.13793103, 0.13793103, 1.        , 1.        , 0.13793103,\n",
       "       1.        , 1.        , 1.        , 0.13793103, 0.13793103,\n",
       "       0.13793103, 1.        , 0.13793103, 0.13793103, 0.13793103,\n",
       "       1.        , 1.        , 0.13793103, 0.13793103, 0.13793103,\n",
       "       0.13793103, 0.13793103, 1.        , 1.        , 0.13793103,\n",
       "       1.        , 0.13793103, 0.13793103, 1.        , 0.13793103,\n",
       "       1.        , 0.13793103, 1.        , 0.13793103, 0.13793103,\n",
       "       1.        , 0.13793103, 0.13793103, 0.13793103, 1.        ,\n",
       "       1.        , 1.        , 1.        , 1.        , 0.13793103,\n",
       "       0.13793103, 1.        , 1.        , 0.13793103, 0.13793103,\n",
       "       1.        , 0.13793103, 1.        , 0.13793103, 0.13793103,\n",
       "       1.        , 1.        , 1.        , 0.13793103, 1.        ,\n",
       "       0.13793103, 0.13793103, 1.        , 1.        , 1.        ])"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "optimized_hitrates"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "8b2d3cea-1cc0-476e-92bf-2ac4344a9b1b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>100.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>0.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>0.427625</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.137931</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>0.137931</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>0.137931</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                0\n",
       "count  100.000000\n",
       "mean     0.500000\n",
       "std      0.427625\n",
       "min      0.137931\n",
       "25%      0.137931\n",
       "50%      0.137931\n",
       "75%      1.000000\n",
       "max      1.000000"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(optimized_hitrates).describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7a998837-72b8-4039-95a5-ca8d9c8e65ab",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "graphs",
   "language": "python",
   "name": "graphs"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
`@@ -1 +1,2 @@`
	`.ipynb_checkpoints/`	`.ipynb_checkpoints/`
		`*.csv`