From 799f7b78d4eb17a5b06da3366ec477138eb40735 Mon Sep 17 00:00:00 2001 From: Tuan-Dat Tran Date: Tue, 3 Dec 2024 10:12:07 +0100 Subject: [PATCH] fix(h_i_opt calculation): Fix for calculation of optimized hitrate: - Assignment of current_cache_size instead of decrease Signed-off-by: Tuan-Dat Tran --- .../gen_nb_cost_optimization-checkpoint.ipynb | 473 ------------------ .../nb_cost_optimization-checkpoint.ipynb | 391 +++++++++++++-- .../gen_nb_cost_optimization.ipynb | 473 ------------------ .../nb_cost_optimization.ipynb | 391 +++++++++++++-- 4 files changed, 706 insertions(+), 1022 deletions(-) delete mode 100644 01_nb_cncf_optimization/.ipynb_checkpoints/gen_nb_cost_optimization-checkpoint.ipynb delete mode 100644 01_nb_cncf_optimization/gen_nb_cost_optimization.ipynb diff --git a/01_nb_cncf_optimization/.ipynb_checkpoints/gen_nb_cost_optimization-checkpoint.ipynb b/01_nb_cncf_optimization/.ipynb_checkpoints/gen_nb_cost_optimization-checkpoint.ipynb deleted file mode 100644 index 4675328..0000000 --- a/01_nb_cncf_optimization/.ipynb_checkpoints/gen_nb_cost_optimization-checkpoint.ipynb +++ /dev/null @@ -1,473 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "ab5cd7d1-1a57-46fc-8282-dae0a6cc2944", - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "import random" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "3d1ad0b9-f6a8-4e98-84aa-6e02e4279954", - "metadata": {}, - "outputs": [], - "source": [ - "DATABASE_OBJECT_COUNT = 100\n", - "CACHE_SIZE = DATABASE_OBJECT_COUNT/2\n", - "ZIPF_CONSTANT = 2\n", - "\n", - "CACHE_MISS_COST = 2\n", - "CACHE_REFRESH_COST = 1\n", - "\n", - "SEED = 42\n", - "np.random.seed(SEED)\n", - "random.seed(SEED)\n", - "\n", - "LAMBDA_VALUES = np.array([np.random.zipf(ZIPF_CONSTANT) for i in np.arange(1, DATABASE_OBJECT_COUNT + 1,1)])" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "9cc83cf6-5c78-4f0d-b7cb-08cdb80c362e", - "metadata": {}, - "outputs": [], - "source": [ - "# LAMBDA_VALUES = np.array([0.03, 0.04,0.05,0.06,0.07,1,1.1,1.2,1.3,1.4,1.5])\n", - "# DATABASE_OBJECT_COUNT = len(LAMBDA_VALUES)\n", - "# CACHE_SIZE = 4.4\n", - "# CACHE_MISS_COST = 7\n", - "# CACHE_REFRESH_COST = 1" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "3dc07233-0b56-4fee-a93b-212836c18b42", - "metadata": {}, - "outputs": [], - "source": [ - "db_object_count = DATABASE_OBJECT_COUNT\n", - "cache_sz = CACHE_SIZE\n", - "\n", - "lambda_vals = LAMBDA_VALUES\n", - "c_f = CACHE_MISS_COST\n", - "c_delta = CACHE_REFRESH_COST" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "5a27d416-8f98-4814-af9e-6c6bef95f4ef", - "metadata": {}, - "outputs": [], - "source": [ - "def eta_star(db_object_count, c_f, cache_sz, c_delta, lambda_vals):\n", - " num = (db_object_count * c_f - cache_sz * c_delta)\n", - " denom = np.sum(1.0/lambda_vals)\n", - " return max(0, num/denom)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "6276a9ce-f839-4fe6-90f2-2195cf065fc8", - "metadata": {}, - "outputs": [], - "source": [ - "def h_i_star(c_f, eta, lambda_vals, c_delta):\n", - " optimized_hitrate = (c_f - (eta/lambda_vals)) / c_delta\n", - " return optimized_hitrate" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "dcd31a8c-6864-4b9a-8bb3-998f0c32baf6", - "metadata": {}, - "outputs": [], - "source": [ - "def get_index_of_furthest_hitrate_from_boundary(hitrates):\n", - " local_hitrates = hitrates[(hitrates < 0) | (hitrates > 1)]\n", - " smallest_delta = np.abs(np.min(local_hitrates))\n", - " biggest_delta = np.max(local_hitrates) - 1\n", - " if smallest_delta > biggest_delta:\n", - " index = np.where(hitrates == np.min(local_hitrates))[0][0]\n", - " return index\n", - " else:\n", - " index = np.where(hitrates == np.max(local_hitrates))[0][0]\n", - " return index" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "55b251f8-97ca-49a8-9ec6-be77dc1e49b2", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "\"\"\"\n", - "Perform theoretical optimization to compute optimal hit probabilities.\n", - "\n", - "Parameters:\n", - "- lambda_vals (numpy array): Request rates for each item.\n", - "- B (float): Total cache size.\n", - "- c_f (float): Fetching linear cost (cache miss cost).\n", - "- c_delta (float): Age linear cost.\n", - "\n", - "Returns:\n", - "- h_opt (numpy array): Optimal hit probabilities for each item.\n", - "\"\"\"\n", - "optimized_hitrates = np.zeros(DATABASE_OBJECT_COUNT)\n", - "differenc_set = np.arange(DATABASE_OBJECT_COUNT)\n", - "fix_i = []\n", - "current_db_objects = DATABASE_OBJECT_COUNT\n", - "current_cache_size = CACHE_SIZE\n", - "\n", - "while True:\n", - " if current_db_objects == 0:\n", - " # Handle special case: no items left to optimize\n", - " if current_cache_size > 0:\n", - " # Redistribute unused cache size among items with zero hit probability\n", - " differenc_set = np.where(optimized_hitrates == 0)[0]\n", - " fix_i = np.setdiff1d(np.arange(DATABASE_OBJECT_COUNT), differenc_set)\n", - " current_db_objects = len(differenc_set)\n", - " continue\n", - " else:\n", - " optimized_hitrates[differenc_set] = 0\n", - " break\n", - " # Compute Lagrangian multiplier and optimal hit probabilities\n", - " mu = max(0, (current_db_objects * c_f - current_cache_size * c_delta) / np.sum(1.0 / lambda_vals[differenc_set]))\n", - " eta = eta_star(current_db_objects, c_f, current_cache_size, c_delta, lambda_vals[differenc_set])\n", - " assert(mu == eta)\n", - " optimized_hitrates[differenc_set] = (c_f - mu / lambda_vals[differenc_set]) / c_delta\n", - " # print(optimized_hitrates)\n", - " # Handle the case where mu < 0\n", - " if mu < 0:\n", - " current_cache_size = current_db_objects * c_f / c_delta # Adjust cache size for next iteration\n", - " continue\n", - " # Check for constraint violations\n", - " larger_i = np.where(optimized_hitrates > 1)[0] # h > 1\n", - " smaller_i = np.where(optimized_hitrates < 0)[0] # h < 0\n", - " # If no violations, optimization is complete\n", - " break_con = len(smaller_i) == 0 and len(larger_i) == 0\n", - " break_con1 = len((optimized_hitrates[differenc_set])[((optimized_hitrates[differenc_set]) < 0) | ((optimized_hitrates[differenc_set])> 1)]) == 0\n", - " assert(break_con == break_con1)\n", - " if break_con:\n", - " break\n", - " # Find the furthest violating item\n", - " min_viol, min_viol_i = (0, -1)\n", - " if len(smaller_i) > 0:\n", - " min_viol_i = np.argmin(optimized_hitrates)\n", - " min_viol = optimized_hitrates[min_viol_i]\n", - " max_viol, max_viol_i = (0, -1)\n", - " if len(larger_i) > 0:\n", - " larger = optimized_hitrates - 1\n", - " max_viol_i = np.argmax(larger)\n", - " max_viol = larger[max_viol_i]\n", - " # Compare the furthest violations and adjust accordingly\n", - " viol_i = min_viol_i\n", - " min_viol_flag = True # True if furthest is from the left boundary\n", - " if max_viol > abs(min_viol):\n", - " viol_i = max_viol_i\n", - " min_viol_flag = False \n", - " index = get_index_of_furthest_hitrate_from_boundary(optimized_hitrates)\n", - " if viol_i != index:\n", - " print(optimized_hitrates[viol_i])\n", - " print(optimized_hitrates[index])\n", - " assert(viol_i == index)\n", - " if min_viol_flag:\n", - " optimized_hitrates[viol_i] = 0\n", - " else:\n", - " optimized_hitrates[viol_i] = min(1, current_cache_size)\n", - "\n", - " # Update parameters for next iteration\n", - " current_cache_size =- optimized_hitrates[viol_i]\n", - " fix_i.append(viol_i)\n", - " differenc_set = np.setdiff1d(np.arange(DATABASE_OBJECT_COUNT), fix_i)\n", - " current_db_objects = DATABASE_OBJECT_COUNT - len(fix_i)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "efa16eaf-a10b-4927-99cd-190e2ffe1d1e", - "metadata": {}, - "outputs": [], - "source": [ - "a = optimized_hitrates\n", - "b = differenc_set" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "0e21c26f-058a-4e56-a5ad-1c47bf28656c", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "All values optimized.\n" - ] - } - ], - "source": [ - "\"\"\"\n", - "Perform theoretical optimization to compute optimal hit probabilities.\n", - "\n", - "Parameters:\n", - "- lambda_vals (numpy array): Request rates for each item.\n", - "- B (float): Total cache size.\n", - "- c_f (float): Fetching linear cost (cache miss cost).\n", - "- c_delta (float): Age linear cost.\n", - "\n", - "Returns:\n", - "- h_opt (numpy array): Optimal hit probabilities for each item.\n", - "\"\"\"\n", - "optimized_hitrates = np.zeros(DATABASE_OBJECT_COUNT)\n", - "differenc_set = np.arange(DATABASE_OBJECT_COUNT)\n", - "fix_i = []\n", - "current_db_objects = DATABASE_OBJECT_COUNT\n", - "current_cache_size = CACHE_SIZE\n", - "\n", - "while True:\n", - " if current_db_objects == 0:\n", - " # Handle special case: no items left to optimize\n", - " if current_cache_size > 0:\n", - " # Redistribute unused cache size among items with zero hit probability\n", - " differenc_set = np.where(optimized_hitrates == 0)[0]\n", - " fix_i = np.setdiff1d(np.arange(DATABASE_OBJECT_COUNT), differenc_set)\n", - " current_db_objects = len(differenc_set)\n", - " continue\n", - " else:\n", - " optimized_hitrates[differenc_set] = 0\n", - " break\n", - " # Compute Lagrangian multiplier and optimal hit probabilities\n", - " eta = eta_star(current_db_objects, c_f, current_cache_size, c_delta, lambda_vals[differenc_set])\n", - " optimized_hitrates[differenc_set] = (c_f - eta / lambda_vals[differenc_set]) / c_delta\n", - "\n", - " if mu < 0:\n", - " current_cache_size = current_db_objects * c_f / c_delta # Adjust cache size for next iteration\n", - " continue\n", - " \n", - " if len((optimized_hitrates[differenc_set])[((optimized_hitrates[differenc_set]) < 0) | ((optimized_hitrates[differenc_set])> 1)]) == 0:\n", - " print(\"All values optimized.\")\n", - " break\n", - " max_outbound_index = get_index_of_furthest_hitrate_from_boundary(optimized_hitrates)\n", - " optimized_hitrates[max_outbound_index] = (1 if optimized_hitrates[max_outbound_index] > 1 else 0)\n", - "\n", - " current_cache_size =- optimized_hitrates[max_outbound_index]\n", - " fix_i.append(max_outbound_index)\n", - " differenc_set = np.setdiff1d(np.arange(DATABASE_OBJECT_COUNT), fix_i)\n", - " current_db_objects = DATABASE_OBJECT_COUNT - len(fix_i)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "4f64253f-b389-4be9-b403-08027d480121", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", - " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", - " 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,\n", - " 1., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 1., 0., 1.,\n", - " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,\n", - " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.])" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "optimized_hitrates" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "17d818db-ec88-4c26-92af-6d74862525d9", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([ 0. , 0. , 0. , 0. , 0. ,\n", - " 0. , 0.43902439, 0. , 0. , 0. ,\n", - " 0. , 0. , 0. , 0. , 0. ,\n", - " 0. , 0. , 0. , 0. , 0. ,\n", - " 0. , 0. , 0. , 0. , 0. ,\n", - " 0. , 0. , 0.43902439, 0. , 0. ,\n", - " 0. , 0.04878049, 0. , 0.04878049, 0. ,\n", - " 0. , 0. , 0. , -0. , 0. ,\n", - " 0.04878049, 0.04878049, 0. , 0. , 0. ,\n", - " 0. , 0. , 0. , 0. , 0. ,\n", - " 0.43902439, 0. , 0. , 0. , 0. ,\n", - " 0. , 0. , 0. , 0. , 0. ,\n", - " -0. , 0. , 0. , 0. , 0. ,\n", - " -0. , 0. , 1. , 0. , 0. ,\n", - " 0. , 0. , 0. , 0. , 0. ,\n", - " 0. , 0. , 0. , -0. , 0. ,\n", - " 0. , 0.43902439, 0. , 0. , 0. ,\n", - " 0. , 0. , 0. , 0. , 0. ,\n", - " 0. , 0. , 0. , 0. , 0. ,\n", - " 0. , 0. , 0. , 0.04878049, 0. ])" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "a" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "791b3f96-527a-489e-970e-c92ec950177f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([ 6, 27, 31, 33, 40, 41, 50, 81, 98])" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "b" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "c22fa973-432a-4c05-89bf-2a6ea82ae3d2", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([ 6, 27, 50, 81])" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "differenc_set" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "898e1266-5aaa-46f4-ac0f-c7807ac2b6bb", - "metadata": {}, - "outputs": [], - "source": [ - "db_object_count = DATABASE_OBJECT_COUNT\n", - "cache_sz = CACHE_SIZE\n", - "loop_lambda = lambda_vals\n", - "\n", - "non_optimized_values = np.arange(db_object_count)" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "8cc9b8a9-f7ae-48fc-adfb-ac4b7a4998f1", - "metadata": {}, - "outputs": [], - "source": [ - "db_object_count = DATABASE_OBJECT_COUNT\n", - "cache_sz = CACHE_SIZE\n", - "loop_lambda = lambda_vals\n", - "\n", - "optimized_hitrate = np.zeros(db_object_count)\n", - "non_optimized_values = np.arange(db_object_count)\n", - "optimized_value = {}\n", - "\n", - "eta = eta_star(db_object_count, c_f, cache_sz, c_delta, loop_lambda[non_optimized_values])\n", - "optimized_hitrate[non_optimized_values] = h_i_star(c_f, eta, loop_lambda[non_optimized_values], c_delta)\n", - "\n", - "max_outbound_index = get_index_of_furthest_hitrate_from_boundary(optimized_hitrate)\n", - "optimized_value[max_outbound_index] = (1 if optimized_hitrate[max_outbound_index] > 1 else 0)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "cbcf3592-fcf2-4f54-a3cd-761097c12972", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{67: 1}" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "optimized_value" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "db732331-1d09-45b7-915c-73daa270b5e2", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "graphs", - "language": "python", - "name": "graphs" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.7" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/01_nb_cncf_optimization/.ipynb_checkpoints/nb_cost_optimization-checkpoint.ipynb b/01_nb_cncf_optimization/.ipynb_checkpoints/nb_cost_optimization-checkpoint.ipynb index 725cd1d..88de62d 100644 --- a/01_nb_cncf_optimization/.ipynb_checkpoints/nb_cost_optimization-checkpoint.ipynb +++ b/01_nb_cncf_optimization/.ipynb_checkpoints/nb_cost_optimization-checkpoint.ipynb @@ -9,7 +9,8 @@ "source": [ "import matplotlib.pyplot as plt\n", "import numpy as np\n", - "import random" + "import random\n", + "import pandas as pd" ] }, { @@ -23,7 +24,7 @@ "CACHE_SIZE = DATABASE_OBJECT_COUNT/2\n", "ZIPF_CONSTANT = 2\n", "\n", - "CACHE_MISS_COST = 1\n", + "CACHE_MISS_COST = 2\n", "CACHE_REFRESH_COST = 1\n", "\n", "SEED = 42\n", @@ -95,62 +96,201 @@ "outputs": [], "source": [ "def get_index_of_furthest_hitrate_from_boundary(hitrates):\n", - " local_hitrates = hitrates[(hitrates < 0) | (hitrates > 1)]\n", - " smallest_delta = np.abs(np.min(local_hitrates))\n", - " biggest_delta = np.max(local_hitrates) - 1\n", + " lower_bound_violation = hitrates[(hitrates < 0)]\n", + " upper_bound_violation = hitrates[(hitrates > 1)]\n", + " smallest_delta = np.abs(np.min(lower_bound_violation))\n", + " biggest_delta = np.max(upper_bound_violation) - 1\n", " if smallest_delta > biggest_delta:\n", + " print(smallest_delta)\n", " index = np.where(hitrates == np.min(local_hitrates))[0][0]\n", " return index\n", " else:\n", + " \n", " index = np.where(hitrates == np.max(local_hitrates))[0][0]\n", " return index" ] }, { "cell_type": "code", - "execution_count": 14, - "id": "ccd4b95d-1cdd-4c99-a22e-4b31338993cf", + "execution_count": 8, + "id": "9d774304-ae68-43b3-a76a-e970c06c5236", + "metadata": {}, + "outputs": [], + "source": [ + "def get_index_of_furthest_hitrate_from_boundary(hitrates):\n", + " outside_bounds = (hitrates < 0) | (hitrates > 1)\n", + " distances = np.where(outside_bounds, np.maximum(np.abs(hitrates - 0), np.abs(hitrates - 1)), -np.inf)\n", + " index = np.argmax(distances)\n", + " return index" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "19678083-15e1-439b-be8c-42033d501644", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array([0.30256805, 0.76752268, 0.30256805, 0.30256805, 0.65128403,\n", - " 0.30256805, 0.86051361, 0.30256805, 0.30256805, 0.30256805,\n", - " 0.65128403, 0.30256805, 0.30256805, 0.30256805, 0.65128403,\n", - " 0.65128403, 0.30256805, 0.30256805, 0.76752268, 0.30256805,\n", - " 0.30256805, 0.30256805, 0.30256805, 0.65128403, 0.30256805,\n", - " 0.30256805, 0.30256805, 0.86051361, 0.30256805, 0.30256805,\n", - " 0.30256805, 0.82564201, 0.30256805, 0.82564201, 0.30256805,\n", - " 0.30256805, 0.30256805, 0.76752268, 0.91282101, 0.30256805,\n", - " 0.82564201, 0.82564201, 0.65128403, 0.30256805, 0.30256805,\n", - " 0.30256805, 0.93025681, 0.30256805, 0.30256805, 0.30256805,\n", - " 0.86051361, 0.92250756, 0.30256805, 0.30256805, 0.30256805,\n", - " 0.30256805, 0.30256805, 0.95897459, 0.65128403, 0.30256805,\n", - " 0.97317569, 0.30256805, 0.30256805, 0.65128403, 0.30256805,\n", - " 0.93025681, 0.30256805, 0.98989229, 0.30256805, 0.30256805,\n", - " 0.65128403, 0.30256805, 0.30256805, 0.30256805, 0.76752268,\n", - " 0.65128403, 0.65128403, 0.76752268, 0.95350454, 0.30256805,\n", - " 0.30256805, 0.86051361, 0.65128403, 0.30256805, 0.30256805,\n", - " 0.65128403, 0.30256805, 0.65128403, 0.30256805, 0.30256805,\n", - " 0.65128403, 0.65128403, 0.76752268, 0.30256805, 0.65128403,\n", - " 0.30256805, 0.30256805, 0.98115049, 0.82564201, 0.65128403])" + "array([ 1, 3, 1, 1, 2, 1, 5, 1, 1, 1, 2, 1, 1, 1, 2, 2, 1,\n", + " 1, 3, 1, 1, 1, 1, 2, 1, 1, 1, 5, 1, 1, 1, 4, 1, 4,\n", + " 1, 1, 1, 3, 8, 1, 4, 4, 2, 1, 1, 1, 10, 1, 1, 1, 5,\n", + " 9, 1, 1, 1, 1, 1, 17, 2, 1, 26, 1, 1, 2, 1, 10, 1, 69,\n", + " 1, 1, 2, 1, 1, 1, 3, 2, 2, 3, 15, 1, 1, 5, 2, 1, 1,\n", + " 2, 1, 2, 1, 1, 2, 2, 3, 1, 2, 1, 1, 37, 4, 2])" ] }, - "execution_count": 14, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "eta = eta_star(db_object_count, c_f, cache_sz, c_delta, lambda_vals)\n", - "optimized_hitrates = (c_f - eta / lambda_vals) / c_delta\n", + "lambda_vals" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "ccd4b95d-1cdd-4c99-a22e-4b31338993cf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2.1159070575516945\n" + ] + }, + { + "data": { + "text/plain": [ + "array([-0.11590706, 1.29469765, -0.11590706, -0.11590706, 0.94204647,\n", + " -0.11590706, -0.11590706, -0.11590706, -0.11590706, 0.94204647,\n", + " -0.11590706, -0.11590706, -0.11590706, 0.94204647, 0.94204647,\n", + " -0.11590706, -0.11590706, 1.29469765, -0.11590706, -0.11590706,\n", + " -0.11590706, -0.11590706, 0.94204647, -0.11590706, -0.11590706,\n", + " -0.11590706, -0.11590706, -0.11590706, -0.11590706, 1.47102324,\n", + " -0.11590706, 1.47102324, -0.11590706, -0.11590706, -0.11590706,\n", + " 1.29469765, 1.73551162, -0.11590706, 1.47102324, 1.47102324,\n", + " 0.94204647, -0.11590706, -0.11590706, -0.11590706, 1.78840929,\n", + " -0.11590706, -0.11590706, -0.11590706, 1.76489922, -0.11590706,\n", + " -0.11590706, -0.11590706, -0.11590706, -0.11590706, 1.87553488,\n", + " 0.94204647, -0.11590706, 1.91861896, -0.11590706, -0.11590706,\n", + " 0.94204647, -0.11590706, 1.78840929, -0.11590706, 1.96933468,\n", + " -0.11590706, -0.11590706, 0.94204647, -0.11590706, -0.11590706,\n", + " -0.11590706, 1.29469765, 0.94204647, 0.94204647, 1.29469765,\n", + " 1.85893953, -0.11590706, -0.11590706, 0.94204647, -0.11590706,\n", + " -0.11590706, 0.94204647, -0.11590706, 0.94204647, -0.11590706,\n", + " -0.11590706, 0.94204647, 0.94204647, 1.29469765, -0.11590706,\n", + " 0.94204647, -0.11590706, -0.11590706, 1.94281332, 1.47102324,\n", + " 0.94204647])" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "eta = eta_star(db_object_count, c_f, cache_sz, c_delta, lambda_vals[lambda_vals != lambda_vals[6]])\n", + "print(eta)\n", + "optimized_hitrates = (c_f - eta / lambda_vals[lambda_vals != lambda_vals[6]]) / c_delta\n", "optimized_hitrates" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 11, + "id": "05b17074-719f-4bca-8434-2aaee26094d0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
count96.000000
mean0.437500
std0.726101
min-0.115907
25%-0.115907
50%-0.115907
75%0.942046
max1.969335
\n", + "
" + ], + "text/plain": [ + " 0\n", + "count 96.000000\n", + "mean 0.437500\n", + "std 0.726101\n", + "min -0.115907\n", + "25% -0.115907\n", + "50% -0.115907\n", + "75% 0.942046\n", + "max 1.969335" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame(optimized_hitrates).describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, "id": "0e21c26f-058a-4e56-a5ad-1c47bf28656c", "metadata": { "scrolled": true @@ -160,6 +300,48 @@ "name": "stdout", "output_type": "stream", "text": [ + "Optimized: 67 1.97 // [ 1.79077042 -0.09229584 1. -0.09229584 -0.09229584]\n", + "Optimized: 97 1.94 // [-0.07876743 -0.07876743 1. 1.48030814 0.96061628]\n", + "Optimized: 60 1.92 // [ 0.96720258 -0.06559484 1. -0.06559484 -0.06559484]\n", + "Optimized: 57 1.88 // [-0.05274002 -0.05274002 1. 0.97362999 -0.05274002]\n", + "Optimized: 78 1.86 // [ 0.97977406 1.31984937 1. -0.04045188 -0.04045188]\n", + "Optimized: 46 1.80 // [-0.02836604 -0.02836604 1. -0.02836604 -0.02836604]\n", + "Optimized: 65 1.80 // [ 0.99140044 -0.01719911 1. -0.01719911 1. ]\n", + "Optimized: 51 1.78 // [-0.00600086 1.59879983 1. -0.00600086 -0.00600086]\n", + "Optimized: 38 1.75 // [0.00491746 1.33497249 1. 0.00491746 1.50122936]\n", + "Optimized: 6 1.60 // [1.00774103 0.01548205 1. 0.01548205 0.01548205]\n", + "Optimized: 27 1.60 // [0.02399435 0.02399435 1. 0.02399435 0.02399435]\n", + "Optimized: 50 1.61 // [0.03255485 0.03255485 1. 1. 0.03255485]\n", + "Optimized: 81 1.61 // [0.04116395 0.04116395 1. 1.02058197 0.04116395]\n", + "Optimized: 31 1.51 // [0.04982206 0.04982206 1. 0.04982206 1.51245552]\n", + "Optimized: 33 1.51 // [1. 0.05714286 1. 0.05714286 0.05714286]\n", + "Optimized: 40 1.52 // [1. 0.06451613 1. 1.51612903 1.03225806]\n", + "Optimized: 41 1.52 // [0.07194245 1. 1. 1.03597122 0.07194245]\n", + "Optimized: 98 1.52 // [0.07942238 1. 1. 1.03971119]\n", + "Optimized: 1 1.36 // []\n", + "Optimized: 18 1.36 // [0.09223301 0.09223301 1. 0.09223301 0.09223301]\n", + "Optimized: 37 1.37 // [0.09756098 0.09756098 1. 1. 0.09756098]\n", + "Optimized: 74 1.37 // [0.10294118 0.10294118 1. 1.05147059 1.05147059]\n", + "Optimized: 77 1.37 // [1.05418719 1.05418719 1. 1. 0.10837438]\n", + "Optimized: 92 1.37 // [1.05693069 1.05693069 1. 0.11386139 1.05693069]\n", + "Optimized: 4 1.06 // [0.11940299 0.11940299 1. 0.11940299 1. ]\n", + "Optimized: 10 1.06 // [0.12030075 0.12030075 1. 0.12030075 0.12030075]\n", + "Optimized: 14 1.06 // [0.12121212 0.12121212 1. 1.06060606 0.12121212]\n", + "Optimized: 15 1.06 // [0.1221374 1. 1. 0.1221374 0.1221374]\n", + "Optimized: 23 1.06 // [0.12307692 0.12307692 1. 0.12307692 0.12307692]\n", + "Optimized: 42 1.06 // [1. 1. 1. 0.12403101 0.12403101]\n", + "Optimized: 58 1.06 // [0.125 1. 1. 0.125 1. ]\n", + "Optimized: 63 1.06 // [0.12598425 0.12598425 1. 0.12598425 1. ]\n", + "Optimized: 70 1.06 // [0.12698413 0.12698413 1. 0.12698413 0.12698413]\n", + "Optimized: 75 1.06 // [0.128 1. 1. 1.064 1. ]\n", + "Optimized: 76 1.06 // [1. 1. 1. 1. 1.]\n", + "Optimized: 82 1.07 // [0.1300813 1. 1. 0.1300813 0.1300813]\n", + "Optimized: 85 1.07 // [0.13114754 0.13114754 1. 0.13114754 1.06557377]\n", + "Optimized: 87 1.07 // [1. 0.1322314 1. 0.1322314 0.1322314]\n", + "Optimized: 90 1.07 // [0.13333333 0.13333333 1. 1.06666667 1. ]\n", + "Optimized: 91 1.07 // [0.13445378 1. 1. 1. 0.13445378]\n", + "Optimized: 94 1.07 // [1. 0.13559322 1. 0.13559322 0.13559322]\n", + "Optimized: 99 1.07 // [1. 1. 1.]\n", "All values optimized.\n" ] } @@ -188,18 +370,19 @@ " if current_db_object_count == 0:\n", " print(\"No objects left to optimize.\")\n", " if current_cache_size > 0:\n", + " print(\"Add obj with optimized hitrate 0 and add them to optimization pool for re-optimization.\")\n", " # Redistribute unused cache size among items with zero hit probability\n", " differenc_set = np.where(optimized_hitrates == 0)[0]\n", - " fix_i = np.setdiff1d(np.arange(DATABASE_OBJECT_COUNT), differenc_set)\n", + " fix_i = np.setdiff1d(np.arange(DATABASE_OBJECT_COUNT), differenc_set).tolist()\n", " current_db_object_count = len(differenc_set)\n", " continue\n", " else:\n", + " \"Reset\"\n", " optimized_hitrates[differenc_set] = 0\n", " break\n", " # Compute Lagrangian multiplier and optimal hit probabilities\n", " eta = eta_star(current_db_object_count, c_f, current_cache_size, c_delta, lambda_vals[differenc_set])\n", " optimized_hitrates[differenc_set] = (c_f - eta / lambda_vals[differenc_set]) / c_delta\n", - "\n", " if eta < 0:\n", " print(\"eta was negative.\")\n", " current_cache_size = current_db_object_count * c_f / c_delta # Adjust cache size for next iteration\n", @@ -210,18 +393,150 @@ " break\n", " \n", " max_outbound_index = get_index_of_furthest_hitrate_from_boundary(optimized_hitrates)\n", - " optimized_hitrates[max_outbound_index] = (1 if optimized_hitrates[max_outbound_index] > 1 else 0)\n", - "\n", - " current_cache_size =- optimized_hitrates[max_outbound_index]\n", " fix_i.append(max_outbound_index)\n", " differenc_set = np.setdiff1d(np.arange(DATABASE_OBJECT_COUNT), fix_i)\n", - " current_db_object_count -= 1" + "\n", + " old_hitrate = optimized_hitrates[max_outbound_index]\n", + " optimized_hitrates[max_outbound_index] = (1 if optimized_hitrates[max_outbound_index] > 1 else 0)\n", + " \n", + " print(f\"Optimized: {max_outbound_index} {old_hitrate:.2f} // {optimized_hitrates[max_outbound_index-2:max_outbound_index+3]}\")\n", + " \n", + " current_db_object_count -= 1\n", + " current_cache_size -= optimized_hitrates[max_outbound_index]" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "f559ee7a-be2f-4076-b01c-f08950ad5a88", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.13793103, 1. , 0.13793103, 0.13793103, 1. ,\n", + " 0.13793103, 1. , 0.13793103, 0.13793103, 0.13793103,\n", + " 1. , 0.13793103, 0.13793103, 0.13793103, 1. ,\n", + " 1. , 0.13793103, 0.13793103, 1. , 0.13793103,\n", + " 0.13793103, 0.13793103, 0.13793103, 1. , 0.13793103,\n", + " 0.13793103, 0.13793103, 1. , 0.13793103, 0.13793103,\n", + " 0.13793103, 1. , 0.13793103, 1. , 0.13793103,\n", + " 0.13793103, 0.13793103, 1. , 1. , 0.13793103,\n", + " 1. , 1. , 1. , 0.13793103, 0.13793103,\n", + " 0.13793103, 1. , 0.13793103, 0.13793103, 0.13793103,\n", + " 1. , 1. , 0.13793103, 0.13793103, 0.13793103,\n", + " 0.13793103, 0.13793103, 1. , 1. , 0.13793103,\n", + " 1. , 0.13793103, 0.13793103, 1. , 0.13793103,\n", + " 1. , 0.13793103, 1. , 0.13793103, 0.13793103,\n", + " 1. , 0.13793103, 0.13793103, 0.13793103, 1. ,\n", + " 1. , 1. , 1. , 1. , 0.13793103,\n", + " 0.13793103, 1. , 1. , 0.13793103, 0.13793103,\n", + " 1. , 0.13793103, 1. , 0.13793103, 0.13793103,\n", + " 1. , 1. , 1. , 0.13793103, 1. ,\n", + " 0.13793103, 0.13793103, 1. , 1. , 1. ])" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "optimized_hitrates" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "8b2d3cea-1cc0-476e-92bf-2ac4344a9b1b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
count100.000000
mean0.500000
std0.427625
min0.137931
25%0.137931
50%0.137931
75%1.000000
max1.000000
\n", + "
" + ], + "text/plain": [ + " 0\n", + "count 100.000000\n", + "mean 0.500000\n", + "std 0.427625\n", + "min 0.137931\n", + "25% 0.137931\n", + "50% 0.137931\n", + "75% 1.000000\n", + "max 1.000000" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame(optimized_hitrates).describe()" ] }, { "cell_type": "code", "execution_count": null, - "id": "11682b36-e705-4bd9-9d75-79012791d1ee", + "id": "7a998837-72b8-4039-95a5-ca8d9c8e65ab", "metadata": {}, "outputs": [], "source": [] diff --git a/01_nb_cncf_optimization/gen_nb_cost_optimization.ipynb b/01_nb_cncf_optimization/gen_nb_cost_optimization.ipynb deleted file mode 100644 index 4675328..0000000 --- a/01_nb_cncf_optimization/gen_nb_cost_optimization.ipynb +++ /dev/null @@ -1,473 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "ab5cd7d1-1a57-46fc-8282-dae0a6cc2944", - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "import random" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "3d1ad0b9-f6a8-4e98-84aa-6e02e4279954", - "metadata": {}, - "outputs": [], - "source": [ - "DATABASE_OBJECT_COUNT = 100\n", - "CACHE_SIZE = DATABASE_OBJECT_COUNT/2\n", - "ZIPF_CONSTANT = 2\n", - "\n", - "CACHE_MISS_COST = 2\n", - "CACHE_REFRESH_COST = 1\n", - "\n", - "SEED = 42\n", - "np.random.seed(SEED)\n", - "random.seed(SEED)\n", - "\n", - "LAMBDA_VALUES = np.array([np.random.zipf(ZIPF_CONSTANT) for i in np.arange(1, DATABASE_OBJECT_COUNT + 1,1)])" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "9cc83cf6-5c78-4f0d-b7cb-08cdb80c362e", - "metadata": {}, - "outputs": [], - "source": [ - "# LAMBDA_VALUES = np.array([0.03, 0.04,0.05,0.06,0.07,1,1.1,1.2,1.3,1.4,1.5])\n", - "# DATABASE_OBJECT_COUNT = len(LAMBDA_VALUES)\n", - "# CACHE_SIZE = 4.4\n", - "# CACHE_MISS_COST = 7\n", - "# CACHE_REFRESH_COST = 1" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "3dc07233-0b56-4fee-a93b-212836c18b42", - "metadata": {}, - "outputs": [], - "source": [ - "db_object_count = DATABASE_OBJECT_COUNT\n", - "cache_sz = CACHE_SIZE\n", - "\n", - "lambda_vals = LAMBDA_VALUES\n", - "c_f = CACHE_MISS_COST\n", - "c_delta = CACHE_REFRESH_COST" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "5a27d416-8f98-4814-af9e-6c6bef95f4ef", - "metadata": {}, - "outputs": [], - "source": [ - "def eta_star(db_object_count, c_f, cache_sz, c_delta, lambda_vals):\n", - " num = (db_object_count * c_f - cache_sz * c_delta)\n", - " denom = np.sum(1.0/lambda_vals)\n", - " return max(0, num/denom)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "6276a9ce-f839-4fe6-90f2-2195cf065fc8", - "metadata": {}, - "outputs": [], - "source": [ - "def h_i_star(c_f, eta, lambda_vals, c_delta):\n", - " optimized_hitrate = (c_f - (eta/lambda_vals)) / c_delta\n", - " return optimized_hitrate" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "dcd31a8c-6864-4b9a-8bb3-998f0c32baf6", - "metadata": {}, - "outputs": [], - "source": [ - "def get_index_of_furthest_hitrate_from_boundary(hitrates):\n", - " local_hitrates = hitrates[(hitrates < 0) | (hitrates > 1)]\n", - " smallest_delta = np.abs(np.min(local_hitrates))\n", - " biggest_delta = np.max(local_hitrates) - 1\n", - " if smallest_delta > biggest_delta:\n", - " index = np.where(hitrates == np.min(local_hitrates))[0][0]\n", - " return index\n", - " else:\n", - " index = np.where(hitrates == np.max(local_hitrates))[0][0]\n", - " return index" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "55b251f8-97ca-49a8-9ec6-be77dc1e49b2", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "\"\"\"\n", - "Perform theoretical optimization to compute optimal hit probabilities.\n", - "\n", - "Parameters:\n", - "- lambda_vals (numpy array): Request rates for each item.\n", - "- B (float): Total cache size.\n", - "- c_f (float): Fetching linear cost (cache miss cost).\n", - "- c_delta (float): Age linear cost.\n", - "\n", - "Returns:\n", - "- h_opt (numpy array): Optimal hit probabilities for each item.\n", - "\"\"\"\n", - "optimized_hitrates = np.zeros(DATABASE_OBJECT_COUNT)\n", - "differenc_set = np.arange(DATABASE_OBJECT_COUNT)\n", - "fix_i = []\n", - "current_db_objects = DATABASE_OBJECT_COUNT\n", - "current_cache_size = CACHE_SIZE\n", - "\n", - "while True:\n", - " if current_db_objects == 0:\n", - " # Handle special case: no items left to optimize\n", - " if current_cache_size > 0:\n", - " # Redistribute unused cache size among items with zero hit probability\n", - " differenc_set = np.where(optimized_hitrates == 0)[0]\n", - " fix_i = np.setdiff1d(np.arange(DATABASE_OBJECT_COUNT), differenc_set)\n", - " current_db_objects = len(differenc_set)\n", - " continue\n", - " else:\n", - " optimized_hitrates[differenc_set] = 0\n", - " break\n", - " # Compute Lagrangian multiplier and optimal hit probabilities\n", - " mu = max(0, (current_db_objects * c_f - current_cache_size * c_delta) / np.sum(1.0 / lambda_vals[differenc_set]))\n", - " eta = eta_star(current_db_objects, c_f, current_cache_size, c_delta, lambda_vals[differenc_set])\n", - " assert(mu == eta)\n", - " optimized_hitrates[differenc_set] = (c_f - mu / lambda_vals[differenc_set]) / c_delta\n", - " # print(optimized_hitrates)\n", - " # Handle the case where mu < 0\n", - " if mu < 0:\n", - " current_cache_size = current_db_objects * c_f / c_delta # Adjust cache size for next iteration\n", - " continue\n", - " # Check for constraint violations\n", - " larger_i = np.where(optimized_hitrates > 1)[0] # h > 1\n", - " smaller_i = np.where(optimized_hitrates < 0)[0] # h < 0\n", - " # If no violations, optimization is complete\n", - " break_con = len(smaller_i) == 0 and len(larger_i) == 0\n", - " break_con1 = len((optimized_hitrates[differenc_set])[((optimized_hitrates[differenc_set]) < 0) | ((optimized_hitrates[differenc_set])> 1)]) == 0\n", - " assert(break_con == break_con1)\n", - " if break_con:\n", - " break\n", - " # Find the furthest violating item\n", - " min_viol, min_viol_i = (0, -1)\n", - " if len(smaller_i) > 0:\n", - " min_viol_i = np.argmin(optimized_hitrates)\n", - " min_viol = optimized_hitrates[min_viol_i]\n", - " max_viol, max_viol_i = (0, -1)\n", - " if len(larger_i) > 0:\n", - " larger = optimized_hitrates - 1\n", - " max_viol_i = np.argmax(larger)\n", - " max_viol = larger[max_viol_i]\n", - " # Compare the furthest violations and adjust accordingly\n", - " viol_i = min_viol_i\n", - " min_viol_flag = True # True if furthest is from the left boundary\n", - " if max_viol > abs(min_viol):\n", - " viol_i = max_viol_i\n", - " min_viol_flag = False \n", - " index = get_index_of_furthest_hitrate_from_boundary(optimized_hitrates)\n", - " if viol_i != index:\n", - " print(optimized_hitrates[viol_i])\n", - " print(optimized_hitrates[index])\n", - " assert(viol_i == index)\n", - " if min_viol_flag:\n", - " optimized_hitrates[viol_i] = 0\n", - " else:\n", - " optimized_hitrates[viol_i] = min(1, current_cache_size)\n", - "\n", - " # Update parameters for next iteration\n", - " current_cache_size =- optimized_hitrates[viol_i]\n", - " fix_i.append(viol_i)\n", - " differenc_set = np.setdiff1d(np.arange(DATABASE_OBJECT_COUNT), fix_i)\n", - " current_db_objects = DATABASE_OBJECT_COUNT - len(fix_i)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "efa16eaf-a10b-4927-99cd-190e2ffe1d1e", - "metadata": {}, - "outputs": [], - "source": [ - "a = optimized_hitrates\n", - "b = differenc_set" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "0e21c26f-058a-4e56-a5ad-1c47bf28656c", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "All values optimized.\n" - ] - } - ], - "source": [ - "\"\"\"\n", - "Perform theoretical optimization to compute optimal hit probabilities.\n", - "\n", - "Parameters:\n", - "- lambda_vals (numpy array): Request rates for each item.\n", - "- B (float): Total cache size.\n", - "- c_f (float): Fetching linear cost (cache miss cost).\n", - "- c_delta (float): Age linear cost.\n", - "\n", - "Returns:\n", - "- h_opt (numpy array): Optimal hit probabilities for each item.\n", - "\"\"\"\n", - "optimized_hitrates = np.zeros(DATABASE_OBJECT_COUNT)\n", - "differenc_set = np.arange(DATABASE_OBJECT_COUNT)\n", - "fix_i = []\n", - "current_db_objects = DATABASE_OBJECT_COUNT\n", - "current_cache_size = CACHE_SIZE\n", - "\n", - "while True:\n", - " if current_db_objects == 0:\n", - " # Handle special case: no items left to optimize\n", - " if current_cache_size > 0:\n", - " # Redistribute unused cache size among items with zero hit probability\n", - " differenc_set = np.where(optimized_hitrates == 0)[0]\n", - " fix_i = np.setdiff1d(np.arange(DATABASE_OBJECT_COUNT), differenc_set)\n", - " current_db_objects = len(differenc_set)\n", - " continue\n", - " else:\n", - " optimized_hitrates[differenc_set] = 0\n", - " break\n", - " # Compute Lagrangian multiplier and optimal hit probabilities\n", - " eta = eta_star(current_db_objects, c_f, current_cache_size, c_delta, lambda_vals[differenc_set])\n", - " optimized_hitrates[differenc_set] = (c_f - eta / lambda_vals[differenc_set]) / c_delta\n", - "\n", - " if mu < 0:\n", - " current_cache_size = current_db_objects * c_f / c_delta # Adjust cache size for next iteration\n", - " continue\n", - " \n", - " if len((optimized_hitrates[differenc_set])[((optimized_hitrates[differenc_set]) < 0) | ((optimized_hitrates[differenc_set])> 1)]) == 0:\n", - " print(\"All values optimized.\")\n", - " break\n", - " max_outbound_index = get_index_of_furthest_hitrate_from_boundary(optimized_hitrates)\n", - " optimized_hitrates[max_outbound_index] = (1 if optimized_hitrates[max_outbound_index] > 1 else 0)\n", - "\n", - " current_cache_size =- optimized_hitrates[max_outbound_index]\n", - " fix_i.append(max_outbound_index)\n", - " differenc_set = np.setdiff1d(np.arange(DATABASE_OBJECT_COUNT), fix_i)\n", - " current_db_objects = DATABASE_OBJECT_COUNT - len(fix_i)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "4f64253f-b389-4be9-b403-08027d480121", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", - " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", - " 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,\n", - " 1., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 1., 0., 1.,\n", - " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,\n", - " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.])" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "optimized_hitrates" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "17d818db-ec88-4c26-92af-6d74862525d9", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([ 0. , 0. , 0. , 0. , 0. ,\n", - " 0. , 0.43902439, 0. , 0. , 0. ,\n", - " 0. , 0. , 0. , 0. , 0. ,\n", - " 0. , 0. , 0. , 0. , 0. ,\n", - " 0. , 0. , 0. , 0. , 0. ,\n", - " 0. , 0. , 0.43902439, 0. , 0. ,\n", - " 0. , 0.04878049, 0. , 0.04878049, 0. ,\n", - " 0. , 0. , 0. , -0. , 0. ,\n", - " 0.04878049, 0.04878049, 0. , 0. , 0. ,\n", - " 0. , 0. , 0. , 0. , 0. ,\n", - " 0.43902439, 0. , 0. , 0. , 0. ,\n", - " 0. , 0. , 0. , 0. , 0. ,\n", - " -0. , 0. , 0. , 0. , 0. ,\n", - " -0. , 0. , 1. , 0. , 0. ,\n", - " 0. , 0. , 0. , 0. , 0. ,\n", - " 0. , 0. , 0. , -0. , 0. ,\n", - " 0. , 0.43902439, 0. , 0. , 0. ,\n", - " 0. , 0. , 0. , 0. , 0. ,\n", - " 0. , 0. , 0. , 0. , 0. ,\n", - " 0. , 0. , 0. , 0.04878049, 0. ])" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "a" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "791b3f96-527a-489e-970e-c92ec950177f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([ 6, 27, 31, 33, 40, 41, 50, 81, 98])" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "b" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "c22fa973-432a-4c05-89bf-2a6ea82ae3d2", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([ 6, 27, 50, 81])" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "differenc_set" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "898e1266-5aaa-46f4-ac0f-c7807ac2b6bb", - "metadata": {}, - "outputs": [], - "source": [ - "db_object_count = DATABASE_OBJECT_COUNT\n", - "cache_sz = CACHE_SIZE\n", - "loop_lambda = lambda_vals\n", - "\n", - "non_optimized_values = np.arange(db_object_count)" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "8cc9b8a9-f7ae-48fc-adfb-ac4b7a4998f1", - "metadata": {}, - "outputs": [], - "source": [ - "db_object_count = DATABASE_OBJECT_COUNT\n", - "cache_sz = CACHE_SIZE\n", - "loop_lambda = lambda_vals\n", - "\n", - "optimized_hitrate = np.zeros(db_object_count)\n", - "non_optimized_values = np.arange(db_object_count)\n", - "optimized_value = {}\n", - "\n", - "eta = eta_star(db_object_count, c_f, cache_sz, c_delta, loop_lambda[non_optimized_values])\n", - "optimized_hitrate[non_optimized_values] = h_i_star(c_f, eta, loop_lambda[non_optimized_values], c_delta)\n", - "\n", - "max_outbound_index = get_index_of_furthest_hitrate_from_boundary(optimized_hitrate)\n", - "optimized_value[max_outbound_index] = (1 if optimized_hitrate[max_outbound_index] > 1 else 0)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "cbcf3592-fcf2-4f54-a3cd-761097c12972", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{67: 1}" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "optimized_value" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "db732331-1d09-45b7-915c-73daa270b5e2", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "graphs", - "language": "python", - "name": "graphs" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.7" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/01_nb_cncf_optimization/nb_cost_optimization.ipynb b/01_nb_cncf_optimization/nb_cost_optimization.ipynb index 725cd1d..88de62d 100644 --- a/01_nb_cncf_optimization/nb_cost_optimization.ipynb +++ b/01_nb_cncf_optimization/nb_cost_optimization.ipynb @@ -9,7 +9,8 @@ "source": [ "import matplotlib.pyplot as plt\n", "import numpy as np\n", - "import random" + "import random\n", + "import pandas as pd" ] }, { @@ -23,7 +24,7 @@ "CACHE_SIZE = DATABASE_OBJECT_COUNT/2\n", "ZIPF_CONSTANT = 2\n", "\n", - "CACHE_MISS_COST = 1\n", + "CACHE_MISS_COST = 2\n", "CACHE_REFRESH_COST = 1\n", "\n", "SEED = 42\n", @@ -95,62 +96,201 @@ "outputs": [], "source": [ "def get_index_of_furthest_hitrate_from_boundary(hitrates):\n", - " local_hitrates = hitrates[(hitrates < 0) | (hitrates > 1)]\n", - " smallest_delta = np.abs(np.min(local_hitrates))\n", - " biggest_delta = np.max(local_hitrates) - 1\n", + " lower_bound_violation = hitrates[(hitrates < 0)]\n", + " upper_bound_violation = hitrates[(hitrates > 1)]\n", + " smallest_delta = np.abs(np.min(lower_bound_violation))\n", + " biggest_delta = np.max(upper_bound_violation) - 1\n", " if smallest_delta > biggest_delta:\n", + " print(smallest_delta)\n", " index = np.where(hitrates == np.min(local_hitrates))[0][0]\n", " return index\n", " else:\n", + " \n", " index = np.where(hitrates == np.max(local_hitrates))[0][0]\n", " return index" ] }, { "cell_type": "code", - "execution_count": 14, - "id": "ccd4b95d-1cdd-4c99-a22e-4b31338993cf", + "execution_count": 8, + "id": "9d774304-ae68-43b3-a76a-e970c06c5236", + "metadata": {}, + "outputs": [], + "source": [ + "def get_index_of_furthest_hitrate_from_boundary(hitrates):\n", + " outside_bounds = (hitrates < 0) | (hitrates > 1)\n", + " distances = np.where(outside_bounds, np.maximum(np.abs(hitrates - 0), np.abs(hitrates - 1)), -np.inf)\n", + " index = np.argmax(distances)\n", + " return index" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "19678083-15e1-439b-be8c-42033d501644", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array([0.30256805, 0.76752268, 0.30256805, 0.30256805, 0.65128403,\n", - " 0.30256805, 0.86051361, 0.30256805, 0.30256805, 0.30256805,\n", - " 0.65128403, 0.30256805, 0.30256805, 0.30256805, 0.65128403,\n", - " 0.65128403, 0.30256805, 0.30256805, 0.76752268, 0.30256805,\n", - " 0.30256805, 0.30256805, 0.30256805, 0.65128403, 0.30256805,\n", - " 0.30256805, 0.30256805, 0.86051361, 0.30256805, 0.30256805,\n", - " 0.30256805, 0.82564201, 0.30256805, 0.82564201, 0.30256805,\n", - " 0.30256805, 0.30256805, 0.76752268, 0.91282101, 0.30256805,\n", - " 0.82564201, 0.82564201, 0.65128403, 0.30256805, 0.30256805,\n", - " 0.30256805, 0.93025681, 0.30256805, 0.30256805, 0.30256805,\n", - " 0.86051361, 0.92250756, 0.30256805, 0.30256805, 0.30256805,\n", - " 0.30256805, 0.30256805, 0.95897459, 0.65128403, 0.30256805,\n", - " 0.97317569, 0.30256805, 0.30256805, 0.65128403, 0.30256805,\n", - " 0.93025681, 0.30256805, 0.98989229, 0.30256805, 0.30256805,\n", - " 0.65128403, 0.30256805, 0.30256805, 0.30256805, 0.76752268,\n", - " 0.65128403, 0.65128403, 0.76752268, 0.95350454, 0.30256805,\n", - " 0.30256805, 0.86051361, 0.65128403, 0.30256805, 0.30256805,\n", - " 0.65128403, 0.30256805, 0.65128403, 0.30256805, 0.30256805,\n", - " 0.65128403, 0.65128403, 0.76752268, 0.30256805, 0.65128403,\n", - " 0.30256805, 0.30256805, 0.98115049, 0.82564201, 0.65128403])" + "array([ 1, 3, 1, 1, 2, 1, 5, 1, 1, 1, 2, 1, 1, 1, 2, 2, 1,\n", + " 1, 3, 1, 1, 1, 1, 2, 1, 1, 1, 5, 1, 1, 1, 4, 1, 4,\n", + " 1, 1, 1, 3, 8, 1, 4, 4, 2, 1, 1, 1, 10, 1, 1, 1, 5,\n", + " 9, 1, 1, 1, 1, 1, 17, 2, 1, 26, 1, 1, 2, 1, 10, 1, 69,\n", + " 1, 1, 2, 1, 1, 1, 3, 2, 2, 3, 15, 1, 1, 5, 2, 1, 1,\n", + " 2, 1, 2, 1, 1, 2, 2, 3, 1, 2, 1, 1, 37, 4, 2])" ] }, - "execution_count": 14, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "eta = eta_star(db_object_count, c_f, cache_sz, c_delta, lambda_vals)\n", - "optimized_hitrates = (c_f - eta / lambda_vals) / c_delta\n", + "lambda_vals" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "ccd4b95d-1cdd-4c99-a22e-4b31338993cf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2.1159070575516945\n" + ] + }, + { + "data": { + "text/plain": [ + "array([-0.11590706, 1.29469765, -0.11590706, -0.11590706, 0.94204647,\n", + " -0.11590706, -0.11590706, -0.11590706, -0.11590706, 0.94204647,\n", + " -0.11590706, -0.11590706, -0.11590706, 0.94204647, 0.94204647,\n", + " -0.11590706, -0.11590706, 1.29469765, -0.11590706, -0.11590706,\n", + " -0.11590706, -0.11590706, 0.94204647, -0.11590706, -0.11590706,\n", + " -0.11590706, -0.11590706, -0.11590706, -0.11590706, 1.47102324,\n", + " -0.11590706, 1.47102324, -0.11590706, -0.11590706, -0.11590706,\n", + " 1.29469765, 1.73551162, -0.11590706, 1.47102324, 1.47102324,\n", + " 0.94204647, -0.11590706, -0.11590706, -0.11590706, 1.78840929,\n", + " -0.11590706, -0.11590706, -0.11590706, 1.76489922, -0.11590706,\n", + " -0.11590706, -0.11590706, -0.11590706, -0.11590706, 1.87553488,\n", + " 0.94204647, -0.11590706, 1.91861896, -0.11590706, -0.11590706,\n", + " 0.94204647, -0.11590706, 1.78840929, -0.11590706, 1.96933468,\n", + " -0.11590706, -0.11590706, 0.94204647, -0.11590706, -0.11590706,\n", + " -0.11590706, 1.29469765, 0.94204647, 0.94204647, 1.29469765,\n", + " 1.85893953, -0.11590706, -0.11590706, 0.94204647, -0.11590706,\n", + " -0.11590706, 0.94204647, -0.11590706, 0.94204647, -0.11590706,\n", + " -0.11590706, 0.94204647, 0.94204647, 1.29469765, -0.11590706,\n", + " 0.94204647, -0.11590706, -0.11590706, 1.94281332, 1.47102324,\n", + " 0.94204647])" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "eta = eta_star(db_object_count, c_f, cache_sz, c_delta, lambda_vals[lambda_vals != lambda_vals[6]])\n", + "print(eta)\n", + "optimized_hitrates = (c_f - eta / lambda_vals[lambda_vals != lambda_vals[6]]) / c_delta\n", "optimized_hitrates" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 11, + "id": "05b17074-719f-4bca-8434-2aaee26094d0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
count96.000000
mean0.437500
std0.726101
min-0.115907
25%-0.115907
50%-0.115907
75%0.942046
max1.969335
\n", + "
" + ], + "text/plain": [ + " 0\n", + "count 96.000000\n", + "mean 0.437500\n", + "std 0.726101\n", + "min -0.115907\n", + "25% -0.115907\n", + "50% -0.115907\n", + "75% 0.942046\n", + "max 1.969335" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame(optimized_hitrates).describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, "id": "0e21c26f-058a-4e56-a5ad-1c47bf28656c", "metadata": { "scrolled": true @@ -160,6 +300,48 @@ "name": "stdout", "output_type": "stream", "text": [ + "Optimized: 67 1.97 // [ 1.79077042 -0.09229584 1. -0.09229584 -0.09229584]\n", + "Optimized: 97 1.94 // [-0.07876743 -0.07876743 1. 1.48030814 0.96061628]\n", + "Optimized: 60 1.92 // [ 0.96720258 -0.06559484 1. -0.06559484 -0.06559484]\n", + "Optimized: 57 1.88 // [-0.05274002 -0.05274002 1. 0.97362999 -0.05274002]\n", + "Optimized: 78 1.86 // [ 0.97977406 1.31984937 1. -0.04045188 -0.04045188]\n", + "Optimized: 46 1.80 // [-0.02836604 -0.02836604 1. -0.02836604 -0.02836604]\n", + "Optimized: 65 1.80 // [ 0.99140044 -0.01719911 1. -0.01719911 1. ]\n", + "Optimized: 51 1.78 // [-0.00600086 1.59879983 1. -0.00600086 -0.00600086]\n", + "Optimized: 38 1.75 // [0.00491746 1.33497249 1. 0.00491746 1.50122936]\n", + "Optimized: 6 1.60 // [1.00774103 0.01548205 1. 0.01548205 0.01548205]\n", + "Optimized: 27 1.60 // [0.02399435 0.02399435 1. 0.02399435 0.02399435]\n", + "Optimized: 50 1.61 // [0.03255485 0.03255485 1. 1. 0.03255485]\n", + "Optimized: 81 1.61 // [0.04116395 0.04116395 1. 1.02058197 0.04116395]\n", + "Optimized: 31 1.51 // [0.04982206 0.04982206 1. 0.04982206 1.51245552]\n", + "Optimized: 33 1.51 // [1. 0.05714286 1. 0.05714286 0.05714286]\n", + "Optimized: 40 1.52 // [1. 0.06451613 1. 1.51612903 1.03225806]\n", + "Optimized: 41 1.52 // [0.07194245 1. 1. 1.03597122 0.07194245]\n", + "Optimized: 98 1.52 // [0.07942238 1. 1. 1.03971119]\n", + "Optimized: 1 1.36 // []\n", + "Optimized: 18 1.36 // [0.09223301 0.09223301 1. 0.09223301 0.09223301]\n", + "Optimized: 37 1.37 // [0.09756098 0.09756098 1. 1. 0.09756098]\n", + "Optimized: 74 1.37 // [0.10294118 0.10294118 1. 1.05147059 1.05147059]\n", + "Optimized: 77 1.37 // [1.05418719 1.05418719 1. 1. 0.10837438]\n", + "Optimized: 92 1.37 // [1.05693069 1.05693069 1. 0.11386139 1.05693069]\n", + "Optimized: 4 1.06 // [0.11940299 0.11940299 1. 0.11940299 1. ]\n", + "Optimized: 10 1.06 // [0.12030075 0.12030075 1. 0.12030075 0.12030075]\n", + "Optimized: 14 1.06 // [0.12121212 0.12121212 1. 1.06060606 0.12121212]\n", + "Optimized: 15 1.06 // [0.1221374 1. 1. 0.1221374 0.1221374]\n", + "Optimized: 23 1.06 // [0.12307692 0.12307692 1. 0.12307692 0.12307692]\n", + "Optimized: 42 1.06 // [1. 1. 1. 0.12403101 0.12403101]\n", + "Optimized: 58 1.06 // [0.125 1. 1. 0.125 1. ]\n", + "Optimized: 63 1.06 // [0.12598425 0.12598425 1. 0.12598425 1. ]\n", + "Optimized: 70 1.06 // [0.12698413 0.12698413 1. 0.12698413 0.12698413]\n", + "Optimized: 75 1.06 // [0.128 1. 1. 1.064 1. ]\n", + "Optimized: 76 1.06 // [1. 1. 1. 1. 1.]\n", + "Optimized: 82 1.07 // [0.1300813 1. 1. 0.1300813 0.1300813]\n", + "Optimized: 85 1.07 // [0.13114754 0.13114754 1. 0.13114754 1.06557377]\n", + "Optimized: 87 1.07 // [1. 0.1322314 1. 0.1322314 0.1322314]\n", + "Optimized: 90 1.07 // [0.13333333 0.13333333 1. 1.06666667 1. ]\n", + "Optimized: 91 1.07 // [0.13445378 1. 1. 1. 0.13445378]\n", + "Optimized: 94 1.07 // [1. 0.13559322 1. 0.13559322 0.13559322]\n", + "Optimized: 99 1.07 // [1. 1. 1.]\n", "All values optimized.\n" ] } @@ -188,18 +370,19 @@ " if current_db_object_count == 0:\n", " print(\"No objects left to optimize.\")\n", " if current_cache_size > 0:\n", + " print(\"Add obj with optimized hitrate 0 and add them to optimization pool for re-optimization.\")\n", " # Redistribute unused cache size among items with zero hit probability\n", " differenc_set = np.where(optimized_hitrates == 0)[0]\n", - " fix_i = np.setdiff1d(np.arange(DATABASE_OBJECT_COUNT), differenc_set)\n", + " fix_i = np.setdiff1d(np.arange(DATABASE_OBJECT_COUNT), differenc_set).tolist()\n", " current_db_object_count = len(differenc_set)\n", " continue\n", " else:\n", + " \"Reset\"\n", " optimized_hitrates[differenc_set] = 0\n", " break\n", " # Compute Lagrangian multiplier and optimal hit probabilities\n", " eta = eta_star(current_db_object_count, c_f, current_cache_size, c_delta, lambda_vals[differenc_set])\n", " optimized_hitrates[differenc_set] = (c_f - eta / lambda_vals[differenc_set]) / c_delta\n", - "\n", " if eta < 0:\n", " print(\"eta was negative.\")\n", " current_cache_size = current_db_object_count * c_f / c_delta # Adjust cache size for next iteration\n", @@ -210,18 +393,150 @@ " break\n", " \n", " max_outbound_index = get_index_of_furthest_hitrate_from_boundary(optimized_hitrates)\n", - " optimized_hitrates[max_outbound_index] = (1 if optimized_hitrates[max_outbound_index] > 1 else 0)\n", - "\n", - " current_cache_size =- optimized_hitrates[max_outbound_index]\n", " fix_i.append(max_outbound_index)\n", " differenc_set = np.setdiff1d(np.arange(DATABASE_OBJECT_COUNT), fix_i)\n", - " current_db_object_count -= 1" + "\n", + " old_hitrate = optimized_hitrates[max_outbound_index]\n", + " optimized_hitrates[max_outbound_index] = (1 if optimized_hitrates[max_outbound_index] > 1 else 0)\n", + " \n", + " print(f\"Optimized: {max_outbound_index} {old_hitrate:.2f} // {optimized_hitrates[max_outbound_index-2:max_outbound_index+3]}\")\n", + " \n", + " current_db_object_count -= 1\n", + " current_cache_size -= optimized_hitrates[max_outbound_index]" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "f559ee7a-be2f-4076-b01c-f08950ad5a88", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.13793103, 1. , 0.13793103, 0.13793103, 1. ,\n", + " 0.13793103, 1. , 0.13793103, 0.13793103, 0.13793103,\n", + " 1. , 0.13793103, 0.13793103, 0.13793103, 1. ,\n", + " 1. , 0.13793103, 0.13793103, 1. , 0.13793103,\n", + " 0.13793103, 0.13793103, 0.13793103, 1. , 0.13793103,\n", + " 0.13793103, 0.13793103, 1. , 0.13793103, 0.13793103,\n", + " 0.13793103, 1. , 0.13793103, 1. , 0.13793103,\n", + " 0.13793103, 0.13793103, 1. , 1. , 0.13793103,\n", + " 1. , 1. , 1. , 0.13793103, 0.13793103,\n", + " 0.13793103, 1. , 0.13793103, 0.13793103, 0.13793103,\n", + " 1. , 1. , 0.13793103, 0.13793103, 0.13793103,\n", + " 0.13793103, 0.13793103, 1. , 1. , 0.13793103,\n", + " 1. , 0.13793103, 0.13793103, 1. , 0.13793103,\n", + " 1. , 0.13793103, 1. , 0.13793103, 0.13793103,\n", + " 1. , 0.13793103, 0.13793103, 0.13793103, 1. ,\n", + " 1. , 1. , 1. , 1. , 0.13793103,\n", + " 0.13793103, 1. , 1. , 0.13793103, 0.13793103,\n", + " 1. , 0.13793103, 1. , 0.13793103, 0.13793103,\n", + " 1. , 1. , 1. , 0.13793103, 1. ,\n", + " 0.13793103, 0.13793103, 1. , 1. , 1. ])" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "optimized_hitrates" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "8b2d3cea-1cc0-476e-92bf-2ac4344a9b1b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
count100.000000
mean0.500000
std0.427625
min0.137931
25%0.137931
50%0.137931
75%1.000000
max1.000000
\n", + "
" + ], + "text/plain": [ + " 0\n", + "count 100.000000\n", + "mean 0.500000\n", + "std 0.427625\n", + "min 0.137931\n", + "25% 0.137931\n", + "50% 0.137931\n", + "75% 1.000000\n", + "max 1.000000" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame(optimized_hitrates).describe()" ] }, { "cell_type": "code", "execution_count": null, - "id": "11682b36-e705-4bd9-9d75-79012791d1ee", + "id": "7a998837-72b8-4039-95a5-ca8d9c8e65ab", "metadata": {}, "outputs": [], "source": []