fix(h_i_opt calculation): Fix for calculation of optimized hitrate:

- Assignment of current_cache_size instead of decrease

Signed-off-by: Tuan-Dat Tran <tuan-dat.tran@tudattr.dev>
This commit is contained in:
Tuan-Dat Tran
2024-12-03 10:12:07 +01:00
parent 4ea5505130
commit 799f7b78d4
4 changed files with 706 additions and 1022 deletions

View File

@@ -9,7 +9,8 @@
"source": [
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import random"
"import random\n",
"import pandas as pd"
]
},
{
@@ -23,7 +24,7 @@
"CACHE_SIZE = DATABASE_OBJECT_COUNT/2\n",
"ZIPF_CONSTANT = 2\n",
"\n",
"CACHE_MISS_COST = 1\n",
"CACHE_MISS_COST = 2\n",
"CACHE_REFRESH_COST = 1\n",
"\n",
"SEED = 42\n",
@@ -95,62 +96,201 @@
"outputs": [],
"source": [
"def get_index_of_furthest_hitrate_from_boundary(hitrates):\n",
" local_hitrates = hitrates[(hitrates < 0) | (hitrates > 1)]\n",
" smallest_delta = np.abs(np.min(local_hitrates))\n",
" biggest_delta = np.max(local_hitrates) - 1\n",
" lower_bound_violation = hitrates[(hitrates < 0)]\n",
" upper_bound_violation = hitrates[(hitrates > 1)]\n",
" smallest_delta = np.abs(np.min(lower_bound_violation))\n",
" biggest_delta = np.max(upper_bound_violation) - 1\n",
" if smallest_delta > biggest_delta:\n",
" print(smallest_delta)\n",
" index = np.where(hitrates == np.min(local_hitrates))[0][0]\n",
" return index\n",
" else:\n",
" \n",
" index = np.where(hitrates == np.max(local_hitrates))[0][0]\n",
" return index"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "ccd4b95d-1cdd-4c99-a22e-4b31338993cf",
"execution_count": 8,
"id": "9d774304-ae68-43b3-a76a-e970c06c5236",
"metadata": {},
"outputs": [],
"source": [
"def get_index_of_furthest_hitrate_from_boundary(hitrates):\n",
" outside_bounds = (hitrates < 0) | (hitrates > 1)\n",
" distances = np.where(outside_bounds, np.maximum(np.abs(hitrates - 0), np.abs(hitrates - 1)), -np.inf)\n",
" index = np.argmax(distances)\n",
" return index"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "19678083-15e1-439b-be8c-42033d501644",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0.30256805, 0.76752268, 0.30256805, 0.30256805, 0.65128403,\n",
" 0.30256805, 0.86051361, 0.30256805, 0.30256805, 0.30256805,\n",
" 0.65128403, 0.30256805, 0.30256805, 0.30256805, 0.65128403,\n",
" 0.65128403, 0.30256805, 0.30256805, 0.76752268, 0.30256805,\n",
" 0.30256805, 0.30256805, 0.30256805, 0.65128403, 0.30256805,\n",
" 0.30256805, 0.30256805, 0.86051361, 0.30256805, 0.30256805,\n",
" 0.30256805, 0.82564201, 0.30256805, 0.82564201, 0.30256805,\n",
" 0.30256805, 0.30256805, 0.76752268, 0.91282101, 0.30256805,\n",
" 0.82564201, 0.82564201, 0.65128403, 0.30256805, 0.30256805,\n",
" 0.30256805, 0.93025681, 0.30256805, 0.30256805, 0.30256805,\n",
" 0.86051361, 0.92250756, 0.30256805, 0.30256805, 0.30256805,\n",
" 0.30256805, 0.30256805, 0.95897459, 0.65128403, 0.30256805,\n",
" 0.97317569, 0.30256805, 0.30256805, 0.65128403, 0.30256805,\n",
" 0.93025681, 0.30256805, 0.98989229, 0.30256805, 0.30256805,\n",
" 0.65128403, 0.30256805, 0.30256805, 0.30256805, 0.76752268,\n",
" 0.65128403, 0.65128403, 0.76752268, 0.95350454, 0.30256805,\n",
" 0.30256805, 0.86051361, 0.65128403, 0.30256805, 0.30256805,\n",
" 0.65128403, 0.30256805, 0.65128403, 0.30256805, 0.30256805,\n",
" 0.65128403, 0.65128403, 0.76752268, 0.30256805, 0.65128403,\n",
" 0.30256805, 0.30256805, 0.98115049, 0.82564201, 0.65128403])"
"array([ 1, 3, 1, 1, 2, 1, 5, 1, 1, 1, 2, 1, 1, 1, 2, 2, 1,\n",
" 1, 3, 1, 1, 1, 1, 2, 1, 1, 1, 5, 1, 1, 1, 4, 1, 4,\n",
" 1, 1, 1, 3, 8, 1, 4, 4, 2, 1, 1, 1, 10, 1, 1, 1, 5,\n",
" 9, 1, 1, 1, 1, 1, 17, 2, 1, 26, 1, 1, 2, 1, 10, 1, 69,\n",
" 1, 1, 2, 1, 1, 1, 3, 2, 2, 3, 15, 1, 1, 5, 2, 1, 1,\n",
" 2, 1, 2, 1, 1, 2, 2, 3, 1, 2, 1, 1, 37, 4, 2])"
]
},
"execution_count": 14,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"eta = eta_star(db_object_count, c_f, cache_sz, c_delta, lambda_vals)\n",
"optimized_hitrates = (c_f - eta / lambda_vals) / c_delta\n",
"lambda_vals"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "ccd4b95d-1cdd-4c99-a22e-4b31338993cf",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2.1159070575516945\n"
]
},
{
"data": {
"text/plain": [
"array([-0.11590706, 1.29469765, -0.11590706, -0.11590706, 0.94204647,\n",
" -0.11590706, -0.11590706, -0.11590706, -0.11590706, 0.94204647,\n",
" -0.11590706, -0.11590706, -0.11590706, 0.94204647, 0.94204647,\n",
" -0.11590706, -0.11590706, 1.29469765, -0.11590706, -0.11590706,\n",
" -0.11590706, -0.11590706, 0.94204647, -0.11590706, -0.11590706,\n",
" -0.11590706, -0.11590706, -0.11590706, -0.11590706, 1.47102324,\n",
" -0.11590706, 1.47102324, -0.11590706, -0.11590706, -0.11590706,\n",
" 1.29469765, 1.73551162, -0.11590706, 1.47102324, 1.47102324,\n",
" 0.94204647, -0.11590706, -0.11590706, -0.11590706, 1.78840929,\n",
" -0.11590706, -0.11590706, -0.11590706, 1.76489922, -0.11590706,\n",
" -0.11590706, -0.11590706, -0.11590706, -0.11590706, 1.87553488,\n",
" 0.94204647, -0.11590706, 1.91861896, -0.11590706, -0.11590706,\n",
" 0.94204647, -0.11590706, 1.78840929, -0.11590706, 1.96933468,\n",
" -0.11590706, -0.11590706, 0.94204647, -0.11590706, -0.11590706,\n",
" -0.11590706, 1.29469765, 0.94204647, 0.94204647, 1.29469765,\n",
" 1.85893953, -0.11590706, -0.11590706, 0.94204647, -0.11590706,\n",
" -0.11590706, 0.94204647, -0.11590706, 0.94204647, -0.11590706,\n",
" -0.11590706, 0.94204647, 0.94204647, 1.29469765, -0.11590706,\n",
" 0.94204647, -0.11590706, -0.11590706, 1.94281332, 1.47102324,\n",
" 0.94204647])"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"eta = eta_star(db_object_count, c_f, cache_sz, c_delta, lambda_vals[lambda_vals != lambda_vals[6]])\n",
"print(eta)\n",
"optimized_hitrates = (c_f - eta / lambda_vals[lambda_vals != lambda_vals[6]]) / c_delta\n",
"optimized_hitrates"
]
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 11,
"id": "05b17074-719f-4bca-8434-2aaee26094d0",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>96.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>0.437500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>0.726101</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>-0.115907</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>-0.115907</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>-0.115907</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>0.942046</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>1.969335</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0\n",
"count 96.000000\n",
"mean 0.437500\n",
"std 0.726101\n",
"min -0.115907\n",
"25% -0.115907\n",
"50% -0.115907\n",
"75% 0.942046\n",
"max 1.969335"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame(optimized_hitrates).describe()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "0e21c26f-058a-4e56-a5ad-1c47bf28656c",
"metadata": {
"scrolled": true
@@ -160,6 +300,48 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Optimized: 67 1.97 // [ 1.79077042 -0.09229584 1. -0.09229584 -0.09229584]\n",
"Optimized: 97 1.94 // [-0.07876743 -0.07876743 1. 1.48030814 0.96061628]\n",
"Optimized: 60 1.92 // [ 0.96720258 -0.06559484 1. -0.06559484 -0.06559484]\n",
"Optimized: 57 1.88 // [-0.05274002 -0.05274002 1. 0.97362999 -0.05274002]\n",
"Optimized: 78 1.86 // [ 0.97977406 1.31984937 1. -0.04045188 -0.04045188]\n",
"Optimized: 46 1.80 // [-0.02836604 -0.02836604 1. -0.02836604 -0.02836604]\n",
"Optimized: 65 1.80 // [ 0.99140044 -0.01719911 1. -0.01719911 1. ]\n",
"Optimized: 51 1.78 // [-0.00600086 1.59879983 1. -0.00600086 -0.00600086]\n",
"Optimized: 38 1.75 // [0.00491746 1.33497249 1. 0.00491746 1.50122936]\n",
"Optimized: 6 1.60 // [1.00774103 0.01548205 1. 0.01548205 0.01548205]\n",
"Optimized: 27 1.60 // [0.02399435 0.02399435 1. 0.02399435 0.02399435]\n",
"Optimized: 50 1.61 // [0.03255485 0.03255485 1. 1. 0.03255485]\n",
"Optimized: 81 1.61 // [0.04116395 0.04116395 1. 1.02058197 0.04116395]\n",
"Optimized: 31 1.51 // [0.04982206 0.04982206 1. 0.04982206 1.51245552]\n",
"Optimized: 33 1.51 // [1. 0.05714286 1. 0.05714286 0.05714286]\n",
"Optimized: 40 1.52 // [1. 0.06451613 1. 1.51612903 1.03225806]\n",
"Optimized: 41 1.52 // [0.07194245 1. 1. 1.03597122 0.07194245]\n",
"Optimized: 98 1.52 // [0.07942238 1. 1. 1.03971119]\n",
"Optimized: 1 1.36 // []\n",
"Optimized: 18 1.36 // [0.09223301 0.09223301 1. 0.09223301 0.09223301]\n",
"Optimized: 37 1.37 // [0.09756098 0.09756098 1. 1. 0.09756098]\n",
"Optimized: 74 1.37 // [0.10294118 0.10294118 1. 1.05147059 1.05147059]\n",
"Optimized: 77 1.37 // [1.05418719 1.05418719 1. 1. 0.10837438]\n",
"Optimized: 92 1.37 // [1.05693069 1.05693069 1. 0.11386139 1.05693069]\n",
"Optimized: 4 1.06 // [0.11940299 0.11940299 1. 0.11940299 1. ]\n",
"Optimized: 10 1.06 // [0.12030075 0.12030075 1. 0.12030075 0.12030075]\n",
"Optimized: 14 1.06 // [0.12121212 0.12121212 1. 1.06060606 0.12121212]\n",
"Optimized: 15 1.06 // [0.1221374 1. 1. 0.1221374 0.1221374]\n",
"Optimized: 23 1.06 // [0.12307692 0.12307692 1. 0.12307692 0.12307692]\n",
"Optimized: 42 1.06 // [1. 1. 1. 0.12403101 0.12403101]\n",
"Optimized: 58 1.06 // [0.125 1. 1. 0.125 1. ]\n",
"Optimized: 63 1.06 // [0.12598425 0.12598425 1. 0.12598425 1. ]\n",
"Optimized: 70 1.06 // [0.12698413 0.12698413 1. 0.12698413 0.12698413]\n",
"Optimized: 75 1.06 // [0.128 1. 1. 1.064 1. ]\n",
"Optimized: 76 1.06 // [1. 1. 1. 1. 1.]\n",
"Optimized: 82 1.07 // [0.1300813 1. 1. 0.1300813 0.1300813]\n",
"Optimized: 85 1.07 // [0.13114754 0.13114754 1. 0.13114754 1.06557377]\n",
"Optimized: 87 1.07 // [1. 0.1322314 1. 0.1322314 0.1322314]\n",
"Optimized: 90 1.07 // [0.13333333 0.13333333 1. 1.06666667 1. ]\n",
"Optimized: 91 1.07 // [0.13445378 1. 1. 1. 0.13445378]\n",
"Optimized: 94 1.07 // [1. 0.13559322 1. 0.13559322 0.13559322]\n",
"Optimized: 99 1.07 // [1. 1. 1.]\n",
"All values optimized.\n"
]
}
@@ -188,18 +370,19 @@
" if current_db_object_count == 0:\n",
" print(\"No objects left to optimize.\")\n",
" if current_cache_size > 0:\n",
" print(\"Add obj with optimized hitrate 0 and add them to optimization pool for re-optimization.\")\n",
" # Redistribute unused cache size among items with zero hit probability\n",
" differenc_set = np.where(optimized_hitrates == 0)[0]\n",
" fix_i = np.setdiff1d(np.arange(DATABASE_OBJECT_COUNT), differenc_set)\n",
" fix_i = np.setdiff1d(np.arange(DATABASE_OBJECT_COUNT), differenc_set).tolist()\n",
" current_db_object_count = len(differenc_set)\n",
" continue\n",
" else:\n",
" \"Reset\"\n",
" optimized_hitrates[differenc_set] = 0\n",
" break\n",
" # Compute Lagrangian multiplier and optimal hit probabilities\n",
" eta = eta_star(current_db_object_count, c_f, current_cache_size, c_delta, lambda_vals[differenc_set])\n",
" optimized_hitrates[differenc_set] = (c_f - eta / lambda_vals[differenc_set]) / c_delta\n",
"\n",
" if eta < 0:\n",
" print(\"eta was negative.\")\n",
" current_cache_size = current_db_object_count * c_f / c_delta # Adjust cache size for next iteration\n",
@@ -210,18 +393,150 @@
" break\n",
" \n",
" max_outbound_index = get_index_of_furthest_hitrate_from_boundary(optimized_hitrates)\n",
" optimized_hitrates[max_outbound_index] = (1 if optimized_hitrates[max_outbound_index] > 1 else 0)\n",
"\n",
" current_cache_size =- optimized_hitrates[max_outbound_index]\n",
" fix_i.append(max_outbound_index)\n",
" differenc_set = np.setdiff1d(np.arange(DATABASE_OBJECT_COUNT), fix_i)\n",
" current_db_object_count -= 1"
"\n",
" old_hitrate = optimized_hitrates[max_outbound_index]\n",
" optimized_hitrates[max_outbound_index] = (1 if optimized_hitrates[max_outbound_index] > 1 else 0)\n",
" \n",
" print(f\"Optimized: {max_outbound_index} {old_hitrate:.2f} // {optimized_hitrates[max_outbound_index-2:max_outbound_index+3]}\")\n",
" \n",
" current_db_object_count -= 1\n",
" current_cache_size -= optimized_hitrates[max_outbound_index]"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "f559ee7a-be2f-4076-b01c-f08950ad5a88",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0.13793103, 1. , 0.13793103, 0.13793103, 1. ,\n",
" 0.13793103, 1. , 0.13793103, 0.13793103, 0.13793103,\n",
" 1. , 0.13793103, 0.13793103, 0.13793103, 1. ,\n",
" 1. , 0.13793103, 0.13793103, 1. , 0.13793103,\n",
" 0.13793103, 0.13793103, 0.13793103, 1. , 0.13793103,\n",
" 0.13793103, 0.13793103, 1. , 0.13793103, 0.13793103,\n",
" 0.13793103, 1. , 0.13793103, 1. , 0.13793103,\n",
" 0.13793103, 0.13793103, 1. , 1. , 0.13793103,\n",
" 1. , 1. , 1. , 0.13793103, 0.13793103,\n",
" 0.13793103, 1. , 0.13793103, 0.13793103, 0.13793103,\n",
" 1. , 1. , 0.13793103, 0.13793103, 0.13793103,\n",
" 0.13793103, 0.13793103, 1. , 1. , 0.13793103,\n",
" 1. , 0.13793103, 0.13793103, 1. , 0.13793103,\n",
" 1. , 0.13793103, 1. , 0.13793103, 0.13793103,\n",
" 1. , 0.13793103, 0.13793103, 0.13793103, 1. ,\n",
" 1. , 1. , 1. , 1. , 0.13793103,\n",
" 0.13793103, 1. , 1. , 0.13793103, 0.13793103,\n",
" 1. , 0.13793103, 1. , 0.13793103, 0.13793103,\n",
" 1. , 1. , 1. , 0.13793103, 1. ,\n",
" 0.13793103, 0.13793103, 1. , 1. , 1. ])"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"optimized_hitrates"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "8b2d3cea-1cc0-476e-92bf-2ac4344a9b1b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>100.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>0.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>0.427625</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.137931</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>0.137931</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>0.137931</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0\n",
"count 100.000000\n",
"mean 0.500000\n",
"std 0.427625\n",
"min 0.137931\n",
"25% 0.137931\n",
"50% 0.137931\n",
"75% 1.000000\n",
"max 1.000000"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame(optimized_hitrates).describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "11682b36-e705-4bd9-9d75-79012791d1ee",
"id": "7a998837-72b8-4039-95a5-ca8d9c8e65ab",
"metadata": {},
"outputs": [],
"source": []