272f722f23
Signed-off-by: Tuan-Dat Tran <tuan-dat.tran@tudattr.dev>
288 lines
10 KiB
Plaintext
288 lines
10 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "ab5cd7d1-1a57-46fc-8282-dae0a6cc2944",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import matplotlib.pyplot as plt\n",
|
|
"import numpy as np\n",
|
|
"import random\n",
|
|
"import pandas as pd\n",
|
|
"import itertools\n",
|
|
"from tqdm import tqdm"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "3d1ad0b9-f6a8-4e98-84aa-6e02e4279954",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"SEED = 42\n",
|
|
"np.random.seed(SEED)\n",
|
|
"random.seed(SEED)\n",
|
|
"\n",
|
|
"ZIPF_CONSTANT = 2"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "5a27d416-8f98-4814-af9e-6c6bef95f4ef",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def eta_star(db_object_count, c_f, cache_sz, c_delta, lambda_vals):\n",
|
|
" num = (db_object_count * c_f - cache_sz * c_delta)\n",
|
|
" denom = np.sum(1.0/lambda_vals)\n",
|
|
" if denom == 0:\n",
|
|
" print(\"sum(1.0/lambda_vals) == 0\")\n",
|
|
" print(db_object_count, c_f, cache_sz, c_delta, lambda_vals)\n",
|
|
" return max(0, num/denom)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "6276a9ce-f839-4fe6-90f2-2195cf065fc8",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def h_i_star(c_f, eta, lambda_vals, c_delta):\n",
|
|
" optimized_hitrate = (c_f - (eta/lambda_vals)) / c_delta\n",
|
|
" return optimized_hitrate"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "dcd31a8c-6864-4b9a-8bb3-998f0c32baf6",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def get_index_of_furthest_hitrate_from_boundary(hitrates):\n",
|
|
" lower_bound_violation = hitrates[(hitrates < 0)]\n",
|
|
" upper_bound_violation = hitrates[(hitrates > 1)]\n",
|
|
" smallest_delta = np.abs(np.min(lower_bound_violation))\n",
|
|
" biggest_delta = np.max(upper_bound_violation) - 1\n",
|
|
" if smallest_delta > biggest_delta:\n",
|
|
" print(smallest_delta)\n",
|
|
" index = np.where(hitrates == np.min(local_hitrates))[0][0]\n",
|
|
" return index\n",
|
|
" else:\n",
|
|
" \n",
|
|
" index = np.where(hitrates == np.max(local_hitrates))[0][0]\n",
|
|
" return index"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"id": "9d774304-ae68-43b3-a76a-e970c06c5236",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def get_index_of_furthest_hitrate_from_boundary(hitrates):\n",
|
|
" outside_bounds = (hitrates < 0) | (hitrates > 1)\n",
|
|
" distances = np.where(outside_bounds, np.maximum(np.abs(hitrates - 0), np.abs(hitrates - 1)), -np.inf)\n",
|
|
" index = np.argmax(distances)\n",
|
|
" return index"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"id": "0e21c26f-058a-4e56-a5ad-1c47bf28656c",
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"def optimize_hitrates(db_object_count, cache_size, c_f, c_delta, lambda_vals):\n",
|
|
" optimized_hitrates = np.zeros(db_object_count)\n",
|
|
" current_db_object_count = db_object_count\n",
|
|
" current_cache_size = cache_size\n",
|
|
" \n",
|
|
" differenc_set = np.arange(db_object_count)\n",
|
|
" fix_i = []\n",
|
|
" while True:\n",
|
|
" if current_db_object_count == 0:\n",
|
|
" if current_cache_size > 0:\n",
|
|
" # print(\"Re-optimize objects with optimal hitrate of 0.\")\n",
|
|
" differenc_set = np.where(optimized_hitrates == 0)[0]\n",
|
|
" fix_i = np.setdiff1d(np.arange(db_object_count), differenc_set).tolist()\n",
|
|
" current_db_object_count = len(differenc_set)\n",
|
|
" continue\n",
|
|
" else:\n",
|
|
" # print(\"Stop optimization.\")\n",
|
|
" optimized_hitrates[differenc_set] = 0\n",
|
|
" break\n",
|
|
" \n",
|
|
" eta = eta_star(current_db_object_count, c_f, current_cache_size, c_delta, lambda_vals[differenc_set])\n",
|
|
" optimized_hitrates[differenc_set] = h_i_star(c_f, eta, lambda_vals[differenc_set], c_delta)\n",
|
|
"\n",
|
|
" if eta < 0:\n",
|
|
" # print(\"eta was negative.\")\n",
|
|
" current_cache_size = current_db_object_count * c_f / c_delta # Adjust cache size for next iteration\n",
|
|
" continue\n",
|
|
" \n",
|
|
" if len((optimized_hitrates[differenc_set])[((optimized_hitrates[differenc_set]) < 0) | ((optimized_hitrates[differenc_set])> 1)]) == 0:\n",
|
|
" # print(\"All values optimized.\")\n",
|
|
" break\n",
|
|
" \n",
|
|
" max_outbound_index = get_index_of_furthest_hitrate_from_boundary(optimized_hitrates)\n",
|
|
" fix_i.append(max_outbound_index)\n",
|
|
" differenc_set = np.setdiff1d(np.arange(db_object_count), fix_i)\n",
|
|
" \n",
|
|
" old_hitrate = optimized_hitrates[max_outbound_index]\n",
|
|
" optimized_hitrates[max_outbound_index] = (1 if optimized_hitrates[max_outbound_index] > 1 else 0)\n",
|
|
" \n",
|
|
" current_db_object_count -= 1\n",
|
|
" current_cache_size -= optimized_hitrates[max_outbound_index]\n",
|
|
" return optimized_hitrates"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"id": "b6bf3329-3a63-4807-ab8b-8a54f824f47e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def objective_function(optimized_hitrates, c_f, c_delta, lambda_vals):\n",
|
|
" return np.sum(lambda_vals*(1-optimized_hitrates)*c_f+0.5*np.power(optimized_hitrates,2)*c_delta)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"id": "7a998837-72b8-4039-95a5-ca8d9c8e65ab",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Perform grid search\n",
|
|
"def grid_search(db_object_counts, cache_sizes, c_f_values, c_delta_values):\n",
|
|
" best_objective = float('inf')\n",
|
|
" best_params = None\n",
|
|
"\n",
|
|
" # Iterate through all combinations of parameters\n",
|
|
" for db_object_count, cache_size, c_f, c_delta in tqdm(itertools.product(db_object_counts, cache_sizes, c_f_values, c_delta_values), total=len(db_object_counts) * len(cache_sizes) * len(c_f_values) * len(c_delta_values), desc=\"Grid Search Progress\"):\n",
|
|
" if db_object_count < cache_size:\n",
|
|
" continue\n",
|
|
" lambda_vals = np.array([np.random.zipf(ZIPF_CONSTANT) for i in np.arange(1, db_object_count + 1,1)])\n",
|
|
" # print(db_object_count, cache_size, c_f, c_delta)\n",
|
|
" # Call the optimization function\n",
|
|
" optimized_hitrates = optimize_hitrates(db_object_count, cache_size, c_f, c_delta, lambda_vals)\n",
|
|
"\n",
|
|
" # Compute the objective function\n",
|
|
" objective = objective_function(optimized_hitrates, c_f, c_delta, lambda_vals)\n",
|
|
" \n",
|
|
" # Track the best (minimum) objective and corresponding parameters\n",
|
|
" if objective < best_objective:\n",
|
|
" best_objective = objective\n",
|
|
" best_params = (db_object_count, cache_size, c_f, c_delta)\n",
|
|
"\n",
|
|
" return best_objective, best_params"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"id": "a271b52d-1f24-4670-ae3f-af5dd9096a2f",
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Grid Search Progress: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 64152/64152 [12:27<00:00, 85.87it/s]"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"CPU times: user 12min 16s, sys: 11.5 s, total: 12min 28s\n",
|
|
"Wall time: 12min 27s\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"%%time\n",
|
|
"\n",
|
|
"# Define the grid search space\n",
|
|
"test_ratios = np.array([0.1, 0.2, 0.5, 0.7, 1, 1.5, 2, 5, 10])\n",
|
|
"db_object_count_values = np.round(np.array([10, 15, 30, 100, 200, 500]))\n",
|
|
"cache_size_values = np.unique(np.round(np.array([db_object_count_values * i for i in test_ratios]).flatten()))\n",
|
|
"c_f_values = np.array([0.1, 0.2, 0.5, 0.7, 1, 1.5, 2, 5, 10])\n",
|
|
"c_delta_values = np.unique(np.array([c_f_values * i for i in test_ratios]).flatten())\n",
|
|
"\n",
|
|
"best_objective, best_params = grid_search(db_object_count_values, cache_size_values, c_f_values, c_delta_values)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"id": "b2f625d0-ebe0-4a5d-92ff-7de03942ef51",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"(0.05000000000000002, (10, 10.0, 1.5, 0.010000000000000002))"
|
|
]
|
|
},
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"best_objective, best_params "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "86a23d02-6f14-4d4d-ad8a-39084ea69151",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "graphs",
|
|
"language": "python",
|
|
"name": "graphs"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.7"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|