In [1]:
import matplotlib.pyplot as plt
import numpy as np
import random
import pandas as pd

In [2]:
DATABASE_OBJECT_COUNT = 100
CACHE_SIZE = DATABASE_OBJECT_COUNT/2
ZIPF_CONSTANT = 2

CACHE_MISS_COST = 2
CACHE_REFRESH_COST = 1

SEED = 42
np.random.seed(SEED)
random.seed(SEED)

LAMBDA_VALUES = np.array([np.random.zipf(ZIPF_CONSTANT) for i in np.arange(1, DATABASE_OBJECT_COUNT + 1,1)])

In [3]:
# LAMBDA_VALUES = np.array([0.03, 0.04,0.05,0.06,0.07,1,1.1,1.2,1.3,1.4,1.5])
# DATABASE_OBJECT_COUNT = len(LAMBDA_VALUES)
# CACHE_SIZE = 4.4
# CACHE_MISS_COST = 7
# CACHE_REFRESH_COST = 1

In [4]:
db_object_count = DATABASE_OBJECT_COUNT
cache_sz = CACHE_SIZE

lambda_vals = LAMBDA_VALUES
c_f = CACHE_MISS_COST
c_delta = CACHE_REFRESH_COST

In [5]:
def eta_star(db_object_count, c_f, cache_sz, c_delta, lambda_vals):
    num = (db_object_count * c_f - cache_sz * c_delta)
    denom = np.sum(1.0/lambda_vals)
    return max(0, num/denom)

In [6]:
def h_i_star(c_f, eta, lambda_vals, c_delta):
    optimized_hitrate = (c_f - (eta/lambda_vals)) / c_delta
    return optimized_hitrate

In [7]:
def get_index_of_furthest_hitrate_from_boundary(hitrates):
    lower_bound_violation =  hitrates[(hitrates < 0)]
    upper_bound_violation = hitrates[(hitrates > 1)]
    smallest_delta = np.abs(np.min(lower_bound_violation))
    biggest_delta = np.max(upper_bound_violation) - 1
    if smallest_delta > biggest_delta:
        print(smallest_delta)
        index = np.where(hitrates == np.min(local_hitrates))[0][0]
        return index
    else:
        
        index = np.where(hitrates == np.max(local_hitrates))[0][0]
        return index

In [8]:
def get_index_of_furthest_hitrate_from_boundary(hitrates):
    outside_bounds = (hitrates < 0) | (hitrates > 1)
    distances = np.where(outside_bounds, np.maximum(np.abs(hitrates - 0), np.abs(hitrates - 1)), -np.inf)
    index = np.argmax(distances)
    return index

In [9]:
lambda_vals

array([ 1,  3,  1,  1,  2,  1,  5,  1,  1,  1,  2,  1,  1,  1,  2,  2,  1,
        1,  3,  1,  1,  1,  1,  2,  1,  1,  1,  5,  1,  1,  1,  4,  1,  4,
        1,  1,  1,  3,  8,  1,  4,  4,  2,  1,  1,  1, 10,  1,  1,  1,  5,
        9,  1,  1,  1,  1,  1, 17,  2,  1, 26,  1,  1,  2,  1, 10,  1, 69,
        1,  1,  2,  1,  1,  1,  3,  2,  2,  3, 15,  1,  1,  5,  2,  1,  1,
        2,  1,  2,  1,  1,  2,  2,  3,  1,  2,  1,  1, 37,  4,  2])

In [10]:
eta = eta_star(db_object_count, c_f, cache_sz, c_delta, lambda_vals[lambda_vals != lambda_vals[6]])
print(eta)
optimized_hitrates = (c_f - eta / lambda_vals[lambda_vals != lambda_vals[6]]) / c_delta
optimized_hitrates

2.1159070575516945


array([-0.11590706,  1.29469765, -0.11590706, -0.11590706,  0.94204647,
       -0.11590706, -0.11590706, -0.11590706, -0.11590706,  0.94204647,
       -0.11590706, -0.11590706, -0.11590706,  0.94204647,  0.94204647,
       -0.11590706, -0.11590706,  1.29469765, -0.11590706, -0.11590706,
       -0.11590706, -0.11590706,  0.94204647, -0.11590706, -0.11590706,
       -0.11590706, -0.11590706, -0.11590706, -0.11590706,  1.47102324,
       -0.11590706,  1.47102324, -0.11590706, -0.11590706, -0.11590706,
        1.29469765,  1.73551162, -0.11590706,  1.47102324,  1.47102324,
        0.94204647, -0.11590706, -0.11590706, -0.11590706,  1.78840929,
       -0.11590706, -0.11590706, -0.11590706,  1.76489922, -0.11590706,
       -0.11590706, -0.11590706, -0.11590706, -0.11590706,  1.87553488,
        0.94204647, -0.11590706,  1.91861896, -0.11590706, -0.11590706,
        0.94204647, -0.11590706,  1.78840929, -0.11590706,  1.96933468,
       -0.11590706, -0.11590706,  0.94204647, -0.11590706, -0.11

In [11]:
pd.DataFrame(optimized_hitrates).describe()

Unnamed: 0,0
count,96.0
mean,0.4375
std,0.726101
min,-0.115907
25%,-0.115907
50%,-0.115907
75%,0.942046
max,1.969335


In [12]:
"""
Perform theoretical optimization to compute optimal hit probabilities.

Parameters:
- lambda_vals (numpy array): Request rates for each item.
- B (float): Total cache size.
- c_f (float): Fetching linear cost (cache miss cost).
- c_delta (float): Age linear cost.

Returns:
- h_opt (numpy array): Optimal hit probabilities for each item.
"""
optimized_hitrates = np.zeros(DATABASE_OBJECT_COUNT)
current_db_object_count = DATABASE_OBJECT_COUNT
current_cache_size = CACHE_SIZE

differenc_set = np.arange(DATABASE_OBJECT_COUNT)
fix_i = []

while True:
    if current_db_object_count == 0:
        print("No objects left to optimize.")
        if current_cache_size > 0:
            print("Add obj with optimized hitrate 0 and add them to optimization pool for re-optimization.")
            # Redistribute unused cache size among items with zero hit probability
            differenc_set = np.where(optimized_hitrates == 0)[0]
            fix_i = np.setdiff1d(np.arange(DATABASE_OBJECT_COUNT), differenc_set).tolist()
            current_db_object_count = len(differenc_set)
            continue
        else:
            "Reset"
            optimized_hitrates[differenc_set] = 0
            break
    # Compute Lagrangian multiplier and optimal hit probabilities
    eta = eta_star(current_db_object_count, c_f, current_cache_size, c_delta, lambda_vals[differenc_set])
    optimized_hitrates[differenc_set] = (c_f - eta / lambda_vals[differenc_set]) / c_delta
    if eta < 0:
        print("eta was negative.")
        current_cache_size = current_db_object_count * c_f / c_delta  # Adjust cache size for next iteration
        continue
    
    if len((optimized_hitrates[differenc_set])[((optimized_hitrates[differenc_set]) < 0) | ((optimized_hitrates[differenc_set])> 1)]) == 0:
        print("All values optimized.")
        break
    
    max_outbound_index = get_index_of_furthest_hitrate_from_boundary(optimized_hitrates)
    fix_i.append(max_outbound_index)
    differenc_set = np.setdiff1d(np.arange(DATABASE_OBJECT_COUNT), fix_i)

    old_hitrate = optimized_hitrates[max_outbound_index]
    optimized_hitrates[max_outbound_index] = (1 if optimized_hitrates[max_outbound_index] > 1 else 0)
    
    print(f"Optimized: {max_outbound_index} {old_hitrate:.2f} // {optimized_hitrates[max_outbound_index-2:max_outbound_index+3]}")
    
    current_db_object_count -= 1
    current_cache_size -= optimized_hitrates[max_outbound_index]

Optimized: 67 1.97 // [ 1.79077042 -0.09229584  1.         -0.09229584 -0.09229584]
Optimized: 97 1.94 // [-0.07876743 -0.07876743  1.          1.48030814  0.96061628]
Optimized: 60 1.92 // [ 0.96720258 -0.06559484  1.         -0.06559484 -0.06559484]
Optimized: 57 1.88 // [-0.05274002 -0.05274002  1.          0.97362999 -0.05274002]
Optimized: 78 1.86 // [ 0.97977406  1.31984937  1.         -0.04045188 -0.04045188]
Optimized: 46 1.80 // [-0.02836604 -0.02836604  1.         -0.02836604 -0.02836604]
Optimized: 65 1.80 // [ 0.99140044 -0.01719911  1.         -0.01719911  1.        ]
Optimized: 51 1.78 // [-0.00600086  1.59879983  1.         -0.00600086 -0.00600086]
Optimized: 38 1.75 // [0.00491746 1.33497249 1.         0.00491746 1.50122936]
Optimized: 6 1.60 // [1.00774103 0.01548205 1.         0.01548205 0.01548205]
Optimized: 27 1.60 // [0.02399435 0.02399435 1.         0.02399435 0.02399435]
Optimized: 50 1.61 // [0.03255485 0.03255485 1.         1.         0.03255485]
Optimized: 81

In [13]:
optimized_hitrates

array([0.13793103, 1.        , 0.13793103, 0.13793103, 1.        ,
       0.13793103, 1.        , 0.13793103, 0.13793103, 0.13793103,
       1.        , 0.13793103, 0.13793103, 0.13793103, 1.        ,
       1.        , 0.13793103, 0.13793103, 1.        , 0.13793103,
       0.13793103, 0.13793103, 0.13793103, 1.        , 0.13793103,
       0.13793103, 0.13793103, 1.        , 0.13793103, 0.13793103,
       0.13793103, 1.        , 0.13793103, 1.        , 0.13793103,
       0.13793103, 0.13793103, 1.        , 1.        , 0.13793103,
       1.        , 1.        , 1.        , 0.13793103, 0.13793103,
       0.13793103, 1.        , 0.13793103, 0.13793103, 0.13793103,
       1.        , 1.        , 0.13793103, 0.13793103, 0.13793103,
       0.13793103, 0.13793103, 1.        , 1.        , 0.13793103,
       1.        , 0.13793103, 0.13793103, 1.        , 0.13793103,
       1.        , 0.13793103, 1.        , 0.13793103, 0.13793103,
       1.        , 0.13793103, 0.13793103, 0.13793103, 1.     

In [14]:
pd.DataFrame(optimized_hitrates).describe()

Unnamed: 0,0
count,100.0
mean,0.5
std,0.427625
min,0.137931
25%,0.137931
50%,0.137931
75%,1.0
max,1.0
