Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: CI

on: [push, pull_request]

jobs:
test:
runs-on: ubuntu-latest

steps:
- name: Checkout code
uses: actions/checkout@v2

- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.12'

- name: Set PYTHONPATH
run: echo "PYTHONPATH=$PWD" >> $GITHUB_ENV

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install uv
uv sync --frozen

- name: Run pre-commit
run: uv run pre-commit run --all-files

- name: Run tests
run: uv run pytest
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
__pycache__
.venv
.vscode
.pytest_cache
.ruff_cache
.python-version
16 changes: 16 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: check-added-large-files

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.11.8
hooks:
- id: ruff
args:
- --fix
- id: ruff-format
Binary file added LT vs TP 2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added TP vs N 1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added TP vs N 2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
15 changes: 0 additions & 15 deletions client.py

This file was deleted.

6 changes: 0 additions & 6 deletions main.py

This file was deleted.

11 changes: 10 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,14 @@ description = "Add your description here"
readme = "README.md"
requires-python = ">=3.12"
dependencies = [
"crypto>=1.4.1",
"cryptography>=45.0.2",
"matplotlib>=3.10.3",
"pydantic>=2.11.4",
"ruff>=0.11.9",
]

[dependency-groups]
dev = [
"pre-commit>=4.2.0",
"pytest>=8.3.5",
]
9 changes: 0 additions & 9 deletions server.py

This file was deleted.

Empty file added src/__init__.py
Empty file.
59 changes: 59 additions & 0 deletions src/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import time

import matplotlib.pyplot as plt

from src.client import Client
from src.server import Server


def benchmark_throughput_vs_db_size(db_sizes):
throughputs = []
latencies = []
for num_blocks in db_sizes:
server = Server(num_blocks=num_blocks)
client = Client(num_blocks=num_blocks)
client._initialize_server_tree(server)

repeats = 1000 // num_blocks

start = time.time()
for _ in range(repeats):
for i in range(num_blocks):
client.store_data(server, i, f"data_{i}")
client.retrieve_data(server, i)

for i in range(num_blocks):
client.retrieve_data(server, i)
client.delete_data(server, i)
end = time.time()

delta = end - start
total_requests = repeats * 4 * num_blocks
throughput = total_requests / delta
latency = delta / total_requests
throughputs.append(throughput)
latencies.append(latency)
print(f"{delta=}")
print(f"N={num_blocks}: {throughput:.2f} req/sec")

return throughputs, latencies


if __name__ == "__main__":
db_sizes = [10, 50, 100, 200, 500, 1000]
throughputs, latencies = benchmark_throughput_vs_db_size(db_sizes)
plt.figure()
plt.plot(db_sizes, throughputs, marker="o")
plt.xlabel("N (DB size)")
plt.ylabel("Throughput (requests/sec)")
plt.title("Throughput vs. DB Size")
plt.grid(True)
plt.show()

plt.figure()
plt.plot(throughputs, [latency * 1000 for latency in latencies], marker="o")
plt.xlabel("Throughput (requests/sec)")
plt.ylabel("Latency (ms/request)")
plt.title("Latency vs. Throughput")
plt.grid(True)
plt.show()
201 changes: 201 additions & 0 deletions src/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
import logging
import math
import random
from typing import List

from cryptography.fernet import Fernet
from pydantic import BaseModel

from src.server import Server

logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)


class Block(BaseModel):
id: int = -1
data: str = "xxxx"


class Bucket(BaseModel):
blocks: List[Block]

def __init__(self, num_blocks: int = 4, blocks: List[Block] = None, **data) -> None:
if blocks:
super().__init__(blocks=blocks, **data)
else:
super().__init__(blocks=[Block() for _ in range(num_blocks)], **data)


class Client:
def __init__(self, num_blocks: int = 100, blocks_per_bucket: int = 4) -> None:
self._logger = logging.getLogger(__name__)
self._num_blocks = num_blocks
self._num_blocks_per_bucket = blocks_per_bucket
self._tree_height = round(math.log2(num_blocks))
self._stash: dict[int, Block] = {} # Changed from List to dict
self._position_map = {}
self._key = Fernet.generate_key()
self._cipher = Fernet(self._key)

def _remap_block(self, block_id: int):
new_position = random.randint(0, int(2**self._tree_height) - 1)
self._position_map[block_id] = new_position
self._logger.debug(f"Block {block_id} remapped to position {new_position}.")

def store_data(self, server: Server, id: int, data: str):
self._logger.info(f"Storing data for block {id}.")
leaf_index = self._position_map.get(id)
self._remap_block(id)
if not leaf_index: # if new block
leaf_index = self._position_map.get(id)
self._logger.debug(f"Leaf index for block {id}: {leaf_index}.")
self._fetch_decrypt_and_update_stash(leaf_index, server)

# write new data to stash
self._stash[id] = Block(id=id, data=data)
self._logger.debug(f"Stash updated with block {id}.")

self._build_encrypt_and_set_path(leaf_index, server)
self._logger.info(f"Data for block {id} stored successfully.")

def retrieve_data(self, server: Server, id: int) -> str:
self._logger.info(f"Retrieving data for block {id}.")
leaf_index = self._position_map.get(id)
if leaf_index is None:
self._logger.warning(f"Block {id} not found.")
return None
self._remap_block(id)
self._fetch_decrypt_and_update_stash(leaf_index, server)

block = self._stash.get(id)

self._build_encrypt_and_set_path(leaf_index, server)
self._logger.info(f"Data for block {id} retrieved successfully.")
return block.data

def delete_data(self, server: Server, id: int, data=None) -> None:
self._logger.info(f"Deleting data for block {id}.")
leaf_index = self._position_map.get(id)
if leaf_index is None:
self._logger.warning(f"Block {id} not found.")
return None
self._fetch_decrypt_and_update_stash(leaf_index, server)

# remove block from stash and position map
del self._stash[id]
del self._position_map[id]
self._logger.debug(f"Block {id} removed from stash.")

self._build_encrypt_and_set_path(leaf_index, server)
self._logger.info(f"Data for block {id} deleted successfully.")

def _update_stash(self, path: List[Bucket], id: int) -> None:
self._logger.debug(f"Updating stash with path for block {id}.")
for bucket in path:
for block in bucket.blocks:
if block.id != -1: # not a dummy block
self._stash[block.id] = block

def _build_new_path(self, leaf_index: int) -> List[Bucket]:
"""
Constructs a new path from the leaf node up to the root, filling each bucket along
the path with blocks from the stash that are reachable from the current node in the path.
For example:
0
/ \
1 2
/ \ / \
3 4 5 6
When we build the path for leaf index 3, we will first fill the bucket of node 3 with
all the blocks that are mapped to node 3 because it is a leaf.
Then, we got to the next node in the path -> 1.
We will node 1 bucket with all the blocks that are mapped to leaves that are reachable
from node 1, which are 3 and 4.
Finally, we will fill the bucket of the root node with all the blocks that are left in
the stash, because every leaf is reachable from the root.

Args:
leaf_index (int): The index of the leaf node for which the path is being built
Returns:
List[Bucket]: A list of Bucket objects representing the path from the leaf to
the root, with each bucket filled with as many appropriate blocks from the
stash as possible
Side Effects:
Removes blocks from the stash that are placed into the path buckets.
"""
self._logger.debug(f"Building new path for leaf index {leaf_index}.")
path = [
Bucket(self._num_blocks_per_bucket) for _ in range(self._tree_height + 1)
]

# iterate over the tree levels from leaf to root
for level in range(self._tree_height, -1, -1):
reachable_leaves = self._calculate_reachable_leaves(leaf_index, level)
bucket_index = self._tree_height - level
num_written_blocks = 0
block_ids = list(
self._stash.keys()
) # to avoid modifying dict during iteration
for block_id in block_ids:
if num_written_blocks >= self._num_blocks_per_bucket:
break
if self._position_map.get(block_id) in reachable_leaves:
path[bucket_index].blocks[num_written_blocks] = self._stash[
block_id
]
del self._stash[block_id]
num_written_blocks += 1
return path

def _calculate_reachable_leaves(self, leaf_index: int, level: int) -> List[int]:
binary = format(leaf_index, f"0{self._tree_height}b")
# get first level bits (path so far)
path_bits = binary[:level]
# compute base index: decimal of path_bits * 2^(L-level)
base = (
int(path_bits, 2) * (1 << (self._tree_height - level)) if path_bits else 0
)
# number of reachable leaves: 2^(L-level)
num_leaves = 1 << (self._tree_height - level)
# list of reachable leaves
return list(range(base, base + num_leaves))

def _decrypt_and_parse_path(self, path: List[List[bytes]]) -> List[Bucket]:
new_path = []
for bucket in path:
blocks = [
Block.model_validate_json(self._cipher.decrypt(data).decode())
for data in bucket
]
new_path.append(Bucket(blocks=blocks))
return new_path

def _unparse_and_encrypt_path(self, path: List[Bucket]) -> List[List[bytes]]:
server_path = []
for bucket in path:
bucket_data = [
self._cipher.encrypt(block.model_dump_json().encode())
for block in bucket.blocks
]
server_path.append(bucket_data)
return server_path

def _initialize_server_tree(self, server: Server) -> None:
dummy_elements = [
Bucket(self._num_blocks_per_bucket)
for _ in range(int(2 ** (self._tree_height + 1) - 1))
]
dummy_elements = self._unparse_and_encrypt_path(dummy_elements)
server.initialize_tree(dummy_elements)

def _fetch_decrypt_and_update_stash(self, leaf_index: int, server: Server) -> None:
path = server.get_path(leaf_index)
path = self._decrypt_and_parse_path(path)
self._update_stash(path, id)

def _build_encrypt_and_set_path(self, leaf_index: int, server: Server) -> None:
path = self._build_new_path(leaf_index)
path = self._unparse_and_encrypt_path(path)
server.set_path(path, leaf_index)
Loading