From b8ed86b251d40541bb75692e5a699dc5ae5a5705 Mon Sep 17 00:00:00 2001 From: GabrielPalmar Date: Tue, 19 Aug 2025 13:01:44 -0500 Subject: [PATCH 1/4] fix(k8s): Added more time for liveness probe --- Dockerfile | 6 +++-- app/readiness.py | 65 ++++++++++++++++++++++++++++++++++------------ k8s/deployment.yml | 2 +- 3 files changed, 53 insertions(+), 20 deletions(-) diff --git a/Dockerfile b/Dockerfile index be8306d..692769d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.13.6-alpine@sha256:f196fd275fdad7287ccb4b0a85c2e402bb8c794d205cf6158909041c1ee9f38d +FROM python:3.13.6-alpine@sha256:af1fd7a973d8adc761ee6b9d362b99329b39eb096ea3c53b8838f99bd187011e RUN addgroup -S appgroup && adduser -S -G appgroup appuser @@ -17,7 +17,9 @@ ENV FLASK_APP=app.main.py:app \ CACHE_TTL=300 \ MINIO_PORT=9000 \ MINIO_ACCESS_KEY=minioadmin \ - MINIO_SECRET_KEY=minioadmin + MINIO_SECRET_KEY=minioadmin \ + REDIS_HOST=redis \ + MINIO_HOST=minio USER appuser diff --git a/app/readiness.py b/app/readiness.py index 86167cd..26eb8a7 100644 --- a/app/readiness.py +++ b/app/readiness.py @@ -1,4 +1,6 @@ '''Module to check the readiness of the stored information''' +import requests +import redis from app.opensense import get_temperature from app.config import create_redis_client @@ -9,30 +11,59 @@ def check_caching(): if not REDIS_AVAILABLE: return True - cache_key = "temperature_data" - ttl = redis_client.ttl(cache_key) + try: + cache_key = "temperature_data" + ttl = redis_client.ttl(cache_key) - if ttl in (-2, -1): - return True + if ttl in (-2, -1): + return True - return False + return False + except redis.RedisError as e: + print(f"Redis error while checking cache: {e}") + return True def reachable_boxes(): - '''Check if 50% + 1 of boxes are reachable''' - _, sensor_stats = get_temperature() - total = sensor_stats["total_sensors"] - null_count = sensor_stats["null_count"] - if total > 0 and null_count > (total * 0.5): - print("Warning: More than 50% of sensors are unreachable") + '''Check if more than 50% of sensor boxes are reachable''' + try: + _, sensor_stats = get_temperature() + total_boxes = sensor_stats.get('total', 0) + reachable = sensor_stats.get('reachable', 0) + + if total_boxes == 0: + return 400 + + percentage = (reachable / total_boxes) * 100 + + if percentage > 50: + return 200 + else: + return 400 + except requests.exceptions.RequestException as e: + # Handle network-related errors from the API call + print(f"Network error checking reachable boxes: {e}") + return 200 + except redis.RedisError as e: + # Handle Redis-related errors + print(f"Redis error checking reachable boxes: {e}") + return 200 + except (ValueError, TypeError, KeyError) as e: + # Handle data parsing errors + print(f"Data error checking reachable boxes: {e}") return 400 - return 200 def readiness_check(): '''Combined readiness check for the /readyz endpoint''' - boxes_status = reachable_boxes() - cache_is_old = check_caching() + try: + boxes_status = reachable_boxes() + cache_is_old = check_caching() - if boxes_status == 400 and cache_is_old: - return 503 + # Only fail if BOTH conditions are bad + if boxes_status == 400 and cache_is_old: + return 503 - return 200 + return 200 + except redis.RedisError as e: + # If Redis is completely unavailable, still allow the service to be ready + print(f"Redis error during readiness check: {e}") + return 200 diff --git a/k8s/deployment.yml b/k8s/deployment.yml index f5cee57..82fd257 100644 --- a/k8s/deployment.yml +++ b/k8s/deployment.yml @@ -58,7 +58,7 @@ spec: port: 5000 initialDelaySeconds: 30 periodSeconds: 10 - timeoutSeconds: 5 + timeoutSeconds: 300 failureThreshold: 3 volumeMounts: - name: tmp-volume From 352c9425299b44014c8fdf5004374d422aaaa65f Mon Sep 17 00:00:00 2001 From: GabrielPalmar Date: Tue, 19 Aug 2025 13:09:51 -0500 Subject: [PATCH 2/4] fix(ci): Fixed PyLint --- app/readiness.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/readiness.py b/app/readiness.py index 26eb8a7..e62200c 100644 --- a/app/readiness.py +++ b/app/readiness.py @@ -37,8 +37,8 @@ def reachable_boxes(): if percentage > 50: return 200 - else: - return 400 + return 400 + except requests.exceptions.RequestException as e: # Handle network-related errors from the API call print(f"Network error checking reachable boxes: {e}") From e54c8ee0921869ad5f9e596493766ad24cddbb98 Mon Sep 17 00:00:00 2001 From: GabrielPalmar Date: Tue, 19 Aug 2025 15:09:02 -0500 Subject: [PATCH 3/4] fix(ci): Avoiding real calls --- app/readiness.py | 16 +++++++++------- tests/test_modules.py | 10 +++++++--- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/app/readiness.py b/app/readiness.py index e62200c..a21807c 100644 --- a/app/readiness.py +++ b/app/readiness.py @@ -27,17 +27,19 @@ def reachable_boxes(): '''Check if more than 50% of sensor boxes are reachable''' try: _, sensor_stats = get_temperature() - total_boxes = sensor_stats.get('total', 0) - reachable = sensor_stats.get('reachable', 0) + total_boxes = sensor_stats.get('total_sensors', 0) + unreachable = sensor_stats.get('null_count', 0) + # No sensors configured => treat as healthy if total_boxes == 0: - return 400 + return 200 - percentage = (reachable / total_boxes) * 100 + percentage_unreachable = (unreachable / total_boxes) * 100 - if percentage > 50: - return 200 - return 400 + # Fail only if strictly more than 50% are unreachable + if percentage_unreachable > 50: + return 400 + return 200 except requests.exceptions.RequestException as e: # Handle network-related errors from the API call diff --git a/tests/test_modules.py b/tests/test_modules.py index 43e655d..6635c57 100644 --- a/tests/test_modules.py +++ b/tests/test_modules.py @@ -32,8 +32,10 @@ def test_version_endpoint(self): def test_temperature_endpoint(self): """Test temperature endpoint returns 200 or 500""" - response = self.client.get('/temperature') - self.assertIn(response.status_code, [200, 500]) + with mock.patch('app.opensense.requests.get', + return_value=MockOpenSenseResponse(20)): + response = self.client.get('/temperature') + self.assertIn(response.status_code, [200, 500]) def test_metrics_endpoint(self): """Test metrics endpoint returns 200""" @@ -94,7 +96,9 @@ class TestOpenSense(unittest.TestCase): def test_get_temperature_returns_tuple(self): """Test that opensense.get_temperature returns a tuple with correct format""" - result, stats = opensense.get_temperature() + with mock.patch('app.opensense.requests.get', + return_value=MockOpenSenseResponse(20)): + result, stats = opensense.get_temperature() self.assertIsInstance(result, str) self.assertIsInstance(stats, dict) self.assertIn('total_sensors', stats) From 5caf11bcf1eaab7583aba46d94f831b15a34897e Mon Sep 17 00:00:00 2001 From: GabrielPalmar Date: Tue, 19 Aug 2025 15:23:18 -0500 Subject: [PATCH 4/4] fix(ci): Adding tests for readiness --- tests/test_modules.py | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/tests/test_modules.py b/tests/test_modules.py index 6635c57..9521b83 100644 --- a/tests/test_modules.py +++ b/tests/test_modules.py @@ -2,6 +2,8 @@ import re import unittest import unittest.mock as mock +import requests # added +import redis # added from minio.error import S3Error, InvalidResponseError from app.storage import store_temperature_data from app.main import app @@ -297,6 +299,17 @@ def test_check_caching_fresh_cache(self): result = readiness.check_caching() self.assertFalse(result) # Cache is fresh + def test_check_caching_redis_error(self): + """TTL raises RedisError -> treated as old cache (True)""" + mock_redis_client = mock.MagicMock() + mock_redis_client.ttl.side_effect = redis.RedisError("boom") + + with mock.patch('app.readiness.REDIS_AVAILABLE', True), \ + mock.patch('app.readiness.redis_client', mock_redis_client), \ + mock.patch('builtins.print'): + result = readiness.check_caching() + self.assertTrue(result) + def test_reachable_boxes_healthy(self): """Test reachable_boxes when most sensors are working""" mock_stats = {"total_sensors": 100, "null_count": 10} @@ -328,6 +341,36 @@ def test_reachable_boxes_edge_cases(self): return_value=("temp", {"total_sensors": 100, "null_count": 50})): self.assertEqual(readiness.reachable_boxes(), 200) + def test_reachable_boxes_network_error(self): + """requests exceptions -> treated as healthy (200)""" + with mock.patch('app.readiness.get_temperature', + side_effect=requests.exceptions.RequestException("net")), \ + mock.patch('builtins.print'): + self.assertEqual(readiness.reachable_boxes(), 200) + + def test_reachable_boxes_redis_error(self): + """Redis errors inside get_temperature -> treated as healthy (200)""" + with mock.patch('app.readiness.get_temperature', + side_effect=redis.RedisError("redis down")), \ + mock.patch('builtins.print'): + self.assertEqual(readiness.reachable_boxes(), 200) + + def test_reachable_boxes_data_error(self): + """Data parsing error -> returns 400""" + # Cause a TypeError during percentage calculation + bad_stats = {"total_sensors": 2, "null_count": "x"} + with mock.patch('app.readiness.get_temperature', + return_value=("temp", bad_stats)), \ + mock.patch('builtins.print'): + self.assertEqual(readiness.reachable_boxes(), 400) + + def test_readiness_check_redis_error_top_level(self): + """Top-level Redis error in readiness_check -> returns 200""" + with mock.patch('app.readiness.check_caching', + side_effect=redis.RedisError("ttl failed")), \ + mock.patch('builtins.print'): + self.assertEqual(readiness.readiness_check(), 200) + def test_readiness_check_all_good(self): """Test readiness_check when everything is healthy""" with mock.patch('app.readiness.check_caching', return_value=False), \