From fb1372420ac4d025efa9fdf5a2522dab57360e5e Mon Sep 17 00:00:00 2001 From: Denys Fedoryshchenko Date: Fri, 30 May 2025 10:22:08 +0300 Subject: [PATCH 1/8] Make pylint happy, cosmetic fixes Signed-off-by: Denys Fedoryshchenko --- api/maintenance.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/api/maintenance.py b/api/maintenance.py index fe098534..46a4a523 100644 --- a/api/maintenance.py +++ b/api/maintenance.py @@ -1,14 +1,34 @@ -from pymongo import MongoClient +""" +This module provides maintenance utilities for the KernelCI API, including +functions to purge old nodes from the database and manage MongoDB connections. +""" import datetime import os +from pymongo import MongoClient def purge_ids(db, collection, ids): + """ + Delete documents from the specified collection in the database by their IDs. + + Args: + db: The MongoDB database instance. + collection (str): The name of the collection to purge from. + ids (list): List of document IDs to delete. + """ print("Purging", len(ids), "from", collection) db[collection].delete_many({"_id": {"$in": ids}}) def connect_to_db(): + """ + Connect to the MongoDB database using the MONGO_SERVICE environment variable. + + Returns: + db: The 'kernelci' MongoDB database instance. + Raises: + ValueError: If the MONGO_SERVICE environment variable is not set. + """ mongo_service = os.environ["MONGO_SERVICE"] if not mongo_service: raise ValueError("MONGO_SERVICE environment variable is not set") @@ -18,6 +38,13 @@ def connect_to_db(): async def purge_old_nodes(age_days=180): + """ + Purge nodes from the 'nodes' collection that are older than the specified number of days. + + Args: + age_days (int, optional): The age in days to use as the threshold for deletion. + Defaults to 180. + """ date_end = datetime.datetime.today() - datetime.timedelta(days=age_days) db = connect_to_db() nodes = db["nodes"].find({"created": {"$lt": date_end}}) From b530d82a1b10a4147a75441a975dc462c677d0e2 Mon Sep 17 00:00:00 2001 From: Denys Fedoryshchenko Date: Fri, 30 May 2025 10:23:04 +0300 Subject: [PATCH 2/8] Make pylint happy, cosmetic fixes in main.py Signed-off-by: Denys Fedoryshchenko --- api/main.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/api/main.py b/api/main.py index 5c9c03f7..9f9ef6f5 100644 --- a/api/main.py +++ b/api/main.py @@ -29,10 +29,9 @@ from fastapi.encoders import jsonable_encoder from fastapi.responses import JSONResponse, PlainTextResponse, FileResponse from fastapi.security import OAuth2PasswordRequestForm -from fastapi_pagination import add_pagination, pagination_ctx +from fastapi_pagination import add_pagination from fastapi_versioning import VersionedFastAPI from bson import ObjectId, errors -from pymongo.errors import DuplicateKeyError from fastapi_users import FastAPIUsers from beanie import PydanticObjectId from pydantic import BaseModel From c6fb8629ef4414afc7a1e563db12149cc05fc000 Mon Sep 17 00:00:00 2001 From: Denys Fedoryshchenko Date: Fri, 30 May 2025 10:24:24 +0300 Subject: [PATCH 3/8] Add myself to authors and update copyright line Signed-off-by: Denys Fedoryshchenko --- api/main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/api/main.py b/api/main.py index 9f9ef6f5..8378f475 100644 --- a/api/main.py +++ b/api/main.py @@ -1,8 +1,9 @@ # SPDX-License-Identifier: LGPL-2.1-or-later # -# Copyright (C) 2021-2023 Collabora Limited +# Copyright (C) 2021-2025 Collabora Limited # Author: Guillaume Tucker # Author: Jeny Sadadia +# Author: Denys Fedoryshchenko # pylint: disable=unused-argument,global-statement,too-many-lines From afdb6b02349dedd8234817f89e8eb10bd55e0e72 Mon Sep 17 00:00:00 2001 From: Denys Fedoryshchenko Date: Fri, 30 May 2025 10:25:21 +0300 Subject: [PATCH 4/8] Add copyright to maintenance.py Signed-off-by: Denys Fedoryshchenko --- api/maintenance.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/api/maintenance.py b/api/maintenance.py index 46a4a523..33cc1c43 100644 --- a/api/maintenance.py +++ b/api/maintenance.py @@ -1,3 +1,10 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +# +# Copyright (C) 2021-2025 Collabora Limited +# Author: Guillaume Tucker +# Author: Jeny Sadadia +# Author: Denys Fedoryshchenko + """ This module provides maintenance utilities for the KernelCI API, including functions to purge old nodes from the database and manage MongoDB connections. From 7b56c3b1232fb63d869f3787da82bf6644a2824e Mon Sep 17 00:00:00 2001 From: Denys Fedoryshchenko Date: Fri, 30 May 2025 10:31:29 +0300 Subject: [PATCH 5/8] (main.py) Add exception for timeout in get_nodes_fast We have timeout but we miss to handle exception. Signed-off-by: Denys Fedoryshchenko --- api/main.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/api/main.py b/api/main.py index 8378f475..e25000ae 100644 --- a/api/main.py +++ b/api/main.py @@ -556,6 +556,11 @@ async def get_nodes_fast(request: Request): timeout=15 ) return resp + except asyncio.TimeoutError: + raise HTTPException( + status_code=status.HTTP_504_GATEWAY_TIMEOUT, + detail="Timeout while fetching nodes" + ) except KeyError as error: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, From 9d0893aa496f1d129e6b940f78e5b27647951384 Mon Sep 17 00:00:00 2001 From: Denys Fedoryshchenko Date: Fri, 30 May 2025 10:37:52 +0300 Subject: [PATCH 6/8] (maintenance) Make pycodestyle happy Signed-off-by: Denys Fedoryshchenko --- api/maintenance.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/api/maintenance.py b/api/maintenance.py index 33cc1c43..1b3b0af2 100644 --- a/api/maintenance.py +++ b/api/maintenance.py @@ -16,7 +16,8 @@ def purge_ids(db, collection, ids): """ - Delete documents from the specified collection in the database by their IDs. + Delete documents from the specified collection in the database + by their IDs. Args: db: The MongoDB database instance. @@ -24,12 +25,15 @@ def purge_ids(db, collection, ids): ids (list): List of document IDs to delete. """ print("Purging", len(ids), "from", collection) - db[collection].delete_many({"_id": {"$in": ids}}) + db[collection].delete_many({ + "_id": {"$in": ids} + }) def connect_to_db(): """ - Connect to the MongoDB database using the MONGO_SERVICE environment variable. + Connect to the MongoDB database using the MONGO_SERVICE environment + variable. Returns: db: The 'kernelci' MongoDB database instance. @@ -46,15 +50,19 @@ def connect_to_db(): async def purge_old_nodes(age_days=180): """ - Purge nodes from the 'nodes' collection that are older than the specified number of days. + Purge nodes from the 'nodes' collection that are older than the + specified number of days. Args: - age_days (int, optional): The age in days to use as the threshold for deletion. + age_days (int, optional): The age in days to use as the + threshold for deletion. Defaults to 180. """ date_end = datetime.datetime.today() - datetime.timedelta(days=age_days) db = connect_to_db() - nodes = db["nodes"].find({"created": {"$lt": date_end}}) + nodes = db["nodes"].find({ + "created": {"$lt": date_end} + }) # We need to delete node in chunks of 1000, # to not block the main thread for too long del_batch = [] From dc135925e390536cc789b1e43e6b5c027b764f44 Mon Sep 17 00:00:00 2001 From: Denys Fedoryshchenko Date: Fri, 30 May 2025 10:43:09 +0300 Subject: [PATCH 7/8] (main) Add limit on recursive nodes, to reduce memory usage Signed-off-by: Denys Fedoryshchenko --- api/main.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/api/main.py b/api/main.py index e25000ae..d4a67036 100644 --- a/api/main.py +++ b/api/main.py @@ -429,6 +429,13 @@ async def get_events(request: Request): query_params['data.result'] = result if limit: query_params['limit'] = int(limit) + # limit recursive to 1000 + if recursive and (not limit or int(limit) > 1000): + # generate error + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Recursive limit is too large, max is 1000" + ) resp = await db.find_by_attributes_nonpaginated(EventHistory, query_params) resp_list = [] for item in resp: From 9ac853bee18170a3a619ac14b2bde6052fee6a85 Mon Sep 17 00:00:00 2001 From: Denys Fedoryshchenko Date: Fri, 30 May 2025 13:23:25 +0300 Subject: [PATCH 8/8] (main) Again, fix pylint complaint about exception api/main.py:567:8: W0707: Consider explicitly re-raising using 'except Exception as exc' and 'raise HTTPException(status_code=status.HTTP_504_GATEWAY_TIMEOUT, detail='Timeout while fetching nodes') from exc' (raise-missing-from) Signed-off-by: Denys Fedoryshchenko --- api/main.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/api/main.py b/api/main.py index d4a67036..a1596c75 100644 --- a/api/main.py +++ b/api/main.py @@ -563,11 +563,11 @@ async def get_nodes_fast(request: Request): timeout=15 ) return resp - except asyncio.TimeoutError: + except asyncio.TimeoutError as error: raise HTTPException( status_code=status.HTTP_504_GATEWAY_TIMEOUT, - detail="Timeout while fetching nodes" - ) + detail=f"Timeout while fetching nodes: {str(error)}" + ) from error except KeyError as error: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND,