diff --git a/agents/cluster/MLSClusterAgent.py b/agents/cluster/MLSClusterAgent.py index 0ca9e8d..f011fc9 100644 --- a/agents/cluster/MLSClusterAgent.py +++ b/agents/cluster/MLSClusterAgent.py @@ -187,7 +187,7 @@ async def fluidity_message_listener(self): case MessageEvents.APP_UPDATED.value: logger.debug(f"Application was updated") await self.application_controller.on_application_updated(data) - case MessageEvents.COMPONENT_PLACED.value: # DEPRACATED + case MessageEvents.COMPONENT_PLACED.value: # DEPRECATED # logger.debug(f"Application component placed") # logger.debug(f"Data: {data}") # Update internal structure diff --git a/agents/cluster/fluidity/manifests/templates/full_descriptions/MLSysOpsApplication.yaml b/agents/cluster/fluidity/manifests/templates/full_descriptions/MLSysOpsApplication.yaml index 8f6bc29..354b79c 100644 --- a/agents/cluster/fluidity/manifests/templates/full_descriptions/MLSysOpsApplication.yaml +++ b/agents/cluster/fluidity/manifests/templates/full_descriptions/MLSysOpsApplication.yaml @@ -66,7 +66,7 @@ spec: enum: - volos - athens - - rende + - render - milan - lille - delft diff --git a/agents/cluster/fluidity/manifests/templates/full_descriptions/MLSysOpsDatacenter.yaml b/agents/cluster/fluidity/manifests/templates/full_descriptions/MLSysOpsDatacenter.yaml index dec8863..1c33429 100644 --- a/agents/cluster/fluidity/manifests/templates/full_descriptions/MLSysOpsDatacenter.yaml +++ b/agents/cluster/fluidity/manifests/templates/full_descriptions/MLSysOpsDatacenter.yaml @@ -69,7 +69,7 @@ spec: enum: - volos - athens - - rende + - render - milan - lille - delft diff --git a/agents/cluster/main.py b/agents/cluster/main.py index b00c3c9..ce6a8c4 100644 --- a/agents/cluster/main.py +++ b/agents/cluster/main.py @@ -47,7 +47,7 @@ async def main(): await asyncio.gather(agent_task) except asyncio.CancelledError: - logger.info("Agent stoped. Performing cleanup...") + logger.info("Agent stopped. Performing cleanup...") if agent: await agent.stop() # Stop the agent during cleanup diff --git a/agents/continuum/templates/MLSysOpsApplication.yaml b/agents/continuum/templates/MLSysOpsApplication.yaml index 8f6bc29..354b79c 100644 --- a/agents/continuum/templates/MLSysOpsApplication.yaml +++ b/agents/continuum/templates/MLSysOpsApplication.yaml @@ -66,7 +66,7 @@ spec: enum: - volos - athens - - rende + - render - milan - lille - delft diff --git a/agents/continuum/templates/MLSysOpsDatacenter.yaml b/agents/continuum/templates/MLSysOpsDatacenter.yaml index dec8863..1c33429 100644 --- a/agents/continuum/templates/MLSysOpsDatacenter.yaml +++ b/agents/continuum/templates/MLSysOpsDatacenter.yaml @@ -69,7 +69,7 @@ spec: enum: - volos - athens - - rende + - render - milan - lille - delft diff --git a/agents/deployments/README.md b/agents/deployments/README.md index 33850c4..8f503ba 100644 --- a/agents/deployments/README.md +++ b/agents/deployments/README.md @@ -22,7 +22,7 @@ To ensure the correct bootstrap, the agents should start in the following order: All the deployments take place in a Kubernetes cluster, in separate namespace 'mlsysops-framework'. All the third-party services, -as well as the Continuum agent are deployed in the managament cluster, the same that is installed in karmada host. +as well as the Continuum agent are deployed in the management cluster, the same that is installed in karmada host. # System descriptions preparation Before the installation process takes place, system descriptions for every layer must be prepared. @@ -34,7 +34,7 @@ For example, a machine at the node level, with hostname `node-1`, should have a the directory `nodes/`. * **Continuum** level descriptions, require one single file, that declare the continuumID and the clusters that we allow MLSysOps to manage. -* **Cluster** level descritptions, require a file for each cluster registered in Karmada. It contains the clusterID and a list of node hostnames, that MLSysOps is allowed to manage. +* **Cluster** level descriptions, require a file for each cluster registered in Karmada. It contains the clusterID and a list of node hostnames, that MLSysOps is allowed to manage. * **Node** level descriptions, contain the detailed information about the node resources. Example [here](descriptions/nodes/node-1.yaml). # Automated Deployment @@ -119,7 +119,7 @@ The files are in repo/tests/application. Update the test_CR and test_MLSysOps_description, with the node names of the cluster and the clusterID. -apply the CR or the descirption via the MLS CLI: +apply the CR or the description via the MLS CLI: `kubectl apply -f tests/application/test_CR.yaml` `mls.py apps deploy-app --path tests/application/test_MLSysOps_descirption.yaml` diff --git a/agents/deployments/deploy.py b/agents/deployments/deploy.py index c35ede7..d1fb462 100644 --- a/agents/deployments/deploy.py +++ b/agents/deployments/deploy.py @@ -230,7 +230,7 @@ def update_custom_object(self, name, yaml_content): body=yaml_content, ) except ApiException as e: - print(f"Failed to apply kind '{yaml_content['kind']}' to Kuberentes API: {e}") + print(f"Failed to apply kind '{yaml_content['kind']}' to Kubernetes API: {e}") def create_or_update(self,resource_yaml): diff --git a/agents/mlsysops/controllers/libs/otel_pods.py b/agents/mlsysops/controllers/libs/otel_pods.py index 8524761..9683108 100644 --- a/agents/mlsysops/controllers/libs/otel_pods.py +++ b/agents/mlsysops/controllers/libs/otel_pods.py @@ -72,7 +72,7 @@ def set_node_dict(v1: client.CoreV1Api) -> None: node_list_dict = [] initial_list = [] http_response = v1.list_node() # http GET , returns a V1NodeList object - # Note, the responce is not an ordinary list , it contains V1Node objects + # Note, the response is not an ordinary list , it contains V1Node objects item_list = http_response.items for item in item_list: # item represents a node dictionary , item : V1Node diff --git a/agents/node/main.py b/agents/node/main.py index 37fc511..d86dc3e 100644 --- a/agents/node/main.py +++ b/agents/node/main.py @@ -84,7 +84,7 @@ async def main(): await asyncio.gather(agent_task) except asyncio.CancelledError: - logger.info("Agent stoped. Performing cleanup...") + logger.info("Agent stopped. Performing cleanup...") if agent: await agent.stop() # Stop the agent during cleanup except Exception as e: diff --git a/docs/design/agents.md b/docs/design/agents.md index 775d85b..8e02d75 100755 --- a/docs/design/agents.md +++ b/docs/design/agents.md @@ -8,9 +8,9 @@ policy developers, whereas the Southbound API is primarily intended for system a -The agent follows MAPE (Monitor-Analyze-Plan-Execute) paradigm, which was proposed in 2003 [55] to manage autonomic +The agent follows MAP (Monitor-Analyze-Plan-Execute) paradigm, which was proposed in 2003 [55] to manage autonomic systems given high-level objectives from the system administrators, by using the same notion for the main configuration -tasks, depicted as MAPE Tasks in Figure 32. The programming language of choice is Python, and leverages SPADE Python +tasks, depicted as MAP Tasks in Figure 32. The programming language of choice is Python, and leverages SPADE Python multi-agent framework [56] to form a network of agents that can communicate through XMPP protocol and a set of defined messages, providing any necessary functionality from internal tasks that are called behaviours. To achieve seamless operation between the various sub-modules, the agent implements a set of controllers that are responsible for managing diff --git a/docs/design/architecture.md b/docs/design/architecture.md index c94a298..e8fc40d 100755 --- a/docs/design/architecture.md +++ b/docs/design/architecture.md @@ -7,7 +7,7 @@ access to telemetry. instructions. - The Continuum Agent sits at the top level, interfacing with external stakeholders (via northbound APIs), receiving high-level intents and application descriptors, and coordinating decision-making across slices. -Each layer operates a Monitor–Analyze–Plan–Execute (MAPE) control loop, enabling autonomous adaptation based on local +Each layer operates a Monitor–Analyze–Plan–Execute (MAP) control loop, enabling autonomous adaptation based on local and global telemetry, system optimization targets, and ML-driven policies. Importantly, this architecture separates management logic from resource control, allowing for modular evolution and system introspection. diff --git a/docs/design/controllers.md b/docs/design/controllers.md index f306647..f262a32 100644 --- a/docs/design/controllers.md +++ b/docs/design/controllers.md @@ -1,4 +1,4 @@ -Controllers are responsible for coordinating all internal components of the framework, including the MAPE tasks, SPADE, +Controllers are responsible for coordinating all internal components of the framework, including the MAP tasks, SPADE, Policy and Mechanism Plugins, and the Northbound and Southbound APIs. - **Application Controller**: Manages the lifecycle of the Analyze loop for each application submitted to the system. When a diff --git a/docs/design/mape.md b/docs/design/mape.md index af376dd..9690136 100755 --- a/docs/design/mape.md +++ b/docs/design/mape.md @@ -1,6 +1,6 @@ The primary responsibility of the agents is to manage the system’s assets—entities and components that can be configured and/or must be monitored. Typical assets include application components, available configuration mechanisms, and the -telemetry system. The MAPE tasks continuously monitor the state of these assets, analyze their condition, determine +telemetry system. The MAP tasks continuously monitor the state of these assets, analyze their condition, determine whether a new configuration plan is required, and, if so, create and execute the plan using the mechanism plugins. The Analyze and Plan tasks invoke the logic implemented in the policy plugins, whereas the Execution task uses the mechanism plugins. diff --git a/docs/design/plugins/mechanism_plugins.md b/docs/design/plugins/mechanism_plugins.md index 621dd63..91bb874 100755 --- a/docs/design/plugins/mechanism_plugins.md +++ b/docs/design/plugins/mechanism_plugins.md @@ -35,7 +35,7 @@ def apply(command: dict[str, any]): "frequency" : "min" | "max" | "1000000 Hz" } """ - # The rest of the code ommited + # The rest of the code omitted cpufreq.set_frequencies(command['frequency']) # ..... ``` diff --git a/docs/design/plugins/plugin_system.md b/docs/design/plugins/plugin_system.md index d1da207..3e0e32f 100755 --- a/docs/design/plugins/plugin_system.md +++ b/docs/design/plugins/plugin_system.md @@ -11,7 +11,7 @@ have been developed, up to the time of writing of this document ## Execution Flow -Figure X illustrates the execution flow of the MAPE tasks and the integration of both policy and mechanism plugins. The +Figure X illustrates the execution flow of the MAP tasks and the integration of both policy and mechanism plugins. The Monitor task runs periodically at all times, regardless of whether an application has been submitted, collecting telemetry data and updating the local state. When a new application is submitted to the system, a separate Analyze task thread is launched, which uses the analyze method of the corresponding policy plugin. Based on the result, the analysis diff --git a/docs/design/plugins/policy_plugins.md b/docs/design/plugins/policy_plugins.md index 02c85fc..8cd26ca 100755 --- a/docs/design/plugins/policy_plugins.md +++ b/docs/design/plugins/policy_plugins.md @@ -1,7 +1,7 @@ # Policy Plugins Policy plugins are the components responsible for determining if a new adaptation is required and generating new -configuration plans. They follow the MAPE paradigm, specifically implementing the Analyze and Plan tasks. A policy +configuration plans. They follow the MAP paradigm, specifically implementing the Analyze and Plan tasks. A policy plugin is implemented as a Python module, which may import and use any external libraries, and must define three specific functions: (i) initialize, (ii) analyze (async), and (iii) plan (async). Each method requires specific arguments and must return defined outputs. Each method accepts a common argument, context, which can be used to maintain state between different diff --git a/docs/index.md b/docs/index.md index 0c3ab5c..e5fce37 100755 --- a/docs/index.md +++ b/docs/index.md @@ -25,7 +25,7 @@ In essence, the framework operates as an abstraction middleware between the part * ML Connector service, for easy ML Model management, deployment, retraining, and explainability. * Node level management. * Deploy using different container runtimes. -* Resource contrainted devices management (Far-Edge devices). +* Resource constrained devices management (Far-Edge devices). * Storage service managed by the framework. ## Use cases diff --git a/docs/installation.md b/docs/installation.md index f850a09..2664776 100755 --- a/docs/installation.md +++ b/docs/installation.md @@ -32,7 +32,7 @@ To ensure the correct bootstrap, the agents should start in the following order: All the deployments take place in a Kubernetes cluster, in separate namespace 'mlsysops-framework'. All the third-party services, -as well as the Continuum agent are deployed in the managament cluster, the same that is installed in karmada host. +as well as the Continuum agent are deployed in the management cluster, the same that is installed in karmada host. # System descriptions preparation diff --git a/docs/mlconnector/Installation.md b/docs/mlconnector/Installation.md index 0c0d937..eb5ebd4 100644 --- a/docs/mlconnector/Installation.md +++ b/docs/mlconnector/Installation.md @@ -24,7 +24,7 @@ The MLConnector dynamically creates and stores docker images for inference appli - `DOCKER_PASSWORD`: Your Docker registry password ### 2. AWS (File Storage) -The MLConnector uses an external storage service, S3 to store it's data including training data and other files. You will need to setup and S3 bucket, or S3 compatible service to complete this setup. After, please provide the following details. If you do not have access to S3 bucket, or S3 compatible service, please contact us and we can help setup a temporarly one. +The MLConnector uses an external storage service, S3 to store it's data including training data and other files. You will need to setup and S3 bucket, or S3 compatible service to complete this setup. After, please provide the following details. If you do not have access to S3 bucket, or S3 compatible service, please contact us and we can help setup a temporarily one. - `AWS_ACCESS_URL`: AWS S3 endpoint URL - `AWS_ACCESS_KEY_ID`: AWS access key ID - `AWS_SECRET_ACCESS_KEY`: AWS secret access key diff --git a/docs/mlconnector/Overview.md b/docs/mlconnector/Overview.md index 2a92a85..d165ce8 100644 --- a/docs/mlconnector/Overview.md +++ b/docs/mlconnector/Overview.md @@ -1,5 +1,5 @@ ## MLConnector -This section describes the ML API (MLConnector) design. It is based on Flask REST. This is the bridge between all MYSysOps operations and ML assisted operations. It allow for flexible and decoupled way to train, deploy, and monitor all ML operations within the MYSysOps continuum. It also offers surpport for drift detectin and explainability. Below the flow diagram. +This section describes the ML API (MLConnector) design. It is based on Flask REST. This is the bridge between all MYSysOps operations and ML assisted operations. It allow for flexible and decoupled way to train, deploy, and monitor all ML operations within the MYSysOps continuum. It also offers support for drift detecting and explainability. Below the flow diagram.