diff --git a/cloudformation/patching/ami-patching.yaml b/cloudformation/patching/ami-patching.yaml
new file mode 100644
index 0000000000..bf7d3dd9b0
--- /dev/null
+++ b/cloudformation/patching/ami-patching.yaml
@@ -0,0 +1,302 @@
+AWSTemplateFormatVersion: 2010-09-09
+Description: >-
+  AWS ParallelCluster AMI patching used for tests
+
+Parameters:
+  ParentImage:
+    Description: The ParallelCluster AMI to patch.
+    Type: String
+  InstanceType:
+    Description: Instance type used by Image Builder to build the patched AMI.
+    Type: String
+  SubnetId:
+    Description: Subnet (with outbound internet access) where the build instance runs.
+    Type: AWS::EC2::Subnet::Id
+  VpcId:
+    Description: VPC of the build subnet (used for the build instance security group).
+    Type: AWS::EC2::VPC::Id
+  PatchScriptS3Uri:
+    Description: S3 URI (s3://bucket/key) of the patching script to run on the build instance.
+    Type: String
+
+Resources:
+
+  # ===========================================================================
+  # AMI helper
+  #
+  # On create it looks up the source AMI and returns its name
+  # (used as the prefix of the patched AMI name).
+  # On stack delete it deregisters the patched AMI built by this stack
+  # and deletes its backing snapshots.
+  # ===========================================================================
+
+  AmiHelper:
+    Type: AWS::CloudFormation::CustomResource
+    Properties:
+      ServiceToken: !GetAtt AmiHelperFunction.Arn
+      SourceAmi: !Ref ParentImage
+      StackName: !Ref AWS::StackName
+
+  AmiHelperFunction:
+    Type: AWS::Lambda::Function
+    Properties:
+      Handler: index.handler
+      Runtime: python3.12
+      Timeout: 60
+      Role: !GetAtt AmiHelperRole.Arn
+      Code:
+        ZipFile: |
+          import json, urllib.request, boto3
+          ec2 = boto3.client("ec2")
+
+          def respond(event, status, data=None):
+              # CloudFormation correlates the response to the request via StackId,
+              # RequestId and LogicalResourceId, and tracks the resource via
+              # PhysicalResourceId, so all four are mandatory. Reason is only required
+              # on failure and Data only when there is something to return.
+              body = {
+                  "Status": status,
+                  "PhysicalResourceId": event.get("PhysicalResourceId", "ami-patching-helper"),
+                  "StackId": event["StackId"],
+                  "RequestId": event["RequestId"],
+                  "LogicalResourceId": event["LogicalResourceId"],
+              }
+              if status == "FAILED":
+                  body["Reason"] = "See CloudWatch Logs"
+              if data:
+                  body["Data"] = data
+              payload = json.dumps(body).encode()
+              req = urllib.request.Request(
+                  event["ResponseURL"], data=payload, method="PUT",
+                  headers={"content-type": "", "content-length": str(len(payload))})
+              urllib.request.urlopen(req)
+
+          def cleanup(stack_name):
+              # Deregister the patched AMI(s) built by this stack and delete their
+              # snapshots. The snapshots are tagged first so DeleteSnapshot is allowed
+              # by the (tag-scoped) IAM policy.
+              if not stack_name:
+                  return
+              images = ec2.describe_images(Owners=["self"], Filters=[
+                  {"Name": "tag:parallelcluster:ami-patching-stack", "Values": [stack_name]}]).get("Images", [])
+              for img in images:
+                  snaps = [m["Ebs"]["SnapshotId"] for m in img.get("BlockDeviceMappings", [])
+                           if m.get("Ebs", {}).get("SnapshotId")]
+                  if snaps:
+                      ec2.create_tags(Resources=snaps, Tags=[
+                          {"Key": "parallelcluster:ami-patching-stack", "Value": stack_name}])
+                  ec2.deregister_image(ImageId=img["ImageId"])
+                  for snap in snaps:
+                      ec2.delete_snapshot(SnapshotId=snap)
+
+          def handler(event, context):
+              try:
+                  p = event.get("ResourceProperties", {})
+                  if event["RequestType"] == "Delete":
+                      cleanup(p.get("StackName"))
+                      return respond(event, "SUCCESS")
+                  src = p["SourceAmi"]
+                  image = ec2.describe_images(ImageIds=[src])["Images"][0]
+                  # The distributed AMI name is "<SourceName>-patched-<buildDate>" and AMI
+                  # names are capped at 128 chars. Image Builder renders buildDate as
+                  # "YYYY-MM-DD'T'HH-MM-SS'Z'" (20 chars); with the "-patched-" separator
+                  # (9 chars) the suffix is up to 29 chars, so truncate the source name to
+                  # 88 (128 - 40) to stay safely within the limit.
+                  name = image.get("Name", src)[:88]
+                  return respond(event, "SUCCESS", {"SourceName": name})
+              except Exception as e:
+                  print("Error: %s" % e)
+                  return respond(event, "FAILED")
+
+  AmiHelperRole:
+    Type: AWS::IAM::Role
+    Properties:
+      AssumeRolePolicyDocument:
+        Version: 2012-10-17
+        Statement:
+          - Effect: Allow
+            Principal:
+              Service: !Sub lambda.${AWS::URLSuffix}
+            Action: sts:AssumeRole
+      ManagedPolicyArns:
+        - !Sub arn:${AWS::Partition}:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole
+      Policies:
+        - PolicyName: ami-helper
+          PolicyDocument:
+            Version: 2012-10-17
+            Statement:
+              # ec2 Describe* actions do not support resource-level permissions.
+              - Effect: Allow
+                Action: ec2:DescribeImages
+                Resource: "*"
+              - Effect: Allow
+                Action: ec2:CreateTags
+                Resource: !Sub arn:${AWS::Partition}:ec2:${AWS::Region}:${AWS::AccountId}:snapshot/*
+                Condition:
+                  StringEquals:
+                    aws:RequestTag/parallelcluster:ami-patching-stack: !Ref AWS::StackName
+              - Effect: Allow
+                Action: ec2:DeregisterImage
+                Resource: !Sub arn:${AWS::Partition}:ec2:${AWS::Region}::image/*
+                Condition:
+                  StringEquals:
+                    aws:ResourceTag/parallelcluster:ami-patching-stack: !Ref AWS::StackName
+              - Effect: Allow
+                Action: ec2:DeleteSnapshot
+                Resource: !Sub arn:${AWS::Partition}:ec2:${AWS::Region}:${AWS::AccountId}:snapshot/*
+                Condition:
+                  StringEquals:
+                    aws:ResourceTag/parallelcluster:ami-patching-stack: !Ref AWS::StackName
+
+  # ===========================================================================
+  # Image Builder
+  #
+  # Builds the patched AMI: the build instance downloads and runs the patching
+  # script, reboots, executes the AMI cleanup and create the new AMI.
+  # ===========================================================================
+
+  PatchedImage:
+    Type: AWS::ImageBuilder::Image
+    DependsOn: RecipeLogGroup
+    Properties:
+      ImageRecipeArn: !Ref PatchImageRecipe
+      InfrastructureConfigurationArn: !Ref PatchInfrastructureConfiguration
+      DistributionConfigurationArn: !Ref PatchDistributionConfiguration
+      ImageTestsConfiguration:
+        ImageTestsEnabled: false
+
+  PatchImageRecipe:
+    Type: AWS::ImageBuilder::ImageRecipe
+    Properties:
+      Name: !Sub pcluster-ami-patching-recipe-${AWS::StackName}
+      Version: 1.0.0
+      ParentImage: !Ref ParentImage
+      Components:
+        - ComponentArn: !Ref PatchComponent
+
+  PatchComponent:
+    Type: AWS::ImageBuilder::Component
+    Properties:
+      Name: !Sub pcluster-ami-patching-${AWS::StackName}
+      Platform: Linux
+      Version: 1.0.0
+      Description: Apply OS security patches (kernel bump allowed) to the parent image.
+      Data: !Sub |
+        name: PatchNodeSecurityUpdates
+        description: Apply OS security patches to the parent image, allowing kernel bumps.
+        schemaVersion: 1.0
+        phases:
+          - name: build
+            steps:
+              - name: PrePatchingChecks
+                action: ExecuteBash
+                inputs:
+                  commands:
+                    - echo "Active kernel:"
+                    - uname -r
+                    - echo "Active kernel modules:"
+                    - lsmod
+              - name: ApplyPatches
+                action: ExecuteBash
+                inputs:
+                  commands:
+                    - aws s3 cp ${PatchScriptS3Uri} /usr/local/sbin/patch_node.sh
+                    - sudo chown root:root /usr/local/sbin/patch_node.sh
+                    - sudo chmod 0744 /usr/local/sbin/patch_node.sh
+                    - sudo /usr/local/sbin/patch_node.sh
+              - name: Reboot
+                action: Reboot
+              - name: PostRebootChecks
+                action: ExecuteBash
+                inputs:
+                  commands:
+                    - echo "Active kernel:"
+                    - uname -r
+                    - echo "Active kernel modules:"
+                    - lsmod
+              - name: Cleanup
+                action: ExecuteBash
+                inputs:
+                  commands:
+                    - /usr/local/sbin/ami_cleanup.sh
+
+  RecipeLogGroup:
+    Type: AWS::Logs::LogGroup
+    DeletionPolicy: Retain
+    UpdateReplacePolicy: Retain
+    Properties:
+      LogGroupName: !Sub /aws/imagebuilder/pcluster-ami-patching-recipe-${AWS::StackName}
+      RetentionInDays: 7
+
+  PatchInfrastructureConfiguration:
+    Type: AWS::ImageBuilder::InfrastructureConfiguration
+    Properties:
+      Name: !Sub pcluster-ami-patching-config-${AWS::StackName}
+      InstanceProfileName: !Ref BuildInstanceProfile
+      InstanceTypes:
+        - !Ref InstanceType
+      SubnetId: !Ref SubnetId
+      SecurityGroupIds:
+        - !Ref BuildSecurityGroup
+      TerminateInstanceOnFailure: true
+      InstanceMetadataOptions:
+        HttpTokens: required
+
+  PatchDistributionConfiguration:
+    Type: AWS::ImageBuilder::DistributionConfiguration
+    Properties:
+      Name: !Sub pcluster-ami-patching-distribution-${AWS::StackName}
+      Distributions:
+        - Region: !Ref AWS::Region
+          AmiDistributionConfiguration:
+            Name: !Sub
+              - "${SourceName}-patched-{{ imagebuilder:buildDate }}"
+              - SourceName: !GetAtt AmiHelper.SourceName
+            AmiTags:
+              parallelcluster:ami-patching-stack: !Ref AWS::StackName
+              parallelcluster:source-ami: !Ref ParentImage
+
+  BuildInstanceProfile:
+    Type: AWS::IAM::InstanceProfile
+    Properties:
+      Roles:
+        - !Ref BuildInstanceRole
+
+  BuildInstanceRole:
+    Type: AWS::IAM::Role
+    Properties:
+      AssumeRolePolicyDocument:
+        Version: 2012-10-17
+        Statement:
+          - Effect: Allow
+            Principal:
+              Service: !Sub ec2.${AWS::URLSuffix}
+            Action: sts:AssumeRole
+      ManagedPolicyArns:
+        - !Sub arn:${AWS::Partition}:iam::aws:policy/EC2InstanceProfileForImageBuilder
+        - !Sub arn:${AWS::Partition}:iam::aws:policy/AmazonSSMManagedInstanceCore
+      Policies:
+        - PolicyName: read-patch-script
+          PolicyDocument:
+            Version: 2012-10-17
+            Statement:
+              - Effect: Allow
+                Action: s3:GetObject
+                Resource: !Sub
+                  - arn:${AWS::Partition}:s3:::${BucketAndKey}
+                  - BucketAndKey: !Select [1, !Split ["s3://", !Ref PatchScriptS3Uri]]
+
+  BuildSecurityGroup:
+    Type: AWS::EC2::SecurityGroup
+    Properties:
+      GroupDescription: Security group for the patched-AMI Image Builder build instance
+      VpcId: !Ref VpcId
+      SecurityGroupEgress:
+        - CidrIp: 0.0.0.0/0
+          Description: Allow all outbound traffic
+          IpProtocol: "-1"
+
+Outputs:
+  AmiId:
+    Description: The id of the patched AMI produced by Image Builder.
+    Value: !GetAtt PatchedImage.ImageId
diff --git a/tests/integration-tests/configs/develop.yaml b/tests/integration-tests/configs/develop.yaml
index 8e1e2a4698..afa3190df5 100644
--- a/tests/integration-tests/configs/develop.yaml
+++ b/tests/integration-tests/configs/develop.yaml
@@ -173,6 +173,17 @@ test-suites:
         - regions: ["us-east-1"]
           instances: ["g4dn.2xlarge"]
           oss: [{{ OS_X86_1 }}]
+  patching:
+    test_patching.py::test_patching_cluster:
+      dimensions:
+        - regions: [{{ g4dn_8xlarge_CAPACITY_RESERVATION_3_INSTANCES_2_HOURS_NOPG_rhel9 }}]
+          instances: ["g4dn.8xlarge"]
+          oss: {{ RHEL_OS_X86 }}
+          schedulers: ["slurm"]
+        - regions: [{{ g4dn_8xlarge_CAPACITY_RESERVATION_3_INSTANCES_2_HOURS_NOPG_ubuntu2404 }}]
+          instances: ["g4dn.8xlarge"]
+          oss: {{ NO_RHEL_OS_X86 }}
+          schedulers: ["slurm"]
   custom_resource:
     test_cluster_custom_resource.py::test_cluster_create:
       dimensions:
diff --git a/tests/integration-tests/configs/released.yaml b/tests/integration-tests/configs/released.yaml
index ec423469c9..961ef8edcd 100644
--- a/tests/integration-tests/configs/released.yaml
+++ b/tests/integration-tests/configs/released.yaml
@@ -113,6 +113,19 @@ test-suites:
         - regions: ["ca-central-1"]
           instances: {{ common.INSTANCES_DEFAULT_X86 }}
           oss: ["alinux2023"]
+  # The patching test is currently expected to fail; it will be reintroduced once the
+  # failure is fixed.
+  # patching:
+  #   test_patching.py::test_patching_cluster:
+  #     dimensions:
+  #       - regions: [{{ g4dn_8xlarge_CAPACITY_RESERVATION_3_INSTANCES_2_HOURS_NOPG_rhel9 }}]
+  #         instances: ["g4dn.8xlarge"]
+  #         oss: {{ RHEL_OS_X86 }}
+  #         schedulers: ["slurm"]
+  #       - regions: [{{ g4dn_8xlarge_CAPACITY_RESERVATION_3_INSTANCES_2_HOURS_NOPG_ubuntu2404 }}]
+  #         instances: ["g4dn.8xlarge"]
+  #         oss: {{ NO_RHEL_OS_X86 }}
+  #         schedulers: ["slurm"]
   custom_resource:
     test_cluster_custom_resource.py::test_cluster_1_click:
       dimensions:
diff --git a/tests/integration-tests/conftest.py b/tests/integration-tests/conftest.py
index 7e650fb950..e62d6550fe 100644
--- a/tests/integration-tests/conftest.py
+++ b/tests/integration-tests/conftest.py
@@ -244,6 +244,11 @@ def pytest_addoption(parser):
         "--proxy-stack",
         help="Name of CFN stack providing a Proxy environment.",
     )
+    parser.addoption(
+        "--patch-ami-stack",
+        help="Name of an existing CFN stack that builds the patched AMI (cloudformation/patching/ami-patching.yaml). "
+        "When provided, the patching tests reuse this stack instead of creating and deleting a new one.",
+    )
     parser.addoption(
         "--build-image-roles-stack",
         help="Name of CFN stack providing the build image permissions.",
@@ -1721,6 +1726,78 @@ def _copy_image(image_id, test_name):
             logging.error("Delete copied AMI snapshot failed due to %s", e)
 
 
+@pytest.fixture()
+def patched_ami_factory(region, vpc_stack, test_datadir, request, cfn_stacks_factory, s3_bucket_factory):
+    """
+    Factory fixture that builds a security-patched AMI from a given base AMI.
+
+    The whole AMI-build infrastructure lives in a CloudFormation stack
+    (cloudformation/patching/ami-patching.yaml) that uses EC2 Image Builder: the
+    build instance downloads and runs the patch script, reboots, runs the AMI
+    cleanup, and Image Builder captures the patched AMI. The patched AMI is named
+    after the source AMI (with a -patched-<buildDate> suffix) and tagged with
+    parallelcluster:source-ami and parallelcluster:ami-patching-stack. The stack's
+    AmiId output is returned.
+
+    The returned callable takes the base AMI id and the builder instance type.
+    On teardown the stack is deleted, which deregisters the produced AMI and its
+    snapshots (deleting an Image Builder image does not remove the produced AMI).
+    """
+    # Path is relative to the integration-tests working directory, matching the
+    # convention used by other tests that load templates from cloudformation/.
+    with open("../../cloudformation/patching/ami-patching.yaml", encoding="utf-8") as template_file:
+        template_body = template_file.read()
+    reuse_stack_name = request.config.getoption("patch_ami_stack")
+    built = []  # list of (ami_id, stack_name) for stacks created (and to be deleted) by this fixture
+
+    def _build(base_ami, builder_instance):
+        # Reuse an already-deployed patch-infra stack when requested: just read its
+        # AmiId output and skip creation/deletion.
+        if reuse_stack_name:
+            logging.info("Reusing existing patch-infra stack %s", reuse_stack_name)
+            stack = CfnStack(name=reuse_stack_name, region=region, template=template_body)
+            return stack.cfn_outputs["AmiId"]
+
+        logging.info("Starting patching of AMI %s using a %s builder instance", base_ami, builder_instance)
+        bucket_name = s3_bucket_factory()
+        boto3.resource("s3", region_name=region).Bucket(bucket_name).upload_file(
+            str(test_datadir / "patch_node.sh"), "scripts/patch_node.sh"
+        )
+        stack_name = generate_stack_name("integ-tests-patching-builder", request.config.getoption("stackname_suffix"))
+        stack = CfnStack(
+            name=stack_name,
+            region=region,
+            template=template_body,
+            parameters=[
+                {"ParameterKey": "ParentImage", "ParameterValue": base_ami},
+                {"ParameterKey": "InstanceType", "ParameterValue": builder_instance},
+                {"ParameterKey": "SubnetId", "ParameterValue": vpc_stack.get_public_subnet()},
+                {"ParameterKey": "VpcId", "ParameterValue": vpc_stack.cfn_outputs["VpcId"]},
+                {"ParameterKey": "PatchScriptS3Uri", "ParameterValue": f"s3://{bucket_name}/scripts/patch_node.sh"},
+            ],
+            capabilities=["CAPABILITY_IAM"],
+        )
+        # create_stack blocks until CREATE_COMPLETE, i.e. until Image Builder has
+        # finished building the patched AMI.
+        cfn_stacks_factory.create_stack(stack)
+        ami_id = stack.cfn_outputs["AmiId"]
+        built.append((ami_id, stack_name))
+        logging.info("Patched AMI %s is available", ami_id)
+        return ami_id
+
+    yield _build
+
+    # Leave everything in place when --no-delete is set, and never tear down a
+    # reused stack. Otherwise just delete the stack: it owns the cleanup of the
+    # patched AMI and its snapshots (via the AmiHelper custom resource on delete).
+    if request.config.getoption("no_delete"):
+        logging.info("--no-delete specified: retaining patched AMI(s) and stack(s): %s", built)
+        return
+    for ami_id, stack_name in built:
+        logging.info("Deleting patch-infra stack %s (removes patched AMI %s)", stack_name, ami_id)
+        cfn_stacks_factory.delete_stack(stack_name, region)
+
+
 @pytest.fixture()
 def mpi_variants(architecture):
     variants = ["openmpi"]
diff --git a/tests/integration-tests/test_runner.py b/tests/integration-tests/test_runner.py
index 09507df6e7..8510fbf783 100644
--- a/tests/integration-tests/test_runner.py
+++ b/tests/integration-tests/test_runner.py
@@ -111,6 +111,7 @@
     "retain_ad_stack": False,
     "global_build_number": 0,
     "proxy_stack": None,
+    "patch_ami_stack": None,
     "build_image_roles_stack": None,
     "capacity_reservation_id": None,
     "skip_ddb_metadata": False,
@@ -513,6 +514,12 @@ def _init_argparser():
         help="Name of CFN stack providing a Proxy environment.",
         default=TEST_DEFAULTS.get("proxy_stack"),
     )
+    debug_group.add_argument(
+        "--patch-ami-stack",
+        help="Name of an existing CFN stack that builds the patched AMI. "
+        "When provided, the patching tests reuse this stack instead of creating and deleting a new one.",
+        default=TEST_DEFAULTS.get("patch_ami_stack"),
+    )
     debug_group.add_argument(
         "--build-image-roles-stack",
         help="Name of CFN stack providing build image permissions.",
@@ -772,6 +779,9 @@ def _set_custom_stack_args(args, pytest_args):  # noqa: C901
     if args.proxy_stack:
         pytest_args.extend(["--proxy-stack", args.proxy_stack])
 
+    if args.patch_ami_stack:
+        pytest_args.extend(["--patch-ami-stack", args.patch_ami_stack])
+
     if args.build_image_roles_stack:
         pytest_args.extend(["--build-image-roles-stack", args.build_image_roles_stack])
 
diff --git a/tests/integration-tests/tests/basic/test_essential_features.py b/tests/integration-tests/tests/basic/test_essential_features.py
index 39a3029944..cbe128f99a 100644
--- a/tests/integration-tests/tests/basic/test_essential_features.py
+++ b/tests/integration-tests/tests/basic/test_essential_features.py
@@ -27,7 +27,7 @@
     wait_instance_replaced_or_terminating,
 )
 from tests.common.mpi_common import _test_mpi
-from tests.common.utils import fetch_instance_slots, run_system_analyzer
+from tests.common.utils import GPU_JOB_SCRIPT, fetch_instance_slots, run_system_analyzer
 
 
 def test_essential_features(
@@ -344,7 +344,7 @@ def _test_gpu_workload(cluster, scheduler_commands_factory, test_datadir):
     for sample in samples:
         logging.info("Submitting CUDA sample job for %s", sample)
         result = scheduler_commands.submit_script(
-            str(test_datadir / "gpu_job.sh"),
+            str(GPU_JOB_SCRIPT),
             script_args=[sample],
             partition="gpu",
             nodes=1,
diff --git a/tests/integration-tests/tests/basic/test_essential_features/test_essential_features/gpu_job.sh b/tests/integration-tests/tests/common/data/gpu_job.sh
similarity index 100%
rename from tests/integration-tests/tests/basic/test_essential_features/test_essential_features/gpu_job.sh
rename to tests/integration-tests/tests/common/data/gpu_job.sh
diff --git a/tests/integration-tests/tests/common/utils.py b/tests/integration-tests/tests/common/utils.py
index 9978ba9e3a..7bda9f3dbd 100644
--- a/tests/integration-tests/tests/common/utils.py
+++ b/tests/integration-tests/tests/common/utils.py
@@ -19,6 +19,7 @@
 from importlib.metadata import version as get_package_version
 
 import boto3
+import yaml
 from assertpy import assert_that
 from botocore.exceptions import ClientError
 from framework.framework_constants import METADATA_DEFAULT_REGION, PERFORMANCE_METADATA_TABLE
@@ -26,8 +27,8 @@
 from packaging import version as packaging_version
 from remote_command_executor import RemoteCommandExecutionError, RemoteCommandExecutor
 from retrying import retry
-from time_utils import seconds
-from utils import get_instance_info, run_command
+from time_utils import minutes, seconds
+from utils import get_instance_info, get_username_for_os, run_command
 
 from tests.common.osu_common import PRIVATE_OSES
 
@@ -35,6 +36,16 @@
 
 SYSTEM_ANALYZER_SCRIPT = pathlib.Path(__file__).parent / "data/system-analyzer.sh"
 
+# Cluster node types exercised by the integration tests.
+HEAD_NODE = "HeadNode"
+COMPUTE_NODE = "ComputeNode"
+LOGIN_NODE = "LoginNode"
+NODE_TYPES = (HEAD_NODE, COMPUTE_NODE, LOGIN_NODE)
+
+# Shared Slurm job script that builds and runs a single CUDA sample on a GPU
+# compute node. Used by multiple tests to validate GPU workloads.
+GPU_JOB_SCRIPT = pathlib.Path(__file__).parent / "data/gpu_job.sh"
+
 RHEL_OWNERS = ["309956199498", "841258680906", "219670896067"]
 
 OS_TO_OFFICIAL_AMI_NAME_OWNER_MAP = {
@@ -383,6 +394,38 @@ def wait_login_node_status_ok(cluster):
     )
 
 
+@retry(stop_max_delay=minutes(3), wait_fixed=seconds(15))
+def wait_node_reachable(cluster, node_ip):
+    """Wait until the node at the given IP is reachable over SSH.
+
+    Retried every 15 seconds for up to 3 minutes to absorb a reboot window, and
+    confirms the node is healthy by reading its running kernel.
+    """
+    username = get_username_for_os(cluster.os)
+    ssh_opts = "-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10"
+    command = f"ssh {ssh_opts} -i {cluster.ssh_key} {username}@{node_ip} uname -r"
+    kernel = run_command(command, timeout=30, shell=True).stdout.strip()
+    logging.info("Node %s reachable over SSH; running kernel: %s", node_ip, kernel)
+
+
+@retry(stop_max_delay=minutes(5), wait_fixed=seconds(10), retry_on_result=lambda ami: ami is None)
+def retrieve_cluster_head_node_ami(cluster, region):
+    """Return the AMI id the cluster uses, read from the cluster stack template.
+
+    The AMI is read from the head node launch template (HeadNodeLaunchTemplate) in the
+    cluster CloudFormation stack template, which is available as soon as the stack is
+    created and avoids waiting for the head node instance to come up.
+    """
+    template = (
+        boto3.client("cloudformation", region_name=region).get_template(StackName=cluster.cfn_name).get("TemplateBody")
+    )
+    if isinstance(template, str):
+        template = yaml.safe_load(template)
+    if not template:
+        return None
+    return template["Resources"]["HeadNodeLaunchTemplate"]["Properties"]["LaunchTemplateData"]["ImageId"]
+
+
 def get_default_vpc_security_group(vpc_id, region):
     return (
         boto3.client("ec2", region_name=region)
diff --git a/tests/integration-tests/tests/patching/__init__.py b/tests/integration-tests/tests/patching/__init__.py
new file mode 100644
index 0000000000..5006309027
--- /dev/null
+++ b/tests/integration-tests/tests/patching/__init__.py
@@ -0,0 +1,11 @@
+# Copyright 2026 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License").
+# You may not use this file except in compliance with the License.
+# A copy of the License is located at
+#
+# http://aws.amazon.com/apache2.0/
+#
+# or in the "LICENSE.txt" file accompanying this file.
+# This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
diff --git a/tests/integration-tests/tests/patching/test_patching.py b/tests/integration-tests/tests/patching/test_patching.py
new file mode 100644
index 0000000000..77ad496085
--- /dev/null
+++ b/tests/integration-tests/tests/patching/test_patching.py
@@ -0,0 +1,219 @@
+# Copyright 2026 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License").
+# You may not use this file except in compliance with the License.
+# A copy of the License is located at
+#
+# http://aws.amazon.com/apache2.0/
+#
+# or in the "LICENSE.txt" file accompanying this file.
+# This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+import logging
+
+import boto3
+from assertpy import assert_that, soft_assertions
+from remote_command_executor import RemoteCommandExecutor
+from retrying import retry
+from time_utils import minutes, seconds
+
+from tests.common.login_nodes_utils import wait_for_login_fleet_stop
+from tests.common.utils import (
+    COMPUTE_NODE,
+    GPU_JOB_SCRIPT,
+    LOGIN_NODE,
+    NODE_TYPES,
+    reboot_head_node,
+    retrieve_cluster_head_node_ami,
+    wait_node_reachable,
+)
+
+# Time budget (seconds) for the OS security patching to complete on the head node.
+PATCHING_TIMEOUT = 1800
+
+
+def test_patching_cluster(
+    region,
+    os,
+    instance,
+    scheduler,
+    vpc_stack,
+    pcluster_config_reader,
+    clusters_factory,
+    test_datadir,
+    scheduler_commands_factory,
+    patched_ami_factory,
+    request,
+):
+    """
+    Validate that users can self-patch their clusters.
+
+    Flow:
+      1.  Create a cluster.
+      2.  Read the AMI it uses from its CloudFormation stack template.
+      3.  Bake a patched AMI from that AMI.
+      4.  Wait for the cluster creation to complete.
+      5.  Run a baseline GPU workload from head node and login node
+      6.  Snapshot the loaded kernel modules.
+      7.  Stop the login nodes.
+      8.  Update the cluster to the patched AMI and wait for nodes to be replaced.
+      9.  Patch and reboot the head node, then wait for it to be reachable over SSH.
+      10. Re-run the GPU workload from head node and login node.
+      11. Assert that every kernel module loaded before patching is still loaded, on each node type.
+    """
+    ec2 = boto3.client("ec2", region_name=region)
+
+    # Start the cluster creation but do not block on it: the AMI patching below
+    # runs concurrently while the cluster comes up.
+    create_config = pcluster_config_reader(output_file="pcluster.config.create.yaml", login_nodes_count=1)
+    cluster = clusters_factory(create_config, wait=False)
+
+    # Use the exact AMI the cluster uses as the source for patching, read from the
+    # cluster stack template instead of resolving it with a separate AMI lookup.
+    base_ami = retrieve_cluster_head_node_ami(cluster, region)
+    logging.info("Cluster is running on AMI %s", base_ami)
+
+    # Bake the patched AMI while the cluster is still being created. The builder
+    # instance uses the same GPU instance type as the cluster nodes.
+    patched_ami = patched_ami_factory(base_ami, instance)
+
+    # Wait for the cluster creation to complete before using it.
+    logging.info("Waiting for cluster %s to reach CREATE_COMPLETE", cluster.name)
+    cluster.wait_cluster_status("CREATE_COMPLETE")
+
+    # Snapshot the loaded kernel modules on the head, compute and login nodes before
+    # patching so we can later assert the same modules remain loaded.
+    kernel_modules_before = _collect_loaded_kernel_modules(cluster, scheduler_commands_factory)
+    logging.info("Kernel modules loaded before patching: %s", kernel_modules_before)
+
+    # GPU workload BEFORE patching, from the head node and login node (baseline).
+    _run_gpu_workload(cluster, scheduler_commands_factory, use_login_node=False)
+    _run_gpu_workload(cluster, scheduler_commands_factory, use_login_node=True)
+
+    # Stop the login nodes (required before changing the login pool image).
+    stop_login_config = pcluster_config_reader(output_file="pcluster.config.stop-login.yaml", login_nodes_count=0)
+    cluster.update(str(stop_login_config))
+    wait_for_login_fleet_stop(cluster)
+    logging.info("Login nodes stopped")
+
+    # Update the cluster so login and compute nodes use the patched AMI.
+    update_config = pcluster_config_reader(
+        output_file="pcluster.config.update-ami.yaml", login_nodes_count=1, patched_ami=patched_ami
+    )
+    cluster.update(str(update_config))
+
+    # With QueueUpdateStrategy DRAIN the static compute node is drained and replaced
+    # asynchronously after the update completes, and the login pool is recreated, so
+    # wait for both to come back running the patched AMI.
+    logging.info("Waiting for compute and login nodes to be replaced with the patched AMI")
+    _wait_instances_using_ami(ec2, cluster, "Compute", patched_ami)
+    _wait_instances_using_ami(ec2, cluster, "LoginNode", patched_ami)
+
+    # Patch the head node in place and reboot it.
+    remote_command_executor = RemoteCommandExecutor(cluster)
+    logging.info("Patching the head node")
+    patch_result = remote_command_executor.run_remote_script(
+        str(test_datadir / "patch_node.sh"), run_as_root=True, timeout=PATCHING_TIMEOUT
+    )
+    logging.info("Head node patching script output:\n%s", patch_result.stdout)
+    reboot_head_node(cluster)
+
+    # Verify the head node is reachable over SSH again after the reboot (and that
+    # the patch left it healthy) before exercising the cluster further.
+    wait_node_reachable(cluster, cluster.head_node_ip)
+
+    # GPU workload AFTER patching, from the head node and login node.
+    _run_gpu_workload(cluster, scheduler_commands_factory, use_login_node=False)
+    _run_gpu_workload(cluster, scheduler_commands_factory, use_login_node=True)
+
+    # Snapshot and log the kernel modules loaded after patching, then assert (softly,
+    # so every node type is reported even if one fails) that every module loaded
+    # before patching is still loaded on the head, compute and login nodes.
+    kernel_modules_after = _collect_loaded_kernel_modules(cluster, scheduler_commands_factory)
+    logging.info("Kernel modules loaded after patching: %s", kernel_modules_after)
+    with soft_assertions():
+        for node_type in NODE_TYPES:
+            missing = kernel_modules_before[node_type] - kernel_modules_after[node_type]
+            assert_that(missing).described_as(f"kernel modules no longer loaded on the {node_type}").is_empty()
+
+
+@retry(stop_max_delay=minutes(15), wait_fixed=seconds(30), retry_on_result=lambda replaced: not replaced)
+def _wait_instances_using_ami(ec2, cluster, node_type, expected_ami):
+    """Wait until all instances of the given node type are running the expected AMI.
+
+    Used after a DRAIN-strategy update, where the static compute node is replaced
+    asynchronously and the login pool is recreated, so the new instances may not be
+    up (or may briefly coexist with the old ones) right after the update completes.
+    """
+    instance_ids = cluster.get_cluster_instance_ids(node_type=node_type)
+    if not instance_ids:
+        return False
+    amis = {
+        ec2.describe_instances(InstanceIds=[instance_id])["Reservations"][0]["Instances"][0]["ImageId"]
+        for instance_id in instance_ids
+    }
+    logging.info("%s instances %s on AMIs %s (expected %s)", node_type, instance_ids, amis, expected_ami)
+    using_patched_ami = amis == {expected_ami}
+    if using_patched_ami:
+        logging.info(
+            "Detected new %s node(s) %s now running the patched AMI %s",
+            node_type,
+            instance_ids,
+            expected_ami,
+        )
+    return using_patched_ami
+
+
+def _run_gpu_workload(cluster, scheduler_commands_factory, use_login_node):
+    """Submit a CUDA sample onto the GPU partition and assert success.
+
+    The job is submitted from the login node when use_login_node is True, otherwise
+    from the head node.
+    """
+    source = "login node" if use_login_node else "head node"
+    logging.info("Submitting GPU validation job from the %s", source)
+    remote_command_executor = RemoteCommandExecutor(cluster, use_login_node=use_login_node)
+    scheduler_commands = scheduler_commands_factory(remote_command_executor)
+    result = scheduler_commands.submit_script(
+        str(GPU_JOB_SCRIPT),
+        script_args=["1_Utilities/deviceQuery"],
+        partition="q1",
+        nodes=1,
+        slots=1,
+    )
+    job_id = scheduler_commands.assert_job_submitted(result.stdout)
+    scheduler_commands.wait_job_completed(job_id, timeout=20)
+    scheduler_commands.assert_job_succeeded(job_id)
+    logging.info("GPU validation job %s submitted from the %s succeeded", job_id, source)
+
+
+def _collect_loaded_kernel_modules(cluster, scheduler_commands_factory):
+    """Snapshot the loaded kernel modules on the head, compute and login nodes.
+
+    Returns a mapping of node type to the set of loaded kernel modules, so the same
+    modules can later be asserted as still loaded after patching.
+    """
+    return {
+        node_type: _loaded_kernel_modules(_node_executor(cluster, scheduler_commands_factory, node_type))
+        for node_type in NODE_TYPES
+    }
+
+
+def _node_executor(cluster, scheduler_commands_factory, node_type):
+    """Return a RemoteCommandExecutor connected to the given node type.
+
+    Compute nodes are reached through the head node, which acts as the bastion.
+    """
+    if node_type == COMPUTE_NODE:
+        scheduler_commands = scheduler_commands_factory(RemoteCommandExecutor(cluster))
+        compute_node = scheduler_commands.get_compute_nodes()[0]
+        return RemoteCommandExecutor(cluster, compute_node_ip=scheduler_commands.get_node_addr(compute_node))
+    if node_type == LOGIN_NODE:
+        return RemoteCommandExecutor(cluster, use_login_node=True)
+    return RemoteCommandExecutor(cluster)
+
+
+def _loaded_kernel_modules(remote_command_executor):
+    """Return the set of kernel module names currently loaded on the node."""
+    output = remote_command_executor.run_remote_command("lsmod | tail -n +2 | awk '{print $1}'").stdout
+    return set(output.split())
diff --git a/tests/integration-tests/tests/patching/test_patching/test_patching_cluster/patch_node.sh b/tests/integration-tests/tests/patching/test_patching/test_patching_cluster/patch_node.sh
new file mode 100644
index 0000000000..3b050135bb
--- /dev/null
+++ b/tests/integration-tests/tests/patching/test_patching/test_patching_cluster/patch_node.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+#
+# Patching script.
+#
+# Applies all available *security* patches to the system using the native
+# package manager. Kernel packages are intentionally NOT excluded: if a
+# security fix requires a newer kernel, the bump is accepted. A reboot after
+# this script runs is required to activate a new kernel.
+#
+# Supports dnf (AL2023/RHEL9/Rocky9), yum (AL2/RHEL8) and apt (Ubuntu).
+set -euo pipefail
+
+echo "===== Starting system security patching on $(hostname) ====="
+# Report the running kernel before patching. The kernel after the reboot is
+# reported separately once the node has rebooted (the reboot is mandatory to
+# activate any new kernel).
+echo "Kernel before patching: $(uname -r)"
+
+if command -v dnf >/dev/null 2>&1; then
+    echo "Detected dnf package manager"
+    sudo dnf clean all
+    sudo dnf makecache --refresh || true
+    # Apply only security errata. Kernel packages are allowed to be upgraded.
+    sudo dnf upgrade --security -y
+elif command -v yum >/dev/null 2>&1; then
+    echo "Detected yum package manager"
+    sudo yum clean all
+    sudo yum makecache || true
+    # update-minimal --security applies the smallest set of security errata.
+    # Kernel bumps are allowed (no --exclude=kernel*).
+    sudo yum update-minimal --security -y
+elif command -v apt-get >/dev/null 2>&1; then
+    echo "Detected apt package manager"
+    export DEBIAN_FRONTEND=noninteractive
+    sudo apt-get update -y
+    # unattended-upgrades applies only the security pocket by default and will
+    # upgrade linux-image-* (kernel) packages when needed.
+    sudo apt-get install -y unattended-upgrades
+    sudo unattended-upgrade -v
+else
+    echo "ERROR: no supported package manager found (dnf/yum/apt-get)" >&2
+    exit 1
+fi
+
+echo "===== System security patching completed on $(hostname) ====="
diff --git a/tests/integration-tests/tests/patching/test_patching/test_patching_cluster/pcluster.config.yaml b/tests/integration-tests/tests/patching/test_patching/test_patching_cluster/pcluster.config.yaml
new file mode 100644
index 0000000000..0625385839
--- /dev/null
+++ b/tests/integration-tests/tests/patching/test_patching/test_patching_cluster/pcluster.config.yaml
@@ -0,0 +1,66 @@
+Image:
+  Os: {{ os }}
+HeadNode:
+  InstanceType: {{ instance }}
+  Networking:
+    SubnetId: {{ public_subnet_id }}
+    ElasticIp: true
+  Ssh:
+    KeyName: {{ key_name }}
+  Imds:
+    Secured: {{ imds_secured }}
+LoginNodes:
+  Pools:
+    - Name: login1
+      InstanceType: {{ instance }}
+      Count: {{ login_nodes_count }}
+      GracetimePeriod: 3
+      {% if patched_ami %}
+      Image:
+        CustomAmi: {{ patched_ami }}
+      {% endif %}
+      Networking:
+        SubnetIds:
+          - {{ public_subnet_id }}
+Scheduling:
+  Scheduler: {{ scheduler }}
+  SlurmSettings:
+    QueueUpdateStrategy: DRAIN
+  SlurmQueues:
+    - Name: q1
+      {% if patched_ami %}
+      Image:
+        CustomAmi: {{ patched_ami }}
+      {% endif %}
+      HealthChecks:
+        Gpu:
+          Enabled: true
+      Networking:
+        PlacementGroup:
+          {% if capacity_reservation_framework_placement_group %}
+          Enabled: true
+          Name: {{ capacity_reservation_framework_placement_group }}
+          {% else %}
+          Enabled: false
+          {% endif %}
+        SubnetIds:
+          - {{ private_subnet_id }}
+      ComputeResources:
+        - Name: cr1
+          InstanceType: {{ instance }}
+          MinCount: 1
+          MaxCount: 1
+          Efa:
+            Enabled: true
+SharedStorage:
+  - MountDir: /shared-ebs
+    Name: shared-ebs
+    StorageType: Ebs
+  - MountDir: /shared-efs
+    Name: shared-efs
+    StorageType: Efs
+  - MountDir: /shared-fsxlustre
+    Name: shared-fsx
+    StorageType: FsxLustre
+    FsxLustreSettings:
+      StorageCapacity: 1200