From 7ad3494cc9770dd64b9a5cf9529fd5adfa48623b Mon Sep 17 00:00:00 2001 From: Sam Crauwels Date: Tue, 10 Mar 2026 09:42:57 +0100 Subject: [PATCH 1/2] Fix password timing on fresh install and cluster settings application The elasticstack shared role sets elasticstack_password to the user-defined value before the elasticsearch role's security setup has actually changed the password in ES. On fresh install the passwords file doesn't exist yet, so the fetch-from-file task is skipped and the premature value stays, causing a 401 on the API availability check. We now clear it to {stdout: ''} so the check is safely skipped, and added fallback logic that detects a 401 from a prior password change and switches to the user-defined password for re-runs. The cluster settings comparison template produced whitespace-padded output (" True") which Ansible 2.19+ bool filter now coerces to False instead of True, silently skipping the PUT. Added | trim to fix that. Also fixed verify_fetch_password.yml where Ansible's register directive on a skipped shell task overwrote the set_fact result with a dict lacking .stdout. Swapped the task order so set_fact always runs last and wins. --- molecule/shared/set_ci_watermarks.yml | 5 +- molecule/shared/verify_fetch_password.yml | 17 +++--- .../tasks/elasticsearch-security.yml | 61 +++++++++++++++---- roles/elasticsearch/tasks/main.yml | 2 +- 4 files changed, 65 insertions(+), 20 deletions(-) diff --git a/molecule/shared/set_ci_watermarks.yml b/molecule/shared/set_ci_watermarks.yml index 6ecbd2cd..1ee38cba 100644 --- a/molecule/shared/set_ci_watermarks.yml +++ b/molecule/shared/set_ci_watermarks.yml @@ -4,6 +4,8 @@ # scenarios where disk space is constrained. Runs once per cluster. # # Requires: Elasticsearch running with security enabled. +# Uses elasticstack_password (set by the elasticsearch role) when +# available, falls back to reading from the initial_passwords file. - name: Read elastic password for watermark setup ansible.builtin.shell: | set -o pipefail @@ -15,6 +17,7 @@ changed_when: false no_log: true run_once: true + when: elasticstack_password is not defined or elasticstack_password.stdout | default('') | length == 0 - name: Set lenient disk watermarks (CI) ansible.builtin.uri: @@ -28,7 +31,7 @@ cluster.routing.allocation.disk.watermark.flood_stage.frozen: "99%" body_format: json user: elastic - password: "{{ _ci_elastic_pass.stdout }}" + password: "{{ elasticstack_password.stdout | default(_ci_elastic_pass.stdout | default('')) }}" validate_certs: false force_basic_auth: true register: _ci_watermark_result diff --git a/molecule/shared/verify_fetch_password.yml b/molecule/shared/verify_fetch_password.yml index bbf6e0de..2ad955bf 100644 --- a/molecule/shared/verify_fetch_password.yml +++ b/molecule/shared/verify_fetch_password.yml @@ -7,13 +7,6 @@ # _verify_delegate_to: host to delegate to (default: omitted, runs locally) # _verify_run_once: whether to run once (default: true) -- name: Use user-defined elastic password - ansible.builtin.set_fact: - elastic_pass: - stdout: "{{ _verify_elastic_password }}" - when: _verify_elastic_password | default('') | length > 0 - run_once: "{{ _verify_run_once | default(true) }}" # noqa: run-once[task] - - name: Fetch Elastic password from file ansible.builtin.shell: | set -o pipefail @@ -26,3 +19,13 @@ run_once: "{{ _verify_run_once | default(true) }}" # noqa: run-once[task] delegate_to: "{{ _verify_delegate_to | default(omit) }}" when: _verify_elastic_password | default('') | length == 0 + +# set_fact MUST come after the shell task: Ansible's `register` runs +# even on skipped tasks, overwriting the variable with a dict that +# lacks `.stdout`. Putting set_fact second ensures it wins. +- name: Use user-defined elastic password + ansible.builtin.set_fact: + elastic_pass: + stdout: "{{ _verify_elastic_password }}" + when: _verify_elastic_password | default('') | length > 0 + run_once: "{{ _verify_run_once | default(true) }}" # noqa: run-once[task] diff --git a/roles/elasticsearch/tasks/elasticsearch-security.yml b/roles/elasticsearch/tasks/elasticsearch-security.yml index ddb8c0f2..f352616e 100644 --- a/roles/elasticsearch/tasks/elasticsearch-security.yml +++ b/roles/elasticsearch/tasks/elasticsearch-security.yml @@ -747,13 +747,6 @@ retries: 30 delay: 10 - - name: Use user-defined elastic password - ansible.builtin.set_fact: - elasticstack_password: - stdout: "{{ elasticsearch_elastic_password }}" - no_log: "{{ elasticstack_no_log }}" - when: elasticsearch_elastic_password | default('') | length > 0 - - name: Fetch Elastic password from file ansible.builtin.include_tasks: file: "{{ role_path }}/../elasticstack/tasks/fetch_password.yml" @@ -761,9 +754,22 @@ _password_user: elastic _password_fact: elasticstack_password when: - - elasticsearch_elastic_password | default('') | length == 0 - elasticsearch_passwords_file.stat.exists | bool + # On fresh install the passwords file doesn't exist yet, so the fetch + # above is skipped. But the elasticstack shared role may have already + # set elasticstack_password to the user-defined value — which hasn't + # actually been applied to ES yet. Clear it so the API check below + # is safely skipped (the bootstrap-password check already confirmed + # the cluster is reachable). + - name: Clear premature user-defined password on fresh install + ansible.builtin.set_fact: + elasticstack_password: + stdout: "" + when: + - not elasticsearch_passwords_file.stat.exists | bool + - elasticsearch_elastic_password | default('') | length > 0 + - name: Check for API availability with elastic password ansible.builtin.uri: url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}" @@ -773,14 +779,47 @@ force_basic_auth: true register: elasticsearch_api_status changed_when: false + failed_when: false no_log: "{{ elasticstack_no_log }}" when: - elasticstack_password is defined - elasticstack_password.stdout | default('') | length > 0 - until: (elasticsearch_api_status.json | default({})).cluster_name is defined + until: >- + (elasticsearch_api_status.json | default({})).cluster_name is defined + or (elasticsearch_api_status.status | default(0)) == 401 retries: 20 delay: 10 + # If the auto-generated password returned 401, the user-defined + # password was already applied in a previous run. Switch to it. + - name: Switch to user-defined password after prior change + ansible.builtin.set_fact: + elasticstack_password: + stdout: "{{ elasticsearch_elastic_password }}" + no_log: "{{ elasticstack_no_log }}" + when: + - elasticsearch_elastic_password | default('') | length > 0 + - elasticsearch_api_status is defined + - (elasticsearch_api_status.status | default(0)) == 401 + + - name: Verify API availability with user-defined password + ansible.builtin.uri: + url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}" + user: elastic + password: "{{ elasticstack_password.stdout }}" + validate_certs: "{{ elasticsearch_validate_api_certs }}" + force_basic_auth: true + register: elasticsearch_api_status + changed_when: false + no_log: "{{ elasticstack_no_log }}" + when: + - elasticsearch_elastic_password | default('') | length > 0 + - elasticsearch_api_status is defined + - (elasticsearch_api_status.status | default(0)) == 401 + until: (elasticsearch_api_status.json | default({})).cluster_name is defined + retries: 10 + delay: 5 + - name: Work around low resources on CI/CD nodes when: ansible_facts.virtualization_type in ["container", "docker", "lxc"] block: @@ -802,7 +841,7 @@ cluster.routing.allocation.disk.watermark.flood_stage.frozen: "99%" body_format: json user: elastic - password: "{{ elasticstack_password.stdout | default(elasticsearch_bootstrap_pw) }}" + password: "{{ (elasticstack_password.stdout | default('')) or elasticsearch_bootstrap_pw }}" validate_certs: "{{ elasticsearch_validate_api_certs }}" force_basic_auth: true register: elasticsearch_watermark_response @@ -915,7 +954,7 @@ when: - inventory_hostname == elasticstack_ca_host - elasticsearch_elastic_password | default('') | length > 0 - - elasticsearch_freshstart_security.changed | bool + - elasticstack_password.stdout | default('') != elasticsearch_elastic_password block: - name: Fetch auto-generated elastic password ansible.builtin.include_tasks: diff --git a/roles/elasticsearch/tasks/main.yml b/roles/elasticsearch/tasks/main.yml index f0102647..51467be4 100644 --- a/roles/elasticsearch/tasks/main.yml +++ b/roles/elasticsearch/tasks/main.yml @@ -580,7 +580,7 @@ - name: Check if settings already match ansible.builtin.set_fact: - _es_cluster_settings_changed: "{{ _needs_update }}" + _es_cluster_settings_changed: "{{ _needs_update | trim }}" vars: _current: "{{ _es_current_cluster_settings.json.persistent }}" _needs_update: >- From 1be738829832078e14a41f8beaaa89ccbdd45e73 Mon Sep 17 00:00:00 2001 From: Sam Crauwels Date: Tue, 10 Mar 2026 11:37:31 +0100 Subject: [PATCH 2/2] Fix Ansible 2.19+ compatibility and integration test failures The ternary() filter eagerly evaluates all arguments, crashing when _es_cgroup_memory.content is undefined on real VMs (not cgroup-limited). Replaced with an if/else block. The comment('#') filter style was removed in Ansible 2.19+ so we use the default (no-arg) form instead. On fresh installs the elastic password wasn't re-fetched after security setup created it, leaving downstream tasks without credentials. The Kibana external certs flow tried to write files before creating /etc/kibana/certs. --- roles/elasticsearch/tasks/elasticsearch-security.yml | 12 ++++++++++++ roles/elasticsearch/tasks/main.yml | 8 +++++--- roles/elasticsearch/templates/log4j2.properties.j2 | 2 +- roles/kibana/tasks/kibana-security.yml | 9 +++++++++ 4 files changed, 27 insertions(+), 4 deletions(-) diff --git a/roles/elasticsearch/tasks/elasticsearch-security.yml b/roles/elasticsearch/tasks/elasticsearch-security.yml index f352616e..523b8ccf 100644 --- a/roles/elasticsearch/tasks/elasticsearch-security.yml +++ b/roles/elasticsearch/tasks/elasticsearch-security.yml @@ -983,6 +983,18 @@ stdout: "{{ elasticsearch_elastic_password }}" no_log: "{{ elasticstack_no_log }}" + # On fresh install the passwords file was just created but elasticstack_password + # may still be unset (the shared role couldn't fetch it because the file didn't + # exist yet). Fetch now so downstream tasks (cluster settings, watermarks) work. + - name: Fetch elastic password after initial setup + ansible.builtin.include_tasks: + file: "{{ role_path }}/../elasticstack/tasks/fetch_password.yml" + vars: + _password_user: elastic + _password_fact: elasticstack_password + when: + - elasticstack_password is not defined or (elasticstack_password.stdout | default('') | length == 0) + # Maybe make sure that Elasticsearch is using the right protocol http(s) to connect, even in newly setup clusters # -- Certificate expiry warnings -- diff --git a/roles/elasticsearch/tasks/main.yml b/roles/elasticsearch/tasks/main.yml index 51467be4..d4b1f539 100644 --- a/roles/elasticsearch/tasks/main.yml +++ b/roles/elasticsearch/tasks/main.yml @@ -531,9 +531,11 @@ ansible.builtin.debug: msg: >- Using {{ elasticsearch_heap | int * 1024 }} of - {{ (_es_cgroup_memory.content is defined and (_es_cgroup_memory.content | b64decode | trim) != 'max') - | ternary((_es_cgroup_memory.content | b64decode | trim) | int // 1048576 ~ ' MB (cgroup limit)', - ansible_facts.memtotal_mb ~ ' MB') }} + {% if _es_cgroup_memory.content is defined and (_es_cgroup_memory.content | b64decode | trim) != 'max' %} + {{ (_es_cgroup_memory.content | b64decode | trim) | int // 1048576 }} MB (cgroup limit) + {% else %} + {{ ansible_facts.memtotal_mb }} MB + {% endif %} as heap for Elasticsearch when: elasticsearch_heap is defined and (elasticsearch_heap | string | length > 0) diff --git a/roles/elasticsearch/templates/log4j2.properties.j2 b/roles/elasticsearch/templates/log4j2.properties.j2 index b56ca7a6..ffe666ca 100644 --- a/roles/elasticsearch/templates/log4j2.properties.j2 +++ b/roles/elasticsearch/templates/log4j2.properties.j2 @@ -1,4 +1,4 @@ -{{ ansible_managed | comment('#') }} +{{ ansible_managed | comment }} status = error diff --git a/roles/kibana/tasks/kibana-security.yml b/roles/kibana/tasks/kibana-security.yml index 10b42259..45dbb0b6 100644 --- a/roles/kibana/tasks/kibana-security.yml +++ b/roles/kibana/tasks/kibana-security.yml @@ -67,6 +67,15 @@ _validate_ca_extracted_fact: _kibana_ca_extracted when: not (_kibana_content_mode | bool) + # -- Ensure cert directory exists before deploying -- + - name: Create certificate directory + ansible.builtin.file: + path: /etc/kibana/certs + state: directory + owner: root + group: kibana + mode: "0750" + # -- Deploy certificates -- - name: Write Kibana certificate (from content)