Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
14d3067
Add master cluster autoscale support
dwoz May 27, 2025
47b2fff
Fix critical bugs in cluster join-reply handler
dwoz Jan 1, 2026
40354d3
Add signature verification and security hardening to cluster join pro…
dwoz Jan 1, 2026
ee31742
Use Salt's clean_join() for path traversal protection
dwoz Jan 1, 2026
a0e6f4e
Add comprehensive integration tests for cluster autoscale
dwoz Jan 1, 2026
9c607a4
Add comprehensive unit tests for cluster autoscale join handlers
dwoz Jan 1, 2026
9e36d38
Fix unit test failures in cluster autoscale tests
dwoz Jan 1, 2026
9e9d246
Add signature verification to JOIN-NOTIFY handler
dwoz Jan 1, 2026
64a3a1d
Enable token validation in discover-reply handler
dwoz Jan 2, 2026
dd4b2f7
Fix config key typo: clsuter_pub_signature → cluster_pub_signature
dwoz Jan 2, 2026
df0add0
Replace SHA-1 with SHA-256 for cluster_pub digest
dwoz Jan 2, 2026
cfb37b7
Add cluster_pub_signature_required config option (secure by default)
dwoz Jan 2, 2026
f2a9b34
Add tests for cluster_pub_signature_required (secure by default)
dwoz Jan 2, 2026
5ae36f2
Fix master startup crash when master_pub not yet in cache
dwoz Jan 2, 2026
d34e100
Fix AttributeError: remove broken __get_keys() call in MasterKeys.__i…
dwoz Jan 2, 2026
025919e
Fix test: check peer key instead of cluster.pem for join rejection
dwoz Jan 2, 2026
737926d
Fix test_autoscale_rejects_join_with_wrong_cluster_pub_signature
dwoz Jan 2, 2026
39a335f
Fix linting errors in autoscale code
dwoz Jan 2, 2026
1572b30
Replace broad exception handlers with specific exception types
dwoz Jan 2, 2026
b2a3615
Add changelog entry for cluster autoscale feature
dwoz Jan 3, 2026
b899f1a
Fix isort and black formatting in test files
dwoz Jan 3, 2026
1c5db9e
Fix black formatting in source files
dwoz Jan 3, 2026
feb8fcc
Fix black formatting in test_autoscale_functional.py
dwoz Jan 3, 2026
a8c7bc9
Fix black formatting in earlier autoscale commits
dwoz Jan 3, 2026
9744ddf
Fix PublicKey class to accept key_bytes instead of path
dwoz Jan 3, 2026
9873d42
Fix integration tests to read log files instead of calling non-existe…
dwoz Jan 4, 2026
2a86382
Run black formatter on test_autoscale_security.py
dwoz Jan 4, 2026
9a6d5e4
Fix pylint encoding warnings in _get_log_contents()
dwoz Jan 4, 2026
42857c1
Fix TypeError in cluster autoscale: use BaseKey.from_file() instead o…
dwoz Jan 4, 2026
2b035c8
Fix TypeError: use PublicKey.from_file() instead of BaseKey.from_file()
dwoz Jan 5, 2026
364fa20
Fix handle_pool_publish() signature: remove extra parameter
dwoz Jan 6, 2026
c4786f7
Add exception handling for InvalidKeyError in cluster discover/join h…
dwoz Jan 6, 2026
0387609
Run black formatter on server.py
dwoz Jan 9, 2026
1e315a0
Fix event routing and KeyError issues in cluster autoscale
dwoz Jan 11, 2026
0c83f48
Add automatic peer discovery mechanism (partial implementation)
dwoz Jan 14, 2026
b0d4857
Fix pre-commit
dwoz Apr 2, 2026
13da66e
Fix Master Cluster autoscale protocol and transport initialization
dwoz Apr 3, 2026
701a606
Fix Master Cluster autoscale transport initialization and key retrieval
dwoz Apr 4, 2026
1ef22c0
Fix systemic master initialization and cluster protocol issues
dwoz Apr 4, 2026
9cf741f
Fix systemic master cluster transport and async initialization
dwoz Apr 5, 2026
84e673f
Fix remaining line too long linting violations in transport and channel
dwoz Apr 5, 2026
1b35b34
Fix import order and line length in test_netapi.py
dwoz Apr 5, 2026
3289c7b
Fix linting and formatting issues in tests and core
dwoz Apr 5, 2026
3223bb5
Fix linting issues: implicit string concat and empty docstrings
dwoz Apr 5, 2026
81b821c
Move asyncio imports to top in master and netapi
dwoz Apr 5, 2026
3afb72b
Fix systemic CI failures: TypeError, salt-ssh regression, and linting
dwoz Apr 5, 2026
8fb0c6b
Final pre-commit and formatting fixes
dwoz Apr 5, 2026
4f834cd
Fix sequential master join test and apply pre-commit optimizations
dwoz Apr 6, 2026
e3b9157
Force CI rerun on definitive fixes
dwoz Apr 6, 2026
1870974
Definitively fix local publisher connection robustness
dwoz Apr 7, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog/68576.added.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add cluster autoscale support with comprehensive security hardening including signature verification, token validation, path traversal protection, and secure-by-default configuration.
1,074 changes: 1,042 additions & 32 deletions salt/channel/server.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion salt/cli/daemons.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def verify_environment(self):
if (
self.config["cluster_id"]
and self.config["cluster_pki_dir"]
and self.config["cluster_pki_dir"] != self.config["pki_dir"]
# and self.config["cluster_pki_dir"] != self.config["pki_dir"]
):
v_dirs.extend(
[
Expand Down
3 changes: 2 additions & 1 deletion salt/client/netapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
The main entry point for salt-api
"""

import asyncio
import logging
import signal

Expand Down Expand Up @@ -63,7 +64,7 @@ def run(self):
# No custom signal handling was added, install our own
signal.signal(signal.SIGTERM, self._handle_signals)

self.process_manager.run()
asyncio.run(self.process_manager.run())

def _handle_signals(self, signum, sigframe):
# escalate the signals to the process manager
Expand Down
2 changes: 1 addition & 1 deletion salt/client/ssh/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1920,7 +1920,7 @@ def _cmd_str(self):
code_checksum=thin_code_digest,
arguments=self.argv,
)
py_code = SSH_PY_SHIM.replace("#%%OPTS", arg_str)
py_code = SSH_PY_SHIM.replace("# %%OPTS", arg_str)
py_code_enc = base64.encodebytes(py_code.encode("utf-8")).decode("utf-8")
if not self.winrm:
cmd = SSH_SH_SHIM.format(
Expand Down
2 changes: 1 addition & 1 deletion salt/client/ssh/ssh_py_shim.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class OptionsContainer:
# The below line is where OPTIONS can be redefined with internal options
# (rather than cli arguments) when the shim is bundled by
# client.ssh.Single._cmd_str()
#%%OPTS
# %%OPTS


def get_system_encoding():
Expand Down
8 changes: 7 additions & 1 deletion salt/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,10 @@ def _gather_buffer_space():
"cluster_pki_dir": str,
# The port required to be open for a master cluster to properly function
"cluster_pool_port": int,
# SHA-256 hash of the cluster public key for verification during autoscale join
"cluster_pub_signature": str,
# Require cluster_pub_signature to be configured for autoscale joins (recommended)
"cluster_pub_signature_required": bool,
# Use a module function to determine the unique identifier. If this is
# set and 'id' is not set, it will allow invocation of a module function
# to determine the value of 'id'. For simple invocations without function
Expand Down Expand Up @@ -1728,6 +1732,8 @@ def _gather_buffer_space():
"cluster_peers": [],
"cluster_pki_dir": None,
"cluster_pool_port": 4520,
"cluster_pub_signature": None,
"cluster_pub_signature_required": True,
"features": {},
"publish_signing_algorithm": "PKCS1v15-SHA1",
"keys.cache_driver": "localfs_key",
Expand Down Expand Up @@ -4225,7 +4231,7 @@ def apply_master_config(overrides=None, defaults=None):
if "cluster_id" not in opts:
opts["cluster_id"] = None
if opts["cluster_id"] is not None:
if not opts.get("cluster_peers", None):
if not opts.get("cluster_peers", None) and not opts.get("cluster_secret", None):
log.warning("Cluster id defined without defining cluster peers")
opts["cluster_peers"] = []
if not opts.get("cluster_pki_dir", None):
Expand Down
149 changes: 148 additions & 1 deletion salt/crypt.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import base64
import binascii
import copy
import getpass
import hashlib
import hmac
import logging
Expand Down Expand Up @@ -145,6 +146,57 @@ def dropfile(cachedir, user=None, master_id=""):
os.rename(dfn_next, dfn)


def _write_private(keydir, keyname, key, passphrase=None):
base = os.path.join(keydir, keyname)
priv = f"{base}.pem"
# Do not try writing anything, if directory has no permissions.
if not os.access(keydir, os.W_OK):
raise OSError(
'Write access denied to "{}" for user "{}".'.format(
os.path.abspath(keydir), getpass.getuser()
)
)
if pathlib.Path(priv).exists():
# XXX
# raise RuntimeError()
log.error("Key should not exist")
with salt.utils.files.set_umask(0o277):
with salt.utils.files.fopen(priv, "wb+") as f:
if passphrase:
enc = serialization.BestAvailableEncryption(passphrase.encode())
_format = serialization.PrivateFormat.TraditionalOpenSSL
if fips_enabled():
_format = serialization.PrivateFormat.PKCS8
else:
enc = serialization.NoEncryption()
_format = serialization.PrivateFormat.TraditionalOpenSSL
pem = key.private_bytes(
encoding=serialization.Encoding.PEM,
format=_format,
encryption_algorithm=enc,
)
f.write(pem)


def _write_public(keydir, keyname, key):
base = os.path.join(keydir, keyname)
pub = f"{base}.pub"
# Do not try writing anything, if directory has no permissions.
if not os.access(keydir, os.W_OK):
raise OSError(
'Write access denied to "{}" for user "{}".'.format(
os.path.abspath(keydir), getpass.getuser()
)
)
pubkey = key.public_key()
with salt.utils.files.fopen(pub, "wb+") as f:
pem = pubkey.public_bytes(
encoding=serialization.Encoding.PEM,
format=serialization.PublicFormat.SubjectPublicKeyInfo,
)
f.write(pem)


def gen_keys(keysize, passphrase=None, e=65537):
"""
Generate a RSA public keypair for use with salt
Expand Down Expand Up @@ -183,6 +235,47 @@ def gen_keys(keysize, passphrase=None, e=65537):
)


def write_keys(keydir, keyname, keysize, user=None, passphrase=None, e=65537):
"""
Generate and write a RSA public keypair for use with salt

:param str keydir: The directory to write the keypair to
:param str keyname: The type of salt server for whom this key should be written. (i.e. 'master' or 'minion')
:param int keysize: The number of bits in the key
:param str user: The user on the system who should own this keypair
:param str passphrase: The passphrase which should be used to encrypt the private key

:rtype: str
:return: Path on the filesystem to the RSA private key
"""
base = os.path.join(keydir, keyname)
priv = f"{base}.pem"
pub = f"{base}.pub"

gen = rsa.generate_private_key(e, keysize)

if os.path.isfile(priv):
# Between first checking and the generation another process has made
# a key! Use the winner's key
return priv

_write_private(keydir, keyname, gen, passphrase)
_write_public(keydir, keyname, gen)
os.chmod(priv, 0o400)
if user:
try:
import pwd

uid = pwd.getpwnam(user).pw_uid
os.chown(priv, uid, -1)
os.chown(pub, uid, -1)
except (KeyError, ImportError, OSError):
# The specified user was not found, allow the backup systems to
# report the error
pass
return priv


class BaseKey:

@classmethod
Expand Down Expand Up @@ -278,14 +371,29 @@ def decrypt(self, data, algorithm=OAEP_SHA1):
except cryptography.exceptions.UnsupportedAlgorithm:
raise UnsupportedAlgorithm(f"Unsupported algorithm: {algorithm}")

def write_private(self, keydir, name, passphrase=None):
_write_private(keydir, name, self.key, passphrase)

def write_public(self, keydir, name):
_write_public(keydir, name, self.key)

def public_key(self):
"""
proxy to PrivateKey.public_key()
"""
return self.key.public_key()


class PrivateKeyString(PrivateKey):
def __init__(self, data, password=None): # pylint: disable=super-init-not-called
self.key = serialization.load_pem_private_key(
data.encode(),
password=password,
)


class PublicKey(BaseKey):

def __init__(self, key_bytes):
log.debug("Loading public key")
try:
Expand All @@ -298,7 +406,10 @@ def __init__(self, key_bytes):
def encrypt(self, data, algorithm=OAEP_SHA1):
_padding = self.parse_padding_for_encryption(algorithm)
_hash = self.parse_hash(algorithm)
bdata = salt.utils.stringutils.to_bytes(data)
if type(data) == "bytes":
bdata = data
else:
bdata = salt.utils.stringutils.to_bytes(data)
try:
return self.key.encrypt(
bdata,
Expand Down Expand Up @@ -334,6 +445,14 @@ def decrypt(self, data):
return verifier.verify(data)


class PublicKeyString(PublicKey):
def __init__(self, data): # pylint: disable=super-init-not-called
try:
self.key = serialization.load_pem_public_key(data.encode())
except ValueError as exc:
raise InvalidKeyError("Invalid key")


@salt.utils.decorators.memoize
def get_rsa_key(path, passphrase):
"""
Expand Down Expand Up @@ -399,6 +518,27 @@ def __init__(self, opts, autocreate=True):
# master.pem/pub can be removed
self.master_id = self.opts["id"].removesuffix("_master")

self.cluster_pub_path = None
self.cluster_rsa_path = None
self.cluster_key = None
# XXX
if self.opts["cluster_id"]:
self.cluster_pub_path = os.path.join(
self.opts["cluster_pki_dir"], "cluster.pub"
)
self.cluster_rsa_path = os.path.join(
self.opts["cluster_pki_dir"], "cluster.pem"
)
if self.opts["cluster_pki_dir"] != self.opts["pki_dir"]:
self.cluster_shared_path = os.path.join(
self.opts["cluster_pki_dir"],
"peers",
f"{self.opts['id']}.pub",
)
# Note: cluster_key setup moved to _setup_keys() (line 614-620)
# to ensure it happens after master keys are initialized
self.pub_signature = None

# set names for the signing key-pairs
self.pubkey_signature = None
self.master_pubkey_signature = (
Expand Down Expand Up @@ -597,6 +737,13 @@ def check_master_shared_pub(self):
if not master_pub:
master_pub = self.cache.fetch("master_keys", "master.pub")

# If master_pub is still None, the keys haven't been set up yet
# This can happen if check_master_shared_pub() is called before _setup_keys()
# Just return early and let the later call (after key setup) handle it
if not master_pub:
log.debug("Master public key not yet available, skipping shared key check")
return

if shared_pub:
if shared_pub != master_pub:
message = (
Expand Down
21 changes: 14 additions & 7 deletions salt/ext/saslprep.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,14 @@ def saslprep(data):
if isinstance(data, str):
raise TypeError(
"The stringprep module is not available. Usernames and "
"passwords must be ASCII strings.")
"passwords must be ASCII strings."
)
return data

else:
HAVE_STRINGPREP = True
import unicodedata

# RFC4013 section 2.3 prohibited output.
_PROHIBITED = (
# A strict reading of RFC 4013 requires table c12 here, but
Expand All @@ -44,7 +47,8 @@ def saslprep(data):
stringprep.in_table_c6,
stringprep.in_table_c7,
stringprep.in_table_c8,
stringprep.in_table_c9)
stringprep.in_table_c9,
)

def saslprep(data, prohibit_unassigned_code_points=True):
"""An implementation of RFC4013 SASLprep.
Expand Down Expand Up @@ -77,12 +81,16 @@ def saslprep(data, prohibit_unassigned_code_points=True):
in_table_c12 = stringprep.in_table_c12
in_table_b1 = stringprep.in_table_b1
data = "".join(
["\u0020" if in_table_c12(elt) else elt
for elt in data if not in_table_b1(elt)])
[
"\u0020" if in_table_c12(elt) else elt
for elt in data
if not in_table_b1(elt)
]
)

# RFC3454 section 2, step 2 - Normalize
# RFC4013 section 2.2 normalization
data = unicodedata.ucd_3_2_0.normalize('NFKC', data)
data = unicodedata.ucd_3_2_0.normalize("NFKC", data)

in_table_d1 = stringprep.in_table_d1
if in_table_d1(data[0]):
Expand All @@ -103,7 +111,6 @@ def saslprep(data, prohibit_unassigned_code_points=True):
# RFC3454 section 2, step 3 and 4 - Prohibit and check bidi
for char in data:
if any(in_table(char) for in_table in prohibited):
raise ValueError(
"SASLprep: failed prohibited character check")
raise ValueError("SASLprep: failed prohibited character check")

return data
Loading
Loading