From 02bb21c9c25b750b2c8656eb45093eb16e337068 Mon Sep 17 00:00:00 2001 From: Markus Opolka Date: Mon, 2 Feb 2026 12:15:55 +0100 Subject: [PATCH 1/2] Improve check_swarm --swarm by using local node state --- check_docker/check_swarm.py | 26 ++++++++++++++++++-------- tests/test_check_swarm.py | 24 ++++++++++++++++++++---- 2 files changed, 38 insertions(+), 12 deletions(-) diff --git a/check_docker/check_swarm.py b/check_docker/check_swarm.py index 340adfd..15c438a 100755 --- a/check_docker/check_swarm.py +++ b/check_docker/check_swarm.py @@ -112,19 +112,15 @@ def process_urllib_response(response): def get_swarm_status(): - content, status = get_url(daemon + '/swarm') - return status - + return get_url(daemon + '/info') def get_service_info(name): return get_url(daemon + '/services/{service}'.format(service=name)) - def get_service_tasks(name): tasks, status = get_url(daemon + '/tasks?filters={{"name":{{"{service}":true}}}}'.format(service=name)) return tasks - def get_nodes(): return get_url(daemon + '/nodes') @@ -186,9 +182,23 @@ def unknown(message): # Checks ############################################################################################# def check_swarm(): - status = get_swarm_status() - process_url_status(status, ok_msg='Node is in a swarm', - critical_msg='Node is not in a swarm', unknown_msg='Error accessing swarm info') + content, status = get_swarm_status() + if status not in HTTP_GOOD_CODES: + unknown('Could not retrieve swarm info') + return + + if not 'Swarm' in content: + unknown('No swarm status available') + return + + state = content['Swarm'].get('LocalNodeState') + + if state == 'active': + ok(f'Node is in a swarm. Local node status: {state}') + elif state == 'pending': + warning(f'Node is not active in swarm. Local node status: {state}') + else: + critical(f'Node is not in a swarm. Local node status: {state}') def process_global_service(name, ignore_paused=False): diff --git a/tests/test_check_swarm.py b/tests/test_check_swarm.py index da630ef..b106b47 100644 --- a/tests/test_check_swarm.py +++ b/tests/test_check_swarm.py @@ -72,8 +72,8 @@ def mock_open(*args, **kwargs): def test_get_swarm_status(check_swarm): with patch('check_docker.check_swarm.get_url', return_value=('', 999)): - response = check_swarm.get_swarm_status() - assert response == 999 + response, status = check_swarm.get_swarm_status() + assert status == 999 def test_get_service_info(check_swarm): @@ -243,7 +243,7 @@ def test_check_swarm_called(check_swarm, fs): def test_check_swarm_results_OK(check_swarm, fs): fs.create_file(check_swarm.DEFAULT_SOCKET, contents='', st_mode=(stat.S_IFSOCK | 0o666)) args = ['--swarm'] - with patch('check_docker.check_swarm.get_swarm_status', return_value=200): + with patch('check_docker.check_swarm.get_swarm_status', return_value=({'Swarm': {'LocalNodeState': 'active'}}, 200)): check_swarm.perform_checks(args) assert check_swarm.rc == cs.OK_RC @@ -251,11 +251,27 @@ def test_check_swarm_results_OK(check_swarm, fs): def test_check_swarm_results_CRITICAL(check_swarm, fs): fs.create_file(check_swarm.DEFAULT_SOCKET, contents='', st_mode=(stat.S_IFSOCK | 0o666)) args = ['--swarm'] - with patch('check_docker.check_swarm.get_swarm_status', return_value=406): + with patch('check_docker.check_swarm.get_swarm_status', return_value=({'Swarm': {'LocalNodeState': 'inactive'}}, 200)): check_swarm.perform_checks(args) assert check_swarm.rc == cs.CRITICAL_RC +def test_check_swarm_results_WARNING(check_swarm, fs): + fs.create_file(check_swarm.DEFAULT_SOCKET, contents='', st_mode=(stat.S_IFSOCK | 0o666)) + args = ['--swarm'] + with patch('check_docker.check_swarm.get_swarm_status', return_value=({'Swarm': {'LocalNodeState': 'pending'}}, 200)): + check_swarm.perform_checks(args) + assert check_swarm.rc == cs.WARNING_RC + + +def test_check_swarm_results_UNKNOWN(check_swarm, fs): + fs.create_file(check_swarm.DEFAULT_SOCKET, contents='', st_mode=(stat.S_IFSOCK | 0o666)) + args = ['--swarm'] + with patch('check_docker.check_swarm.get_swarm_status', return_value=({}, 200)): + check_swarm.perform_checks(args) + assert check_swarm.rc == cs.UNKNOWN_RC + + def test_check_service_called(check_swarm, fs): service_info = {'Spec': {'Mode': {'Replicated': {'Replicas': 1}}}} From 3eac7e5802ed2856753720d944c0d07b239de434 Mon Sep 17 00:00:00 2001 From: Markus Opolka Date: Mon, 2 Feb 2026 12:21:13 +0100 Subject: [PATCH 2/2] Fix some linting issues --- check_docker/check_swarm.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/check_docker/check_swarm.py b/check_docker/check_swarm.py index 15c438a..347c774 100755 --- a/check_docker/check_swarm.py +++ b/check_docker/check_swarm.py @@ -104,7 +104,7 @@ def process_urllib_response(response): try: resp = json.loads(body) except json.JSONDecodeError as e: - unknown(f'Unable to parse response.') + unknown(f'Unable to parse response. {e}') print_results() sys.exit(rc) @@ -118,7 +118,7 @@ def get_service_info(name): return get_url(daemon + '/services/{service}'.format(service=name)) def get_service_tasks(name): - tasks, status = get_url(daemon + '/tasks?filters={{"name":{{"{service}":true}}}}'.format(service=name)) + tasks, _ = get_url(daemon + '/tasks?filters={{"name":{{"{service}":true}}}}'.format(service=name)) return tasks def get_nodes(): @@ -130,7 +130,7 @@ def get_services(names): if status == 406: critical("Error checking service status, node is not in swarm mode") return [] - elif status not in HTTP_GOOD_CODES: + if status not in HTTP_GOOD_CODES: unknown("Could not retrieve service info") return [] @@ -187,7 +187,7 @@ def check_swarm(): unknown('Could not retrieve swarm info') return - if not 'Swarm' in content: + if 'Swarm' not in content: unknown('No swarm status available') return @@ -207,7 +207,7 @@ def process_global_service(name, ignore_paused=False): bad_node_states.add('paused') # Get all the nodes we care about based on their state - node_list, status = get_nodes() + node_list, _ = get_nodes() node_index = set() for node in node_list: if node['Spec']['Availability'] in bad_node_states: @@ -244,7 +244,7 @@ def process_replicated_service(name, replicas_desired): def check_service(name, ignore_paused=False): # get service mode - service_info, status = get_service_info(name) + service_info, _ = get_service_info(name) mode_info = service_info['Spec']['Mode'] # if global ensure one per node @@ -356,8 +356,8 @@ def socketfile_permissions_failure(parsed_args): and stat.S_ISSOCK(os.stat(parsed_args.connection).st_mode) and os.access(parsed_args.connection, os.R_OK) and os.access(parsed_args.connection, os.W_OK)) - else: - return False + + return False def print_results(): @@ -389,7 +389,7 @@ def perform_checks(raw_args): def main(): perform_checks(argv[1:]) - exit(rc) + sys.exit(rc) if __name__ == '__main__':