Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ check_swarm Usage
[--connection [/<path to>/docker.socket|<ip/host address>:<port>]
| --secure-connection [<ip/host address>:<port>]]
[--timeout TIMEOUT]
(--swarm | --service SERVICE [SERVICE ...] | --ignore_paused)
(--swarm | --service SERVICE [SERVICE ...] | --ignore-paused)
[-V]

Check docker swarm.
Expand All @@ -151,7 +151,7 @@ check_swarm Usage
--service SERVICE [SERVICE ...]
One or more RegEx that match the names of the
services(s) to check.
--ignore_paused Don't require global services to be running on paused nodes
--ignore-paused Don't require global services to be running on paused nodes
-V show program's version number and exit

Gotchas
Expand Down
24 changes: 12 additions & 12 deletions check_docker/check_swarm.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,15 +202,16 @@ def check_swarm():


def process_global_service(name, ignore_paused=False):
bad_node_states = {'drain'}
ignore_node_states = {'drain'}
if ignore_paused:
bad_node_states.add('paused')
ignore_node_states.add('pause')

# Get all the nodes we care about based on their state
node_list, _ = get_nodes()
node_index = set()
for node in node_list:
if node['Spec']['Availability'] in bad_node_states:
# we can ignore these nodes
if node['Spec']['Availability'] in ignore_node_states:
continue
node_index.add(node['ID'])

Expand All @@ -219,13 +220,12 @@ def process_global_service(name, ignore_paused=False):
# Also note, this ignores conditions where services state they are running on a node not in the index.
service_tasks = get_service_tasks(name)
for task in service_tasks:
if task['Status']['State'] != 'running':
critical('Global service {service} has one or more tasks not running'.format(service=name))
return
node_index.discard(task['NodeID'])
if task['Status']['State'] == 'running' and task['NodeID'] in node_index:
node_index.discard(task['NodeID'])

if len(node_index) > 0:
critical('Global service {service} has {count} tasks not running'.format(service=name, count=len(node_list)))
critical('Global service {service} has {count} tasks not running'.format(service=name, count=len(node_index)))
return

ok('Global service {service} OK'.format(service=name))

Expand Down Expand Up @@ -310,10 +310,10 @@ def process_args(args):
default=[],
help='One or more RegEx that match the names of the services(s) to check.')

swarm_group.add_argument('--ignore_paused',
dest='ignore_paused',
action='store_true',
help="Don't require global services to be running on paused nodes")
parser.add_argument('--ignore-paused',
dest='ignore_paused',
action='store_true',
help="Don't require global services to be running on paused nodes")

# Debug logging
parser.add_argument('--debug',
Expand Down
4 changes: 2 additions & 2 deletions tests/test_check_swarm.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def active_node():

@pytest.fixture
def paused_node():
return {"ID": 43, 'Spec': {'Availability': 'paused'}}
return {"ID": 43, 'Spec': {'Availability': 'pause'}}


@pytest.fixture
Expand Down Expand Up @@ -313,7 +313,7 @@ def test_check_services_global_ignore_paused(check_swarm, fs):
@pytest.mark.parametrize("service_list, ignore_paused, expected_rc", (
([active_node_task, paused_node_task, drain_node_task], False, cs.OK_RC),
([active_node_task, drain_node_task], False, cs.CRITICAL_RC),
([active_node_task, paused_node_task], False, cs.OK_RC),
([active_node_task, paused_node_task], True, cs.OK_RC),
([active_node_task], False, cs.CRITICAL_RC),
([paused_node_task], False, cs.CRITICAL_RC),
([], False, cs.CRITICAL_RC),
Expand Down