Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 0 additions & 13 deletions .codeclimate.yml

This file was deleted.

6 changes: 6 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: monthly
35 changes: 9 additions & 26 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Run tests
name: CI

on:
push:
Expand All @@ -7,35 +7,18 @@ on:
branches: [ master ]

jobs:
build:

gitHubActionForPytest:
runs-on: ubuntu-latest
strategy:
matrix:
python_version: [3.6, 3.7, 3.8]

python-version: ["3.9", "3.10"]
name: GitHub Action
steps:
- uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python_version }}
- name: Checkout
uses: actions/checkout@v6
- name: Install dependencies
run: |
python -V
printenv
python -m pip install --upgrade pip
pip install flake8 pytest coverage pyfakefs pytest-cov
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with pytest
python -m pip install -r requirements-dev.txt
- name: Test and coverate
run: |
pytest --cov=check_docker --cov-fail-under 90 --cov-report term --cov-report html
- uses: actions/upload-artifact@v2
with:
name: coverage_report
path: htmlcov
make coverage
20 changes: 6 additions & 14 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -83,23 +83,14 @@ celerybeat-schedule
venv/
ENV/

# Spyder project settings
# Editors
.spyderproject

# Rope project settings
.ropeproject

.idea


## File-based project format:
*.iws

## Plugin-specific files:

# IntelliJ
.idea
/out/

\#*
.\#*

# JIRA plugin
atlassian-ide-plugin.xml
Expand All @@ -118,6 +109,7 @@ cr-sess1.json


testing_tools/vagrant/.vagrant
.vagrant
!check_docker/

.DS_Store
.DS_Store
14 changes: 0 additions & 14 deletions .travis.yml

This file was deleted.

8 changes: 8 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
.PHONY: lint test

lint:
python -m pylint check_docker/
test:
py.test -v
coverage:
py.test --cov=check_docker
17 changes: 0 additions & 17 deletions Pipfile

This file was deleted.

104 changes: 76 additions & 28 deletions check_docker/check_docker.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python3
# logging.basicConfig(level=logging.DEBUG)

import argparse
import json
import logging
Expand All @@ -8,6 +8,7 @@
import re
import socket
import stat
import sys
import traceback
from collections import deque, namedtuple, UserDict, defaultdict
from concurrent import futures
Expand Down Expand Up @@ -138,13 +139,13 @@ def http_response(self, request, response):
https_response = http_response

@staticmethod
def _get_outh2_token(www_authenticate_header):
def _get_oauth2_token(www_authenticate_header):
auth_fields = dict(re.findall(r"""(?:(?P<key>[^ ,=]+)="([^"]+)")""", www_authenticate_header))

auth_url = "{realm}?scope={scope}&service={service}".format(
realm=auth_fields['realm'],
scope=auth_fields['scope'],
service=auth_fields['service'],
realm=auth_fields.get('realm'),
scope=auth_fields.get('scope'),
service=auth_fields.get('service'),
)
token_request = Request(auth_url)
token_request.add_header("Content-Type", "application/x-www-form-urlencoded; charset=utf-8")
Expand All @@ -160,7 +161,7 @@ def process_oauth2(self, request, response, www_authenticate_header):
raise HTTPError(full_url, 401, "Stopping Oauth2 failure loop for {}".format(full_url),
response.headers, response)

auth_token = self._get_outh2_token(www_authenticate_header)
auth_token = self._get_oauth2_token(www_authenticate_header)

request.add_unredirected_header('Authorization', 'Bearer ' + auth_token)
return self.parent.open(request, timeout=request.timeout)
Expand Down Expand Up @@ -276,16 +277,31 @@ def evaluate_numeric_thresholds(container, value, thresholds, name, short_name,
@lru_cache(maxsize=None)
def get_url(url):
logger.debug("get_url: {}".format(url))
response = better_urllib_get.open(url, timeout=timeout)
logger.debug("get_url: {} {}".format(url, response.status))
return process_urllib_response(response), response.status
try:
response = better_urllib_get.open(url, timeout=timeout)
logger.debug("get_url: {} {}".format(url, response.status))
return process_urllib_response(response), response.status
except URLError as e:
unknown(f'Failed to connect to daemon: {e.reason}.')
# We have no result, so we can just exit
print_results()
sys.exit(rc)


def process_urllib_response(response):
response_bytes = response.read()
body = response_bytes.decode('utf-8')
# logger.debug("BODY: {}".format(body))
return json.loads(body)
logger.debug(body)

resp = {}
try:
resp = json.loads(body)
except json.JSONDecodeError as e:
unknown(f'Unable to parse response.')
print_results()
sys.exit(rc)

return resp


def get_container_info(name):
Expand Down Expand Up @@ -358,26 +374,39 @@ def normalize_image_name_to_manifest_url(image_name, insecure_registries):

# Registry query url
scheme = 'http' if parsed_url.registry.lower() in lower_insecure else 'https'
url = '{scheme}://{registry}/v2/{image_name}/manifests/{image_tag}'.format(scheme=scheme,
url = '{scheme}://{registry}/v2/{image_name}/manifests'.format(scheme=scheme,
registry=parsed_url.registry,
image_name=parsed_url.name,
image_tag=parsed_url.tag)
return url, parsed_url.registry
image_name=parsed_url.name)
image_tag = parsed_url.tag

return url, image_tag, parsed_url.registry


# Auth servers seem picky about being hit too hard. Can't figure out why. ;)
# As result it is best to single thread this check
# This is based on https://docs.docker.com/registry/spec/auth/token/#requesting-a-token
def get_digest_from_registry(url):
def get_digest_from_registry(url, image_tag, image_arch):
logger.debug("get_digest_from_registry")
# query registry
# TODO: Handle logging in if needed
registry_info, status_code = get_url(url=url)
image_url = '{}/{}'.format(url, image_tag)
registry_info, status_code = get_url(url=image_url)

if 'manifests' in registry_info:
digest = find_digest_for_architecture(registry_info['manifests'], image_arch)
image_url = '{}/{}'.format(url, digest)
registry_info, status_code = get_url(url=image_url)

if status_code != 200:
raise RegistryError(response=registry_info)

return registry_info['config'].get('digest', None)

def find_digest_for_architecture(manifests, image_arch):
for manifest in manifests:
if 'platform' in manifest and manifest['platform']['architecture'] == image_arch:
return manifest.get('digest')
return None

def set_rc(new_rc):
global rc
Expand Down Expand Up @@ -520,7 +549,14 @@ def check_memory(container, thresholds):
inspection = get_stats(container)

# Subtracting cache to match what `docker stats` does.
adjusted_usage = inspection['memory_stats']['usage'] - inspection['memory_stats']['stats']['total_cache']
adjusted_usage = inspection['memory_stats']['usage']
if 'total_cache' in inspection['memory_stats']['stats']:
# CGroups v1 - https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt
adjusted_usage -= inspection['memory_stats']['stats']['total_cache']
elif 'inactive_file' in inspection['memory_stats']['stats']:
# CGroups v2 - https://www.kernel.org/doc/Documentation/cgroup-v2.txt
adjusted_usage -= inspection['memory_stats']['stats']['inactive_file']

if thresholds.units == '%':
max = 100
usage = int(100 * adjusted_usage / inspection['memory_stats']['limit'])
Expand Down Expand Up @@ -618,10 +654,14 @@ def check_version(container, insecure_registries):
unknown('"{}" has last no repository tag. Is this anywhere else?'.format(container))
return

url, registry = normalize_image_name_to_manifest_url(image_urls[0], insecure_registries)
logger.debug("Looking up image digest here {}".format(url))

container_image = get_container_info(container)['Image']
image_arch = get_image_info(container_image)['Architecture']

url, image_tag, registry = normalize_image_name_to_manifest_url(image_urls[0], insecure_registries)
logger.debug("Looking up image digest here {}/{}".format(url, image_tag))
try:
registry_hash = get_digest_from_registry(url)
registry_hash = get_digest_from_registry(url, image_tag, image_arch)
except URLError as e:
if hasattr(e.reason, 'reason') and e.reason.reason == 'UNKNOWN_PROTOCOL':
unknown(
Expand All @@ -630,12 +670,12 @@ def check_version(container, insecure_registries):
return
elif hasattr(e.reason, 'strerror') and e.reason.strerror == 'nodename nor servname provided, or not known':
unknown(
"Cannot reach registry for {} at {}".format(container, url))
"Cannot reach registry for {} at {}/{}".format(container, url, image_tag))
return
else:
raise e
except RegistryError as e:
unknown("Cannot check version, couldn't retrieve digest for {} while checking {}.".format(container, url))
unknown("Cannot check version, couldn't retrieve digest for {} while checking {}/{}.".format(container, url, image_tag))
return
logger.debug("Image digests, local={} remote={}".format(image_id, registry_hash))
if registry_hash == image_id:
Expand Down Expand Up @@ -766,7 +806,7 @@ def process_args(args):
action='store',
type=str,
metavar='WARN:CRIT',
help='Check cpu usage percentage taking into account any limits.')
help='Check cpu usage percentage taking into account any limits. Valid values are 0 - 100.')

# Memory
parser.add_argument('--memory',
Expand Down Expand Up @@ -842,13 +882,22 @@ def process_args(args):
action='store_true',
help='Suppress performance data. Reduces output when performance data is not being used.')

# Debug logging
parser.add_argument('--debug',
dest='debug',
action='store_true',
help='Enable debug logging.')

parser.add_argument('-V', action='version', version='%(prog)s {}'.format(__version__))

if len(args) == 0:
parser.print_help()

parsed_args = parser.parse_args(args=args)

if parsed_args.debug:
logging.basicConfig(level=logging.DEBUG)

global timeout
timeout = parsed_args.timeout

Expand Down Expand Up @@ -892,10 +941,10 @@ def print_results():
if len(filtered_messages) == 0:
messages_concat = 'OK'
else:
messages_concat = '; '.join(filtered_messages)
messages_concat = '\n'.join(filtered_messages)

else:
messages_concat = '; '.join(messages)
messages_concat = '\n'.join(messages)

if no_performance or len(performance_data) == 0:
print(messages_concat)
Expand All @@ -919,7 +968,7 @@ def perform_checks(raw_args):
no_ok = args.no_ok

global no_performance
no_performance = args.no_ok
no_performance = args.no_performance
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doh, that is a silly bug. Thanks for fixing.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh no worries. I've made my fair share of copy-paste mistakes


if socketfile_permissions_failure(args):
unknown("Cannot access docker socket file. User ID={}, socket file={}".format(os.getuid(), args.connection))
Expand All @@ -934,7 +983,6 @@ def perform_checks(raw_args):
return

# Here is where all the work happens
#############################################################################################
containers = get_containers(args.containers, args.present)

if len(containers) == 0 and not args.present:
Expand Down
Loading