From 9402c063b3ea5d02c91e27c5faab7fa00798ea64 Mon Sep 17 00:00:00 2001 From: Vyron Tsingaras Date: Fri, 31 Aug 2018 14:21:35 +0300 Subject: [PATCH 1/3] Add WWN info to PD and add property to find linux /dev/* disk node --- pymegacli/components.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/pymegacli/components.py b/pymegacli/components.py index f46d7a1..f4e593c 100644 --- a/pymegacli/components.py +++ b/pymegacli/components.py @@ -1,6 +1,7 @@ import pipes import subprocess import re +import os from .parser import BlockParser from .parser import bail_on @@ -131,20 +132,22 @@ class Disk(Component): 'Predictive Failure Count', ) ERROR_BOOL_KEYS = ('Drive has flagged a S.M.A.R.T alert', ) - REQUIRED_FIELDS = ('Enclosure Device ID', 'Slot Number') + REQUIRED_FIELDS = ('Enclosure Device ID', 'Slot Number', 'WWN') PARSER = BlockParser(rules=[ once_per_block(colon_field('Enclosure Device ID', int_or_na)), rule(colon_field('Slot Number', int)), + rule(colon_field('WWN', str)), rule(colon_field('Other Error Count', int)), rule(colon_field('Predictive Failure Count', int)), rule(colon_field('Media Error Count', int)), rule(colon_field('Drive has flagged a S.M.A.R.T alert', yesnobool)), ], default_constructor=colon_field(None, str)) - def __init__(self, enclosure_id, slot_number, parent, props=None): + def __init__(self, enclosure_id, slot_number, wwn, parent, props=None): self.enclosure_id = enclosure_id self.slot_number = slot_number + self.wwn = wwn self.thresholds = dict( (k, 0) for k @@ -159,6 +162,11 @@ def set_threshold(self, key, value): def identifier(self): return 'PhysDrv [%d:%d]' % (self.enclosure_id, self.slot_number) + @property + def devnode(self): + disk_by_id_path = "/dev/disk/by-id/wwn-0x%s" % self.wwn + return os.path.realpath(disk_by_id_path) + @property def health_status(self): status = {} From 1e710be2e97f23571c911693f75d0219123ba14d Mon Sep 17 00:00:00 2001 From: Vyron Tsingaras Date: Fri, 31 Aug 2018 14:23:06 +0300 Subject: [PATCH 2/3] Add /dev/* node info to the check messages and ability to toggle the fault led via libstoragemgmt --- bin/check_megacli | 60 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 52 insertions(+), 8 deletions(-) diff --git a/bin/check_megacli b/bin/check_megacli index 7b5cbd5..c6a9eaa 100644 --- a/bin/check_megacli +++ b/bin/check_megacli @@ -2,6 +2,7 @@ # Example script using pymegacli which is suitable for invocation by nagios +from __future__ import print_function import argparse import os import sys @@ -31,11 +32,39 @@ def main(): action='append', help='Subsystems to check. If not passed, defaults to all' ) + parser.add_argument( + '--led', + action='store_true', + help='Use libstoragemgmt (megaraid plugin) to toggle the fault led on unhealthy disks' + ) + parser.set_defaults(led=False) args = parser.parse_args() if os.geteuid() != 0: parser.error('Must run as root!') + if args.led: + try: + import lsm + try: + import lsm.plugin.megaraid + HAVE_LSM_MEGARAID = True + except: + HAVE_LSM_MEGARAID = False + print( + "--led specified but libstoragemgmt megaraid plugin was not found, install like so on Fedora/CentOS... :\n" + "\tyum install libstoragemgmt-megaraid-plugin", + file=sys.stderr + ) + except ImportError: + HAVE_LSM_MEGARAID = False + print( + "--led specified but libstoragemgmt module was not found, install like so on Fedora/CentOS... :\n" + "\tyum install libstoragemgmt libstoragemgmt-megaraid-plugin", + file=sys.stderr + ) + + checks = {} for check in CHECKS: checks[check] = (not args.check or check in args.check) @@ -50,14 +79,20 @@ def main(): } def check_component(component): + component_identifier = component.identifier + if isinstance(component, pymegacli.components.Disk): + component_identifier = component_identifier + ' - ' + component.devnode + if component.healthy: - messages[OK].append('%s is healthy' % component.identifier) + messages[OK].append('%s is healthy' % component_identifier) + return OK else: for message in component.health_messages: messages[CRITICAL].append('%s %s' % ( - component.identifier, + component_identifier, message )) + return CRITICAL controllers = list(connection.controllers) if not controllers: @@ -66,10 +101,19 @@ def main(): for controller in controllers: if checks['PD']: for disk in controller.PDs: - check_component(disk) + disk_health = check_component(disk) + if args.led and HAVE_LSM_MEGARAID: + try: + if CRITICAL == disk_health: + lsm.LocalDisk.fault_led_on(disk.devnode) + else: + lsm.LocalDisk.fault_led_off(disk.devnode) + except: + print('Couldn\'t manipulate enclosure LED', file=sys.stderr) + if checks['LD']: for logical_device in controller.LDs: - check_component(disk) + check_component(logical_device) if 'WriteBack' not in logical_device['Current Cache Policy']: messages[WARNING].append('%s has cache policy %s, which does not include WriteBack' % ( logical_device.identifier, @@ -82,16 +126,16 @@ def main(): messages[WARNING].append('%s is in learn cycle' % bbu.identifier) if messages[CRITICAL]: - print 'CRITICAL: %s' % '; '.join(messages[CRITICAL]) + print('CRITICAL: %s' % '; '.join(messages[CRITICAL])) return CRITICAL elif messages[WARNING]: - print 'WARNING: %s' % '; '.join(messages[WARNING]) + print('WARNING: %s' % '; '.join(messages[WARNING])) return WARNING elif messages[UNKNOWN]: - print 'UNKNOWN: %s' % '; '.join(messages[UNKNOWN]) + print('UNKNOWN: %s' % '; '.join(messages[UNKNOWN])) return UNKNOWN else: - print 'OK: %s' % '; '.join(messages[OK]) + print('OK: %s' % '; '.join(messages[OK])) return OK From 4368484ecd819b4c55b9be8527ac6160cf86d153 Mon Sep 17 00:00:00 2001 From: Vyron Tsingaras Date: Mon, 3 Sep 2018 14:23:44 +0300 Subject: [PATCH 3/3] Fix WWN-to-disk_by_id path mapping as: https://github.com/chicks-net/megamap/issues/1 --- bin/check_megacli | 8 ++------ pymegacli/components.py | 18 ++++++++++++++---- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/bin/check_megacli b/bin/check_megacli index c6a9eaa..edc1653 100644 --- a/bin/check_megacli +++ b/bin/check_megacli @@ -79,17 +79,13 @@ def main(): } def check_component(component): - component_identifier = component.identifier - if isinstance(component, pymegacli.components.Disk): - component_identifier = component_identifier + ' - ' + component.devnode - if component.healthy: - messages[OK].append('%s is healthy' % component_identifier) + messages[OK].append('%s is healthy' % component.identifier) return OK else: for message in component.health_messages: messages[CRITICAL].append('%s %s' % ( - component_identifier, + component.identifier, message )) return CRITICAL diff --git a/pymegacli/components.py b/pymegacli/components.py index f4e593c..d0d7ede 100644 --- a/pymegacli/components.py +++ b/pymegacli/components.py @@ -2,6 +2,7 @@ import subprocess import re import os +import glob from .parser import BlockParser from .parser import bail_on @@ -147,7 +148,14 @@ class Disk(Component): def __init__(self, enclosure_id, slot_number, wwn, parent, props=None): self.enclosure_id = enclosure_id self.slot_number = slot_number - self.wwn = wwn + self.wwn = wwn.lower() + disk_by_id_path_glob = "/dev/disk/by-id/wwn-0x%s?" % self.wwn[:-1] + disk_by_id_path_glob_results = glob.glob(disk_by_id_path_glob) + if len(disk_by_id_path_glob_results) == 1: + self.linux_disk_by_id = disk_by_id_path_glob_results[0] + else: + self.linux_disk_by_id = None + self.thresholds = dict( (k, 0) for k @@ -160,12 +168,14 @@ def set_threshold(self, key, value): @property def identifier(self): - return 'PhysDrv [%d:%d]' % (self.enclosure_id, self.slot_number) + return 'PhysDrv [%d:%d] WWN: %s Dev: %s' % (self.enclosure_id, self.slot_number, self.wwn, self.devnode) @property def devnode(self): - disk_by_id_path = "/dev/disk/by-id/wwn-0x%s" % self.wwn - return os.path.realpath(disk_by_id_path) + if self.linux_disk_by_id: + return os.path.realpath(self.linux_disk_by_id) + else: + return None @property def health_status(self):