diff --git a/docs/detectors.md b/docs/detectors.md index 204edec..e0c98fc 100644 --- a/docs/detectors.md +++ b/docs/detectors.md @@ -87,6 +87,7 @@ List of detectors: * [Random detector](detectors/random_detector.md): Generates random alerts. * [New Value](detectors/new_value.md): Detect new values in the variables in the logs. * [Combo Detector](detectors/combo.md): Detect new combination of variables in the logs. +* [Value Range](...): Detect numeric value ranges in variables in the logs. ## Configuration diff --git a/docs/detectors/value_range.md b/docs/detectors/value_range.md new file mode 100644 index 0000000..e69de29 diff --git a/src/detectmatelibrary/common/core.py b/src/detectmatelibrary/common/core.py index 02afb3c..be561f4 100644 --- a/src/detectmatelibrary/common/core.py +++ b/src/detectmatelibrary/common/core.py @@ -93,25 +93,25 @@ def process(self, data: BaseSchema | bytes) -> BaseSchema | bytes | None: return None if (fit_state := self.fitlogic.run()) == FitLogicState.DO_CONFIG: - logger.info(f"<<{self.name}>> use data for configuration") + logger.debug(f"<<{self.name}>> use data for configuration") self.configure(input_=data_buffered) return None elif self.fitlogic.finish_config(): - logger.info(f"<<{self.name}>> finalizing configuration") + logger.debug(f"<<{self.name}>> finalizing configuration") self.set_configuration() if fit_state == FitLogicState.DO_TRAIN: - logger.info(f"<<{self.name}>> use data for training") + logger.debug(f"<<{self.name}>> use data for training") self.train(input_=data_buffered) elif self.fitlogic.finish_training(): - logger.info(f"<<{self.name}>> finalizing training") + logger.debug(f"<<{self.name}>> finalizing training") self.post_train() output_ = self.output_schema() - logger.info(f"<<{self.name}>> processing data") + logger.debug(f"<<{self.name}>> processing data") return_schema = self.run(input_=data_buffered, output_=output_) if not return_schema: - logger.info(f"<<{self.name}>> returns None") + logger.debug(f"<<{self.name}>> returns None") return None logger.debug(f"<<{self.name}>> processed:\n{output_}") diff --git a/src/detectmatelibrary/detectors/__init__.py b/src/detectmatelibrary/detectors/__init__.py index 7ca736e..a8b02f2 100644 --- a/src/detectmatelibrary/detectors/__init__.py +++ b/src/detectmatelibrary/detectors/__init__.py @@ -1,10 +1,13 @@ from .random_detector import RandomDetector, RandomDetectorConfig from .new_value_detector import NewValueDetector, NewValueDetectorConfig +from .value_range_detector import ValueRangeDetector, ValueRangeDetectorConfig __all__ = [ "random_detector", "RandomDetectorConfig", "NewValueDetector", "NewValueDetectorConfig", - "RandomDetector" + "RandomDetector", + "ValueRangeDetector", + "ValueRangeDetectorConfig" ] diff --git a/src/detectmatelibrary/detectors/value_range_detector.py b/src/detectmatelibrary/detectors/value_range_detector.py new file mode 100644 index 0000000..86478c0 --- /dev/null +++ b/src/detectmatelibrary/detectors/value_range_detector.py @@ -0,0 +1,181 @@ +from detectmatelibrary.common._config._compile import generate_detector_config +from detectmatelibrary.common._config._formats import EventsConfig + +from detectmatelibrary.common.detector import ( + CoreDetectorConfig, + CoreDetector, + get_configured_variables, + get_global_variables, + validate_config_coverage, +) +from detectmatelibrary.utils.persistency.event_data_structures.trackers.stability.stability_tracker import ( + EventStabilityTracker +) +from detectmatelibrary.utils.persistency.event_persistency import EventPersistency +from detectmatelibrary.utils.data_buffer import BufferMode + +from detectmatelibrary.schemas import ParserSchema, DetectorSchema +from detectmatelibrary.constants import GLOBAL_EVENT_ID + +from typing_extensions import override +from tools.logging import logger + + +class ValueRangeDetectorConfig(CoreDetectorConfig): + method_type: str = "value_range_detector" + + use_stable_vars: bool = True + use_static_vars: bool = True + + +class ValueRangeDetector(CoreDetector): + """Detect new value ranges in logs as anomalies based on learned values.""" + + def __init__( + self, + name: str = "ValueRangeDetector", + config: ValueRangeDetectorConfig = ValueRangeDetectorConfig() + ) -> None: + + if isinstance(config, dict): + config = ValueRangeDetectorConfig.from_dict(config, name) + + super().__init__(name=name, buffer_mode=BufferMode.NO_BUF, config=config) + self.config: ValueRangeDetectorConfig # type narrowing for IDE + self.persistency = EventPersistency( + event_data_class=EventStabilityTracker, + ) + # auto config checks if individual variables are stable to select combos from + self.auto_conf_persistency = EventPersistency( + event_data_class=EventStabilityTracker + ) + + def cast_val_to_numeric(self, configured_variables, k, remove): + v = configured_variables[k] + if not isinstance(v, (int, float)): + try: + configured_variables[k] = int(v) + except ValueError: + try: + configured_variables[k] = float(v) + except ValueError: + logger.error(f"Non-numeric value '{v}' appeared in training of {self.__class__.__name__}" + f" with the name {self.name}.") + exit(1) + # TODO: what to do in this case; exit the program or skipping the data? + remove.append(k) + + def train(self, input_: ParserSchema) -> None: # type: ignore + """Train the detector by learning values from the input data.""" + configured_variables = get_configured_variables(input_, self.config.events) + remove = [] + for k in configured_variables.keys(): + self.cast_val_to_numeric(configured_variables, k, remove) + for k in remove: + del configured_variables[k] + self.persistency.ingest_event( + event_id=input_["EventID"], + event_template=input_["template"], + named_variables=configured_variables + ) + if self.config.global_instances: + global_vars = get_global_variables(input_, self.config.global_instances) + if global_vars: + self.persistency.ingest_event( + event_id=GLOBAL_EVENT_ID, + event_template=input_["template"], + named_variables=global_vars + ) + + def detect( + self, input_: ParserSchema, output_: DetectorSchema # type: ignore + ) -> bool: + """Detect new value ranges in the input data.""" + alerts: dict[str, str] = {} + configured_variables = get_configured_variables(input_, self.config.events) + #print("configured", configured_variables) + #print("input", input_) + overall_score = 0.0 + + current_event_id = input_["EventID"] + known_events = self.persistency.get_events_data() + print("KNOWN EVENTS", known_events) + + if current_event_id in known_events: + event_tracker = known_events[current_event_id] + for var_name, multi_tracker in event_tracker.get_data().items(): + self.cast_val_to_numeric(configured_variables, var_name, []) + value = configured_variables.get(var_name) + if value is None: + continue + min_ = min(multi_tracker.unique_set) + max_ = max(multi_tracker.unique_set) + if value < min_ or value > max_: + alerts[f"EventID {current_event_id} - {var_name}"] = ( + f"Out of range value: '{value}' ({min_} - {max_})" + ) + overall_score += 1.0 + + if self.config.global_instances and GLOBAL_EVENT_ID in known_events: + global_vars = get_global_variables(input_, self.config.global_instances) + global_tracker = known_events[GLOBAL_EVENT_ID] + for var_name, multi_tracker in global_tracker.get_data().items(): + self.cast_val_to_numeric(global_vars, var_name, []) + value = global_vars.get(var_name) + if value is None: + continue + min_ = min(multi_tracker.unique_set) + max_ = max(multi_tracker.unique_set) + if value < min_ or value > max_: + alerts[f"Global - {var_name}"] = f"Unknown value: '{value}'" + overall_score += 1.0 + + if overall_score > 0: + output_["score"] = overall_score + output_["description"] = f"{self.name} detects values not encountered in training as anomalies." + output_["alertsObtain"].update(alerts) + return True + + return False + + def configure(self, input_: ParserSchema) -> None: # type: ignore + print(input_["variables"], "AAA") + self.auto_conf_persistency.ingest_event( + event_id=input_["EventID"], + event_template=input_["template"], + variables=input_["variables"], + named_variables=input_["logFormatVariables"], + ) + + @override + def post_train(self) -> None: + if not self.config.auto_config: + validate_config_coverage(self.name, self.config.events, self.persistency) + + def set_configuration(self) -> None: + variables = {} + for event_id, tracker in self.auto_conf_persistency.get_events_data().items(): + # UNSTABLE VARS ARE POSSIBLE HERE! + stable = [] + if self.config.use_stable_vars: + stable = tracker.get_features_by_classification("STABLE") # type: ignore + static = [] + if self.config.use_static_vars: + static = tracker.get_features_by_classification("STATIC") # type: ignore + vars_ = stable + static + if len(vars_) > 0: + variables[event_id] = vars_ + config_dict = generate_detector_config( + variable_selection=variables, + detector_name=self.name, + method_type=self.config.method_type, + ) + # Update the config object from the dictionary instead of replacing it + self.config = ValueRangeDetectorConfig.from_dict(config_dict, self.name) + events = self.config.events + if isinstance(events, EventsConfig) and not events.events: + logger.warning( + f"[{self.name}] auto_config=True generated an empty configuration. " + "No stable variables were found in configure-phase data. " + "The detector will produce no alerts." + ) diff --git a/src/detectmatelibrary/utils/persistency/event_persistency.py b/src/detectmatelibrary/utils/persistency/event_persistency.py index 3c6d178..3247e43 100644 --- a/src/detectmatelibrary/utils/persistency/event_persistency.py +++ b/src/detectmatelibrary/utils/persistency/event_persistency.py @@ -45,6 +45,7 @@ def ingest_event( return self.event_templates[event_id] = event_template all_variables = self.get_all_variables(variables, named_variables) + #print("ALL", all_variables) data_structure = self.events_data.get(event_id) if data_structure is None: @@ -53,6 +54,8 @@ def ingest_event( data = data_structure.to_data(all_variables) data_structure.add_data(data) + #print("DATA STRUCTURE", data_structure) + #print("event data", self.events_data) def get_events_seen(self) -> set[int | str]: """Retrieve all event IDs observed via ingest_event(), regardless of diff --git a/tests/test_detectors/test_value_range_detector.py b/tests/test_detectors/test_value_range_detector.py new file mode 100644 index 0000000..52802a2 --- /dev/null +++ b/tests/test_detectors/test_value_range_detector.py @@ -0,0 +1,392 @@ +"""Tests for ValueRangeDetector class. + +This module tests the ValueRangeDetector implementation including: +- Initialization and configuration +- Training functionality to learn known values +- Detection logic for new/unknown values +- Event-specific configuration handling +- Input/output schema validation +""" + +import random +import pytest +from detectmatelibrary.common._core_op._fit_logic import TrainState +from detectmatelibrary.detectors.value_range_detector import (ValueRangeDetector, ValueRangeDetectorConfig, \ + BufferMode) +from detectmatelibrary.common._core_op._fit_logic import ConfigState +from detectmatelibrary.constants import GLOBAL_EVENT_ID +from detectmatelibrary.parsers.template_matcher import MatcherParser +from detectmatelibrary.helper.from_to import From +import detectmatelibrary.schemas as schemas +from detectmatelibrary.utils.aux import time_test_mode +# Set time test mode for consistent timestamps +time_test_mode() + + +config = { + "detectors": { + "CustomInit": { + "method_type": "value_range_detector", + "auto_config": False, + "params": {}, + "events": { + 1: { + "instance1": { + "params": {}, + "variables": [{ + "pos": 1, "name": "sad", "params": {} + }] + } + } + } + }, + "MultipleDetector": { + "method_type": "value_range_detector", + "auto_config": False, + "params": {}, + "events": { + 1: { + "test": { + "params": {}, + "variables": [{ + "pos": 1, "name": "test", "params": {} + }] + } + } + } + } + } +} + + +class TestValueRangeDetectorInitialization: + """Test ValueRangeDetector initialization and configuration.""" + + def test_default_initialization(self): + """Test detector initialization with default parameters.""" + detector = ValueRangeDetector() + + assert detector.name == "ValueRangeDetector" + assert hasattr(detector, 'config') + assert detector.data_buffer.mode == BufferMode.NO_BUF + assert detector.input_schema == schemas.ParserSchema + assert detector.output_schema == schemas.DetectorSchema + assert hasattr(detector, 'persistency') + + def test_custom_config_initialization(self): + """Test detector initialization with custom configuration.""" + detector = ValueRangeDetector(name="CustomInit", config=config) + + assert detector.name == "CustomInit" + assert hasattr(detector, 'persistency') + assert isinstance(detector.persistency.events_data, dict) + + +class TestValueRangeDetectorTraining: + """Test ValueRangeDetector training functionality.""" + + def test_train_multiple_values(self): + """Test training with multiple different values.""" + detector = ValueRangeDetector(config=config, name="MultipleDetector") + # Train with multiple values (the minimum and maximum value should be captured) + min_val = 100000 + max_val = 0 + for event in range(3): + for _ in range(300): + value = random.randint(0, 300) + if event == 1: + min_val = min(min_val, value) + max_val = max(max_val, value) + parser_data = schemas.ParserSchema({ + "parserType": "test", + "EventID": event, + "template": "test template", + "variables": ["val0", str(value), "val2", "val3", "val4"], + "logID": "1", + "parsedLogID": "1", + "parserID": "test_parser", + "log": "test log message", + "logFormatVariables": {} + }) + detector.train(parser_data) + + for _ in range(300): + value = random.uniform(0, 300) + if event == 1: + min_val = min(min_val, value) + max_val = max(max_val, value) + parser_data = schemas.ParserSchema({ + "parserType": "test", + "EventID": event, + "template": "test template", + "variables": ["val0", str(value), "val2", "val3", "val4"], + "logID": "1", + "parsedLogID": "1", + "parserID": "test_parser", + "log": "test log message", + "logFormatVariables": {} + }) + detector.train(parser_data) + + # Only event 1 should be tracked (based on events config) + assert len(detector.persistency.events_data) == 1 + event_data = detector.persistency.get_event_data(1) + assert event_data is not None + # Check the variable at position 1 (named "test") + assert min(event_data["test"].unique_set) == min_val + assert max(event_data["test"].unique_set) == max_val + + def test_train_detect_non_numeric(self): + """Test training with multiple different values.""" + + detector = ValueRangeDetector(config=config, name="MultipleDetector") + # Train with multiple values (the minimum and maximum value should be captured) + parser_data = schemas.ParserSchema({ + "parserType": "test", + "EventID": 1, + "template": "test template", + "variables": ["val0", f"val{random.randint(0, 300)}", "val2", "val3", "val4"], + "logID": "1", + "parsedLogID": "1", + "parserID": "test_parser", + "log": "test log message", + "logFormatVariables": {} + }) + with pytest.raises(SystemExit) as excinfo: + detector.train(parser_data) + assert excinfo.value.code == 1 + normal_data = schemas.ParserSchema({ + "parserType": "test", + "EventID": 1, + "template": "test template", + "variables": ["val0", f"{random.randint(0, 300)}", "val2", "val3", "val4"], + "logID": "1", + "parsedLogID": "1", + "parserID": "test_parser", + "log": "test log message", + "logFormatVariables": {} + }) + detector.train(normal_data) + with pytest.raises(SystemExit) as excinfo: + output = schemas.DetectorSchema() + detector.detect(parser_data, output) + assert excinfo.value.code == 1 + + +class TestValueRangeDetectorDetection: + """Test ValueRangeDetector detection functionality.""" + + def test_detect_learned_value_range_no_alert(self): + detector = ValueRangeDetector(config=config, name="MultipleDetector") + + # Train with values + for val in ["1", "5000", "2130"]: + train_data = schemas.ParserSchema({ + "parserType": "test", + "EventID": 1, + "template": "test template", + "variables": ["adsasd", val], + "logID": "1", + "parsedLogID": "1", + "parserID": "test_parser", + "log": "test log message", + "logFormatVariables": {"level": "INFO"} + }) + detector.train(train_data) + + # Detect with the same value + test_data = schemas.ParserSchema({ + "parserType": "test", + "EventID": 1, + "template": "test template", + "variables": ["adsasd", "4321"], + "logID": "2", + "parsedLogID": "2", + "parserID": "test_parser", + "log": "test log message", + "logFormatVariables": {"level": "CRITICAL"} + }) + output = schemas.DetectorSchema() + + result = detector.detect(test_data, output) + + assert not result + assert output.score == 0.0 + + def test_detect_known_value_ranges_alert(self): + detector = ValueRangeDetector(config=config, name="MultipleDetector") + + # Train with values + for val in ["1", "5000", "2130"]: + train_data = schemas.ParserSchema({ + "parserType": "test", + "EventID": 1, + "template": "test template", + "variables": ["adsasd", val], + "logID": "1", + "parsedLogID": "1", + "parserID": "test_parser", + "log": "test log message", + "logFormatVariables": {"level": "INFO"} + }) + detector.train(train_data) + + # Detect with the different value + test_data = schemas.ParserSchema({ + "parserType": "test", + "EventID": 1, + "template": "test template", + "variables": ["adsasd", "5001"], + "logID": "2", + "parsedLogID": "2", + "parserID": "test_parser", + "log": "test log message", + "logFormatVariables": {"level": "CRITICAL"} + }) + output = schemas.DetectorSchema() + + result = detector.detect(test_data, output) + + assert result + assert output.score == 1.0 + + # Detect with the different value + test_data = schemas.ParserSchema({ + "parserType": "test", + "EventID": 1, + "template": "test template", + "variables": ["adsasd", "0"], + "logID": "2", + "parsedLogID": "2", + "parserID": "test_parser", + "log": "test log message", + "logFormatVariables": {"level": "CRITICAL"} + }) + output = schemas.DetectorSchema() + + result = detector.detect(test_data, output) + + assert result + assert output.score == 1.0 + + +_PARSER_CONFIG = { + "parsers": { + "MatcherParser": { + "method_type": "matcher_parser", + "auto_config": False, + "log_format": "type= msg=audit(