From f71a8b0039738fbf0ef3b73d5b6a034bcd9d7d1f Mon Sep 17 00:00:00 2001 From: Ernst Leierzopf Date: Fri, 10 Apr 2026 16:37:27 +0200 Subject: [PATCH 1/5] add files for ValueRangeDetector. --- docs/detectors.md | 1 + src/detectmatelibrary/common/core.py | 12 ++++++------ .../detectors/value_range_detector.py | 0 tests/test_detectors/test_value_range_detector.py | 0 4 files changed, 7 insertions(+), 6 deletions(-) create mode 100644 src/detectmatelibrary/detectors/value_range_detector.py create mode 100644 tests/test_detectors/test_value_range_detector.py diff --git a/docs/detectors.md b/docs/detectors.md index 204edec..e0c98fc 100644 --- a/docs/detectors.md +++ b/docs/detectors.md @@ -87,6 +87,7 @@ List of detectors: * [Random detector](detectors/random_detector.md): Generates random alerts. * [New Value](detectors/new_value.md): Detect new values in the variables in the logs. * [Combo Detector](detectors/combo.md): Detect new combination of variables in the logs. +* [Value Range](...): Detect numeric value ranges in variables in the logs. ## Configuration diff --git a/src/detectmatelibrary/common/core.py b/src/detectmatelibrary/common/core.py index 02afb3c..be561f4 100644 --- a/src/detectmatelibrary/common/core.py +++ b/src/detectmatelibrary/common/core.py @@ -93,25 +93,25 @@ def process(self, data: BaseSchema | bytes) -> BaseSchema | bytes | None: return None if (fit_state := self.fitlogic.run()) == FitLogicState.DO_CONFIG: - logger.info(f"<<{self.name}>> use data for configuration") + logger.debug(f"<<{self.name}>> use data for configuration") self.configure(input_=data_buffered) return None elif self.fitlogic.finish_config(): - logger.info(f"<<{self.name}>> finalizing configuration") + logger.debug(f"<<{self.name}>> finalizing configuration") self.set_configuration() if fit_state == FitLogicState.DO_TRAIN: - logger.info(f"<<{self.name}>> use data for training") + logger.debug(f"<<{self.name}>> use data for training") self.train(input_=data_buffered) elif self.fitlogic.finish_training(): - logger.info(f"<<{self.name}>> finalizing training") + logger.debug(f"<<{self.name}>> finalizing training") self.post_train() output_ = self.output_schema() - logger.info(f"<<{self.name}>> processing data") + logger.debug(f"<<{self.name}>> processing data") return_schema = self.run(input_=data_buffered, output_=output_) if not return_schema: - logger.info(f"<<{self.name}>> returns None") + logger.debug(f"<<{self.name}>> returns None") return None logger.debug(f"<<{self.name}>> processed:\n{output_}") diff --git a/src/detectmatelibrary/detectors/value_range_detector.py b/src/detectmatelibrary/detectors/value_range_detector.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_detectors/test_value_range_detector.py b/tests/test_detectors/test_value_range_detector.py new file mode 100644 index 0000000..e69de29 From 42bcc5df3da411a26a554b72aa7fbdfab0331aa3 Mon Sep 17 00:00:00 2001 From: Ernst Leierzopf Date: Sat, 11 Apr 2026 00:26:39 +0200 Subject: [PATCH 2/5] add structure for ValueRangeDetector. --- docs/detectors/value_range.md | 0 src/detectmatelibrary/detectors/__init__.py | 5 +- .../detectors/value_range_detector.py | 150 +++++++++ .../test_value_range_detector.py | 313 ++++++++++++++++++ 4 files changed, 467 insertions(+), 1 deletion(-) create mode 100644 docs/detectors/value_range.md diff --git a/docs/detectors/value_range.md b/docs/detectors/value_range.md new file mode 100644 index 0000000..e69de29 diff --git a/src/detectmatelibrary/detectors/__init__.py b/src/detectmatelibrary/detectors/__init__.py index 7ca736e..a8b02f2 100644 --- a/src/detectmatelibrary/detectors/__init__.py +++ b/src/detectmatelibrary/detectors/__init__.py @@ -1,10 +1,13 @@ from .random_detector import RandomDetector, RandomDetectorConfig from .new_value_detector import NewValueDetector, NewValueDetectorConfig +from .value_range_detector import ValueRangeDetector, ValueRangeDetectorConfig __all__ = [ "random_detector", "RandomDetectorConfig", "NewValueDetector", "NewValueDetectorConfig", - "RandomDetector" + "RandomDetector", + "ValueRangeDetector", + "ValueRangeDetectorConfig" ] diff --git a/src/detectmatelibrary/detectors/value_range_detector.py b/src/detectmatelibrary/detectors/value_range_detector.py index e69de29..d538675 100644 --- a/src/detectmatelibrary/detectors/value_range_detector.py +++ b/src/detectmatelibrary/detectors/value_range_detector.py @@ -0,0 +1,150 @@ +from detectmatelibrary.common._config._compile import generate_detector_config +from detectmatelibrary.common._config._formats import EventsConfig + +from detectmatelibrary.common.detector import ( + CoreDetectorConfig, + CoreDetector, + get_configured_variables, + get_global_variables, + validate_config_coverage, +) +from detectmatelibrary.utils.persistency.event_data_structures.trackers.stability.stability_tracker import ( + EventStabilityTracker +) +from detectmatelibrary.utils.persistency.event_persistency import EventPersistency +from detectmatelibrary.utils.data_buffer import BufferMode + +from detectmatelibrary.schemas import ParserSchema, DetectorSchema +from detectmatelibrary.constants import GLOBAL_EVENT_ID + +from typing_extensions import override +from tools.logging import logger + + +class ValueRangeDetectorConfig(CoreDetectorConfig): + method_type: str = "value_range_detector" + + use_stable_vars: bool = True + use_static_vars: bool = True + + +class ValueRangeDetector(CoreDetector): + """Detect new value ranges in log data as anomalies based on learned values.""" + + def __init__( + self, + name: str = "ValueRangeDetector", + config: ValueRangeDetectorConfig = ValueRangeDetectorConfig() + ) -> None: + + if isinstance(config, dict): + config = ValueRangeDetectorConfig.from_dict(config, name) + + super().__init__(name=name, buffer_mode=BufferMode.NO_BUF, config=config) + self.config: ValueRangeDetectorConfig # type narrowing for IDE + self.persistency = EventPersistency( + event_data_class=EventStabilityTracker, + ) + # auto config checks if individual variables are stable to select combos from + self.auto_conf_persistency = EventPersistency( + event_data_class=EventStabilityTracker + ) + + def train(self, input_: ParserSchema) -> None: # type: ignore + """Train the detector by learning values from the input data.""" + configured_variables = get_configured_variables(input_, self.config.events) + self.persistency.ingest_event( + event_id=input_["EventID"], + event_template=input_["template"], + named_variables=configured_variables + ) + if self.config.global_instances: + global_vars = get_global_variables(input_, self.config.global_instances) + if global_vars: + self.persistency.ingest_event( + event_id=GLOBAL_EVENT_ID, + event_template=input_["template"], + named_variables=global_vars + ) + + def detect( + self, input_: ParserSchema, output_: DetectorSchema # type: ignore + ) -> bool: + """Detect new value ranges in the input data.""" + alerts: dict[str, str] = {} + configured_variables = get_configured_variables(input_, self.config.events) + overall_score = 0.0 + + current_event_id = input_["EventID"] + known_events = self.persistency.get_events_data() + + if current_event_id in known_events: + event_tracker = known_events[current_event_id] + for var_name, multi_tracker in event_tracker.get_data().items(): + value = configured_variables.get(var_name) + if value is None: + continue + if value not in multi_tracker.unique_set: + alerts[f"EventID {current_event_id} - {var_name}"] = ( + f"Unknown value: '{value}'" + ) + overall_score += 1.0 + + if self.config.global_instances and GLOBAL_EVENT_ID in known_events: + global_vars = get_global_variables(input_, self.config.global_instances) + global_tracker = known_events[GLOBAL_EVENT_ID] + for var_name, multi_tracker in global_tracker.get_data().items(): + value = global_vars.get(var_name) + if value is None: + continue + if value not in multi_tracker.unique_set: + alerts[f"Global - {var_name}"] = f"Unknown value: '{value}'" + overall_score += 1.0 + + if overall_score > 0: + output_["score"] = overall_score + output_["description"] = f"{self.name} detects values not encountered in training as anomalies." + output_["alertsObtain"].update(alerts) + return True + + return False + + def configure(self, input_: ParserSchema) -> None: # type: ignore + self.auto_conf_persistency.ingest_event( + event_id=input_["EventID"], + event_template=input_["template"], + variables=input_["variables"], + named_variables=input_["logFormatVariables"], + ) + + @override + def post_train(self) -> None: + if not self.config.auto_config: + validate_config_coverage(self.name, self.config.events, self.persistency) + + def set_configuration(self) -> None: + variables = {} + for event_id, tracker in self.auto_conf_persistency.get_events_data().items(): + stable = [] + if self.config.use_stable_vars: + stable = tracker.get_features_by_classification("STABLE") # type: ignore + static = [] + if self.config.use_static_vars: + static = tracker.get_features_by_classification("STATIC") # type: ignore + vars_ = stable + static + if len(vars_) > 0: + variables[event_id] = vars_ + config_dict = generate_detector_config( + variable_selection=variables, + detector_name=self.name, + method_type=self.config.method_type, + ) + # Update the config object from the dictionary instead of replacing it + self.config = ValueRangeDetectorConfig.from_dict(config_dict, self.name) + events = self.config.events + if isinstance(events, EventsConfig) and not events.events: + logger.warning( + f"[{self.name}] auto_config=True generated an empty configuration. " + "No stable variables were found in configure-phase data. " + "The detector will produce no alerts." + ) diff --git a/tests/test_detectors/test_value_range_detector.py b/tests/test_detectors/test_value_range_detector.py index e69de29..6756c5a 100644 --- a/tests/test_detectors/test_value_range_detector.py +++ b/tests/test_detectors/test_value_range_detector.py @@ -0,0 +1,313 @@ +"""Tests for ValueRangeDetector class. + +This module tests the ValueRangeDetector implementation including: +- Initialization and configuration +- Training functionality to learn known values +- Detection logic for new/unknown values +- Event-specific configuration handling +- Input/output schema validation +""" + +from detectmatelibrary.common._core_op._fit_logic import TrainState +from detectmatelibrary.detectors.value_range_detector import ( + ValueRangeDetector, ValueRangeDetectorConfig, BufferMode +) +from detectmatelibrary.common._core_op._fit_logic import ConfigState +from detectmatelibrary.constants import GLOBAL_EVENT_ID +from detectmatelibrary.parsers.template_matcher import MatcherParser +from detectmatelibrary.helper.from_to import From +import detectmatelibrary.schemas as schemas + +from detectmatelibrary.utils.aux import time_test_mode + +# Set time test mode for consistent timestamps +time_test_mode() + + +config = { + "detectors": { + "CustomInit": { + "method_type": "value_range_detector", + "auto_config": False, + "params": {}, + "events": { + 1: { + "instance1": { + "params": {}, + "variables": [{ + "pos": 0, "name": "sad", "params": {} + }] + } + } + } + }, + "MultipleDetector": { + "method_type": "value_range_detector", + "auto_config": False, + "params": {}, + "events": { + 1: { + "test": { + "params": {}, + "variables": [{ + "pos": 1, "name": "test", "params": {} + }], + "header_variables": [{ + "pos": "level", "params": {} + }] + } + } + } + } + } +} + + +class TestValueRangeDetectorInitialization: + """Test ValueRangeDetector initialization and configuration.""" + + def test_default_initialization(self): + """Test detector initialization with default parameters.""" + detector = ValueRangeDetector() + + assert detector.name == "ValueRangeDetector" + assert hasattr(detector, 'config') + assert detector.data_buffer.mode == BufferMode.NO_BUF + assert detector.input_schema == schemas.ParserSchema + assert detector.output_schema == schemas.DetectorSchema + assert hasattr(detector, 'persistency') + + def test_custom_config_initialization(self): + """Test detector initialization with custom configuration.""" + detector = ValueRangeDetector(name="CustomInit", config=config) + + assert detector.name == "CustomInit" + assert hasattr(detector, 'persistency') + assert isinstance(detector.persistency.events_data, dict) + + +class TestValueRangeDetectorTraining: + """Test ValueRangeDetector training functionality.""" + + def test_train_multiple_values(self): + """Test training with multiple different values.""" + detector = ValueRangeDetector(config=config, name="MultipleDetector") + # Train with multiple values (only event 1 should be tracked per config) + for event in range(3): + for level in ["INFO", "WARNING", "ERROR"]: + parser_data = schemas.ParserSchema({ + "parserType": "test", + "EventID": event, + "template": "test template", + "variables": ["0", "assa"], + "logID": "1", + "parsedLogID": "1", + "parserID": "test_parser", + "log": "test log message", + "logFormatVariables": {"level": level} + }) + detector.train(parser_data) + + # Only event 1 should be tracked (based on events config) + assert len(detector.persistency.events_data) == 1 + event_data = detector.persistency.get_event_data(1) + assert event_data is not None + # Check the level values + assert "INFO" in event_data["level"].unique_set + assert "WARNING" in event_data["level"].unique_set + assert "ERROR" in event_data["level"].unique_set + # Check the variable at position 1 (named "test") + assert "assa" in event_data["test"].unique_set + + +class TestValueRangeDetectorDetection: + """Test ValueRangeDetector detection functionality.""" + + def test_detect_known_value_no_alert(self): + detector = ValueRangeDetector(config=config, name="MultipleDetector") + + # Train with a value + train_data = schemas.ParserSchema({ + "parserType": "test", + "EventID": 1, + "template": "test template", + "variables": ["adsasd", "asdasd"], + "logID": "1", + "parsedLogID": "1", + "parserID": "test_parser", + "log": "test log message", + "logFormatVariables": {"level": "INFO"} + }) + detector.train(train_data) + + # Detect with the same value + test_data = schemas.ParserSchema({ + "parserType": "test", + "EventID": 12, + "template": "test template", + "variables": ["adsasd"], + "logID": "2", + "parsedLogID": "2", + "parserID": "test_parser", + "log": "test log message", + "logFormatVariables": {"level": "CRITICAL"} + }) + output = schemas.DetectorSchema() + + result = detector.detect(test_data, output) + + assert not result + assert output.score == 0.0 + + def test_detect_known_value_alert(self): + detector = ValueRangeDetector(config=config, name="MultipleDetector") + + # Train with a value + train_data = schemas.ParserSchema({ + "parserType": "test", + "EventID": 1, + "template": "test template", + "variables": ["adsasd", "asdasd"], + "logID": "1", + "parsedLogID": "1", + "parserID": "test_parser", + "log": "test log message", + "logFormatVariables": {"level": "INFO"} + }) + detector.train(train_data) + + # Detect with the same value + test_data = schemas.ParserSchema({ + "parserType": "test", + "EventID": 1, + "template": "test template", + "variables": ["adsasd", "asdasd"], + "logID": "2", + "parsedLogID": "2", + "parserID": "test_parser", + "log": "test log message", + "logFormatVariables": {"level": "CRITICAL"} + }) + output = schemas.DetectorSchema() + + result = detector.detect(test_data, output) + + assert result + assert output.score == 1.0 + + +_PARSER_CONFIG = { + "parsers": { + "MatcherParser": { + "method_type": "matcher_parser", + "auto_config": False, + "log_format": "type= msg=audit(