diff --git a/config/pipeline_config_default.yaml b/config/pipeline_config_default.yaml index 4271752..84a53df 100644 --- a/config/pipeline_config_default.yaml +++ b/config/pipeline_config_default.yaml @@ -77,3 +77,8 @@ detectors: name: var1 header_variables: - pos: level + + NewEventDetector: + method_type: new_event_detector + auto_config: False + params: {} diff --git a/docs/detectors.md b/docs/detectors.md index 204edec..7625b9d 100644 --- a/docs/detectors.md +++ b/docs/detectors.md @@ -87,6 +87,7 @@ List of detectors: * [Random detector](detectors/random_detector.md): Generates random alerts. * [New Value](detectors/new_value.md): Detect new values in the variables in the logs. * [Combo Detector](detectors/combo.md): Detect new combination of variables in the logs. +* [New Event](detectors/new_event.md): Detect new events in the variables in the logs. ## Configuration diff --git a/docs/detectors/new_event.md b/docs/detectors/new_event.md new file mode 100644 index 0000000..5f7a028 --- /dev/null +++ b/docs/detectors/new_event.md @@ -0,0 +1,50 @@ +# New Event Detector + +The New Event Detector raises alerts when previously unseen log templates, distinguished by event IDs, appear in log data. It is useful to detect unexpected types of events in the environment. + +| | Schema | Description | +|------------|----------------------------|--------------------| +| **Input** | [ParserSchema](../schemas.md) | Structured log | +| **Output** | [DetectorSchema](../schemas.md) | Alert / finding | + +## Description + +This detector maintains a lightweight set of observed event IDs and emits an alert when an event ID not present in the set is seen for the first time (subject to configuration). + + +## Configuration example + +```yaml +detectors: + NewEventDetector: + method_type: new_event_detector + auto_config: False + params: {} +``` + + +## Example usage + +```python +from detectmatelibrary.detectors.new_event_detector import NewEventDetector, BufferMode +import detectmatelibrary.schemas as schemas + +detector = NewEventDetector(name="NewEventTest", config=cfg) + +parser_data = schemas.ParserSchema({ + "parserType": "test", + "EventID": 1, + "template": "test template", + "variables": ["var1"], + "logID": "1", + "parsedLogID": "1", + "parserID": "test_parser", + "log": "test log message", + "logFormatVariables": {"timestamp": "123456"} +}) + + +alert = detector.process(parsed_data) +``` + +Go back [Index](../index.md) diff --git a/docs/detectors/new_value.md b/docs/detectors/new_value.md index 5d044c4..5fa15e5 100644 --- a/docs/detectors/new_value.md +++ b/docs/detectors/new_value.md @@ -9,7 +9,7 @@ The New Value Detector raises alerts when previously unseen values appear in con ## Description -This detector maintains a lightweight set of observed values per monitored field and emits an alert when a value not present in the set is seen for the first time (subject to configuration). . +This detector maintains a lightweight set of observed values per monitored field and emits an alert when a value not present in the set is seen for the first time (subject to configuration). ## Configuration example diff --git a/src/detectmatelibrary/common/core.py b/src/detectmatelibrary/common/core.py index 02afb3c..be561f4 100644 --- a/src/detectmatelibrary/common/core.py +++ b/src/detectmatelibrary/common/core.py @@ -93,25 +93,25 @@ def process(self, data: BaseSchema | bytes) -> BaseSchema | bytes | None: return None if (fit_state := self.fitlogic.run()) == FitLogicState.DO_CONFIG: - logger.info(f"<<{self.name}>> use data for configuration") + logger.debug(f"<<{self.name}>> use data for configuration") self.configure(input_=data_buffered) return None elif self.fitlogic.finish_config(): - logger.info(f"<<{self.name}>> finalizing configuration") + logger.debug(f"<<{self.name}>> finalizing configuration") self.set_configuration() if fit_state == FitLogicState.DO_TRAIN: - logger.info(f"<<{self.name}>> use data for training") + logger.debug(f"<<{self.name}>> use data for training") self.train(input_=data_buffered) elif self.fitlogic.finish_training(): - logger.info(f"<<{self.name}>> finalizing training") + logger.debug(f"<<{self.name}>> finalizing training") self.post_train() output_ = self.output_schema() - logger.info(f"<<{self.name}>> processing data") + logger.debug(f"<<{self.name}>> processing data") return_schema = self.run(input_=data_buffered, output_=output_) if not return_schema: - logger.info(f"<<{self.name}>> returns None") + logger.debug(f"<<{self.name}>> returns None") return None logger.debug(f"<<{self.name}>> processed:\n{output_}") diff --git a/src/detectmatelibrary/detectors/__init__.py b/src/detectmatelibrary/detectors/__init__.py index 7ca736e..c10328e 100644 --- a/src/detectmatelibrary/detectors/__init__.py +++ b/src/detectmatelibrary/detectors/__init__.py @@ -1,10 +1,13 @@ from .random_detector import RandomDetector, RandomDetectorConfig from .new_value_detector import NewValueDetector, NewValueDetectorConfig +from .new_event_detector import NewEventDetector, NewEventDetectorConfig __all__ = [ "random_detector", "RandomDetectorConfig", "NewValueDetector", "NewValueDetectorConfig", - "RandomDetector" + "RandomDetector", + "NewEventDetector", + "NewEventDetectorConfig" ] diff --git a/src/detectmatelibrary/detectors/new_event_detector.py b/src/detectmatelibrary/detectors/new_event_detector.py new file mode 100644 index 0000000..39097f7 --- /dev/null +++ b/src/detectmatelibrary/detectors/new_event_detector.py @@ -0,0 +1,96 @@ +from detectmatelibrary.common._config._compile import generate_detector_config +from detectmatelibrary.common.detector import CoreDetectorConfig, CoreDetector, get_configured_variables, \ + get_global_variables +from detectmatelibrary.utils.persistency.event_data_structures.trackers.stability.stability_tracker import ( + EventStabilityTracker +) +from detectmatelibrary.constants import GLOBAL_EVENT_ID +from detectmatelibrary.utils.persistency.event_persistency import EventPersistency +from detectmatelibrary.utils.data_buffer import BufferMode +from detectmatelibrary.schemas import ParserSchema, DetectorSchema + + +class NewEventDetectorConfig(CoreDetectorConfig): + method_type: str = "new_event_detector" + + +class NewEventDetector(CoreDetector): + """Detect new values in log data as anomalies based on learned values.""" + + def __init__( + self, + name: str = "NewEventDetector", + config: NewEventDetectorConfig = NewEventDetectorConfig() + ) -> None: + + if isinstance(config, dict): + config = NewEventDetectorConfig.from_dict(config, name) + + super().__init__(name=name, buffer_mode=BufferMode.NO_BUF, config=config) + self.config: NewEventDetectorConfig + self.persistency = EventPersistency( + event_data_class=EventStabilityTracker, + ) + # auto config checks if individual variables are stable to select combos from + self.auto_conf_persistency = EventPersistency( + event_data_class=EventStabilityTracker + ) + + def train(self, input_: ParserSchema) -> None: # type: ignore + """Train the detector by learning values from the input data.""" + self.persistency.ingest_event( + event_id=input_["EventID"], + event_template=input_["template"] + ) + if self.config.global_instances: + global_vars = get_global_variables(input_, self.config.global_instances) + if global_vars: + self.persistency.ingest_event( + event_id=GLOBAL_EVENT_ID, + event_template=input_["template"] + ) + + def detect( + self, input_: ParserSchema, output_: DetectorSchema # type: ignore + ) -> bool: + """Detect new values in the input data.""" + alerts: dict[str, str] = {} + overall_score = 0.0 + + current_event_id = input_["EventID"] + known_events = self.persistency.get_events_seen() + + if self.config.global_instances and GLOBAL_EVENT_ID in known_events: + global_vars = get_global_variables(input_, self.config.global_instances) + alerts[f"Global - {global_vars}"] = f"Unknown event ID: '{current_event_id}'" + overall_score += 1.0 + elif current_event_id not in known_events: + configured_variables = get_configured_variables(input_, self.config.events) + alerts[f"EventID {current_event_id} - {configured_variables}"] = ( + f"Unknown event ID: '{current_event_id}'" + ) + overall_score += 1.0 + + if overall_score > 0: + output_["score"] = overall_score + output_["description"] = \ + f"{self.name} detects event IDs not encountered in training as anomalies." + output_["alertsObtain"].update(alerts) + return True + + return False + + def configure(self, input_: ParserSchema) -> None: # type: ignore + self.auto_conf_persistency.ingest_event( + event_id=input_["EventID"], + event_template=input_["template"] + ) + + def set_configuration(self) -> None: + config_dict = generate_detector_config( + variable_selection={}, + detector_name=self.name, + method_type=self.config.method_type + ) + # Update the config object from the dictionary instead of replacing it + self.config = NewEventDetectorConfig.from_dict(config_dict, self.name) diff --git a/src/detectmatelibrary/parsers/template_matcher/_matcher_op.py b/src/detectmatelibrary/parsers/template_matcher/_matcher_op.py index 399edfd..0c41a05 100644 --- a/src/detectmatelibrary/parsers/template_matcher/_matcher_op.py +++ b/src/detectmatelibrary/parsers/template_matcher/_matcher_op.py @@ -205,7 +205,6 @@ def extract_parameters(log: str, template: str) -> tuple[str, ...] | None: pattern_parts_escaped = [re.escape(part) for part in pattern_parts] regex_pattern = "(.*?)".join(pattern_parts_escaped) regex = "^" + regex_pattern + "$" - # matches = re.search(regex, log) matches = safe_search(regex, log, 1) if matches: groups: tuple[str, ...] = matches.groups() diff --git a/tests/test_detectors/test_new_event_detector.py b/tests/test_detectors/test_new_event_detector.py new file mode 100644 index 0000000..015c24f --- /dev/null +++ b/tests/test_detectors/test_new_event_detector.py @@ -0,0 +1,254 @@ +"""Tests for NewEventDetector class. + +This module tests the NewEventDetector implementation including: +- Initialization and configuration +- Training functionality to learn known values +- Detection logic for new/unknown values +- Event-specific configuration handling +- Input/output schema validation +""" + +import json + +from detectmatelibrary.detectors.new_event_detector import NewEventDetector, NewEventDetectorConfig, \ + BufferMode +from detectmatelibrary.parsers.template_matcher import MatcherParser +from detectmatelibrary.helper.from_to import From +import detectmatelibrary.schemas as schemas + +from detectmatelibrary.utils.aux import time_test_mode +from detectmatelibrary.common._core_op._fit_logic import ConfigState, TrainState +from detectmatelibrary.constants import GLOBAL_EVENT_ID + + +# Set time test mode for consistent timestamps +time_test_mode() + + +config = { + "detectors": { + "CustomInit": { + "method_type": "new_event_detector", + "auto_config": False, + "params": {} + }, + "MultipleDetector": { + "method_type": "new_event_detector", + "auto_config": False, + "params": {} + }, + "NewEventDetector": { + "method_type": "new_event_detector", + "auto_config": False, + "params": {} + } + } +} + + +class TestNewEventDetectorInitialization: + """Test NewEventDetector initialization and configuration.""" + + def test_default_initialization(self): + """Test detector initialization with default parameters.""" + detector = NewEventDetector() + + assert detector.name == "NewEventDetector" + assert hasattr(detector, 'config') + assert detector.data_buffer.mode == BufferMode.NO_BUF + assert detector.input_schema == schemas.ParserSchema + assert detector.output_schema == schemas.DetectorSchema + assert hasattr(detector, 'persistency') + + def test_custom_config_initialization(self): + """Test detector initialization with custom configuration.""" + detector = NewEventDetector(name="CustomInit", config=config) + + assert detector.name == "CustomInit" + assert hasattr(detector, 'persistency') + assert isinstance(detector.persistency.events_data, dict) + + +class TestNewEventDetectorTraining: + """Test NewEventDetector training functionality.""" + + def test_train_multiple_event_ids(self): + """Test training with multiple different event ids.""" + detector = NewEventDetector(config=config, name="MultipleDetector") + event_ids = {0, 3, 8, 9} + for event in event_ids: + parser_data = schemas.ParserSchema({ + "parserType": "test", + "EventID": event, + "template": "test template", + "variables": ["0", "assa"], + "logID": "1", + "parsedLogID": "1", + "parserID": "test_parser", + "log": "test log message", + "logFormatVariables": {"level": "INFO"} + }) + detector.train(parser_data) + + assert len(detector.persistency.events_seen) == len(event_ids) + event_seen = detector.persistency.get_events_seen() + assert event_seen == event_ids + + +class TestNewEventDetectorDetection: + """Test NewEventDetector detection functionality.""" + + def test_detect_known_event_id_no_alert(self): + detector = NewEventDetector(config=config, name="MultipleDetector") + + # Train with an event_id + train_data = schemas.ParserSchema({ + "parserType": "test", + "EventID": 1, + "template": "test template", + "variables": ["adsasd", "asdasd"], + "logID": "1", + "parsedLogID": "1", + "parserID": "test_parser", + "log": "test log message", + "logFormatVariables": {"level": "INFO"} + }) + detector.train(train_data) + + # Detect with the same event_id + test_data = schemas.ParserSchema({ + "parserType": "test", + "EventID": 1, + "template": "test template", + "variables": ["adsasd"], + "logID": "2", + "parsedLogID": "2", + "parserID": "test_parser", + "log": "test log message", + "logFormatVariables": {"level": "CRITICAL"} + }) + output = schemas.DetectorSchema() + + result = detector.detect(test_data, output) + + assert not result + assert output.score == 0.0 + + +_PARSER_CONFIG = { + "parsers": { + "MatcherParser": { + "method_type": "matcher_parser", + "auto_config": False, + "log_format": "type= msg=audit(