From e5fe2e9b94b753f4f3a525675622cc4355ec3309 Mon Sep 17 00:00:00 2001 From: "angre.garcia-gomez@ait.ac.at" Date: Tue, 16 Dec 2025 15:31:50 +0100 Subject: [PATCH 01/12] add logging function from service --- src/tools/logging.py | 28 +++++++++++ tests/test_others/test_logging.py | 84 +++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+) create mode 100644 src/tools/logging.py create mode 100644 tests/test_others/test_logging.py diff --git a/src/tools/logging.py b/src/tools/logging.py new file mode 100644 index 0000000..26ee663 --- /dev/null +++ b/src/tools/logging.py @@ -0,0 +1,28 @@ +import logging + +import sys + +logger = logging.getLogger(__name__) + + +def setup_logging(level: int = logging.INFO) -> None: + """Set up logging with errors to stderr and others to stdout.""" + # create separate handlers for stdout and stderr + stdout_handler = logging.StreamHandler(sys.stdout) + stdout_handler.setLevel(level) + # set filter to allow only non-error messages + stdout_handler.addFilter(lambda record: record.levelno < logging.ERROR) + + stderr_handler = logging.StreamHandler(sys.stderr) + stderr_handler.setLevel(logging.ERROR) + + # common formatter + formatter = logging.Formatter('[%(asctime)s] %(levelname)s %(name)s: %(message)s') + stdout_handler.setFormatter(formatter) + stderr_handler.setFormatter(formatter) + + # configure root logger + root_logger = logging.getLogger() + root_logger.setLevel(level) + root_logger.addHandler(stdout_handler) + root_logger.addHandler(stderr_handler) \ No newline at end of file diff --git a/tests/test_others/test_logging.py b/tests/test_others/test_logging.py new file mode 100644 index 0000000..5725481 --- /dev/null +++ b/tests/test_others/test_logging.py @@ -0,0 +1,84 @@ +import logging +import io +from contextlib import redirect_stdout, redirect_stderr +import pytest + +from tools.logging import setup_logging, logger + + +@pytest.fixture(autouse=True) +def reset_logging(): + """Reset logging configuration after each test.""" + # Save original state + original_handlers = logging.root.handlers[:] + original_level = logging.root.level + yield + # Restore original state + logging.root.handlers = original_handlers + logging.root.setLevel(original_level) + + +def test_logging_routing(): + """Test that errors go to stderr and other logs to stdout.""" + # create string buffers to capture output + stdout_capture = io.StringIO() + stderr_capture = io.StringIO() + + # redirect stdout and stderr to our buffers + with redirect_stdout(stdout_capture), redirect_stderr(stderr_capture): + setup_logging(level=logging.DEBUG) + + # test different log levels + logger.debug("This is a debug message") + logger.info("This is an info message") + logger.warning("This is a warning message") + logger.error("This is an error message") + logger.critical("This is a critical message") + + # get the captured output + stdout_output = stdout_capture.getvalue() + stderr_output = stderr_capture.getvalue() + + # verify that errors went to stderr + assert "error" in stderr_output.lower() + assert "critical" in stderr_output.lower() + + # verify that non-errors went to stdout + assert "debug" in stdout_output.lower() + assert "info" in stdout_output.lower() + assert "warning" in stdout_output.lower() + + # verify that errors didn't go to stdout + assert "error" not in stdout_output.lower() + assert "critical" not in stdout_output.lower() + + +def test_logging_level_filtering(): + """Test that logging level filtering works correctly.""" + # create string buffers to capture output + stdout_capture = io.StringIO() + stderr_capture = io.StringIO() + + # redirect stdout and stderr to our buffers + with redirect_stdout(stdout_capture), redirect_stderr(stderr_capture): + setup_logging(level=logging.INFO) # set to INFO level + + # test different log levels + logger.debug("This debug should not appear") + logger.info("This info should appear") + logger.warning("This warning should appear") + logger.error("This error should appear") + + # get the captured output + stdout_output = stdout_capture.getvalue() + stderr_output = stderr_capture.getvalue() + + # verify debug messages are filtered out + assert "debug" not in stdout_output.lower() + + # verify info and warning messages appear in stdout + assert "info" in stdout_output.lower() + assert "warning" in stdout_output.lower() + + # verify error messages appear in stderr + assert "error" in stderr_output.lower() \ No newline at end of file From b61b97fc13d23dc41edcec6639cb8ab3d9ba1fff Mon Sep 17 00:00:00 2001 From: "angre.garcia-gomez@ait.ac.at" Date: Tue, 16 Dec 2025 16:33:13 +0100 Subject: [PATCH 02/12] add basic logging functionality --- src/detectmatelibrary/common/core.py | 15 +++++++- src/detectmatelibrary/common/reader.py | 4 ++ src/tools/logging.py | 51 +++++++++++++++++++------- 3 files changed, 54 insertions(+), 16 deletions(-) diff --git a/src/detectmatelibrary/common/core.py b/src/detectmatelibrary/common/core.py index 937af28..b95e362 100644 --- a/src/detectmatelibrary/common/core.py +++ b/src/detectmatelibrary/common/core.py @@ -5,9 +5,14 @@ from detectmatelibrary.schemas import BaseSchema +from tools.logging import logger, setup_logging + from typing import Any, Dict, Tuple, List +setup_logging() + + class SchemaPipeline: @staticmethod def preprocess( @@ -76,18 +81,24 @@ def train( def process(self, data: BaseSchema | bytes) -> BaseSchema | bytes | None: is_byte, data = SchemaPipeline.preprocess(self.input_schema(), data) + logger.info(f"<<{self.name}>> received:\n{data}") + if (data_buffered := self.data_buffer.add(data)) is None: # type: ignore return None if do_training(config=self.config, index=self.data_used_train): self.data_used_train += 1 + logger.info(f"<<{self.name}>> use data for training") self.train(input_=data_buffered) output_ = self.output_schema() - anomaly_detected = self.run(input_=data_buffered, output_=output_) - if not anomaly_detected: + logger.info(f"<<{self.name}>> processing data") + return_schema = self.run(input_=data_buffered, output_=output_) + if not return_schema: + logger.info(f"<<{self.name}>> returns None") return None + logger.info(f"<<{self.name}>> processed:\n{output_}") return SchemaPipeline.postprocess(output_, is_byte=is_byte) def get_config(self) -> Dict[str, Any]: diff --git a/src/detectmatelibrary/common/reader.py b/src/detectmatelibrary/common/reader.py index 504255e..5b8032d 100644 --- a/src/detectmatelibrary/common/reader.py +++ b/src/detectmatelibrary/common/reader.py @@ -2,6 +2,8 @@ from detectmatelibrary import schemas +from tools.logging import logger + from typing import Optional, Any @@ -44,8 +46,10 @@ def __init_logs(self) -> schemas.LogSchema: def process(self, as_bytes: bool = True) -> schemas.LogSchema | bytes | None: # type: ignore is_new_log = self.read(log := self.__init_logs()) if not is_new_log: + logger.info(f"<<{self.name}>> returns None") return None + logger.info(f"<<{self.name}>> read:\n{log}") return SchemaPipeline.postprocess(log, is_byte=as_bytes) if is_new_log else None # type: ignore def read(self, output_: schemas.LogSchema) -> bool: diff --git a/src/tools/logging.py b/src/tools/logging.py index 26ee663..f0ed846 100644 --- a/src/tools/logging.py +++ b/src/tools/logging.py @@ -1,28 +1,51 @@ import logging - import sys +from typing import Optional logger = logging.getLogger(__name__) -def setup_logging(level: int = logging.INFO) -> None: - """Set up logging with errors to stderr and others to stdout.""" - # create separate handlers for stdout and stderr +def setup_logging( + level: int = logging.INFO, + logfile: Optional[str] = None, + *, + force_color: Optional[bool] = None +) -> None: + # determine whether to use colors + if force_color is None: + use_color = sys.stdout.isatty() + else: + use_color = bool(force_color) + + # common plain formatter for file and non-colored console + plain_fmt = logging.Formatter('[%(asctime)s] %(levelname)s %(name)s: %(message)s') + colored_fmt = logging.Formatter( + '\033[95m[%(asctime)s] %(levelname)s %(name)s\033[0m: %(message)s' + ) + + root_logger = logging.getLogger() + root_logger.setLevel(level) + + # remove existing handlers to avoid duplicates on repeated setup calls + for h in list(root_logger.handlers): + root_logger.removeHandler(h) + + # console handler: stdout for non-error, stderr for error stdout_handler = logging.StreamHandler(sys.stdout) stdout_handler.setLevel(level) - # set filter to allow only non-error messages stdout_handler.addFilter(lambda record: record.levelno < logging.ERROR) + stdout_handler.setFormatter(colored_fmt if use_color else plain_fmt) stderr_handler = logging.StreamHandler(sys.stderr) stderr_handler.setLevel(logging.ERROR) + stderr_handler.setFormatter(colored_fmt if use_color else plain_fmt) - # common formatter - formatter = logging.Formatter('[%(asctime)s] %(levelname)s %(name)s: %(message)s') - stdout_handler.setFormatter(formatter) - stderr_handler.setFormatter(formatter) - - # configure root logger - root_logger = logging.getLogger() - root_logger.setLevel(level) root_logger.addHandler(stdout_handler) - root_logger.addHandler(stderr_handler) \ No newline at end of file + root_logger.addHandler(stderr_handler) + + # optional file handler (always plain) + if logfile: + file_handler = logging.FileHandler(logfile) + file_handler.setLevel(level) + file_handler.setFormatter(plain_fmt) + root_logger.addHandler(file_handler) \ No newline at end of file From d070ddf518caf24e690c3ec1bfbb3a67dcd62899 Mon Sep 17 00:00:00 2001 From: "angre.garcia-gomez@ait.ac.at" Date: Tue, 16 Dec 2025 16:56:57 +0100 Subject: [PATCH 03/12] first output version --- src/detectmatelibrary/common/output.py | 58 ++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 src/detectmatelibrary/common/output.py diff --git a/src/detectmatelibrary/common/output.py b/src/detectmatelibrary/common/output.py new file mode 100644 index 0000000..09d7f60 --- /dev/null +++ b/src/detectmatelibrary/common/output.py @@ -0,0 +1,58 @@ +from detectmatelibrary.common.core import CoreComponent, CoreConfig + +from detectmatelibrary.utils.data_buffer import ArgsBuffer, BufferMode + + +from detectmatelibrary.schemas import DetectorSchema, BaseSchema + +from typing_extensions import override +from typing import List, Optional, Any + + +class CoreOutputConfig(CoreConfig): + comp_type: str = "outputs" + method_type: str = "core_output" + parser: str = "" + + auto_config: bool = False + + +class CoreOutput(CoreComponent): + def __init__( + self, + name: str = "CoreOutput", + buffer_mode: BufferMode = BufferMode.NO_BUF, + buffer_size: Optional[int] = None, + config: Optional[CoreOutputConfig | dict[str, Any]] = CoreOutputConfig(), + ) -> None: + if isinstance(config, dict): + config = CoreOutputConfig.from_dict(config, name) + + super().__init__( + name=name, + type_=config.comp_type, # type: ignore + config=config, # type: ignore + args_buffer=ArgsBuffer(mode=buffer_mode, size=buffer_size), + input_schema=DetectorSchema, + output_schema=BaseSchema, + ) + + @override + def run( + self, input_: List[DetectorSchema] | DetectorSchema, output_: BaseSchema # type: ignore + ) -> bool: + do_output = self.do_output(input_, output_) + return do_output if do_output is not None else True + + def do_output( + self, + input_: List[DetectorSchema] | DetectorSchema, + output_: BaseSchema, + ) -> bool | None: + return True + + @override + def train( + self, input_: DetectorSchema | list[DetectorSchema] # type: ignore + ) -> None: + pass From 91f1a604e21a37eba650a13529f431040c7cbb7e Mon Sep 17 00:00:00 2001 From: "angre.garcia-gomez@ait.ac.at" Date: Tue, 16 Dec 2025 17:03:26 +0100 Subject: [PATCH 04/12] update schemas --- README.md | 2 +- src/detectmatelibrary/schemas/schemas.proto | 12 ++++++++++++ src/detectmatelibrary/schemas/schemas_pb2.py | 8 +++++++- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ac8ae4f..c24d12a 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ protoc --version This dependency is only needed if a proto file is modified. To compile the proto file do: ```bash -protoc --proto_path=src/schemas/ --python_out=src/schemas/ src/schemas/schemas.proto +protoc --proto_path=src/detectmatelibrary/schemas/ --python_out=src/detectmatelibrary/schemas/ src/schemas/schemas.proto ``` ### Step 3: Run unit tests diff --git a/src/detectmatelibrary/schemas/schemas.proto b/src/detectmatelibrary/schemas/schemas.proto index adfbcf8..1c0b8ac 100644 --- a/src/detectmatelibrary/schemas/schemas.proto +++ b/src/detectmatelibrary/schemas/schemas.proto @@ -40,3 +40,15 @@ message DetectorSchema { optional int32 receivedTimestamp = 11; map alertsObtain = 12; } + +message OutputSchema { + optional string __version__ = 1; + repeated string detectorIDs = 2; + repeated string detectorTypes = 3; + repeated int32 alertID = 4; + optional int32 outputTimestamp = 5; + repeated int32 logIDs = 6; + repeated int32 extractedTimestamps = 9; + optional string description = 10; + map alertsObtain = 12; +} diff --git a/src/detectmatelibrary/schemas/schemas_pb2.py b/src/detectmatelibrary/schemas/schemas_pb2.py index 1276c2c..2f26e1d 100644 --- a/src/detectmatelibrary/schemas/schemas_pb2.py +++ b/src/detectmatelibrary/schemas/schemas_pb2.py @@ -14,7 +14,7 @@ -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rschemas.proto\"\x1d\n\x06Schema\x12\x13\n\x0b__version__\x18\x01 \x01(\t\"\xb7\x01\n\tLogSchema\x12\x18\n\x0b__version__\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x12\n\x05logID\x18\x02 \x01(\x05H\x01\x88\x01\x01\x12\x10\n\x03log\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x16\n\tlogSource\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x15\n\x08hostname\x18\x05 \x01(\tH\x04\x88\x01\x01\x42\x0e\n\x0cX__version__B\x08\n\x06_logIDB\x06\n\x04_logB\x0c\n\n_logSourceB\x0b\n\t_hostname\"\xa5\x04\n\x0cParserSchema\x12\x18\n\x0b__version__\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x17\n\nparserType\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x15\n\x08parserID\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x14\n\x07\x45ventID\x18\x04 \x01(\x05H\x03\x88\x01\x01\x12\x15\n\x08template\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x11\n\tvariables\x18\x06 \x03(\t\x12\x18\n\x0bparsedLogID\x18\x07 \x01(\x05H\x05\x88\x01\x01\x12\x12\n\x05logID\x18\x08 \x01(\x05H\x06\x88\x01\x01\x12\x10\n\x03log\x18\t \x01(\tH\x07\x88\x01\x01\x12\x41\n\x12logFormatVariables\x18\n \x03(\x0b\x32%.ParserSchema.LogFormatVariablesEntry\x12\x1e\n\x11receivedTimestamp\x18\x0b \x01(\x05H\x08\x88\x01\x01\x12\x1c\n\x0fparsedTimestamp\x18\x0c \x01(\x05H\t\x88\x01\x01\x1a\x39\n\x17LogFormatVariablesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0e\n\x0cX__version__B\r\n\x0b_parserTypeB\x0b\n\t_parserIDB\n\n\x08_EventIDB\x0b\n\t_templateB\x0e\n\x0c_parsedLogIDB\x08\n\x06_logIDB\x06\n\x04_logB\x14\n\x12_receivedTimestampB\x12\n\x10_parsedTimestamp\"\x81\x04\n\x0e\x44\x65tectorSchema\x12\x18\n\x0b__version__\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x17\n\ndetectorID\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x19\n\x0c\x64\x65tectorType\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x14\n\x07\x61lertID\x18\x04 \x01(\x05H\x03\x88\x01\x01\x12\x1f\n\x12\x64\x65tectionTimestamp\x18\x05 \x01(\x05H\x04\x88\x01\x01\x12\x0e\n\x06logIDs\x18\x06 \x03(\x05\x12\x12\n\x05score\x18\x08 \x01(\x02H\x05\x88\x01\x01\x12\x1b\n\x13\x65xtractedTimestamps\x18\t \x03(\x05\x12\x18\n\x0b\x64\x65scription\x18\n \x01(\tH\x06\x88\x01\x01\x12\x1e\n\x11receivedTimestamp\x18\x0b \x01(\x05H\x07\x88\x01\x01\x12\x37\n\x0c\x61lertsObtain\x18\x0c \x03(\x0b\x32!.DetectorSchema.AlertsObtainEntry\x1a\x33\n\x11\x41lertsObtainEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0e\n\x0cX__version__B\r\n\x0b_detectorIDB\x0f\n\r_detectorTypeB\n\n\x08_alertIDB\x15\n\x13_detectionTimestampB\x08\n\x06_scoreB\x0e\n\x0c_descriptionB\x14\n\x12_receivedTimestampb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rschemas.proto\"\x1d\n\x06Schema\x12\x13\n\x0b__version__\x18\x01 \x01(\t\"\xb7\x01\n\tLogSchema\x12\x18\n\x0b__version__\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x12\n\x05logID\x18\x02 \x01(\x05H\x01\x88\x01\x01\x12\x10\n\x03log\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x16\n\tlogSource\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x15\n\x08hostname\x18\x05 \x01(\tH\x04\x88\x01\x01\x42\x0e\n\x0cX__version__B\x08\n\x06_logIDB\x06\n\x04_logB\x0c\n\n_logSourceB\x0b\n\t_hostname\"\xa5\x04\n\x0cParserSchema\x12\x18\n\x0b__version__\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x17\n\nparserType\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x15\n\x08parserID\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x14\n\x07\x45ventID\x18\x04 \x01(\x05H\x03\x88\x01\x01\x12\x15\n\x08template\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x11\n\tvariables\x18\x06 \x03(\t\x12\x18\n\x0bparsedLogID\x18\x07 \x01(\x05H\x05\x88\x01\x01\x12\x12\n\x05logID\x18\x08 \x01(\x05H\x06\x88\x01\x01\x12\x10\n\x03log\x18\t \x01(\tH\x07\x88\x01\x01\x12\x41\n\x12logFormatVariables\x18\n \x03(\x0b\x32%.ParserSchema.LogFormatVariablesEntry\x12\x1e\n\x11receivedTimestamp\x18\x0b \x01(\x05H\x08\x88\x01\x01\x12\x1c\n\x0fparsedTimestamp\x18\x0c \x01(\x05H\t\x88\x01\x01\x1a\x39\n\x17LogFormatVariablesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0e\n\x0cX__version__B\r\n\x0b_parserTypeB\x0b\n\t_parserIDB\n\n\x08_EventIDB\x0b\n\t_templateB\x0e\n\x0c_parsedLogIDB\x08\n\x06_logIDB\x06\n\x04_logB\x14\n\x12_receivedTimestampB\x12\n\x10_parsedTimestamp\"\x81\x04\n\x0e\x44\x65tectorSchema\x12\x18\n\x0b__version__\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x17\n\ndetectorID\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x19\n\x0c\x64\x65tectorType\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x14\n\x07\x61lertID\x18\x04 \x01(\x05H\x03\x88\x01\x01\x12\x1f\n\x12\x64\x65tectionTimestamp\x18\x05 \x01(\x05H\x04\x88\x01\x01\x12\x0e\n\x06logIDs\x18\x06 \x03(\x05\x12\x12\n\x05score\x18\x08 \x01(\x02H\x05\x88\x01\x01\x12\x1b\n\x13\x65xtractedTimestamps\x18\t \x03(\x05\x12\x18\n\x0b\x64\x65scription\x18\n \x01(\tH\x06\x88\x01\x01\x12\x1e\n\x11receivedTimestamp\x18\x0b \x01(\x05H\x07\x88\x01\x01\x12\x37\n\x0c\x61lertsObtain\x18\x0c \x03(\x0b\x32!.DetectorSchema.AlertsObtainEntry\x1a\x33\n\x11\x41lertsObtainEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0e\n\x0cX__version__B\r\n\x0b_detectorIDB\x0f\n\r_detectorTypeB\n\n\x08_alertIDB\x15\n\x13_detectionTimestampB\x08\n\x06_scoreB\x0e\n\x0c_descriptionB\x14\n\x12_receivedTimestamp\"\xea\x02\n\x0cOutputSchema\x12\x18\n\x0b__version__\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x13\n\x0b\x64\x65tectorIDs\x18\x02 \x03(\t\x12\x15\n\rdetectorTypes\x18\x03 \x03(\t\x12\x0f\n\x07\x61lertID\x18\x04 \x03(\x05\x12\x1c\n\x0foutputTimestamp\x18\x05 \x01(\x05H\x01\x88\x01\x01\x12\x0e\n\x06logIDs\x18\x06 \x03(\x05\x12\x1b\n\x13\x65xtractedTimestamps\x18\t \x03(\x05\x12\x18\n\x0b\x64\x65scription\x18\n \x01(\tH\x02\x88\x01\x01\x12\x35\n\x0c\x61lertsObtain\x18\x0c \x03(\x0b\x32\x1f.OutputSchema.AlertsObtainEntry\x1a\x33\n\x11\x41lertsObtainEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0e\n\x0cX__version__B\x12\n\x10_outputTimestampB\x0e\n\x0c_descriptionb\x06proto3') _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'schemas_pb2', globals()) @@ -25,6 +25,8 @@ _PARSERSCHEMA_LOGFORMATVARIABLESENTRY._serialized_options = b'8\001' _DETECTORSCHEMA_ALERTSOBTAINENTRY._options = None _DETECTORSCHEMA_ALERTSOBTAINENTRY._serialized_options = b'8\001' + _OUTPUTSCHEMA_ALERTSOBTAINENTRY._options = None + _OUTPUTSCHEMA_ALERTSOBTAINENTRY._serialized_options = b'8\001' _SCHEMA._serialized_start=17 _SCHEMA._serialized_end=46 _LOGSCHEMA._serialized_start=49 @@ -37,4 +39,8 @@ _DETECTORSCHEMA._serialized_end=1300 _DETECTORSCHEMA_ALERTSOBTAINENTRY._serialized_start=1118 _DETECTORSCHEMA_ALERTSOBTAINENTRY._serialized_end=1169 + _OUTPUTSCHEMA._serialized_start=1303 + _OUTPUTSCHEMA._serialized_end=1665 + _OUTPUTSCHEMA_ALERTSOBTAINENTRY._serialized_start=1118 + _OUTPUTSCHEMA_ALERTSOBTAINENTRY._serialized_end=1169 # @@protoc_insertion_point(module_scope) From 28247d66b12d80cfbd86023c3deb60e0aef0b01a Mon Sep 17 00:00:00 2001 From: "angre.garcia-gomez@ait.ac.at" Date: Tue, 16 Dec 2025 17:03:47 +0100 Subject: [PATCH 05/12] update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c24d12a..a282269 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ protoc --version This dependency is only needed if a proto file is modified. To compile the proto file do: ```bash -protoc --proto_path=src/detectmatelibrary/schemas/ --python_out=src/detectmatelibrary/schemas/ src/schemas/schemas.proto +protoc --proto_path=src/detectmatelibrary/schemas/ --python_out=src/detectmatelibrary/schemas/ src/detectmatelibrary/schemas/schemas.proto ``` ### Step 3: Run unit tests From a73c4c912af478bff5a8118d08b223a891ac7474 Mon Sep 17 00:00:00 2001 From: "angre.garcia-gomez@ait.ac.at" Date: Tue, 16 Dec 2025 17:13:36 +0100 Subject: [PATCH 06/12] add output schema --- src/detectmatelibrary/common/output.py | 8 +++---- src/detectmatelibrary/schemas/__init__.py | 5 ++-- src/detectmatelibrary/schemas/_classes.py | 12 ++++++++++ src/detectmatelibrary/schemas/_op.py | 4 +++- src/detectmatelibrary/schemas/schemas.proto | 2 +- src/detectmatelibrary/schemas/schemas_pb2.py | 4 ++-- tests/test_schemas/test_ops.py | 24 ++++++++++++++++++++ 7 files changed, 49 insertions(+), 10 deletions(-) diff --git a/src/detectmatelibrary/common/output.py b/src/detectmatelibrary/common/output.py index 09d7f60..5ff2acd 100644 --- a/src/detectmatelibrary/common/output.py +++ b/src/detectmatelibrary/common/output.py @@ -3,7 +3,7 @@ from detectmatelibrary.utils.data_buffer import ArgsBuffer, BufferMode -from detectmatelibrary.schemas import DetectorSchema, BaseSchema +from detectmatelibrary.schemas import DetectorSchema, OutputSchema from typing_extensions import override from typing import List, Optional, Any @@ -34,12 +34,12 @@ def __init__( config=config, # type: ignore args_buffer=ArgsBuffer(mode=buffer_mode, size=buffer_size), input_schema=DetectorSchema, - output_schema=BaseSchema, + output_schema=OutputSchema, ) @override def run( - self, input_: List[DetectorSchema] | DetectorSchema, output_: BaseSchema # type: ignore + self, input_: List[DetectorSchema] | DetectorSchema, output_: OutputSchema # type: ignore ) -> bool: do_output = self.do_output(input_, output_) return do_output if do_output is not None else True @@ -47,7 +47,7 @@ def run( def do_output( self, input_: List[DetectorSchema] | DetectorSchema, - output_: BaseSchema, + output_: OutputSchema, ) -> bool | None: return True diff --git a/src/detectmatelibrary/schemas/__init__.py b/src/detectmatelibrary/schemas/__init__.py index d0a4867..5c1fa3e 100644 --- a/src/detectmatelibrary/schemas/__init__.py +++ b/src/detectmatelibrary/schemas/__init__.py @@ -7,8 +7,9 @@ BaseSchema, LogSchema, ParserSchema, - DetectorSchema + DetectorSchema, + OutputSchema ) -__all__ = ["BaseSchema", "LogSchema", "ParserSchema", "DetectorSchema"] +__all__ = ["BaseSchema", "LogSchema", "ParserSchema", "DetectorSchema", "OutputSchema"] diff --git a/src/detectmatelibrary/schemas/_classes.py b/src/detectmatelibrary/schemas/_classes.py index 876abdb..97c8ea2 100644 --- a/src/detectmatelibrary/schemas/_classes.py +++ b/src/detectmatelibrary/schemas/_classes.py @@ -149,3 +149,15 @@ def __init__( def copy(self) -> "DetectorSchema": schema: DetectorSchema = super().copy() # type: ignore return schema + + +class OutputSchema(BaseSchema): + """Output schema class.""" + def __init__( + self, kwargs: dict[str, Any] | None = None + ) -> None: + super().__init__(schema_id=op.OUTPUT_SCHEMA, kwargs=kwargs) + + def copy(self) -> "OutputSchema": + schema: OutputSchema = super().copy() # type: ignore + return schema diff --git a/src/detectmatelibrary/schemas/_op.py b/src/detectmatelibrary/schemas/_op.py index 49e4e23..bea2a10 100644 --- a/src/detectmatelibrary/schemas/_op.py +++ b/src/detectmatelibrary/schemas/_op.py @@ -9,13 +9,14 @@ # Main variables ************************************ # Use Union of actual protobuf classes for better type hints -SchemaT = Union[s.Schema, s.LogSchema, s.ParserSchema, s.DetectorSchema] # type: ignore +SchemaT = Union[s.Schema, s.LogSchema, s.ParserSchema, s.DetectorSchema, s.OutputSchema] # type: ignore SchemaID = NewType("SchemaID", bytes) BASE_SCHEMA: SchemaID = SchemaID(b"0") LOG_SCHEMA: SchemaID = SchemaID(b"1") PARSER_SCHEMA: SchemaID = SchemaID(b"2") DETECTOR_SCHEMA: SchemaID = SchemaID(b"3") +OUTPUT_SCHEMA: SchemaID = SchemaID(b"4") __current_version = "1.0.0" __id_codes: Dict[SchemaID, Type[Message]] = { @@ -23,6 +24,7 @@ LOG_SCHEMA: s.LogSchema, # type: ignore PARSER_SCHEMA: s.ParserSchema, # type: ignore DETECTOR_SCHEMA: s.DetectorSchema, # type: ignore + OUTPUT_SCHEMA: s.OutputSchema, # type: ignore } diff --git a/src/detectmatelibrary/schemas/schemas.proto b/src/detectmatelibrary/schemas/schemas.proto index 1c0b8ac..bd13668 100644 --- a/src/detectmatelibrary/schemas/schemas.proto +++ b/src/detectmatelibrary/schemas/schemas.proto @@ -45,7 +45,7 @@ message OutputSchema { optional string __version__ = 1; repeated string detectorIDs = 2; repeated string detectorTypes = 3; - repeated int32 alertID = 4; + repeated int32 alertIDs = 4; optional int32 outputTimestamp = 5; repeated int32 logIDs = 6; repeated int32 extractedTimestamps = 9; diff --git a/src/detectmatelibrary/schemas/schemas_pb2.py b/src/detectmatelibrary/schemas/schemas_pb2.py index 2f26e1d..3bb8ada 100644 --- a/src/detectmatelibrary/schemas/schemas_pb2.py +++ b/src/detectmatelibrary/schemas/schemas_pb2.py @@ -14,7 +14,7 @@ -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rschemas.proto\"\x1d\n\x06Schema\x12\x13\n\x0b__version__\x18\x01 \x01(\t\"\xb7\x01\n\tLogSchema\x12\x18\n\x0b__version__\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x12\n\x05logID\x18\x02 \x01(\x05H\x01\x88\x01\x01\x12\x10\n\x03log\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x16\n\tlogSource\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x15\n\x08hostname\x18\x05 \x01(\tH\x04\x88\x01\x01\x42\x0e\n\x0cX__version__B\x08\n\x06_logIDB\x06\n\x04_logB\x0c\n\n_logSourceB\x0b\n\t_hostname\"\xa5\x04\n\x0cParserSchema\x12\x18\n\x0b__version__\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x17\n\nparserType\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x15\n\x08parserID\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x14\n\x07\x45ventID\x18\x04 \x01(\x05H\x03\x88\x01\x01\x12\x15\n\x08template\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x11\n\tvariables\x18\x06 \x03(\t\x12\x18\n\x0bparsedLogID\x18\x07 \x01(\x05H\x05\x88\x01\x01\x12\x12\n\x05logID\x18\x08 \x01(\x05H\x06\x88\x01\x01\x12\x10\n\x03log\x18\t \x01(\tH\x07\x88\x01\x01\x12\x41\n\x12logFormatVariables\x18\n \x03(\x0b\x32%.ParserSchema.LogFormatVariablesEntry\x12\x1e\n\x11receivedTimestamp\x18\x0b \x01(\x05H\x08\x88\x01\x01\x12\x1c\n\x0fparsedTimestamp\x18\x0c \x01(\x05H\t\x88\x01\x01\x1a\x39\n\x17LogFormatVariablesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0e\n\x0cX__version__B\r\n\x0b_parserTypeB\x0b\n\t_parserIDB\n\n\x08_EventIDB\x0b\n\t_templateB\x0e\n\x0c_parsedLogIDB\x08\n\x06_logIDB\x06\n\x04_logB\x14\n\x12_receivedTimestampB\x12\n\x10_parsedTimestamp\"\x81\x04\n\x0e\x44\x65tectorSchema\x12\x18\n\x0b__version__\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x17\n\ndetectorID\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x19\n\x0c\x64\x65tectorType\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x14\n\x07\x61lertID\x18\x04 \x01(\x05H\x03\x88\x01\x01\x12\x1f\n\x12\x64\x65tectionTimestamp\x18\x05 \x01(\x05H\x04\x88\x01\x01\x12\x0e\n\x06logIDs\x18\x06 \x03(\x05\x12\x12\n\x05score\x18\x08 \x01(\x02H\x05\x88\x01\x01\x12\x1b\n\x13\x65xtractedTimestamps\x18\t \x03(\x05\x12\x18\n\x0b\x64\x65scription\x18\n \x01(\tH\x06\x88\x01\x01\x12\x1e\n\x11receivedTimestamp\x18\x0b \x01(\x05H\x07\x88\x01\x01\x12\x37\n\x0c\x61lertsObtain\x18\x0c \x03(\x0b\x32!.DetectorSchema.AlertsObtainEntry\x1a\x33\n\x11\x41lertsObtainEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0e\n\x0cX__version__B\r\n\x0b_detectorIDB\x0f\n\r_detectorTypeB\n\n\x08_alertIDB\x15\n\x13_detectionTimestampB\x08\n\x06_scoreB\x0e\n\x0c_descriptionB\x14\n\x12_receivedTimestamp\"\xea\x02\n\x0cOutputSchema\x12\x18\n\x0b__version__\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x13\n\x0b\x64\x65tectorIDs\x18\x02 \x03(\t\x12\x15\n\rdetectorTypes\x18\x03 \x03(\t\x12\x0f\n\x07\x61lertID\x18\x04 \x03(\x05\x12\x1c\n\x0foutputTimestamp\x18\x05 \x01(\x05H\x01\x88\x01\x01\x12\x0e\n\x06logIDs\x18\x06 \x03(\x05\x12\x1b\n\x13\x65xtractedTimestamps\x18\t \x03(\x05\x12\x18\n\x0b\x64\x65scription\x18\n \x01(\tH\x02\x88\x01\x01\x12\x35\n\x0c\x61lertsObtain\x18\x0c \x03(\x0b\x32\x1f.OutputSchema.AlertsObtainEntry\x1a\x33\n\x11\x41lertsObtainEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0e\n\x0cX__version__B\x12\n\x10_outputTimestampB\x0e\n\x0c_descriptionb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rschemas.proto\"\x1d\n\x06Schema\x12\x13\n\x0b__version__\x18\x01 \x01(\t\"\xb7\x01\n\tLogSchema\x12\x18\n\x0b__version__\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x12\n\x05logID\x18\x02 \x01(\x05H\x01\x88\x01\x01\x12\x10\n\x03log\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x16\n\tlogSource\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x15\n\x08hostname\x18\x05 \x01(\tH\x04\x88\x01\x01\x42\x0e\n\x0cX__version__B\x08\n\x06_logIDB\x06\n\x04_logB\x0c\n\n_logSourceB\x0b\n\t_hostname\"\xa5\x04\n\x0cParserSchema\x12\x18\n\x0b__version__\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x17\n\nparserType\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x15\n\x08parserID\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x14\n\x07\x45ventID\x18\x04 \x01(\x05H\x03\x88\x01\x01\x12\x15\n\x08template\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x11\n\tvariables\x18\x06 \x03(\t\x12\x18\n\x0bparsedLogID\x18\x07 \x01(\x05H\x05\x88\x01\x01\x12\x12\n\x05logID\x18\x08 \x01(\x05H\x06\x88\x01\x01\x12\x10\n\x03log\x18\t \x01(\tH\x07\x88\x01\x01\x12\x41\n\x12logFormatVariables\x18\n \x03(\x0b\x32%.ParserSchema.LogFormatVariablesEntry\x12\x1e\n\x11receivedTimestamp\x18\x0b \x01(\x05H\x08\x88\x01\x01\x12\x1c\n\x0fparsedTimestamp\x18\x0c \x01(\x05H\t\x88\x01\x01\x1a\x39\n\x17LogFormatVariablesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0e\n\x0cX__version__B\r\n\x0b_parserTypeB\x0b\n\t_parserIDB\n\n\x08_EventIDB\x0b\n\t_templateB\x0e\n\x0c_parsedLogIDB\x08\n\x06_logIDB\x06\n\x04_logB\x14\n\x12_receivedTimestampB\x12\n\x10_parsedTimestamp\"\x81\x04\n\x0e\x44\x65tectorSchema\x12\x18\n\x0b__version__\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x17\n\ndetectorID\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x19\n\x0c\x64\x65tectorType\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x14\n\x07\x61lertID\x18\x04 \x01(\x05H\x03\x88\x01\x01\x12\x1f\n\x12\x64\x65tectionTimestamp\x18\x05 \x01(\x05H\x04\x88\x01\x01\x12\x0e\n\x06logIDs\x18\x06 \x03(\x05\x12\x12\n\x05score\x18\x08 \x01(\x02H\x05\x88\x01\x01\x12\x1b\n\x13\x65xtractedTimestamps\x18\t \x03(\x05\x12\x18\n\x0b\x64\x65scription\x18\n \x01(\tH\x06\x88\x01\x01\x12\x1e\n\x11receivedTimestamp\x18\x0b \x01(\x05H\x07\x88\x01\x01\x12\x37\n\x0c\x61lertsObtain\x18\x0c \x03(\x0b\x32!.DetectorSchema.AlertsObtainEntry\x1a\x33\n\x11\x41lertsObtainEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0e\n\x0cX__version__B\r\n\x0b_detectorIDB\x0f\n\r_detectorTypeB\n\n\x08_alertIDB\x15\n\x13_detectionTimestampB\x08\n\x06_scoreB\x0e\n\x0c_descriptionB\x14\n\x12_receivedTimestamp\"\xeb\x02\n\x0cOutputSchema\x12\x18\n\x0b__version__\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x13\n\x0b\x64\x65tectorIDs\x18\x02 \x03(\t\x12\x15\n\rdetectorTypes\x18\x03 \x03(\t\x12\x10\n\x08\x61lertIDs\x18\x04 \x03(\x05\x12\x1c\n\x0foutputTimestamp\x18\x05 \x01(\x05H\x01\x88\x01\x01\x12\x0e\n\x06logIDs\x18\x06 \x03(\x05\x12\x1b\n\x13\x65xtractedTimestamps\x18\t \x03(\x05\x12\x18\n\x0b\x64\x65scription\x18\n \x01(\tH\x02\x88\x01\x01\x12\x35\n\x0c\x61lertsObtain\x18\x0c \x03(\x0b\x32\x1f.OutputSchema.AlertsObtainEntry\x1a\x33\n\x11\x41lertsObtainEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0e\n\x0cX__version__B\x12\n\x10_outputTimestampB\x0e\n\x0c_descriptionb\x06proto3') _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'schemas_pb2', globals()) @@ -40,7 +40,7 @@ _DETECTORSCHEMA_ALERTSOBTAINENTRY._serialized_start=1118 _DETECTORSCHEMA_ALERTSOBTAINENTRY._serialized_end=1169 _OUTPUTSCHEMA._serialized_start=1303 - _OUTPUTSCHEMA._serialized_end=1665 + _OUTPUTSCHEMA._serialized_end=1666 _OUTPUTSCHEMA_ALERTSOBTAINENTRY._serialized_start=1118 _OUTPUTSCHEMA_ALERTSOBTAINENTRY._serialized_end=1169 # @@protoc_insertion_point(module_scope) diff --git a/tests/test_schemas/test_ops.py b/tests/test_schemas/test_ops.py index f21d809..917b5ab 100644 --- a/tests/test_schemas/test_ops.py +++ b/tests/test_schemas/test_ops.py @@ -72,6 +72,30 @@ def test_initialize_detector_schema(self) -> None: assert schema.score == 0.5 assert schema.extractedTimestamps == [4, 5, 6] + def test_initialize_output_schema(self) -> None: + values = { + "detectorIDs": ["test id", "another id"], + "detectorTypes": ["type test", "another type"], + "alertIDs": [1, 2], + "outputTimestamp": 2, + "logIDs": [1, 2, 3], + "extractedTimestamps": [4, 5, 6], + "description": "test description", + "alertsObtain": {"key": "value"} + } + schema = op_schemas.initialize(op_schemas.OUTPUT_SCHEMA, **values) + + assert schema.__version__ == "1.0.0" + assert schema.detectorIDs == ["test id", "another id"] + assert schema.detectorTypes == ["type test", "another type"] + assert schema.alertIDs == [1, 2] + assert schema.outputTimestamp == 2 + assert schema.logIDs == [1, 2, 3] + assert schema.extractedTimestamps == [4, 5, 6] + assert schema.description == "test description" + assert schema.alertsObtain == {"key": "value"} + assert schema.extractedTimestamps == [4, 5, 6] + def test_copy(self) -> None: values = { "logID": 1, "log": "test", "logSource": "example", "hostname": "example@org" From 6605941db5afc8af191489e769ed87aa7f7ab316 Mon Sep 17 00:00:00 2001 From: "angre.garcia-gomez@ait.ac.at" Date: Tue, 16 Dec 2025 17:15:23 +0100 Subject: [PATCH 07/12] CoreParser test minor change --- tests/test_common/test_core_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_common/test_core_parser.py b/tests/test_common/test_core_parser.py index e492c69..49e7505 100644 --- a/tests/test_common/test_core_parser.py +++ b/tests/test_common/test_core_parser.py @@ -1,4 +1,3 @@ -import re from detectmatelibrary.common.parser import CoreParser, CoreParserConfig, get_format_variables from detectmatelibrary.utils.aux import time_test_mode import detectmatelibrary.schemas._op as op_schemas @@ -6,6 +5,7 @@ import pydantic import pytest +import re class MockupConfig(CoreParserConfig): From 63819fabe0917127d9653c0d99d1424d913b9957 Mon Sep 17 00:00:00 2001 From: "angre.garcia-gomez@ait.ac.at" Date: Wed, 17 Dec 2025 10:57:23 +0100 Subject: [PATCH 08/12] create basic output functionality and improve schema operations --- src/detectmatelibrary/common/output.py | 28 ++++++++- src/detectmatelibrary/schemas/__init__.py | 5 +- src/detectmatelibrary/schemas/_classes.py | 11 ++++ src/detectmatelibrary/schemas/_op.py | 66 ++++++++++++---------- tests/test_common/test_core_output.py | 69 +++++++++++++++++++++++ tests/test_schemas/test_ops.py | 17 ++++++ tests/test_schemas/test_schema_class.py | 9 +++ 7 files changed, 172 insertions(+), 33 deletions(-) create mode 100644 tests/test_common/test_core_output.py diff --git a/src/detectmatelibrary/common/output.py b/src/detectmatelibrary/common/output.py index 5ff2acd..d87f413 100644 --- a/src/detectmatelibrary/common/output.py +++ b/src/detectmatelibrary/common/output.py @@ -1,9 +1,9 @@ from detectmatelibrary.common.core import CoreComponent, CoreConfig from detectmatelibrary.utils.data_buffer import ArgsBuffer, BufferMode +from detectmatelibrary.utils.aux import get_timestamp - -from detectmatelibrary.schemas import DetectorSchema, OutputSchema +from detectmatelibrary.schemas import DetectorSchema, OutputSchema, FieldNotFound from typing_extensions import override from typing import List, Optional, Any @@ -17,6 +17,22 @@ class CoreOutputConfig(CoreConfig): auto_config: bool = False +class DetectorFieldNotFound(Exception): + pass + + +def get_field(input_: List[DetectorSchema] | DetectorSchema, field: str) -> List[Any]: + try: + if isinstance(input_, list): + if input_[0].is_field_list(field): + return [item for detector in input_ for item in detector[field]] + return [detector[field] for detector in input_] + else: + return [input_[field]] + except FieldNotFound: + raise DetectorFieldNotFound() + + class CoreOutput(CoreComponent): def __init__( self, @@ -41,7 +57,15 @@ def __init__( def run( self, input_: List[DetectorSchema] | DetectorSchema, output_: OutputSchema # type: ignore ) -> bool: + output_["detectorIDs"] = get_field(input_, "detectorID") + output_["detectorTypes"] = get_field(input_, "detectorType") + output_["alertIDs"] = get_field(input_, "alertID") + output_["logIDs"] = get_field(input_, "logID") + output_["extractedTimestamps"] = get_field(input_, "extractedTimestamps") + do_output = self.do_output(input_, output_) + output_["outputTimestamp"] = get_timestamp() + return do_output if do_output is not None else True def do_output( diff --git a/src/detectmatelibrary/schemas/__init__.py b/src/detectmatelibrary/schemas/__init__.py index 5c1fa3e..9e91517 100644 --- a/src/detectmatelibrary/schemas/__init__.py +++ b/src/detectmatelibrary/schemas/__init__.py @@ -8,8 +8,9 @@ LogSchema, ParserSchema, DetectorSchema, - OutputSchema + OutputSchema, + FieldNotFound, ) -__all__ = ["BaseSchema", "LogSchema", "ParserSchema", "DetectorSchema", "OutputSchema"] +__all__ = ["BaseSchema", "LogSchema", "ParserSchema", "DetectorSchema", "OutputSchema", "FieldNotFound"] diff --git a/src/detectmatelibrary/schemas/_classes.py b/src/detectmatelibrary/schemas/_classes.py index 97c8ea2..d8df1e1 100644 --- a/src/detectmatelibrary/schemas/_classes.py +++ b/src/detectmatelibrary/schemas/_classes.py @@ -26,6 +26,7 @@ def __init__( ) -> None: self.schema_id = schema_id self.var_names: set[str] + self.__is_list: dict[str, bool] = {} self.init_schema(kwargs=kwargs) def __contains__(self, idx: str) -> bool: @@ -53,6 +54,16 @@ def init_schema(self, kwargs: dict[str, Any] | None) -> None: var_names.append(var) self.var_names = set(var_names) + def is_field_list(self, field_name: str) -> bool: + """Check if a field is a list.""" + if field_name in self.__is_list: # Avoid recomputation + return self.__is_list[field_name] + + schema = self.get_schema() + is_list = op.is_repeated(schema=schema, field_name=field_name) + self.__is_list[field_name] = is_list + return is_list + class BaseSchema(SchemaVariables): def __init__( diff --git a/src/detectmatelibrary/schemas/_op.py b/src/detectmatelibrary/schemas/_op.py index bea2a10..278d926 100644 --- a/src/detectmatelibrary/schemas/_op.py +++ b/src/detectmatelibrary/schemas/_op.py @@ -53,11 +53,47 @@ def __get_schema_class(schema_id: SchemaID) -> Type[Message]: return __id_codes[schema_id] -def __is_repeated_field(field: Any) -> bool: +def __is_repeated(field: Any) -> bool: """Check if a field in the message is a repeated element.""" return bool(field.is_repeated) +# Auxiliar methods ***************************************** +def is_repeated(schema: SchemaT, field_name: str) -> bool: + """Check if a field is a repeated element.""" + for field in schema.DESCRIPTOR.fields: + if field.name == field_name: + return __is_repeated(field) + raise NotSupportedSchema() + + +def check_is_same_schema( + id_schema_1: SchemaID, id_schema_2: SchemaID +) -> None | IncorrectSchema: + """Raise exception if two schemas do not match.""" + if id_schema_1 != id_schema_2: + raise IncorrectSchema() + return None + + +def check_if_schema_is_complete(schema: SchemaT) -> None | NotCompleteSchema: + """Check if the schema is complete.""" + missing_fields = [] + for field in schema.DESCRIPTOR.fields: + if not __is_repeated(field) and not schema.HasField(field.name): + missing_fields.append(field.name) + + if len(missing_fields) > 0: + raise NotCompleteSchema(f"Missing fields: {missing_fields}") + + return None + + +def get_variables_names(schema: SchemaT) -> list[str]: + """Get the variable names of the schema.""" + return [field.name for field in schema.DESCRIPTOR.fields] + + # Main methods ***************************************** def initialize(schema_id: SchemaID, **kwargs: Any) -> SchemaT | NotSupportedSchema: """Initialize a protobuf schema, it use its arguments and the assigned @@ -97,31 +133,3 @@ def deserialize(message: bytes) -> Tuple[SchemaID, SchemaT]: schema = schema_class() schema.ParseFromString(message[1:]) return schema_id, schema - - -# Auxiliar methods ***************************************** -def check_is_same_schema( - id_schema_1: SchemaID, id_schema_2: SchemaID -) -> None | IncorrectSchema: - """Raise exception if two schemas do not match.""" - if id_schema_1 != id_schema_2: - raise IncorrectSchema() - return None - - -def check_if_schema_is_complete(schema: SchemaT) -> None | NotCompleteSchema: - """Check if the schema is complete.""" - missing_fields = [] - for field in schema.DESCRIPTOR.fields: - if not __is_repeated_field(field) and not schema.HasField(field.name): - missing_fields.append(field.name) - - if len(missing_fields) > 0: - raise NotCompleteSchema(f"Missing fields: {missing_fields}") - - return None - - -def get_variables_names(schema: SchemaT) -> list[str]: - """Get the variable names of the schema.""" - return [field.name for field in schema.DESCRIPTOR.fields] diff --git a/tests/test_common/test_core_output.py b/tests/test_common/test_core_output.py new file mode 100644 index 0000000..0c38d99 --- /dev/null +++ b/tests/test_common/test_core_output.py @@ -0,0 +1,69 @@ + +from detectmatelibrary.common.output import CoreOutput, CoreOutputConfig, get_field, DetectorFieldNotFound +import detectmatelibrary.schemas as schemas + +import pytest + + +class MockupConfig(CoreOutputConfig): + pass + + +class MockupOutput(CoreOutput): + def __init__(self, name: str, config: CoreOutputConfig) -> None: + super().__init__(name=name, config=config) + + def do_output(self, input_, output_): + pass + + +values = { + "detectorID": "test id", + "detectorType": "type test", + "alertID": 0, + "detectionTimestamp": 0, + "logIDs": [0, 0, 0], + "score": 0.0, + "extractedTimestamps": [0, 0, 0], + "description": "", + "receivedTimestamp": 0, +} + +values2 = { + "detectorID": "test id2", + "detectorType": "type test2", + "alertID": 1, + "detectionTimestamp": 1, + "logIDs": [1, 1], + "score": 1.0, + "extractedTimestamps": [1, 1], + "description": "", + "receivedTimestamp": 1, +} + + +class TestGetField: + def test_get_field_empty_schema(self): + input_ = schemas.DetectorSchema() + result = get_field(input_, "detectorID") + assert result == [""] + + def test_get_field_missing_field(self): + input_ = schemas.DetectorSchema() + with pytest.raises(DetectorFieldNotFound): + get_field(input_, "other_field") + + def test_get_field_single(self): + input_ = schemas.DetectorSchema(values) + result = get_field(input_, "score") + assert result == [0.0] + + def test_get_field_list(self): + input_ = [schemas.DetectorSchema(values), schemas.DetectorSchema(values2)] + result = get_field(input_, "alertID") + assert result == [0, 1] + + def test_get_list_of_list(self): + input_ = [schemas.DetectorSchema(values), schemas.DetectorSchema(values2)] + result = get_field(input_, "logIDs") + assert result == [0, 0, 0, 1, 1] diff --git a/tests/test_schemas/test_ops.py b/tests/test_schemas/test_ops.py index 917b5ab..e47031b 100644 --- a/tests/test_schemas/test_ops.py +++ b/tests/test_schemas/test_ops.py @@ -176,3 +176,20 @@ def test_get_variables(self) -> None: assert set(vars) == set(expected_vars), f"{vars}" assert len(vars) == len(expected_vars), f"{vars}" + + def test_is_repeated(self) -> None: + values = { + "detectorID": "test id", + "detectorType": "type test", + "alertID": 1, + "detectionTimestamp": 2, + "logIDs": [1, 2, 3], + "score": 0.5, + "extractedTimestamps": [4, 5, 6] + } + schema = op_schemas.initialize(op_schemas.DETECTOR_SCHEMA, **values) + + assert not op_schemas.is_repeated(schema, "detectorID") + assert not op_schemas.is_repeated(schema, "score") + assert op_schemas.is_repeated(schema, "logIDs") + assert op_schemas.is_repeated(schema, "extractedTimestamps") diff --git a/tests/test_schemas/test_schema_class.py b/tests/test_schemas/test_schema_class.py index 859f3a0..882b938 100644 --- a/tests/test_schemas/test_schema_class.py +++ b/tests/test_schemas/test_schema_class.py @@ -156,3 +156,12 @@ def test_check_is_same(self): with pytest.raises(IncorrectSchema): log_schema1.check_is_same(parser_schema) + + def test_is_field_list(self): + detector_schema = DetectorSchema() + assert detector_schema.is_field_list("logIDs") is True + assert detector_schema.is_field_list("detectorID") is False + + # Check that that the memory works + assert detector_schema.is_field_list("logIDs") is True + assert detector_schema.is_field_list("detectorID") is False From 8382dc4ed1d03d95cf027642eb245e157a5f7997 Mon Sep 17 00:00:00 2001 From: "angre.garcia-gomez@ait.ac.at" Date: Wed, 17 Dec 2025 11:03:18 +0100 Subject: [PATCH 09/12] add as_dict method in schemas --- src/detectmatelibrary/schemas/_classes.py | 9 +++++---- tests/test_schemas/test_schema_class.py | 16 ++++++++++++++++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/src/detectmatelibrary/schemas/_classes.py b/src/detectmatelibrary/schemas/_classes.py index d8df1e1..0f00349 100644 --- a/src/detectmatelibrary/schemas/_classes.py +++ b/src/detectmatelibrary/schemas/_classes.py @@ -32,12 +32,13 @@ def __init__( def __contains__(self, idx: str) -> bool: return idx in self.var_names + def as_dict(self) -> dict[str, Any]: + """Return the schema variables as a dictionary.""" + return {var: getattr(self, var) for var in self.var_names} + def get_schema(self) -> op.SchemaT: """Retrieve the current schema instance.""" - return _initialize_schema( - schema_id=self.schema_id, - kwargs={var: getattr(self, var) for var in self.var_names} - ) + return _initialize_schema(schema_id=self.schema_id, kwargs=self.as_dict()) def set_schema(self, schema: op.SchemaT) -> None: """Set the schema instance and update attributes.""" diff --git a/tests/test_schemas/test_schema_class.py b/tests/test_schemas/test_schema_class.py index 882b938..29b4e89 100644 --- a/tests/test_schemas/test_schema_class.py +++ b/tests/test_schemas/test_schema_class.py @@ -165,3 +165,19 @@ def test_is_field_list(self): # Check that that the memory works assert detector_schema.is_field_list("logIDs") is True assert detector_schema.is_field_list("detectorID") is False + + def test_as_dict(self): + detector_schema = DetectorSchema() + assert detector_schema.as_dict() == { + "__version__": "1.0.0", + "detectorID": detector_schema.detectorID, + "detectorType": detector_schema.detectorType, + "alertID": detector_schema.alertID, + "detectionTimestamp": detector_schema.detectionTimestamp, + "logIDs": detector_schema.logIDs, + "score": detector_schema.score, + "extractedTimestamps": detector_schema.extractedTimestamps, + "description": detector_schema.description, + "receivedTimestamp": detector_schema.receivedTimestamp, + "alertsObtain": detector_schema.alertsObtain, + } From 14566ec76846c1c3e2d07ca9ae939d999af9a8a3 Mon Sep 17 00:00:00 2001 From: "angre.garcia-gomez@ait.ac.at" Date: Wed, 17 Dec 2025 12:20:32 +0100 Subject: [PATCH 10/12] add tests for the core_output --- src/detectmatelibrary/common/output.py | 3 +- tests/test_common/test_core_output.py | 55 +++++++++++++++++++++++++- 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/src/detectmatelibrary/common/output.py b/src/detectmatelibrary/common/output.py index d87f413..168f8e8 100644 --- a/src/detectmatelibrary/common/output.py +++ b/src/detectmatelibrary/common/output.py @@ -12,7 +12,6 @@ class CoreOutputConfig(CoreConfig): comp_type: str = "outputs" method_type: str = "core_output" - parser: str = "" auto_config: bool = False @@ -60,7 +59,7 @@ def run( output_["detectorIDs"] = get_field(input_, "detectorID") output_["detectorTypes"] = get_field(input_, "detectorType") output_["alertIDs"] = get_field(input_, "alertID") - output_["logIDs"] = get_field(input_, "logID") + output_["logIDs"] = get_field(input_, "logIDs") output_["extractedTimestamps"] = get_field(input_, "extractedTimestamps") do_output = self.do_output(input_, output_) diff --git a/tests/test_common/test_core_output.py b/tests/test_common/test_core_output.py index 0c38d99..545ee7b 100644 --- a/tests/test_common/test_core_output.py +++ b/tests/test_common/test_core_output.py @@ -1,5 +1,6 @@ from detectmatelibrary.common.output import CoreOutput, CoreOutputConfig, get_field, DetectorFieldNotFound +from detectmatelibrary.utils.data_buffer import BufferMode import detectmatelibrary.schemas as schemas import pytest @@ -11,10 +12,13 @@ class MockupConfig(CoreOutputConfig): class MockupOutput(CoreOutput): def __init__(self, name: str, config: CoreOutputConfig) -> None: - super().__init__(name=name, config=config) + super().__init__( + name=name, config=config, buffer_mode=BufferMode.WINDOW, buffer_size=2 + ) def do_output(self, input_, output_): - pass + output_["description"] = "hi" + output_["alertsObtain"] = {"ciao": "bella"} values = { @@ -67,3 +71,50 @@ def test_get_list_of_list(self): input_ = [schemas.DetectorSchema(values), schemas.DetectorSchema(values2)] result = get_field(input_, "logIDs") assert result == [0, 0, 0, 1, 1] + + +class TestCoreOutput: + def test_initialization(self): + config = MockupConfig() + output = MockupOutput(name="TestOutput", config=config) + + assert output.name == "TestOutput" + assert output.config == config + assert output.input_schema == schemas.DetectorSchema + assert output.output_schema == schemas.OutputSchema + + def test_run(self): + config = MockupConfig() + output = MockupOutput(name="TestOutput", config=config) + + input_ = [ + schemas.DetectorSchema(values), + schemas.DetectorSchema(values2) + ] + output_ = schemas.OutputSchema() + + output.run(input_=input_, output_=output_) + + assert output_.detectorIDs == ["test id", "test id2"] + assert output_.detectorTypes == ["type test", "type test2"] + assert output_.alertIDs == [0, 1] + assert output_.logIDs == [0, 0, 0, 1, 1] + assert output_.extractedTimestamps == [0, 0, 0, 1, 1] + assert output_.description == "hi" + assert output_.alertsObtain == {"ciao": "bella"} + + def test_process(self): + config = MockupConfig() + output = MockupOutput(name="TestOutput", config=config) + + assert output.process(schemas.DetectorSchema(values)) is None + + result = output.process(schemas.DetectorSchema(values2)) + + assert result.detectorIDs == ["test id", "test id2"] + assert result.detectorTypes == ["type test", "type test2"] + assert result.alertIDs == [0, 1] + assert result.logIDs == [0, 0, 0, 1, 1] + assert result.extractedTimestamps == [0, 0, 0, 1, 1] + assert result.description == "hi" + assert result.alertsObtain == {"ciao": "bella"} From bd186a219018e6fc328c27dbca537fc424d266b5 Mon Sep 17 00:00:00 2001 From: "angre.garcia-gomez@ait.ac.at" Date: Wed, 17 Dec 2025 12:24:40 +0100 Subject: [PATCH 11/12] add json output --- src/detectmatelibrary/outputs/json_output.py | 46 ++++++++++++++++++++ tests/test_output/test_json.py | 29 ++++++++++++ 2 files changed, 75 insertions(+) create mode 100644 src/detectmatelibrary/outputs/json_output.py create mode 100644 tests/test_output/test_json.py diff --git a/src/detectmatelibrary/outputs/json_output.py b/src/detectmatelibrary/outputs/json_output.py new file mode 100644 index 0000000..c4cbdb4 --- /dev/null +++ b/src/detectmatelibrary/outputs/json_output.py @@ -0,0 +1,46 @@ + +from detectmatelibrary.common.output import CoreOutput, CoreOutputConfig +from detectmatelibrary.schemas import DetectorSchema, OutputSchema +from detectmatelibrary.utils.data_buffer import BufferMode + +from typing import cast, Any +import json +import os + + +def save_json_file(path_folder: str, id_: str, data: dict[str, Any]) -> None: + if not os.path.exists(path_folder): + os.mkdir(path_folder) + + with open(f"{path_folder}/{id_}.json", "w") as json_file: + json.dump(data, json_file) + + +class JSONOutputConfig(CoreOutputConfig): + method_type: str = "json_output" + path_folder: str = "" + + +class JSONOutput(CoreOutput): + def __init__( + self, name: str = "JsonOutput", config: JSONOutputConfig = JSONOutputConfig() + ) -> None: + + if isinstance(config, dict): + config = JSONOutputConfig.from_dict(config, name) + + super().__init__(name=name, buffer_mode=BufferMode.NO_BUF, config=config) + + self.config = cast(JSONOutputConfig, self.config) + self.test_mode: bool = False + + def do_output(self, input_: DetectorSchema, output_: OutputSchema) -> None: # type: ignore + output_["description"] = f"Alert description: {input_['description']}" + output_["alertsObtain"] = input_["alertsObtain"] + + if not self.test_mode: + save_json_file( + path_folder=self.config.path_folder, # type: ignore + id_=str(input_["alertID"]), + data=output_.as_dict(), + ) diff --git a/tests/test_output/test_json.py b/tests/test_output/test_json.py new file mode 100644 index 0000000..f941ced --- /dev/null +++ b/tests/test_output/test_json.py @@ -0,0 +1,29 @@ + +from detectmatelibrary.outputs.json_output import JSONOutput, JSONOutputConfig + + +class TestJSONOutput: + def test_do_output(self): + config = JSONOutputConfig(path_folder=".") + json_output = JSONOutput(name="TestJsonOutput", config=config) + json_output.test_mode = True + + input_ = { + "detectorID": "detector_1", + "detectorType": "type_A", + "alertID": 123, + "detectionTimestamp": 1625079600, + "logIDs": [1, 2, 3], + "score": 0.95, + "extractedTimestamps": [1625079601, 1625079602], + "description": "Test alert", + "alertsObtain": {"key": "value"}, + "receivedTimestamp": 1625079603, + } + + output_ = {} + + json_output.do_output(input_, output_) + + assert output_["description"] == "Alert description: Test alert" + assert output_["alertsObtain"] == {"key": "value"} From f343e86172bd8e359c89fb968491deb304117faf Mon Sep 17 00:00:00 2001 From: "angre.garcia-gomez@ait.ac.at" Date: Wed, 17 Dec 2025 12:26:51 +0100 Subject: [PATCH 12/12] add timestamp test --- tests/test_common/test_core_output.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test_common/test_core_output.py b/tests/test_common/test_core_output.py index 545ee7b..8809401 100644 --- a/tests/test_common/test_core_output.py +++ b/tests/test_common/test_core_output.py @@ -1,6 +1,7 @@ from detectmatelibrary.common.output import CoreOutput, CoreOutputConfig, get_field, DetectorFieldNotFound from detectmatelibrary.utils.data_buffer import BufferMode +from detectmatelibrary.utils.aux import time_test_mode import detectmatelibrary.schemas as schemas import pytest @@ -73,6 +74,9 @@ def test_get_list_of_list(self): assert result == [0, 0, 0, 1, 1] +time_test_mode() + + class TestCoreOutput: def test_initialization(self): config = MockupConfig() @@ -100,6 +104,7 @@ def test_run(self): assert output_.alertIDs == [0, 1] assert output_.logIDs == [0, 0, 0, 1, 1] assert output_.extractedTimestamps == [0, 0, 0, 1, 1] + assert output_.outputTimestamp == 0 assert output_.description == "hi" assert output_.alertsObtain == {"ciao": "bella"} @@ -118,3 +123,4 @@ def test_process(self): assert result.extractedTimestamps == [0, 0, 0, 1, 1] assert result.description == "hi" assert result.alertsObtain == {"ciao": "bella"} + assert result.outputTimestamp == 0