From c45a028b0cc90fc9239e06d883327b082666182b Mon Sep 17 00:00:00 2001 From: thorinaboenke Date: Fri, 6 Mar 2026 17:29:32 +0100 Subject: [PATCH 1/7] detector extracts timestamps from human readable formats --- src/detectmatelibrary/common/detector.py | 25 ++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/src/detectmatelibrary/common/detector.py b/src/detectmatelibrary/common/detector.py index 18b67b9..4f035b6 100644 --- a/src/detectmatelibrary/common/detector.py +++ b/src/detectmatelibrary/common/detector.py @@ -1,3 +1,5 @@ +from datetime import datetime + from detectmatelibrary.common._config._formats import EventsConfig from detectmatelibrary.common.core import CoreComponent, CoreConfig @@ -10,12 +12,31 @@ from typing import Dict, List, Optional, Any + def _extract_timestamp( input_: List[ParserSchema] | ParserSchema ) -> List[int]: def format_time(time: str) -> int: - time_ = time.split(":")[0] - return int(float(time_)) + #try Unix timestamp first + try: + return int(float(time)) + except ValueError: + pass + + # human-readable formats + formats = [ + "%d/%b/%Y:%H:%M:%S %z", # 04/Mar/2026:14:18:00 +0000 + "%d/%b/%Y:%H:%M:%S", # 04/Mar/2026:14:18:00 + "%Y-%m-%dT%H:%M:%S%z", # 2026-03-04T14:18:00+0000 + "%Y-%m-%d %H:%M:%S", # 2026-03-04 14:18:00 + ] + for format in formats: + try: + return int(datetime.strptime(time, format).timestamp()) + except ValueError: + continue + + raise ValueError(f"Unrecognised time format: '{time}'") if not isinstance(input_, list): input_ = [input_] From 3c42bb2952e248b70b93d0d28da6be928d31834d Mon Sep 17 00:00:00 2001 From: thorinaboenke Date: Fri, 6 Mar 2026 17:43:37 +0100 Subject: [PATCH 2/7] prek --- src/detectmatelibrary/common/detector.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/detectmatelibrary/common/detector.py b/src/detectmatelibrary/common/detector.py index 4f035b6..f3a907a 100644 --- a/src/detectmatelibrary/common/detector.py +++ b/src/detectmatelibrary/common/detector.py @@ -12,12 +12,11 @@ from typing import Dict, List, Optional, Any - def _extract_timestamp( input_: List[ParserSchema] | ParserSchema ) -> List[int]: def format_time(time: str) -> int: - #try Unix timestamp first + # try Unix timestamp first try: return int(float(time)) except ValueError: From 6a220f28f437571f1affa765f7145d7797194e99 Mon Sep 17 00:00:00 2001 From: thorinaboenke Date: Mon, 9 Mar 2026 10:57:57 +0100 Subject: [PATCH 3/7] prek --- src/detectmatelibrary/common/detector.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/detectmatelibrary/common/detector.py b/src/detectmatelibrary/common/detector.py index f3a907a..e1a88f5 100644 --- a/src/detectmatelibrary/common/detector.py +++ b/src/detectmatelibrary/common/detector.py @@ -16,12 +16,12 @@ def _extract_timestamp( input_: List[ParserSchema] | ParserSchema ) -> List[int]: def format_time(time: str) -> int: - # try Unix timestamp first + # try Unix timestamp first try: return int(float(time)) except ValueError: pass - + # human-readable formats formats = [ "%d/%b/%Y:%H:%M:%S %z", # 04/Mar/2026:14:18:00 +0000 @@ -34,7 +34,7 @@ def format_time(time: str) -> int: return int(datetime.strptime(time, format).timestamp()) except ValueError: continue - + raise ValueError(f"Unrecognised time format: '{time}'") if not isinstance(input_, list): From 936cb69e2c5597c486c87a714747d405a3c2312f Mon Sep 17 00:00:00 2001 From: thorinaboenke Date: Mon, 9 Mar 2026 13:35:40 +0100 Subject: [PATCH 4/7] re-add colon separation --- src/detectmatelibrary/common/detector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/detectmatelibrary/common/detector.py b/src/detectmatelibrary/common/detector.py index e1a88f5..49d1be7 100644 --- a/src/detectmatelibrary/common/detector.py +++ b/src/detectmatelibrary/common/detector.py @@ -18,7 +18,7 @@ def _extract_timestamp( def format_time(time: str) -> int: # try Unix timestamp first try: - return int(float(time)) + return int(float(time.split(":")[0])) except ValueError: pass From 737b2e9e1adde8e5e857db7b3e214ab11e21baae Mon Sep 17 00:00:00 2001 From: thorinaboenke Date: Mon, 9 Mar 2026 15:29:41 +0100 Subject: [PATCH 5/7] remove invalid format from test data, use _extract_timestamp in detector.py --- src/detectmatelibrary/common/detector.py | 31 ++--------- .../utils/time_format_handler.py | 2 + tests/test_common/test_core_detector.py | 2 +- tests/test_common/test_extract_timestamp.py | 55 +++++++++++++++++++ 4 files changed, 63 insertions(+), 27 deletions(-) create mode 100644 tests/test_common/test_extract_timestamp.py diff --git a/src/detectmatelibrary/common/detector.py b/src/detectmatelibrary/common/detector.py index 49d1be7..6b827be 100644 --- a/src/detectmatelibrary/common/detector.py +++ b/src/detectmatelibrary/common/detector.py @@ -1,5 +1,3 @@ -from datetime import datetime - from detectmatelibrary.common._config._formats import EventsConfig from detectmatelibrary.common.core import CoreComponent, CoreConfig @@ -11,36 +9,17 @@ from typing_extensions import override from typing import Dict, List, Optional, Any +from detectmatelibrary.utils.time_format_handler import TimeFormatHandler + + +_time_handler = TimeFormatHandler() def _extract_timestamp( input_: List[ParserSchema] | ParserSchema ) -> List[int]: - def format_time(time: str) -> int: - # try Unix timestamp first - try: - return int(float(time.split(":")[0])) - except ValueError: - pass - - # human-readable formats - formats = [ - "%d/%b/%Y:%H:%M:%S %z", # 04/Mar/2026:14:18:00 +0000 - "%d/%b/%Y:%H:%M:%S", # 04/Mar/2026:14:18:00 - "%Y-%m-%dT%H:%M:%S%z", # 2026-03-04T14:18:00+0000 - "%Y-%m-%d %H:%M:%S", # 2026-03-04 14:18:00 - ] - for format in formats: - try: - return int(datetime.strptime(time, format).timestamp()) - except ValueError: - continue - - raise ValueError(f"Unrecognised time format: '{time}'") - if not isinstance(input_, list): input_ = [input_] - - return [format_time(i["logFormatVariables"]["Time"]) for i in input_] + return [int(_time_handler.parse_timestamp(i["logFormatVariables"]["Time"])) for i in input_] def _extract_logIDs( diff --git a/src/detectmatelibrary/utils/time_format_handler.py b/src/detectmatelibrary/utils/time_format_handler.py index 5f2fcd0..4616210 100644 --- a/src/detectmatelibrary/utils/time_format_handler.py +++ b/src/detectmatelibrary/utils/time_format_handler.py @@ -21,8 +21,10 @@ class TimeFormatHandler: "%Y-%m-%d %H:%M:%S", "%Y/%m/%d %H:%M:%S", "%d/%b/%Y:%H:%M:%S %z", # Apache style: 10/Oct/2000:13:55:36 -0700 + "%d/%b/%Y:%H:%M:%S", # Apache style without timezone "%b %d %H:%M:%S", # syslog without year "%H:%M:%S", + "%A, %B %d, %Y %H:%M:%S", # "Wednesday, March 4, 2026 14:18:00" ] def __init__(self) -> None: diff --git a/tests/test_common/test_core_detector.py b/tests/test_common/test_core_detector.py index d2226ed..226b45f 100644 --- a/tests/test_common/test_core_detector.py +++ b/tests/test_common/test_core_detector.py @@ -79,7 +79,7 @@ def detect(self, input_, output_): "parsedLogID": "22", "parserID": "test", "log": "This is a parsed log.", - "logFormatVariables": {"Time": "12121.12:20"}, + "logFormatVariables": {"Time": "12121.12"}, } diff --git a/tests/test_common/test_extract_timestamp.py b/tests/test_common/test_extract_timestamp.py new file mode 100644 index 0000000..87c50d6 --- /dev/null +++ b/tests/test_common/test_extract_timestamp.py @@ -0,0 +1,55 @@ +from detectmatelibrary.common.detector import _extract_timestamp +import detectmatelibrary.schemas as schemas + +class TestCoreDetector: + def test_various_time_formats(self) -> None: + """Test that _extract_timestamp handles a wide range of realistic time formats.""" + dummy_schema = { + "parserType": "a", + "EventID": 0, + "template": "asd", + "variables": [""], + "logID": "0", + "parsedLogID": "22", + "parserID": "test", + "log": "This is a parsed log.", + "logFormatVariables": {"Time": "12121"}, + } + # Compute expected value for timezone-naive formats at runtime + EXPECTED_UTC = 1772633880 + test_cases = [ + # Unix timestamps + ("0", 0), + ("1772812294", 1772812294), + ("1772812294.5", 1772812294), + # Apache/nginx format + ("04/Mar/2026:14:18:00 +0000", EXPECTED_UTC), + ("04/Mar/2026:14:18:00", EXPECTED_UTC), + # ISO 8601 formats + ("2026-03-04T14:18:00+00:00", EXPECTED_UTC), + ("2026-03-04T14:18:00Z", EXPECTED_UTC), + ("2026-03-04T14:18:00.000Z", EXPECTED_UTC), + ("2026-03-04T14:18:00", EXPECTED_UTC), + # Space-separated + ("2026-03-04 14:18:00", EXPECTED_UTC), + ("2026-03-04 14:18:00.000", EXPECTED_UTC), + ("2026/03/04 14:18:00", EXPECTED_UTC), + # Timezone variations + ("2026-03-04T15:18:00+01:00", EXPECTED_UTC), + ("2026-03-04T13:18:00-01:00", EXPECTED_UTC), + + # High precision and different separators + ("2026-03-04T14:18:00.123Z", EXPECTED_UTC), + ("2026-03-04 14:18:00,000", EXPECTED_UTC), + # Common human-readable variations + ("Wednesday, March 4, 2026 14:18:00", EXPECTED_UTC), + ] + + + + for time_str, expected in test_cases: + schema = schemas.ParserSchema({**dummy_schema, "logFormatVariables": {"Time": time_str}}) + result = _extract_timestamp(schema) + assert result == [expected], ( + f"Format '{time_str}': expected [{expected}], got {result}" + ) \ No newline at end of file From f5163a61dcc7cb6a1e7c137712a00050c7b9bd7c Mon Sep 17 00:00:00 2001 From: thorinaboenke Date: Mon, 9 Mar 2026 15:30:01 +0100 Subject: [PATCH 6/7] prek --- tests/test_common/test_extract_timestamp.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/test_common/test_extract_timestamp.py b/tests/test_common/test_extract_timestamp.py index 87c50d6..f67bacb 100644 --- a/tests/test_common/test_extract_timestamp.py +++ b/tests/test_common/test_extract_timestamp.py @@ -1,9 +1,10 @@ from detectmatelibrary.common.detector import _extract_timestamp import detectmatelibrary.schemas as schemas -class TestCoreDetector: +class TestCoreDetector: def test_various_time_formats(self) -> None: - """Test that _extract_timestamp handles a wide range of realistic time formats.""" + """Test that _extract_timestamp handles a wide range of realistic + time formats.""" dummy_schema = { "parserType": "a", "EventID": 0, @@ -22,7 +23,7 @@ def test_various_time_formats(self) -> None: ("0", 0), ("1772812294", 1772812294), ("1772812294.5", 1772812294), - # Apache/nginx format + # Apache/nginx format ("04/Mar/2026:14:18:00 +0000", EXPECTED_UTC), ("04/Mar/2026:14:18:00", EXPECTED_UTC), # ISO 8601 formats @@ -45,11 +46,11 @@ def test_various_time_formats(self) -> None: ("Wednesday, March 4, 2026 14:18:00", EXPECTED_UTC), ] - + for time_str, expected in test_cases: schema = schemas.ParserSchema({**dummy_schema, "logFormatVariables": {"Time": time_str}}) result = _extract_timestamp(schema) assert result == [expected], ( f"Format '{time_str}': expected [{expected}], got {result}" - ) \ No newline at end of file + ) From 448bb780ce3e78a0661acd567ae2d16a9cf5a15e Mon Sep 17 00:00:00 2001 From: thorinaboenke Date: Mon, 9 Mar 2026 15:39:01 +0100 Subject: [PATCH 7/7] prek --- src/detectmatelibrary/common/detector.py | 1 + .../utils/time_format_handler.py | 2 +- tests/test_common/test_extract_timestamp.py | 98 +++++++++---------- 3 files changed, 49 insertions(+), 52 deletions(-) diff --git a/src/detectmatelibrary/common/detector.py b/src/detectmatelibrary/common/detector.py index 6b827be..1d33c94 100644 --- a/src/detectmatelibrary/common/detector.py +++ b/src/detectmatelibrary/common/detector.py @@ -14,6 +14,7 @@ _time_handler = TimeFormatHandler() + def _extract_timestamp( input_: List[ParserSchema] | ParserSchema ) -> List[int]: diff --git a/src/detectmatelibrary/utils/time_format_handler.py b/src/detectmatelibrary/utils/time_format_handler.py index 4616210..66ed748 100644 --- a/src/detectmatelibrary/utils/time_format_handler.py +++ b/src/detectmatelibrary/utils/time_format_handler.py @@ -21,7 +21,7 @@ class TimeFormatHandler: "%Y-%m-%d %H:%M:%S", "%Y/%m/%d %H:%M:%S", "%d/%b/%Y:%H:%M:%S %z", # Apache style: 10/Oct/2000:13:55:36 -0700 - "%d/%b/%Y:%H:%M:%S", # Apache style without timezone + "%d/%b/%Y:%H:%M:%S", # Apache style without timezone "%b %d %H:%M:%S", # syslog without year "%H:%M:%S", "%A, %B %d, %Y %H:%M:%S", # "Wednesday, March 4, 2026 14:18:00" diff --git a/tests/test_common/test_extract_timestamp.py b/tests/test_common/test_extract_timestamp.py index f67bacb..77196e5 100644 --- a/tests/test_common/test_extract_timestamp.py +++ b/tests/test_common/test_extract_timestamp.py @@ -1,56 +1,52 @@ from detectmatelibrary.common.detector import _extract_timestamp import detectmatelibrary.schemas as schemas + class TestCoreDetector: def test_various_time_formats(self) -> None: - """Test that _extract_timestamp handles a wide range of realistic - time formats.""" - dummy_schema = { - "parserType": "a", - "EventID": 0, - "template": "asd", - "variables": [""], - "logID": "0", - "parsedLogID": "22", - "parserID": "test", - "log": "This is a parsed log.", - "logFormatVariables": {"Time": "12121"}, - } - # Compute expected value for timezone-naive formats at runtime - EXPECTED_UTC = 1772633880 - test_cases = [ - # Unix timestamps - ("0", 0), - ("1772812294", 1772812294), - ("1772812294.5", 1772812294), - # Apache/nginx format - ("04/Mar/2026:14:18:00 +0000", EXPECTED_UTC), - ("04/Mar/2026:14:18:00", EXPECTED_UTC), - # ISO 8601 formats - ("2026-03-04T14:18:00+00:00", EXPECTED_UTC), - ("2026-03-04T14:18:00Z", EXPECTED_UTC), - ("2026-03-04T14:18:00.000Z", EXPECTED_UTC), - ("2026-03-04T14:18:00", EXPECTED_UTC), - # Space-separated - ("2026-03-04 14:18:00", EXPECTED_UTC), - ("2026-03-04 14:18:00.000", EXPECTED_UTC), - ("2026/03/04 14:18:00", EXPECTED_UTC), - # Timezone variations - ("2026-03-04T15:18:00+01:00", EXPECTED_UTC), - ("2026-03-04T13:18:00-01:00", EXPECTED_UTC), - - # High precision and different separators - ("2026-03-04T14:18:00.123Z", EXPECTED_UTC), - ("2026-03-04 14:18:00,000", EXPECTED_UTC), - # Common human-readable variations - ("Wednesday, March 4, 2026 14:18:00", EXPECTED_UTC), - ] - - - - for time_str, expected in test_cases: - schema = schemas.ParserSchema({**dummy_schema, "logFormatVariables": {"Time": time_str}}) - result = _extract_timestamp(schema) - assert result == [expected], ( - f"Format '{time_str}': expected [{expected}], got {result}" - ) + """Test that _extract_timestamp handles a wide range of realistic time + formats.""" + dummy_schema = { + "parserType": "a", + "EventID": 0, + "template": "asd", + "variables": [""], + "logID": "0", + "parsedLogID": "22", + "parserID": "test", + "log": "This is a parsed log.", + "logFormatVariables": {"Time": "12121"}, + } + # Compute expected value for timezone-naive formats at runtime + EXPECTED_UTC = 1772633880 + test_cases = [ + ("0", 0), + ("1772812294", 1772812294), + ("1772812294.5", 1772812294), + # Apache/nginx format + ("04/Mar/2026:14:18:00 +0000", EXPECTED_UTC), + ("04/Mar/2026:14:18:00", EXPECTED_UTC), + # ISO 8601 formats + ("2026-03-04T14:18:00+00:00", EXPECTED_UTC), + ("2026-03-04T14:18:00Z", EXPECTED_UTC), + ("2026-03-04T14:18:00.000Z", EXPECTED_UTC), + ("2026-03-04T14:18:00", EXPECTED_UTC), + # Space-separated + ("2026-03-04 14:18:00", EXPECTED_UTC), + ("2026-03-04 14:18:00.000", EXPECTED_UTC), + ("2026/03/04 14:18:00", EXPECTED_UTC), + # Timezone variations + ("2026-03-04T15:18:00+01:00", EXPECTED_UTC), + ("2026-03-04T13:18:00-01:00", EXPECTED_UTC), + # High precision and different separators + ("2026-03-04T14:18:00.123Z", EXPECTED_UTC), + ("2026-03-04 14:18:00,000", EXPECTED_UTC), + # Common human-readable variations + ("Wednesday, March 4, 2026 14:18:00", EXPECTED_UTC), + ] + for time_str, expected in test_cases: + schema = schemas.ParserSchema({**dummy_schema, "logFormatVariables": {"Time": time_str}}) + result = _extract_timestamp(schema) + assert result == [expected], ( + f"Format '{time_str}': expected [{expected}], got {result}" + )