diff --git a/src/detectmatelibrary/common/detector.py b/src/detectmatelibrary/common/detector.py index 18b67b9..1d33c94 100644 --- a/src/detectmatelibrary/common/detector.py +++ b/src/detectmatelibrary/common/detector.py @@ -9,18 +9,18 @@ from typing_extensions import override from typing import Dict, List, Optional, Any +from detectmatelibrary.utils.time_format_handler import TimeFormatHandler + + +_time_handler = TimeFormatHandler() + def _extract_timestamp( input_: List[ParserSchema] | ParserSchema ) -> List[int]: - def format_time(time: str) -> int: - time_ = time.split(":")[0] - return int(float(time_)) - if not isinstance(input_, list): input_ = [input_] - - return [format_time(i["logFormatVariables"]["Time"]) for i in input_] + return [int(_time_handler.parse_timestamp(i["logFormatVariables"]["Time"])) for i in input_] def _extract_logIDs( diff --git a/src/detectmatelibrary/utils/time_format_handler.py b/src/detectmatelibrary/utils/time_format_handler.py index 5f2fcd0..66ed748 100644 --- a/src/detectmatelibrary/utils/time_format_handler.py +++ b/src/detectmatelibrary/utils/time_format_handler.py @@ -21,8 +21,10 @@ class TimeFormatHandler: "%Y-%m-%d %H:%M:%S", "%Y/%m/%d %H:%M:%S", "%d/%b/%Y:%H:%M:%S %z", # Apache style: 10/Oct/2000:13:55:36 -0700 + "%d/%b/%Y:%H:%M:%S", # Apache style without timezone "%b %d %H:%M:%S", # syslog without year "%H:%M:%S", + "%A, %B %d, %Y %H:%M:%S", # "Wednesday, March 4, 2026 14:18:00" ] def __init__(self) -> None: diff --git a/tests/test_common/test_core_detector.py b/tests/test_common/test_core_detector.py index d2226ed..226b45f 100644 --- a/tests/test_common/test_core_detector.py +++ b/tests/test_common/test_core_detector.py @@ -79,7 +79,7 @@ def detect(self, input_, output_): "parsedLogID": "22", "parserID": "test", "log": "This is a parsed log.", - "logFormatVariables": {"Time": "12121.12:20"}, + "logFormatVariables": {"Time": "12121.12"}, } diff --git a/tests/test_common/test_extract_timestamp.py b/tests/test_common/test_extract_timestamp.py new file mode 100644 index 0000000..77196e5 --- /dev/null +++ b/tests/test_common/test_extract_timestamp.py @@ -0,0 +1,52 @@ +from detectmatelibrary.common.detector import _extract_timestamp +import detectmatelibrary.schemas as schemas + + +class TestCoreDetector: + def test_various_time_formats(self) -> None: + """Test that _extract_timestamp handles a wide range of realistic time + formats.""" + dummy_schema = { + "parserType": "a", + "EventID": 0, + "template": "asd", + "variables": [""], + "logID": "0", + "parsedLogID": "22", + "parserID": "test", + "log": "This is a parsed log.", + "logFormatVariables": {"Time": "12121"}, + } + # Compute expected value for timezone-naive formats at runtime + EXPECTED_UTC = 1772633880 + test_cases = [ + ("0", 0), + ("1772812294", 1772812294), + ("1772812294.5", 1772812294), + # Apache/nginx format + ("04/Mar/2026:14:18:00 +0000", EXPECTED_UTC), + ("04/Mar/2026:14:18:00", EXPECTED_UTC), + # ISO 8601 formats + ("2026-03-04T14:18:00+00:00", EXPECTED_UTC), + ("2026-03-04T14:18:00Z", EXPECTED_UTC), + ("2026-03-04T14:18:00.000Z", EXPECTED_UTC), + ("2026-03-04T14:18:00", EXPECTED_UTC), + # Space-separated + ("2026-03-04 14:18:00", EXPECTED_UTC), + ("2026-03-04 14:18:00.000", EXPECTED_UTC), + ("2026/03/04 14:18:00", EXPECTED_UTC), + # Timezone variations + ("2026-03-04T15:18:00+01:00", EXPECTED_UTC), + ("2026-03-04T13:18:00-01:00", EXPECTED_UTC), + # High precision and different separators + ("2026-03-04T14:18:00.123Z", EXPECTED_UTC), + ("2026-03-04 14:18:00,000", EXPECTED_UTC), + # Common human-readable variations + ("Wednesday, March 4, 2026 14:18:00", EXPECTED_UTC), + ] + for time_str, expected in test_cases: + schema = schemas.ParserSchema({**dummy_schema, "logFormatVariables": {"Time": time_str}}) + result = _extract_timestamp(schema) + assert result == [expected], ( + f"Format '{time_str}': expected [{expected}], got {result}" + )