From 6fd74d848a747e8cdff741e3f7ed6d1d4138583a Mon Sep 17 00:00:00 2001 From: Anthony Giuliano Date: Tue, 3 Mar 2026 12:13:35 -0800 Subject: [PATCH 1/2] Chunked regex with overlapping Matcher for boundary-safe data masking (#639) Co-Authored-By: Claude Opus 4.6 --- .../classes/LogEntryEventBuilder.cls | 105 ++++- .../classes/LogEntryEventBuilder_Tests.cls | 434 ++++++++++++++++++ 2 files changed, 537 insertions(+), 2 deletions(-) diff --git a/nebula-logger/core/main/logger-engine/classes/LogEntryEventBuilder.cls b/nebula-logger/core/main/logger-engine/classes/LogEntryEventBuilder.cls index d31625ac1..b4af062a3 100644 --- a/nebula-logger/core/main/logger-engine/classes/LogEntryEventBuilder.cls +++ b/nebula-logger/core/main/logger-engine/classes/LogEntryEventBuilder.cls @@ -20,6 +20,11 @@ global with sharing class LogEntryEventBuilder { private static final String HTTP_HEADER_FORMAT = '{0}: {1}'; private static final String NEW_LINE_DELIMITER = '\n'; + @TestVisible + private static final Integer DATA_MASK_REGEX_CHUNK_SIZE = 4000; + @TestVisible + private static final Integer DATA_MASK_REGEX_OVERLAP_SIZE = 20; + private static String cachedOrganizationEnvironmentType; @TestVisible @@ -1150,12 +1155,108 @@ global with sharing class LogEntryEventBuilder { for (LogEntryDataMaskRule__mdt dataMaskRule : CACHED_DATA_MASK_RULES.values()) { if (dataMaskRule.IsEnabled__c) { - dataInput = dataInput.replaceAll(dataMaskRule.SensitiveDataRegEx__c, dataMaskRule.ReplacementRegEx__c); + dataInput = applyDataMaskRuleToChunkedText(dataInput, dataMaskRule.SensitiveDataRegEx__c, dataMaskRule.ReplacementRegEx__c); } } return dataInput; } + private static String applyDataMaskRuleToChunkedText(String text, String sensitiveDataRegEx, String replacementRegEx) { + if (text == null || text.length() <= DATA_MASK_REGEX_CHUNK_SIZE) { + return text == null ? text : text.replaceAll(sensitiveDataRegEx, replacementRegEx); + } + + List lines = text.split('\n', -1); + if (lines.size() > 1) { + List processedLines = new List(); + for (String line : lines) { + if (line.length() <= DATA_MASK_REGEX_CHUNK_SIZE) { + processedLines.add(line.replaceAll(sensitiveDataRegEx, replacementRegEx)); + } else { + processedLines.add(applyDataMaskRuleToLongLine(line, sensitiveDataRegEx, replacementRegEx)); + } + } + return String.join(processedLines, '\n'); + } + + return applyDataMaskRuleToLongLine(text, sensitiveDataRegEx, replacementRegEx); + } + + private static String applyDataMaskRuleToLongLine(String line, String sensitiveDataRegEx, String replacementRegEx) { + System.Pattern regex = System.Pattern.compile(sensitiveDataRegEx); + Integer step = DATA_MASK_REGEX_CHUNK_SIZE - DATA_MASK_REGEX_OVERLAP_SIZE; + + // Pass 1: Find all matches using overlapping chunks, deduplicating by start position. + // When the same start position is found by multiple chunks, keep the longest match + // (the chunk with more trailing context produces the most accurate match). + Map endByStart = new Map(); + Map> groupsByStart = new Map>(); + + for (Integer i = 0; i < line.length(); i += step) { + Integer chunkEnd = Math.min(i + DATA_MASK_REGEX_CHUNK_SIZE, line.length()); + System.Matcher m = regex.matcher(line.substring(i, chunkEnd)); + while (m.find()) { + Integer absStart = i + m.start(); + Integer absEnd = i + m.end(); + if (!endByStart.containsKey(absStart) || absEnd > endByStart.get(absStart)) { + endByStart.put(absStart, absEnd); + List groups = new List(); + for (Integer g = 0; g <= m.groupCount(); g++) { + groups.add(m.group(g)); + } + groupsByStart.put(absStart, groups); + } + } + } + + if (endByStart.isEmpty()) { + return line; + } + + // Sort match positions to guarantee left-to-right processing + List sortedStarts = new List(endByStart.keySet()); + sortedStarts.sort(); + + // Pass 2: Build result — copy gaps, expand replacements + String result = ''; + Integer pos = 0; + for (Integer start : sortedStarts) { + if (start < pos) { + continue; // Skip match fully consumed by a previous replacement + } + result += line.substring(pos, start); + result += expandReplacement(replacementRegEx, groupsByStart.get(start)); + pos = endByStart.get(start); + } + result += line.substring(pos); + return result; + } + + private static String expandReplacement(String replacement, List groups) { + String result = ''; + for (Integer i = 0; i < replacement.length(); i++) { + if (replacement.substring(i, i + 1) == '$' && i + 1 < replacement.length()) { + // Parse the group number following '$' + Integer j = i + 1; + while (j < replacement.length() && replacement.substring(j, j + 1) >= '0' && replacement.substring(j, j + 1) <= '9') { + j++; + } + if (j > i + 1) { + Integer groupNum = Integer.valueOf(replacement.substring(i + 1, j)); + if (groupNum >= 1 && groupNum < groups.size() && groups[groupNum] != null) { + result += groups[groupNum]; + } else { + result += replacement.substring(i, j); + } + i = j - 1; // -1 because the for loop increments + continue; + } + } + result += replacement.substring(i, i + 1); + } + return result; + } + private static String getJson(SObject record, Boolean isRecordFieldStrippingEnabled) { List records = new List{ record }; records = isRecordFieldStrippingEnabled == false ? records : stripInaccessible(records); @@ -1404,7 +1505,7 @@ global with sharing class LogEntryEventBuilder { String maskedTextValue = textValueToMask; for (LogEntryDataMaskRule__mdt dataMaskRule : CACHED_DATA_MASK_RULES.values()) { if (dataMaskRule.IsEnabled__c) { - maskedTextValue = maskedTextValue.replaceAll(dataMaskRule.SensitiveDataRegEx__c, dataMaskRule.ReplacementRegEx__c); + maskedTextValue = applyDataMaskRuleToChunkedText(maskedTextValue, dataMaskRule.SensitiveDataRegEx__c, dataMaskRule.ReplacementRegEx__c); } } diff --git a/nebula-logger/core/tests/logger-engine/classes/LogEntryEventBuilder_Tests.cls b/nebula-logger/core/tests/logger-engine/classes/LogEntryEventBuilder_Tests.cls index fd416cd53..bfc329cbb 100644 --- a/nebula-logger/core/tests/logger-engine/classes/LogEntryEventBuilder_Tests.cls +++ b/nebula-logger/core/tests/logger-engine/classes/LogEntryEventBuilder_Tests.cls @@ -2519,6 +2519,413 @@ private class LogEntryEventBuilder_Tests { ); } + @IsTest + static void it_should_apply_data_mask_rule_to_large_multiline_message() { + LoggerSettings__c userSettings = getUserSettings(); + userSettings.IsDataMaskingEnabled__c = true; + LogEntryDataMaskRule__mdt rule = getSocialSecurityNumberDataMaskRule(); + rule.IsEnabled__c = true; + LogEntryEventBuilder.setMockDataMaskRule(rule); + // Build a large multi-line string that exceeds the chunk threshold, with SSNs at start, middle, and end + String paddingLine = 'A'.repeat(LogEntryEventBuilder.DATA_MASK_REGEX_CHUNK_SIZE); + String message = 'SSN at start: 400 11 9999\n' + paddingLine + '\nSSN in middle: 123 45 6789\n' + paddingLine + '\nSSN at end: 987 65 4321'; + + LogEntryEventBuilder builder = new LogEntryEventBuilder(userSettings, System.LoggingLevel.INFO, true); + builder.setMessage(message); + + String result = builder.getLogEntryEvent().Message__c; + System.Assert.isTrue(builder.getLogEntryEvent().MessageMasked__c); + System.Assert.isTrue(result.contains('XXX-XX-9999'), 'SSN at start should be masked'); + System.Assert.isTrue(result.contains('XXX-XX-6789'), 'SSN in middle should be masked'); + System.Assert.isTrue(result.contains('XXX-XX-4321'), 'SSN at end should be masked'); + System.Assert.isFalse(result.contains('400 11 9999'), 'Original SSN at start should not appear'); + System.Assert.isFalse(result.contains('123 45 6789'), 'Original SSN in middle should not appear'); + System.Assert.isFalse(result.contains('987 65 4321'), 'Original SSN at end should not appear'); + } + + @IsTest + static void it_should_apply_data_mask_rule_to_large_single_line_message() { + LoggerSettings__c userSettings = getUserSettings(); + userSettings.IsDataMaskingEnabled__c = true; + LogEntryDataMaskRule__mdt rule = getSocialSecurityNumberDataMaskRule(); + rule.IsEnabled__c = true; + LogEntryEventBuilder.setMockDataMaskRule(rule); + // Build a large single-line string (no newlines) that exceeds the chunk threshold + String padding = 'A'.repeat(LogEntryEventBuilder.DATA_MASK_REGEX_CHUNK_SIZE); + String message = 'SSN here: 400 11 9999 ' + padding + ' another SSN: 123 45 6789'; + + LogEntryEventBuilder builder = new LogEntryEventBuilder(userSettings, System.LoggingLevel.INFO, true); + builder.setMessage(message); + + String result = builder.getLogEntryEvent().Message__c; + System.Assert.isTrue(builder.getLogEntryEvent().MessageMasked__c); + System.Assert.isTrue(result.contains('XXX-XX-9999'), 'First SSN should be masked'); + System.Assert.isTrue(result.contains('XXX-XX-6789'), 'Second SSN should be masked'); + System.Assert.isFalse(result.contains('400 11 9999'), 'Original first SSN should not appear'); + System.Assert.isFalse(result.contains('123 45 6789'), 'Original second SSN should not appear'); + } + + @IsTest + static void it_should_apply_data_mask_rule_when_ssn_is_near_chunk_boundary() { + LoggerSettings__c userSettings = getUserSettings(); + userSettings.IsDataMaskingEnabled__c = true; + LogEntryDataMaskRule__mdt rule = getSocialSecurityNumberDataMaskRule(); + rule.IsEnabled__c = true; + LogEntryEventBuilder.setMockDataMaskRule(rule); + // Place a space-separated SSN so it straddles a chunk boundary. + // The overlapping-chunk Matcher approach ensures the full pattern is found + // even when it spans two adjacent chunks. + Integer ssnPosition = LogEntryEventBuilder.DATA_MASK_REGEX_CHUNK_SIZE - 5; + String message = 'B'.repeat(ssnPosition) + ' 400 11 9999 ' + 'C'.repeat(100); + + LogEntryEventBuilder builder = new LogEntryEventBuilder(userSettings, System.LoggingLevel.INFO, true); + builder.setMessage(message); + + String result = builder.getLogEntryEvent().Message__c; + System.Assert.isTrue(builder.getLogEntryEvent().MessageMasked__c); + System.Assert.isTrue(result.contains('XXX-XX-9999'), 'SSN near chunk boundary should be masked'); + System.Assert.isFalse(result.contains('400 11 9999'), 'Original SSN near chunk boundary should not appear'); + } + + @IsTest + static void it_should_apply_data_mask_rule_to_digit_dense_string_without_limit_exception() { + LoggerSettings__c userSettings = getUserSettings(); + userSettings.IsDataMaskingEnabled__c = true; + // Register all 4 bundled data mask rules — in production all enabled rules run sequentially + LogEntryEventBuilder.setMockDataMaskRule(getSocialSecurityNumberDataMaskRule()); + LogEntryEventBuilder.setMockDataMaskRule(getVisaCreditCardNumberDataMaskRule()); + LogEntryEventBuilder.setMockDataMaskRule(getMastercardCreditCardNumberDataMaskRule()); + LogEntryEventBuilder.setMockDataMaskRule(getAmericanExpressCreditCardNumberDataMaskRule()); + // Simulate serialized SObject JSON — the scenario from issue #639 where a 35K+ string + // of serialized Asset records triggered System.LimitException: Regex too complicated. + // Each fake record mimics a real Asset with multiple fields, Salesforce IDs, dates, and numbers. + String fakeAssetRecord = '{"attributes":{"type":"Asset","url":"/services/data/v64.0/sobjects/Asset/02iDm00000ABCdEFG"},' + + '"Id":"02iDm00000ABCdEFG","AccountId":"001Dm00000XYZ1234","ContactId":"003Dm00000LMN5678",' + + '"Name":"Asset-2024-09-001","Status":"Installed","Quantity":250.00,"Price":14999.95,' + + '"PurchaseDate":"2024-09-15","InstallDate":"2024-09-20","UsageEndDate":"2025-09-15",' + + '"SerialNumber":"SN-8842719305","Description":"Enterprise license unit 42 of 250"}'; + Integer fieldMaxLength = LogEntryEvent__e.Message__c.getDescribe().getLength(); + Integer repetitions = fieldMaxLength / fakeAssetRecord.length(); + String message = fakeAssetRecord.repeat(repetitions) + ' 400 11 9999'; + + LogEntryEventBuilder builder = new LogEntryEventBuilder(userSettings, System.LoggingLevel.INFO, true); + builder.setMessage(message); + + String result = builder.getLogEntryEvent().Message__c; + System.Assert.isTrue(builder.getLogEntryEvent().MessageMasked__c); + System.Assert.isTrue(result.contains('XXX-XX-9999'), 'Real SSN in digit-dense string should be masked'); + System.Assert.isFalse(result.contains('400 11 9999'), 'Original SSN in digit-dense string should not appear'); + } + + @IsTest + static void it_should_not_double_mask_when_match_is_inside_overlap_zone() { + LoggerSettings__c userSettings = getUserSettings(); + userSettings.IsDataMaskingEnabled__c = true; + LogEntryDataMaskRule__mdt rule = getSocialSecurityNumberDataMaskRule(); + rule.IsEnabled__c = true; + LogEntryEventBuilder.setMockDataMaskRule(rule); + // Place compact SSN "400119999" (9 chars) starting at position 3985 + // — fully inside overlap zone [3980, 4000), found by both chunk 0 and chunk 1. + // The seenStarts dedup must prevent it from being masked twice. + Integer ssnPosition = LogEntryEventBuilder.DATA_MASK_REGEX_CHUNK_SIZE - LogEntryEventBuilder.DATA_MASK_REGEX_OVERLAP_SIZE + 5; + String message = 'B'.repeat(ssnPosition) + ' 400119999 ' + 'C'.repeat(100); + Integer originalLength = message.length(); + + LogEntryEventBuilder builder = new LogEntryEventBuilder(userSettings, System.LoggingLevel.INFO, true); + builder.setMessage(message); + + String result = builder.getLogEntryEvent().Message__c; + System.Assert.isTrue(builder.getLogEntryEvent().MessageMasked__c); + System.Assert.isTrue(result.contains('XXX-XX-9999'), 'SSN inside overlap zone should be masked'); + System.Assert.isFalse(result.contains('400119999'), 'Original SSN inside overlap zone should not appear'); + // If the SSN were double-masked, the result length would differ from a single replacement + Integer expectedLengthDelta = 'XXX-XX-9999'.length() - '400119999'.length(); + System.Assert.areEqual(originalLength + expectedLengthDelta, result.length(), 'Result length should reflect exactly one replacement (no double-masking)'); + } + + @IsTest + static void it_should_mask_multiple_ssns_near_same_chunk_boundary() { + LoggerSettings__c userSettings = getUserSettings(); + userSettings.IsDataMaskingEnabled__c = true; + LogEntryDataMaskRule__mdt rule = getSocialSecurityNumberDataMaskRule(); + rule.IsEnabled__c = true; + LogEntryEventBuilder.setMockDataMaskRule(rule); + // Place first SSN ending just before the overlap zone, second SSN starting inside the overlap zone. + // Both are near the chunk boundary — verifies multiple matches in the boundary region are all masked. + Integer firstSsnPosition = LogEntryEventBuilder.DATA_MASK_REGEX_CHUNK_SIZE - LogEntryEventBuilder.DATA_MASK_REGEX_OVERLAP_SIZE - 12; + String message = 'B'.repeat(firstSsnPosition) + ' 400119999 ' + ' 123456789 ' + 'C'.repeat(100); + + LogEntryEventBuilder builder = new LogEntryEventBuilder(userSettings, System.LoggingLevel.INFO, true); + builder.setMessage(message); + + String result = builder.getLogEntryEvent().Message__c; + System.Assert.isTrue(builder.getLogEntryEvent().MessageMasked__c); + System.Assert.isTrue(result.contains('XXX-XX-9999'), 'First SSN near chunk boundary should be masked'); + System.Assert.isTrue(result.contains('XXX-XX-6789'), 'Second SSN near chunk boundary should be masked'); + System.Assert.isFalse(result.contains('400119999'), 'Original first SSN should not appear'); + System.Assert.isFalse(result.contains('123456789'), 'Original second SSN should not appear'); + } + + @IsTest + static void it_should_mask_ssn_at_end_of_long_string() { + LoggerSettings__c userSettings = getUserSettings(); + userSettings.IsDataMaskingEnabled__c = true; + LogEntryDataMaskRule__mdt rule = getSocialSecurityNumberDataMaskRule(); + rule.IsEnabled__c = true; + LogEntryEventBuilder.setMockDataMaskRule(rule); + // Build string just over chunk size, SSN at the very end with no trailing chars. + // The final chunk will be shorter than DATA_MASK_REGEX_CHUNK_SIZE — verifies the tail chunk processes correctly. + String message = 'B'.repeat(LogEntryEventBuilder.DATA_MASK_REGEX_CHUNK_SIZE + 100) + ' 400119999'; + + LogEntryEventBuilder builder = new LogEntryEventBuilder(userSettings, System.LoggingLevel.INFO, true); + builder.setMessage(message); + + String result = builder.getLogEntryEvent().Message__c; + System.Assert.isTrue(builder.getLogEntryEvent().MessageMasked__c); + System.Assert.isTrue(result.contains('XXX-XX-9999'), 'SSN at end of long string should be masked'); + System.Assert.isFalse(result.contains('400119999'), 'Original SSN at end should not appear'); + System.Assert.isTrue(result.endsWith('XXX-XX-9999'), 'Masked SSN should be at the very end of the result'); + } + + @IsTest + static void it_should_mask_ssn_starting_exactly_at_chunk_step_position() { + LoggerSettings__c userSettings = getUserSettings(); + userSettings.IsDataMaskingEnabled__c = true; + LogEntryDataMaskRule__mdt rule = getSocialSecurityNumberDataMaskRule(); + rule.IsEnabled__c = true; + LogEntryEventBuilder.setMockDataMaskRule(rule); + // Place SSN starting exactly at position step (3980) — the exact start of chunk 1 + // and the exact start of the overlap zone for chunk 0. + // Tests the boundary arithmetic at the most sensitive position. + Integer step = LogEntryEventBuilder.DATA_MASK_REGEX_CHUNK_SIZE - LogEntryEventBuilder.DATA_MASK_REGEX_OVERLAP_SIZE; + String message = 'B'.repeat(step) + ' 400119999 ' + 'C'.repeat(100); + + LogEntryEventBuilder builder = new LogEntryEventBuilder(userSettings, System.LoggingLevel.INFO, true); + builder.setMessage(message); + + String result = builder.getLogEntryEvent().Message__c; + System.Assert.isTrue(builder.getLogEntryEvent().MessageMasked__c); + System.Assert.isTrue(result.contains('XXX-XX-9999'), 'SSN at exact chunk step position should be masked'); + System.Assert.isFalse(result.contains('400119999'), 'Original SSN at chunk step position should not appear'); + } + + @IsTest + static void it_should_mask_credit_card_straddling_chunk_boundary() { + LoggerSettings__c userSettings = getUserSettings(); + userSettings.IsDataMaskingEnabled__c = true; + LogEntryDataMaskRule__mdt rule = getVisaCreditCardNumberDataMaskRule(); + rule.IsEnabled__c = true; + LogEntryEventBuilder.setMockDataMaskRule(rule); + // Place a Visa CC " 4111-1111-1111-1111" (20 chars with leading space) starting at position 3981, + // so it ends at 4001 — beyond chunk 0's boundary. Chunk 0 can't match it (only 19 chars visible). + // Chunk 1 finds it correctly. + Integer ccPosition = LogEntryEventBuilder.DATA_MASK_REGEX_CHUNK_SIZE - LogEntryEventBuilder.DATA_MASK_REGEX_OVERLAP_SIZE + 1; + String message = 'B'.repeat(ccPosition) + ' 4111-1111-1111-1111 ' + 'C'.repeat(100); + + LogEntryEventBuilder builder = new LogEntryEventBuilder(userSettings, System.LoggingLevel.INFO, true); + builder.setMessage(message); + + String result = builder.getLogEntryEvent().Message__c; + System.Assert.isTrue(builder.getLogEntryEvent().MessageMasked__c); + System.Assert.isTrue(result.contains('****-****-****-1111'), 'Visa CC straddling chunk boundary should be masked'); + System.Assert.isFalse(result.contains('4111-1111-1111-1111'), 'Original Visa CC should not appear'); + } + + @IsTest + static void it_should_mask_ssn_in_string_of_exactly_chunk_size() { + LoggerSettings__c userSettings = getUserSettings(); + userSettings.IsDataMaskingEnabled__c = true; + LogEntryDataMaskRule__mdt rule = getSocialSecurityNumberDataMaskRule(); + rule.IsEnabled__c = true; + LogEntryEventBuilder.setMockDataMaskRule(rule); + // 4000-char string with SSN — verifies the <= guard routes to replaceAll directly (no chunking) + String ssn = ' 400 11 9999 '; + String message = 'A'.repeat(LogEntryEventBuilder.DATA_MASK_REGEX_CHUNK_SIZE - ssn.length()) + ssn; + + LogEntryEventBuilder builder = new LogEntryEventBuilder(userSettings, System.LoggingLevel.INFO, true); + builder.setMessage(message); + + String result = builder.getLogEntryEvent().Message__c; + System.Assert.isTrue(builder.getLogEntryEvent().MessageMasked__c); + System.Assert.isTrue(result.contains('XXX-XX-9999'), 'SSN in exactly chunk-size string should be masked'); + System.Assert.isFalse(result.contains('400 11 9999'), 'Original SSN should not appear'); + } + + @IsTest + static void it_should_mask_ssn_in_string_of_chunk_size_plus_one() { + LoggerSettings__c userSettings = getUserSettings(); + userSettings.IsDataMaskingEnabled__c = true; + LogEntryDataMaskRule__mdt rule = getSocialSecurityNumberDataMaskRule(); + rule.IsEnabled__c = true; + LogEntryEventBuilder.setMockDataMaskRule(rule); + // 4001-char string with SSN — verifies the chunking path activates at the minimum qualifying length + String ssn = ' 400 11 9999 '; + String message = 'A'.repeat(LogEntryEventBuilder.DATA_MASK_REGEX_CHUNK_SIZE + 1 - ssn.length()) + ssn; + + LogEntryEventBuilder builder = new LogEntryEventBuilder(userSettings, System.LoggingLevel.INFO, true); + builder.setMessage(message); + + String result = builder.getLogEntryEvent().Message__c; + System.Assert.isTrue(builder.getLogEntryEvent().MessageMasked__c); + System.Assert.isTrue(result.contains('XXX-XX-9999'), 'SSN in chunk-size+1 string should be masked'); + System.Assert.isFalse(result.contains('400 11 9999'), 'Original SSN should not appear'); + } + + @IsTest + static void it_should_mask_ssn_in_multiline_where_one_line_is_exactly_chunk_size() { + LoggerSettings__c userSettings = getUserSettings(); + userSettings.IsDataMaskingEnabled__c = true; + LogEntryDataMaskRule__mdt rule = getSocialSecurityNumberDataMaskRule(); + rule.IsEnabled__c = true; + LogEntryEventBuilder.setMockDataMaskRule(rule); + // Multi-line input where one line is exactly 4000 chars with an SSN. + // Verifies the per-line <= check works at the boundary. + String ssn = ' 400 11 9999 '; + String exactLine = 'A'.repeat(LogEntryEventBuilder.DATA_MASK_REGEX_CHUNK_SIZE - ssn.length()) + ssn; + String message = 'First line\n' + exactLine + '\nLast line'; + + LogEntryEventBuilder builder = new LogEntryEventBuilder(userSettings, System.LoggingLevel.INFO, true); + builder.setMessage(message); + + String result = builder.getLogEntryEvent().Message__c; + System.Assert.isTrue(builder.getLogEntryEvent().MessageMasked__c); + System.Assert.isTrue(result.contains('XXX-XX-9999'), 'SSN in exactly chunk-size line should be masked'); + System.Assert.isFalse(result.contains('400 11 9999'), 'Original SSN should not appear'); + } + + @IsTest + static void it_should_mask_ssn_at_start_of_long_string() { + LoggerSettings__c userSettings = getUserSettings(); + userSettings.IsDataMaskingEnabled__c = true; + LogEntryDataMaskRule__mdt rule = getSocialSecurityNumberDataMaskRule(); + rule.IsEnabled__c = true; + LogEntryEventBuilder.setMockDataMaskRule(rule); + // SSN at the very start of a string that exceeds chunk size. + // Tests the first-chunk handling when pos=0 and the first match starts at a small offset. + String message = '400 11 9999 ' + 'B'.repeat(LogEntryEventBuilder.DATA_MASK_REGEX_CHUNK_SIZE); + + LogEntryEventBuilder builder = new LogEntryEventBuilder(userSettings, System.LoggingLevel.INFO, true); + builder.setMessage(message); + + String result = builder.getLogEntryEvent().Message__c; + System.Assert.isTrue(builder.getLogEntryEvent().MessageMasked__c); + System.Assert.isTrue(result.contains('XXX-XX-9999'), 'SSN at start of long string should be masked'); + System.Assert.isFalse(result.contains('400 11 9999'), 'Original SSN at start should not appear'); + System.Assert.isTrue(result.startsWith('XXX-XX-9999'), 'Masked SSN should be at the very start of the result'); + } + + @IsTest + static void it_should_keep_longer_match_when_same_start_found_by_two_chunks() { + LoggerSettings__c userSettings = getUserSettings(); + userSettings.IsDataMaskingEnabled__c = true; + LogEntryDataMaskRule__mdt rule = new LogEntryDataMaskRule__mdt( + DeveloperName = 'SyntheticLongerMatch', + IsEnabled__c = true, + SensitiveDataRegEx__c = '(X+)', + ReplacementRegEx__c = '[MASKED]' + ); + LogEntryEventBuilder.setMockDataMaskRule(rule); + // 30 X's starting at position 3985 straddle the chunk boundary (chunk size = 4000). + // Chunk 0 [0,4000) sees 15 X's → match (3985,4000). + // Chunk 1 [3980,...) sees all 30 → match (3985,4015). + // The longer-match branch (absEnd > endByStart.get(absStart)) keeps the 30-char match. + String message = 'A'.repeat(3985) + 'X'.repeat(30) + 'B'.repeat(100); + + LogEntryEventBuilder builder = new LogEntryEventBuilder(userSettings, System.LoggingLevel.INFO, true); + builder.setMessage(message); + + String result = builder.getLogEntryEvent().Message__c; + System.Assert.isTrue(builder.getLogEntryEvent().MessageMasked__c); + System.Assert.isFalse(result.contains('X'), 'All X chars should be masked — longer match must win'); + System.Assert.areEqual(3985 + '[MASKED]'.length() + 100, result.length(), 'Result length should reflect single replacement'); + } + + @IsTest + static void it_should_apply_matches_from_multiple_chunks_in_left_to_right_order() { + LoggerSettings__c userSettings = getUserSettings(); + userSettings.IsDataMaskingEnabled__c = true; + LogEntryDataMaskRule__mdt rule = new LogEntryDataMaskRule__mdt( + DeveloperName = 'SyntheticSortOrder', + IsEnabled__c = true, + SensitiveDataRegEx__c = '(X{5})', + ReplacementRegEx__c = '[M]' + ); + LogEntryEventBuilder.setMockDataMaskRule(rule); + // Match A at position 100 (chunk 0 only), match B at position 4080 (chunk 1 only). + // Map.keySet() has no guaranteed order in Apex — the sort ensures left-to-right processing. + String message = 'A'.repeat(100) + 'XXXXX' + 'A'.repeat(3975) + 'XXXXX' + 'B'.repeat(100); + + LogEntryEventBuilder builder = new LogEntryEventBuilder(userSettings, System.LoggingLevel.INFO, true); + builder.setMessage(message); + + String result = builder.getLogEntryEvent().Message__c; + System.Assert.isTrue(builder.getLogEntryEvent().MessageMasked__c); + System.Assert.isFalse(result.contains('XXXXX'), 'Both XXXXX runs should be replaced'); + // Original: 100 + 5 + 3975 + 5 + 100 = 4185. Two replacements: 5→3 each, saving 4 total. + System.Assert.areEqual(4181, result.length(), 'Result length should reflect two replacements'); + System.Assert.isTrue(result.startsWith('A'.repeat(100) + '[M]'), 'First match should be at position 100'); + System.Assert.isTrue(result.endsWith('[M]' + 'B'.repeat(100)), 'Second match should be near the end'); + } + + @IsTest + static void it_should_skip_overlapping_match_consumed_by_previous_replacement() { + LoggerSettings__c userSettings = getUserSettings(); + userSettings.IsDataMaskingEnabled__c = true; + LogEntryDataMaskRule__mdt rule = new LogEntryDataMaskRule__mdt( + DeveloperName = 'SyntheticOverlapSkip', + IsEnabled__c = true, + SensitiveDataRegEx__c = '(X{10,})', + ReplacementRegEx__c = '[MASKED]' + ); + LogEntryEventBuilder.setMockDataMaskRule(rule); + // 30 X's starting at position 3970. Chunk 0 matches (3970,4000)=30 X's. + // Chunk 1 matches (3980,4000)=20 X's — a subset. After processing the first match + // (pos=4000), the second match's start (3980) < pos, triggering the `continue` skip. + String message = 'A'.repeat(3970) + 'X'.repeat(30) + 'B'.repeat(200); + + LogEntryEventBuilder builder = new LogEntryEventBuilder(userSettings, System.LoggingLevel.INFO, true); + builder.setMessage(message); + + String result = builder.getLogEntryEvent().Message__c; + System.Assert.isTrue(builder.getLogEntryEvent().MessageMasked__c); + System.Assert.isFalse(result.contains('X'), 'All X chars should be masked'); + Integer maskedCount = result.split('\\[MASKED\\]', -1).size() - 1; + System.Assert.areEqual(1, maskedCount, 'Exactly one [MASKED] token should appear'); + System.Assert.areEqual(3970 + '[MASKED]'.length() + 200, result.length(), 'Result length should reflect single replacement'); + } + + @IsTest + static void it_should_not_reinterpret_dollar_signs_in_captured_group_values() { + LoggerSettings__c userSettings = getUserSettings(); + userSettings.IsDataMaskingEnabled__c = true; + LogEntryDataMaskRule__mdt rule = new LogEntryDataMaskRule__mdt( + DeveloperName = 'SyntheticDollarSign', + IsEnabled__c = true, + SensitiveDataRegEx__c = '(\\w+:\\$\\d+)(DONE)( ?)', + ReplacementRegEx__c = '[$1/$2]' + ); + LogEntryEventBuilder.setMockDataMaskRule(rule); + // Group 1 = 'PRICE:$3', group 2 = 'DONE', group 3 = ' '. + // The old iterative String.replace() would process $3 inside group 1's value, + // replacing it with group 3 (space) → '[PRICE: /DONE]'. The new single-pass + // expandReplacement processes $N only in the template, preserving the literal $3. + String fakeAssetRecord = '{"attributes":{"type":"Asset","url":"/services/data/v64.0/sobjects/Asset/02iDm00000ABCdEFG"},' + + '"Id":"02iDm00000ABCdEFG","AccountId":"001Dm00000XYZ1234","ContactId":"003Dm00000LMN5678",' + + '"Name":"Asset-2024-09-001","Status":"Installed","Quantity":250.00,"Price":14999.95,' + + '"PurchaseDate":"2024-09-15","InstallDate":"2024-09-20","UsageEndDate":"2025-09-15",' + + '"SerialNumber":"SN-8842719305","Description":"Enterprise license unit 42 of 250"}'; + Integer repetitions = (LogEntryEventBuilder.DATA_MASK_REGEX_CHUNK_SIZE / fakeAssetRecord.length()) + 1; + String message = fakeAssetRecord.repeat(repetitions) + ' PRICE:$3DONE '; + + LogEntryEventBuilder builder = new LogEntryEventBuilder(userSettings, System.LoggingLevel.INFO, true); + builder.setMessage(message); + + String result = builder.getLogEntryEvent().Message__c; + System.Assert.isTrue(builder.getLogEntryEvent().MessageMasked__c); + System.Assert.isTrue(result.contains('[PRICE:$3/DONE]'), 'Literal $3 in captured value should be preserved'); + System.Assert.isFalse(result.contains('[PRICE: /DONE]'), '$3 in captured value must not be reinterpreted as group 3'); + } + static String getMessage() { return 'Hello, world'; } @@ -2543,6 +2950,33 @@ private class LogEntryEventBuilder_Tests { ); } + static LogEntryDataMaskRule__mdt getVisaCreditCardNumberDataMaskRule() { + return new LogEntryDataMaskRule__mdt( + DeveloperName = 'VisaCreditCardNumber', + IsEnabled__c = true, + SensitiveDataRegEx__c = '(^|[^0-9])(4\\d{3})([- ]?)\\d{4}\\3\\d{4}\\3(\\d{4})(?!\\d)', + ReplacementRegEx__c = '$1****-****-****-$4' + ); + } + + static LogEntryDataMaskRule__mdt getMastercardCreditCardNumberDataMaskRule() { + return new LogEntryDataMaskRule__mdt( + DeveloperName = 'MastercardCreditCardNumber', + IsEnabled__c = true, + SensitiveDataRegEx__c = '(^|[^0-9])(5[1-5]\\d{2}|222[1-9]|22[3-9]\\d|2[3-6]\\d{2}|27[01]\\d|2720)([- ]?)\\d{4}\\3\\d{4}\\3(\\d{4})(?!\\d)', + ReplacementRegEx__c = '$1****-****-****-$4' + ); + } + + static LogEntryDataMaskRule__mdt getAmericanExpressCreditCardNumberDataMaskRule() { + return new LogEntryDataMaskRule__mdt( + DeveloperName = 'AmericanExpressCreditCardNumber', + IsEnabled__c = true, + SensitiveDataRegEx__c = '(^|[^0-9A-Za-z])(3[47]\\d{2})([- ]?)\\d{6}\\3(\\d{5})(?=[^0-9A-Za-z]|$)', + ReplacementRegEx__c = '$1****-******-$4' + ); + } + static LoggerSettings__c getUserSettings() { LoggerSettings__c userSettings = (LoggerSettings__c) Schema.LoggerSettings__c.SObjectType.newSObject(null, true); userSettings.SetupOwnerId = System.UserInfo.getUserId(); From f314cd701e3f25557e9503779182f766f12f466e Mon Sep 17 00:00:00 2001 From: Anthony Giuliano Date: Sun, 17 May 2026 16:16:37 -0700 Subject: [PATCH 2/2] refactor: address PR #959 review feedback for chunked data masking - Document DATA_MASK_REGEX_CHUNK_SIZE / OVERLAP_SIZE constants with the measured regex-limit findings (un-chunked cliff ~110K-220K chars depending on content shape; chunk size is a safety knob, not a performance lever; the LimitException is uncatchable). - Split the null / length guard in applyDataMaskRuleToChunkedText into two distinct if blocks. - Add explanatory docblocks + worked examples to applyDataMaskRuleToLongLine and inline comments for the overlapping-chunk match-finding logic. - Rewrite expandReplacement using a precompiled Matcher on $(\d+) instead of a hand-rolled index scanner, preserving the property that $N inside captured group values is not re-expanded. - Make overlap and chunk size overridable at runtime via optional LoggerParameter__mdt records (DataMaskRegexOverlapSize, DataMaskRegexChunkSize) with tradeoff guidance in their descriptions. - Add a test for the overlap override; rename the serialized-record-JSON regression test to match what it actually exercises. Co-Authored-By: Claude Opus 4.7 --- ...rameter.DataMaskRegexChunkSize.md-meta.xml | 17 ++ ...meter.DataMaskRegexOverlapSize.md-meta.xml | 17 ++ .../classes/LogEntryEventBuilder.cls | 182 ++++++++++++++---- .../classes/LogEntryEventBuilder_Tests.cls | 38 +++- 4 files changed, 218 insertions(+), 36 deletions(-) create mode 100644 nebula-logger/core/main/configuration/customMetadata/LoggerParameter.DataMaskRegexChunkSize.md-meta.xml create mode 100644 nebula-logger/core/main/configuration/customMetadata/LoggerParameter.DataMaskRegexOverlapSize.md-meta.xml diff --git a/nebula-logger/core/main/configuration/customMetadata/LoggerParameter.DataMaskRegexChunkSize.md-meta.xml b/nebula-logger/core/main/configuration/customMetadata/LoggerParameter.DataMaskRegexChunkSize.md-meta.xml new file mode 100644 index 000000000..12b0f1739 --- /dev/null +++ b/nebula-logger/core/main/configuration/customMetadata/LoggerParameter.DataMaskRegexChunkSize.md-meta.xml @@ -0,0 +1,17 @@ + + + + false + + Comments__c + + + + Description__c + When data masking is applied to a very long string, the value is processed in chunks of this many characters to avoid the Apex 'System.LimitException: Regex too complicated' error (which Salesforce raises when a single regex evaluation is too expensive). Tradeoffs: a LARGER chunk size means fewer chunks and slightly less overlap re-scanning, but each regex evaluation runs against more text and is therefore more likely to hit the LimitException; a SMALLER chunk size is safer against the limit but increases chunk count and overlap re-scan overhead. The chunk size must also be larger than DataMaskRegexOverlapSize plus the longest value any enabled rule can match, or boundary values can be missed. The default (4000) is a deliberately conservative value, roughly 27x below even the worst-case measured failure point. With all four shipped rules applied single-pass (no chunking), the limit was hit between ~110K characters (realistic log-shaped text) and ~220K characters (dense structured input) — diluting matches with ordinary text makes it fail sooner, not later, because the limit is a regex-engine step budget rather than a character count. Note: that LimitException is uncatchable, so without chunking a single oversized log message fails the whole logging call. Benchmarking found chunk size to be a safety knob rather than a performance lever: processing CPU was effectively flat across chunk sizes from 1K to 64K, so raising this value yields no measurable speedup while moving closer to the failure point. Lower this if a custom rule still throws 'Regex too complicated' at the default; only raise it after testing your specific rule regexes against representative data. When no record is configured, Nebula Logger falls back to its built-in default of 4000. + + + Value__c + 4000 + + diff --git a/nebula-logger/core/main/configuration/customMetadata/LoggerParameter.DataMaskRegexOverlapSize.md-meta.xml b/nebula-logger/core/main/configuration/customMetadata/LoggerParameter.DataMaskRegexOverlapSize.md-meta.xml new file mode 100644 index 000000000..4a1a86c59 --- /dev/null +++ b/nebula-logger/core/main/configuration/customMetadata/LoggerParameter.DataMaskRegexOverlapSize.md-meta.xml @@ -0,0 +1,17 @@ + + + + false + + Comments__c + + + + Description__c + When data masking is applied to a very long string, the value is processed in overlapping chunks to avoid the Apex 'System.LimitException: Regex too complicated' error. This integer controls how many characters adjacent chunks overlap, which guarantees that a sensitive value sitting on a chunk boundary is still fully contained within at least one chunk. This value MUST be greater than or equal to the longest value that any enabled LogEntryDataMaskRule__mdt regex can match. The default (20) covers the built-in rules (SSN ~11 chars, credit card ~19 chars with separators); increase it if you add custom rules that match longer values. When no record is configured, Nebula Logger falls back to its built-in default of 20. + + + Value__c + 20 + + diff --git a/nebula-logger/core/main/logger-engine/classes/LogEntryEventBuilder.cls b/nebula-logger/core/main/logger-engine/classes/LogEntryEventBuilder.cls index b4af062a3..39097ac62 100644 --- a/nebula-logger/core/main/logger-engine/classes/LogEntryEventBuilder.cls +++ b/nebula-logger/core/main/logger-engine/classes/LogEntryEventBuilder.cls @@ -20,10 +20,58 @@ global with sharing class LogEntryEventBuilder { private static final String HTTP_HEADER_FORMAT = '{0}: {1}'; private static final String NEW_LINE_DELIMITER = '\n'; + // Data-masking regex is applied in overlapping chunks to avoid Apex's + // `System.LimitException: Regex too complicated`, which Salesforce throws when a single + // regex evaluation exceeds an internal step budget. See issue #639. Salesforce does not + // document the threshold; it depends on the input length and the specific rule's + // pattern. Each enabled rule is an independent `replaceAll` with its own step budget, + // so the single most expensive rule sets the cliff — running more rules does not lower + // it (rule count only adds cumulative CPU, a separate limit). Of the four shipped rules + // the Mastercard pattern (long alternation + `\3` backreference) is by far the worst; + // measured single-pass it alone throws at the same size as all four together. + // Critically, this LimitException is UNCATCHABLE — a try/catch around `replaceAll` + // does not trap it — so without chunking a single large log message makes the entire + // logging call fail unrecoverably. + // + // DATA_MASK_REGEX_CHUNK_SIZE: the max number of characters fed to a single + // `replaceAll`/`Matcher` evaluation. 4000 is a deliberately conservative fixed value, + // trading a higher chunk count for a wide safety margin below the measured failure point. + // + // Measured (Nebula Logger v4.17.3, current Apex regex engine, all four shipped rules + // applied single-pass to the whole blob — i.e. the pre-chunking `applyDataMaskRules` + // path; reproduced identically on a scratch org and a sandbox). The un-chunked cliff + // depends on content shape, not just length, because the limit is a regex-engine STEP + // budget (CPU at the cliff was a steady ~25-45 ms across every shape tested): + // - dense, structured near-miss tokens (best case): throws at ~220K chars + // - tokens diluted with inert text (realistic log shape, worst case): throws at ~110K + // i.e. diluting matches with ordinary text makes it fail SOONER (more engine steps per + // character), not later. The original #639 report at ~35K was an older, lower-threshold + // engine. The default chunk size of 4000 is ~27x below even the worst-case cliff. + // + // Chunk size is a SAFETY knob, not a performance lever: with chunking enabled, CPU was + // flat (<6 ms variance) across chunk sizes 1K-64K and roughly linear in input length + // (200K chars masked in single-digit ms). Raising the chunk size yields no measurable + // speedup and only moves toward the cliff; lowering it adds margin at negligible cost. + // Lower it (via the override below) if custom rules push the failure point down. + // Overridable at runtime via the optional `LoggerParameter__mdt.DataMaskRegexChunkSize` + // record (no deploy required); the constant below is only the default. + // + // DATA_MASK_REGEX_OVERLAP_SIZE: adjacent chunks overlap by this many characters so a + // sensitive value that straddles a chunk boundary is still fully contained within at + // least one chunk. This value MUST be >= the longest sensitive value any data-mask rule + // can match; 20 covers the built-in rules (SSN ~11 chars, credit card ~19 chars with + // separators). It cannot be derived from the rule regexes (a pattern's max match length + // is not generally computable), so for orgs whose custom rules match longer values it is + // overridable at runtime via the optional `LoggerParameter__mdt.DataMaskRegexOverlapSize` + // record (no deploy required); the constant below is only the default. @TestVisible private static final Integer DATA_MASK_REGEX_CHUNK_SIZE = 4000; @TestVisible private static final Integer DATA_MASK_REGEX_OVERLAP_SIZE = 20; + // Matches a `$N` capture-group reference (N = one or more digits) inside a replacement + // template. Used by expandReplacement(); safe to regex directly since replacement + // templates are short config values, never the long log payload. + private static final System.Pattern DATA_MASK_REPLACEMENT_TOKEN_PATTERN = System.Pattern.compile('\\$([0-9]+)'); private static String cachedOrganizationEnvironmentType; @@ -1161,43 +1209,96 @@ global with sharing class LogEntryEventBuilder { return dataInput; } + // Chunk size defaults to DATA_MASK_REGEX_CHUNK_SIZE but can be tuned without a deploy via + // the optional LoggerParameter__mdt.DataMaskRegexChunkSize record. Lower it if a custom + // rule's regex still throws `Regex too complicated` at the default; raise it (carefully) + // to reduce chunk count. Resolved once per masking pass and threaded through so a single + // consistent value is used for every boundary calculation in that pass. + private static Integer getDataMaskRegexChunkSize() { + return LoggerParameter.getInteger('DataMaskRegexChunkSize', DATA_MASK_REGEX_CHUNK_SIZE); + } + private static String applyDataMaskRuleToChunkedText(String text, String sensitiveDataRegEx, String replacementRegEx) { - if (text == null || text.length() <= DATA_MASK_REGEX_CHUNK_SIZE) { - return text == null ? text : text.replaceAll(sensitiveDataRegEx, replacementRegEx); + if (text == null) { + return text; + } + + Integer chunkSize = getDataMaskRegexChunkSize(); + + // Short enough to mask in a single pass — no chunking needed. + if (text.length() <= chunkSize) { + return text.replaceAll(sensitiveDataRegEx, replacementRegEx); } List lines = text.split('\n', -1); if (lines.size() > 1) { List processedLines = new List(); for (String line : lines) { - if (line.length() <= DATA_MASK_REGEX_CHUNK_SIZE) { + if (line.length() <= chunkSize) { processedLines.add(line.replaceAll(sensitiveDataRegEx, replacementRegEx)); } else { - processedLines.add(applyDataMaskRuleToLongLine(line, sensitiveDataRegEx, replacementRegEx)); + processedLines.add(applyDataMaskRuleToLongLine(line, sensitiveDataRegEx, replacementRegEx, chunkSize)); } } return String.join(processedLines, '\n'); } - return applyDataMaskRuleToLongLine(text, sensitiveDataRegEx, replacementRegEx); + return applyDataMaskRuleToLongLine(text, sensitiveDataRegEx, replacementRegEx, chunkSize); } - private static String applyDataMaskRuleToLongLine(String line, String sensitiveDataRegEx, String replacementRegEx) { + /** + * Applies a single data-mask rule to one line that is too long to regex in a single pass. + * + * `String.replaceAll` cannot be called on the whole line (it would throw the + * `Regex too complicated` LimitException), so the line is scanned in overlapping + * windows of `chunkSize` characters (the caller-resolved value of + * DATA_MASK_REGEX_CHUNK_SIZE / its LoggerParameter override), advancing by `step` + * (= chunk size - overlap) each iteration. The overlap guarantees that any sensitive + * value sitting on a chunk boundary is fully visible in at least one window. + * + * Because windows overlap, the same match can be discovered more than once, and + * `Matcher` indexes are window-relative — so matches are collected with absolute + * positions, deduplicated, sorted, then applied left-to-right in a second pass. + * + * Worked example (chunk size 10, overlap 4, step 6) masking the SSN `123-45-6789` + * with replacement `***`: + * + * line = "name 123-45-6789 end" (length 20) + * chunk0 = line[0..10) = "name 123-4" -> no full SSN match + * chunk1 = line[6..16) = "23-45-6789" -> matches at window 0 => absStart 6 + * chunk2 = line[12..20) = "6789 end" -> no match + * collected: { start 6 -> end 16 } + * result = line[0..6) + "***" + line[16..20) = "name *** end" + * + * Keeping the *longest* match for a given start (rather than the first one found) + * matters because an earlier window may truncate the value at its right edge, + * yielding a shorter, less accurate match than a later window with more context. + */ + private static String applyDataMaskRuleToLongLine(String line, String sensitiveDataRegEx, String replacementRegEx, Integer chunkSize) { System.Pattern regex = System.Pattern.compile(sensitiveDataRegEx); - Integer step = DATA_MASK_REGEX_CHUNK_SIZE - DATA_MASK_REGEX_OVERLAP_SIZE; - - // Pass 1: Find all matches using overlapping chunks, deduplicating by start position. - // When the same start position is found by multiple chunks, keep the longest match - // (the chunk with more trailing context produces the most accurate match). + // Overlap defaults to DATA_MASK_REGEX_OVERLAP_SIZE but can be raised without a deploy + // via the optional LoggerParameter__mdt.DataMaskRegexOverlapSize record, for orgs whose + // custom data-mask rules match values longer than the built-in rules. + Integer overlapSize = LoggerParameter.getInteger('DataMaskRegexOverlapSize', DATA_MASK_REGEX_OVERLAP_SIZE); + Integer step = chunkSize - overlapSize; + + // Pass 1: scan overlapping windows and record every match by its ABSOLUTE start + // position. endByStart maps an absolute start index -> absolute end index; groupsByStart + // keeps that match's capture groups (group 0 = full match) so the replacement template + // can be expanded later without re-running the regex. Map endByStart = new Map(); Map> groupsByStart = new Map>(); for (Integer i = 0; i < line.length(); i += step) { - Integer chunkEnd = Math.min(i + DATA_MASK_REGEX_CHUNK_SIZE, line.length()); + Integer chunkEnd = Math.min(i + chunkSize, line.length()); System.Matcher m = regex.matcher(line.substring(i, chunkEnd)); while (m.find()) { + // Matcher indexes are window-relative; add the window offset `i` to get + // absolute positions within the full line. Integer absStart = i + m.start(); Integer absEnd = i + m.end(); + // First time we see this start, OR a later (overlapping) window found a longer + // match starting at the same place — keep the longer one, it has more context. if (!endByStart.containsKey(absStart) || absEnd > endByStart.get(absStart)) { endByStart.put(absStart, absEnd); List groups = new List(); @@ -1213,16 +1314,21 @@ global with sharing class LogEntryEventBuilder { return line; } - // Sort match positions to guarantee left-to-right processing + // Apex Map.keySet() has no guaranteed iteration order, so explicitly sort the start + // positions to process matches strictly left-to-right in Pass 2. List sortedStarts = new List(endByStart.keySet()); sortedStarts.sort(); - // Pass 2: Build result — copy gaps, expand replacements + // Pass 2: walk the matches left-to-right, copying the untouched text between matches + // ("gaps") verbatim and substituting each match with its expanded replacement. + // `pos` tracks how far into the original line has been consumed. String result = ''; Integer pos = 0; for (Integer start : sortedStarts) { + // This match starts inside a region already replaced by an earlier (longer) + // match — skip it to avoid double-masking overlapping hits. if (start < pos) { - continue; // Skip match fully consumed by a previous replacement + continue; } result += line.substring(pos, start); result += expandReplacement(replacementRegEx, groupsByStart.get(start)); @@ -1232,28 +1338,38 @@ global with sharing class LogEntryEventBuilder { return result; } + /** + * Expands `$N` capture-group references in a replacement template, equivalent to + * Java's `Matcher.appendReplacement`. + * + * Only `$N` tokens that appear in the original `replacement` template are expanded; + * a `$N` sequence that happens to occur *inside a captured group's value* is copied + * through verbatim (this is why the result is built from the template, not produced by + * `String.replace` on the group values). An unresolvable token (`$0`, an out-of-range + * group, or a null group) is left as the literal text `$N`. + * + * Example: replacement `"[$1]-$2"`, groups [full, "A", "B"] -> `"[A]-B"`. + * Example: replacement `"$1"`, group 1 = `"price=$3"` -> `"price=$3"` (the `$3` in the + * captured value is NOT re-expanded). + */ private static String expandReplacement(String replacement, List groups) { + System.Matcher tokenMatcher = DATA_MASK_REPLACEMENT_TOKEN_PATTERN.matcher(replacement); String result = ''; - for (Integer i = 0; i < replacement.length(); i++) { - if (replacement.substring(i, i + 1) == '$' && i + 1 < replacement.length()) { - // Parse the group number following '$' - Integer j = i + 1; - while (j < replacement.length() && replacement.substring(j, j + 1) >= '0' && replacement.substring(j, j + 1) <= '9') { - j++; - } - if (j > i + 1) { - Integer groupNum = Integer.valueOf(replacement.substring(i + 1, j)); - if (groupNum >= 1 && groupNum < groups.size() && groups[groupNum] != null) { - result += groups[groupNum]; - } else { - result += replacement.substring(i, j); - } - i = j - 1; // -1 because the for loop increments - continue; - } + Integer pos = 0; + while (tokenMatcher.find()) { + // Copy the literal template text preceding this `$N` token. + result += replacement.substring(pos, tokenMatcher.start()); + Integer groupNum = Integer.valueOf(tokenMatcher.group(1)); + if (groupNum >= 1 && groupNum < groups.size() && groups[groupNum] != null) { + result += groups[groupNum]; + } else { + // Not a resolvable group reference — preserve the token text literally. + result += tokenMatcher.group(); } - result += replacement.substring(i, i + 1); + pos = tokenMatcher.end(); } + // Copy any literal template text after the last token. + result += replacement.substring(pos); return result; } diff --git a/nebula-logger/core/tests/logger-engine/classes/LogEntryEventBuilder_Tests.cls b/nebula-logger/core/tests/logger-engine/classes/LogEntryEventBuilder_Tests.cls index bfc329cbb..718c2bda0 100644 --- a/nebula-logger/core/tests/logger-engine/classes/LogEntryEventBuilder_Tests.cls +++ b/nebula-logger/core/tests/logger-engine/classes/LogEntryEventBuilder_Tests.cls @@ -2588,7 +2588,7 @@ private class LogEntryEventBuilder_Tests { } @IsTest - static void it_should_apply_data_mask_rule_to_digit_dense_string_without_limit_exception() { + static void it_should_mask_serialized_record_json_without_limit_exception() { LoggerSettings__c userSettings = getUserSettings(); userSettings.IsDataMaskingEnabled__c = true; // Register all 4 bundled data mask rules — in production all enabled rules run sequentially @@ -2613,8 +2613,8 @@ private class LogEntryEventBuilder_Tests { String result = builder.getLogEntryEvent().Message__c; System.Assert.isTrue(builder.getLogEntryEvent().MessageMasked__c); - System.Assert.isTrue(result.contains('XXX-XX-9999'), 'Real SSN in digit-dense string should be masked'); - System.Assert.isFalse(result.contains('400 11 9999'), 'Original SSN in digit-dense string should not appear'); + System.Assert.isTrue(result.contains('XXX-XX-9999'), 'Real SSN in serialized record JSON should be masked'); + System.Assert.isFalse(result.contains('400 11 9999'), 'Original SSN in serialized record JSON should not appear'); } @IsTest @@ -2731,6 +2731,38 @@ private class LogEntryEventBuilder_Tests { System.Assert.isFalse(result.contains('4111-1111-1111-1111'), 'Original Visa CC should not appear'); } + @IsTest + static void it_should_mask_long_value_straddling_chunk_boundary_when_overlap_is_overridden() { + // A custom rule whose match is 40 chars long — longer than the default + // DATA_MASK_REGEX_OVERLAP_SIZE (20). Positioned so it straddles the chunk-0 boundary + // and begins before the default chunk-1 start (4000 - 20 = 3980): with the default + // overlap neither chunk contains it whole, so it could not be masked. Raising the + // overlap via the LoggerParameter__mdt override widens chunk 1 enough to capture it. + LoggerParameter.setMock(new LoggerParameter__mdt(DeveloperName = 'DataMaskRegexOverlapSize', Value__c = '60')); + LoggerSettings__c userSettings = getUserSettings(); + userSettings.IsDataMaskingEnabled__c = true; + LogEntryDataMaskRule__mdt rule = new LogEntryDataMaskRule__mdt( + DeveloperName = 'LongCustomSecret', + IsEnabled__c = true, + SensitiveDataRegEx__c = 'SECRET\\d{34}', + ReplacementRegEx__c = 'REDACTED' + ); + LogEntryEventBuilder.setMockDataMaskRule(rule); + // Secret occupies [3970, 4010): truncated in chunk 0 and in the default-overlap + // chunk 1 (starts 3980), but fully inside chunk 1 once overlap is raised to 60 + // (chunk 1 then starts at 3940). + String secret = 'SECRET' + '0'.repeat(34); + String message = 'B'.repeat(3970) + secret + 'C'.repeat(100); + + LogEntryEventBuilder builder = new LogEntryEventBuilder(userSettings, System.LoggingLevel.INFO, true); + builder.setMessage(message); + + String result = builder.getLogEntryEvent().Message__c; + System.Assert.isTrue(builder.getLogEntryEvent().MessageMasked__c); + System.Assert.isTrue(result.contains('REDACTED'), 'Long value should be masked once overlap override widens the chunk'); + System.Assert.isFalse(result.contains(secret), 'Original long secret should not appear'); + } + @IsTest static void it_should_mask_ssn_in_string_of_exactly_chunk_size() { LoggerSettings__c userSettings = getUserSettings();