diff --git a/CHANGELOG.md b/CHANGELOG.md index 63c171ea..eced7a56 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +# 7.8.3 - 2026-02-09 + +fix: do not pattern match long values in code variables + # 7.8.3 - 2026-02-06 fix: openAI input image sanitization diff --git a/posthog/exception_utils.py b/posthog/exception_utils.py index 30af736f..dfc92ccf 100644 --- a/posthog/exception_utils.py +++ b/posthog/exception_utils.py @@ -43,26 +43,29 @@ DEFAULT_MAX_VALUE_LENGTH = 1024 DEFAULT_CODE_VARIABLES_MASK_PATTERNS = [ - r"(?i).*password.*", - r"(?i).*secret.*", - r"(?i).*passwd.*", - r"(?i).*pwd.*", - r"(?i).*api_key.*", - r"(?i).*apikey.*", - r"(?i).*auth.*", - r"(?i).*credentials.*", - r"(?i).*privatekey.*", - r"(?i).*private_key.*", - r"(?i).*token.*", - r"(?i).*aws_access_key_id.*", - r"(?i).*_pass", - r"(?i)sk_.*", - r"(?i).*jwt.*", + r"(?i)password", + r"(?i)secret", + r"(?i)passwd", + r"(?i)pwd", + r"(?i)api_key", + r"(?i)apikey", + r"(?i)auth", + r"(?i)credentials", + r"(?i)privatekey", + r"(?i)private_key", + r"(?i)token", + r"(?i)aws_access_key_id", + r"(?i)_pass", + r"(?i)sk_", + r"(?i)jwt", ] DEFAULT_CODE_VARIABLES_IGNORE_PATTERNS = [r"^__.*"] CODE_VARIABLES_REDACTED_VALUE = "$$_posthog_redacted_based_on_masking_rules_$$" +CODE_VARIABLES_TOO_LONG_VALUE = "$$_posthog_value_too_long_$$" + +_MAX_VALUE_LENGTH_FOR_PATTERN_MATCH = 5_000 DEFAULT_TOTAL_VARIABLES_SIZE_LIMIT = 20 * 1024 @@ -945,23 +948,37 @@ def _pattern_matches(name, patterns): return False -def _mask_sensitive_data(value, compiled_mask): +def _mask_sensitive_data(value, compiled_mask, _seen=None): if not compiled_mask: return value + if isinstance(value, (dict, list, tuple)): + if _seen is None: + _seen = set() + obj_id = id(value) + if obj_id in _seen: + return "" + _seen.add(obj_id) + if isinstance(value, dict): result = {} for k, v in value.items(): key_str = str(k) if not isinstance(k, str) else k - if _pattern_matches(key_str, compiled_mask): + if len(key_str) > _MAX_VALUE_LENGTH_FOR_PATTERN_MATCH: + result[k] = CODE_VARIABLES_TOO_LONG_VALUE + elif _pattern_matches(key_str, compiled_mask): result[k] = CODE_VARIABLES_REDACTED_VALUE else: - result[k] = _mask_sensitive_data(v, compiled_mask) + result[k] = _mask_sensitive_data(v, compiled_mask, _seen) return result elif isinstance(value, (list, tuple)): - masked_items = [_mask_sensitive_data(item, compiled_mask) for item in value] + masked_items = [ + _mask_sensitive_data(item, compiled_mask, _seen) for item in value + ] return type(value)(masked_items) elif isinstance(value, str): + if len(value) > _MAX_VALUE_LENGTH_FOR_PATTERN_MATCH: + return CODE_VARIABLES_TOO_LONG_VALUE if _pattern_matches(value, compiled_mask): return CODE_VARIABLES_REDACTED_VALUE return value @@ -982,7 +999,9 @@ def _serialize_variable_value(value, limiter, max_length=1024, compiled_mask=Non limiter.add(result_size) return value elif isinstance(value, str): - if compiled_mask and _pattern_matches(value, compiled_mask): + if len(value) > _MAX_VALUE_LENGTH_FOR_PATTERN_MATCH: + result = CODE_VARIABLES_TOO_LONG_VALUE + elif compiled_mask and _pattern_matches(value, compiled_mask): result = CODE_VARIABLES_REDACTED_VALUE else: result = value diff --git a/posthog/test/test_exception_capture.py b/posthog/test/test_exception_capture.py index 7c89abe4..24d84935 100644 --- a/posthog/test/test_exception_capture.py +++ b/posthog/test/test_exception_capture.py @@ -450,3 +450,143 @@ def trigger_error(): assert "" in output assert "" in output assert "" + + # Circular list + circular_list = ["item"] + circular_list.append(circular_list) + + result = _mask_sensitive_data(circular_list, compiled_mask) + assert result[0] == "item" + assert result[1] == "" diff --git a/posthog/version.py b/posthog/version.py index d7aee14a..9b7cad35 100644 --- a/posthog/version.py +++ b/posthog/version.py @@ -1,4 +1,4 @@ -VERSION = "7.8.3" +VERSION = "7.8.4" if __name__ == "__main__": print(VERSION, end="") # noqa: T201