From 31ef81ffe9f4ad4650094384dc651132ef0aa410 Mon Sep 17 00:00:00 2001 From: ablaszkiewicz Date: Mon, 9 Feb 2026 14:32:28 +0100 Subject: [PATCH 1/9] feat: initial --- posthog/exception_utils.py | 39 ++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/posthog/exception_utils.py b/posthog/exception_utils.py index 30af736f..e7bb2f80 100644 --- a/posthog/exception_utils.py +++ b/posthog/exception_utils.py @@ -43,26 +43,29 @@ DEFAULT_MAX_VALUE_LENGTH = 1024 DEFAULT_CODE_VARIABLES_MASK_PATTERNS = [ - r"(?i).*password.*", - r"(?i).*secret.*", - r"(?i).*passwd.*", - r"(?i).*pwd.*", - r"(?i).*api_key.*", - r"(?i).*apikey.*", - r"(?i).*auth.*", - r"(?i).*credentials.*", - r"(?i).*privatekey.*", - r"(?i).*private_key.*", - r"(?i).*token.*", - r"(?i).*aws_access_key_id.*", - r"(?i).*_pass", - r"(?i)sk_.*", - r"(?i).*jwt.*", + r"(?i)password", + r"(?i)secret", + r"(?i)passwd", + r"(?i)pwd", + r"(?i)api_key", + r"(?i)apikey", + r"(?i)auth", + r"(?i)credentials", + r"(?i)privatekey", + r"(?i)private_key", + r"(?i)token", + r"(?i)aws_access_key_id", + r"(?i)_pass", + r"(?i)sk_", + r"(?i)jwt", ] DEFAULT_CODE_VARIABLES_IGNORE_PATTERNS = [r"^__.*"] CODE_VARIABLES_REDACTED_VALUE = "$$_posthog_redacted_based_on_masking_rules_$$" +CODE_VARIABLES_TOO_LONG_VALUE = "$$_posthog_value_too_long_$$" + +_MAX_VALUE_LENGTH_FOR_PATTERN_MATCH = 500 DEFAULT_TOTAL_VARIABLES_SIZE_LIMIT = 20 * 1024 @@ -962,6 +965,8 @@ def _mask_sensitive_data(value, compiled_mask): masked_items = [_mask_sensitive_data(item, compiled_mask) for item in value] return type(value)(masked_items) elif isinstance(value, str): + if len(value) > _MAX_VALUE_LENGTH_FOR_PATTERN_MATCH: + return CODE_VARIABLES_TOO_LONG_VALUE if _pattern_matches(value, compiled_mask): return CODE_VARIABLES_REDACTED_VALUE return value @@ -982,7 +987,9 @@ def _serialize_variable_value(value, limiter, max_length=1024, compiled_mask=Non limiter.add(result_size) return value elif isinstance(value, str): - if compiled_mask and _pattern_matches(value, compiled_mask): + if len(value) > _MAX_VALUE_LENGTH_FOR_PATTERN_MATCH: + result = CODE_VARIABLES_TOO_LONG_VALUE + elif compiled_mask and _pattern_matches(value, compiled_mask): result = CODE_VARIABLES_REDACTED_VALUE else: result = value From 1cc0ed6d59d4484dddeaf1028ce9cd4c7aaf944b Mon Sep 17 00:00:00 2001 From: ablaszkiewicz Date: Mon, 9 Feb 2026 15:13:39 +0100 Subject: [PATCH 2/9] feat: bump limit to 5k and add tests --- posthog/exception_utils.py | 2 +- posthog/test/test_exception_capture.py | 100 +++++++++++++++++++++++++ 2 files changed, 101 insertions(+), 1 deletion(-) diff --git a/posthog/exception_utils.py b/posthog/exception_utils.py index e7bb2f80..7caef2be 100644 --- a/posthog/exception_utils.py +++ b/posthog/exception_utils.py @@ -65,7 +65,7 @@ CODE_VARIABLES_REDACTED_VALUE = "$$_posthog_redacted_based_on_masking_rules_$$" CODE_VARIABLES_TOO_LONG_VALUE = "$$_posthog_value_too_long_$$" -_MAX_VALUE_LENGTH_FOR_PATTERN_MATCH = 500 +_MAX_VALUE_LENGTH_FOR_PATTERN_MATCH = 5_000 DEFAULT_TOTAL_VARIABLES_SIZE_LIMIT = 20 * 1024 diff --git a/posthog/test/test_exception_capture.py b/posthog/test/test_exception_capture.py index 7c89abe4..ff541b22 100644 --- a/posthog/test/test_exception_capture.py +++ b/posthog/test/test_exception_capture.py @@ -450,3 +450,103 @@ def trigger_error(): assert "" in output assert "" in output assert " Date: Mon, 9 Feb 2026 15:15:37 +0100 Subject: [PATCH 3/9] fix: copy --- posthog/test/test_exception_capture.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/posthog/test/test_exception_capture.py b/posthog/test/test_exception_capture.py index ff541b22..bce760df 100644 --- a/posthog/test/test_exception_capture.py +++ b/posthog/test/test_exception_capture.py @@ -489,14 +489,10 @@ def trigger_error(): assert "ZeroDivisionError" in output assert "code_variables" in output - # Short string should appear as-is assert "'short_value': 'I am short'" in output - # Long strings should be replaced with the too-long constant assert "$$_posthog_value_too_long_$$" in output - # Long string whose content contains "password" still gets the too-long constant - # (length check fires before pattern matching) assert "'long_blob': '$$_posthog_value_too_long_$$'" in output @@ -542,11 +538,9 @@ def trigger_error(): assert "ZeroDivisionError" in output assert "code_variables" in output - # Short values survive assert "short_val" in output assert "ok" in output - # Long values inside dicts get replaced assert "$$_posthog_value_too_long_$$" in output assert "y" * 1000 not in output assert "z" * 1000 not in output From d1cb1e6a5b69257ec93e3c8a7884858efa02f191 Mon Sep 17 00:00:00 2001 From: ablaszkiewicz Date: Mon, 9 Feb 2026 16:06:11 +0100 Subject: [PATCH 4/9] feat: changelog --- CHANGELOG.md | 4 ++++ posthog/version.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 63c171ea..eced7a56 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +# 7.8.3 - 2026-02-09 + +fix: do not pattern match long values in code variables + # 7.8.3 - 2026-02-06 fix: openAI input image sanitization diff --git a/posthog/version.py b/posthog/version.py index d7aee14a..9b7cad35 100644 --- a/posthog/version.py +++ b/posthog/version.py @@ -1,4 +1,4 @@ -VERSION = "7.8.3" +VERSION = "7.8.4" if __name__ == "__main__": print(VERSION, end="") # noqa: T201 From fa032b0e3be0bee00b7352a4459f102592fba6ba Mon Sep 17 00:00:00 2001 From: ablaszkiewicz Date: Mon, 9 Feb 2026 16:21:22 +0100 Subject: [PATCH 5/9] feat: add guard to keys --- posthog/exception_utils.py | 4 ++- posthog/test/test_exception_capture.py | 45 ++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/posthog/exception_utils.py b/posthog/exception_utils.py index 7caef2be..f288b178 100644 --- a/posthog/exception_utils.py +++ b/posthog/exception_utils.py @@ -956,7 +956,9 @@ def _mask_sensitive_data(value, compiled_mask): result = {} for k, v in value.items(): key_str = str(k) if not isinstance(k, str) else k - if _pattern_matches(key_str, compiled_mask): + if len(key_str) > _MAX_VALUE_LENGTH_FOR_PATTERN_MATCH: + result[k] = CODE_VARIABLES_TOO_LONG_VALUE + elif _pattern_matches(key_str, compiled_mask): result[k] = CODE_VARIABLES_REDACTED_VALUE else: result[k] = _mask_sensitive_data(v, compiled_mask) diff --git a/posthog/test/test_exception_capture.py b/posthog/test/test_exception_capture.py index bce760df..cdd7b89a 100644 --- a/posthog/test/test_exception_capture.py +++ b/posthog/test/test_exception_capture.py @@ -544,3 +544,48 @@ def trigger_error(): assert "$$_posthog_value_too_long_$$" in output assert "y" * 1000 not in output assert "z" * 1000 not in output + + +def test_code_variables_too_long_dict_key(tmpdir): + app = tmpdir.join("app.py") + app.write( + dedent( + """ + import os + from posthog import Posthog + + posthog = Posthog( + 'phc_x', + host='https://eu.i.posthog.com', + debug=True, + enable_exception_autocapture=True, + capture_exception_code_variables=True, + project_root=os.path.dirname(os.path.abspath(__file__)) + ) + + def trigger_error(): + my_data = { + "short": "visible", + "k" * 20000: "should_be_replaced", + } + + 1/0 + + trigger_error() + """ + ) + ) + + with pytest.raises(subprocess.CalledProcessError) as excinfo: + subprocess.check_output([sys.executable, str(app)], stderr=subprocess.STDOUT) + + output = excinfo.value.output.decode("utf-8") + + assert "ZeroDivisionError" in output + assert "code_variables" in output + + assert "short" in output + assert "visible" in output + + assert "$$_posthog_value_too_long_$$" in output + assert "should_be_replaced" not in output From dec73b67752d82d061a8588f2d48ad0ebad3edf4 Mon Sep 17 00:00:00 2001 From: ablaszkiewicz Date: Mon, 9 Feb 2026 16:28:13 +0100 Subject: [PATCH 6/9] feat: add test for key guard --- posthog/test/test_exception_capture.py | 54 ++++++++------------------ 1 file changed, 16 insertions(+), 38 deletions(-) diff --git a/posthog/test/test_exception_capture.py b/posthog/test/test_exception_capture.py index cdd7b89a..7b1bc793 100644 --- a/posthog/test/test_exception_capture.py +++ b/posthog/test/test_exception_capture.py @@ -546,46 +546,24 @@ def trigger_error(): assert "z" * 1000 not in output -def test_code_variables_too_long_dict_key(tmpdir): - app = tmpdir.join("app.py") - app.write( - dedent( - """ - import os - from posthog import Posthog - - posthog = Posthog( - 'phc_x', - host='https://eu.i.posthog.com', - debug=True, - enable_exception_autocapture=True, - capture_exception_code_variables=True, - project_root=os.path.dirname(os.path.abspath(__file__)) +def test_mask_sensitive_data_too_long_dict_key(): + from posthog.exception_utils import ( + CODE_VARIABLES_TOO_LONG_VALUE, + _compile_patterns, + _mask_sensitive_data, ) - def trigger_error(): - my_data = { - "short": "visible", - "k" * 20000: "should_be_replaced", - } - - 1/0 + compiled_mask = _compile_patterns([r"(?i)password"]) - trigger_error() - """ - ) + result = _mask_sensitive_data( + { + "short": "visible", + "k" * 20000: "hidden_val", + "password": "secret", + }, + compiled_mask, ) - with pytest.raises(subprocess.CalledProcessError) as excinfo: - subprocess.check_output([sys.executable, str(app)], stderr=subprocess.STDOUT) - - output = excinfo.value.output.decode("utf-8") - - assert "ZeroDivisionError" in output - assert "code_variables" in output - - assert "short" in output - assert "visible" in output - - assert "$$_posthog_value_too_long_$$" in output - assert "should_be_replaced" not in output + assert result["short"] == "visible" + assert result["k" * 20000] == CODE_VARIABLES_TOO_LONG_VALUE + assert result["password"] == "$$_posthog_redacted_based_on_masking_rules_$$" From bbef1adfe2a8f3e41a06fe6431a99d43faccf562 Mon Sep 17 00:00:00 2001 From: ablaszkiewicz Date: Mon, 9 Feb 2026 16:30:13 +0100 Subject: [PATCH 7/9] feat: add ciruclar refs guard and test --- posthog/exception_utils.py | 14 +++++++++++--- posthog/test/test_exception_capture.py | 22 ++++++++++++++++++++++ 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/posthog/exception_utils.py b/posthog/exception_utils.py index f288b178..5e435b50 100644 --- a/posthog/exception_utils.py +++ b/posthog/exception_utils.py @@ -948,10 +948,18 @@ def _pattern_matches(name, patterns): return False -def _mask_sensitive_data(value, compiled_mask): +def _mask_sensitive_data(value, compiled_mask, _seen=None): if not compiled_mask: return value + if isinstance(value, (dict, list, tuple)): + if _seen is None: + _seen = set() + obj_id = id(value) + if obj_id in _seen: + return "" + _seen.add(obj_id) + if isinstance(value, dict): result = {} for k, v in value.items(): @@ -961,10 +969,10 @@ def _mask_sensitive_data(value, compiled_mask): elif _pattern_matches(key_str, compiled_mask): result[k] = CODE_VARIABLES_REDACTED_VALUE else: - result[k] = _mask_sensitive_data(v, compiled_mask) + result[k] = _mask_sensitive_data(v, compiled_mask, _seen) return result elif isinstance(value, (list, tuple)): - masked_items = [_mask_sensitive_data(item, compiled_mask) for item in value] + masked_items = [_mask_sensitive_data(item, compiled_mask, _seen) for item in value] return type(value)(masked_items) elif isinstance(value, str): if len(value) > _MAX_VALUE_LENGTH_FOR_PATTERN_MATCH: diff --git a/posthog/test/test_exception_capture.py b/posthog/test/test_exception_capture.py index 7b1bc793..82488172 100644 --- a/posthog/test/test_exception_capture.py +++ b/posthog/test/test_exception_capture.py @@ -567,3 +567,25 @@ def test_mask_sensitive_data_too_long_dict_key(): assert result["short"] == "visible" assert result["k" * 20000] == CODE_VARIABLES_TOO_LONG_VALUE assert result["password"] == "$$_posthog_redacted_based_on_masking_rules_$$" + + +def test_mask_sensitive_data_circular_ref(): + from posthog.exception_utils import _compile_patterns, _mask_sensitive_data + + compiled_mask = _compile_patterns([r"(?i)password"]) + + # Circular dict + circular_dict = {"key": "value"} + circular_dict["self"] = circular_dict + + result = _mask_sensitive_data(circular_dict, compiled_mask) + assert result["key"] == "value" + assert result["self"] == "" + + # Circular list + circular_list = ["item"] + circular_list.append(circular_list) + + result = _mask_sensitive_data(circular_list, compiled_mask) + assert result[0] == "item" + assert result[1] == "" From 8345c57b0a93af95d75e6f9131aaefa238b56f7c Mon Sep 17 00:00:00 2001 From: ablaszkiewicz Date: Mon, 9 Feb 2026 16:39:21 +0100 Subject: [PATCH 8/9] fix: format --- posthog/exception_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/posthog/exception_utils.py b/posthog/exception_utils.py index 5e435b50..dfc92ccf 100644 --- a/posthog/exception_utils.py +++ b/posthog/exception_utils.py @@ -972,7 +972,9 @@ def _mask_sensitive_data(value, compiled_mask, _seen=None): result[k] = _mask_sensitive_data(v, compiled_mask, _seen) return result elif isinstance(value, (list, tuple)): - masked_items = [_mask_sensitive_data(item, compiled_mask, _seen) for item in value] + masked_items = [ + _mask_sensitive_data(item, compiled_mask, _seen) for item in value + ] return type(value)(masked_items) elif isinstance(value, str): if len(value) > _MAX_VALUE_LENGTH_FOR_PATTERN_MATCH: From ac4d7ae659fd68e4db2ae7108a1d47153264a5bb Mon Sep 17 00:00:00 2001 From: ablaszkiewicz Date: Mon, 9 Feb 2026 16:41:57 +0100 Subject: [PATCH 9/9] fix: add comment --- posthog/test/test_exception_capture.py | 1 + 1 file changed, 1 insertion(+) diff --git a/posthog/test/test_exception_capture.py b/posthog/test/test_exception_capture.py index 82488172..24d84935 100644 --- a/posthog/test/test_exception_capture.py +++ b/posthog/test/test_exception_capture.py @@ -565,6 +565,7 @@ def test_mask_sensitive_data_too_long_dict_key(): ) assert result["short"] == "visible" + # This then gets shortened by the JSON truncation at 1024 chars anyways so no worries assert result["k" * 20000] == CODE_VARIABLES_TOO_LONG_VALUE assert result["password"] == "$$_posthog_redacted_based_on_masking_rules_$$"