Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
# 7.8.3 - 2026-02-09
# 7.8.5 - 2026-02-09

fix: further optimize code variables pattern matching

# 7.8.4 - 2026-02-09

fix: do not pattern match long values in code variables

Expand Down
44 changes: 37 additions & 7 deletions posthog/exception_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
CODE_VARIABLES_TOO_LONG_VALUE = "$$_posthog_value_too_long_$$"

_MAX_VALUE_LENGTH_FOR_PATTERN_MATCH = 5_000
_REGEX_METACHARACTERS = frozenset(r"\.^$*+?{}[]|()")

DEFAULT_TOTAL_VARIABLES_SIZE_LIMIT = 20 * 1024

Expand Down Expand Up @@ -931,18 +932,47 @@ def strip_string(value, max_length=None):
)


def _extract_plain_substring(pattern):
# Matches inline flag groups like (?i), (?ai), (?ims), etc. that include the 'i' flag.
# Python regex flags: a=ASCII, i=IGNORECASE, L=LOCALE, m=MULTILINE, s=DOTALL, u=UNICODE, x=VERBOSE
inline_flags = re.match(r"^\(\?[aiLmsux]*i[aiLmsux]*\)", pattern)
Comment on lines +935 to +938
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Substrings may be missed
_extract_plain_substring() rejects any pattern whose remainder contains regex metacharacters, including [ and ]. That means simple ignore-case patterns like (?i)[\s_-]*api[_-]?key (or even (?i)api[_-]?key) will never take the substring fast path and will still compile as regex, so the PR’s stated optimization won’t apply to a chunk of “simple” redaction patterns. If the default redaction patterns include any character classes / optional separators, consider broadening the fast-path detection to cover those cases (or update the PR description/tests to reflect which patterns are actually optimized).

Prompt To Fix With AI
This is a comment left during a code review.
Path: posthog/exception_utils.py
Line: 935:938

Comment:
**Substrings may be missed**
`_extract_plain_substring()` rejects any pattern whose remainder contains regex metacharacters, including `[` and `]`. That means simple ignore-case patterns like `(?i)[\s_-]*api[_-]?key` (or even `(?i)api[_-]?key`) will never take the substring fast path and will still compile as regex, so the PR’s stated optimization won’t apply to a chunk of “simple” redaction patterns. If the default redaction patterns include any character classes / optional separators, consider broadening the fast-path detection to cover those cases (or update the PR description/tests to reflect which patterns are actually optimized).

How can I resolve this? If you propose a fix, please make it concise.

if not inline_flags:
return None
remainder = pattern[inline_flags.end() :]
if not remainder or any(c in _REGEX_METACHARACTERS for c in remainder):
return None
return remainder.lower()


def _compile_patterns(patterns):
compiled = []
if not patterns:
return None
substrings = []
regexes = []
for pattern in patterns:
try:
compiled.append(re.compile(pattern))
except Exception:
pass
return compiled
simple = _extract_plain_substring(pattern)
if simple is not None:
substrings.append(simple)
else:
try:
regexes.append(re.compile(pattern))
except Exception:
pass
if not substrings and not regexes:
return None
return (substrings, regexes)


def _pattern_matches(name, patterns):
for pattern in patterns:
if patterns is None:
return False
substrings, regexes = patterns
if substrings:
name_lower = name.lower()
for s in substrings:
if s in name_lower:
return True
for pattern in regexes:
if pattern.search(name):
return True
return False
Expand Down
49 changes: 49 additions & 0 deletions posthog/test/test_exception_capture.py
Original file line number Diff line number Diff line change
Expand Up @@ -590,3 +590,52 @@ def test_mask_sensitive_data_circular_ref():
result = _mask_sensitive_data(circular_list, compiled_mask)
assert result[0] == "item"
assert result[1] == "<circular ref>"


def test_compile_patterns_fast_path_and_regex_fallback():
from posthog.exception_utils import _compile_patterns, _pattern_matches

# Simple case-insensitive patterns should become substrings
simple_only = _compile_patterns([r"(?i)password", r"(?i)token", r"(?i)jwt"])
substrings, regexes = simple_only
assert substrings == ["password", "token", "jwt"]
assert regexes == []

assert _pattern_matches("my_password_var", simple_only) is True
assert _pattern_matches("MY_TOKEN", simple_only) is True
assert _pattern_matches("safe_variable", simple_only) is False

# Complex regex patterns should stay as compiled regexes
complex_only = _compile_patterns([r"^__.*", r"\d{3,}", r"^sk_live_"])
substrings, regexes = complex_only
assert substrings == []
assert len(regexes) == 3

assert _pattern_matches("__dunder", complex_only) is True
assert _pattern_matches("has_999_numbers", complex_only) is True
assert _pattern_matches("sk_live_abc123", complex_only) is True
assert _pattern_matches("normal_var", complex_only) is False

# Mixed: simple substrings + complex regexes together
mixed = _compile_patterns(
[
r"(?i)secret", # simple
r"(?i)api_key", # simple
r"^__.*", # regex
r"\btoken_\w+", # regex
]
)
substrings, regexes = mixed
assert substrings == ["secret", "api_key"]
assert len(regexes) == 2

# Substring matches
assert _pattern_matches("my_secret", mixed) is True
assert _pattern_matches("API_KEY_VALUE", mixed) is True

# Regex matches
assert _pattern_matches("__private", mixed) is True
assert _pattern_matches("token_abc", mixed) is True

# No match
assert _pattern_matches("safe_var", mixed) is False
2 changes: 1 addition & 1 deletion posthog/version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
VERSION = "7.8.4"
VERSION = "7.8.5"

if __name__ == "__main__":
print(VERSION, end="") # noqa: T201
Loading