Skip to content

Commit 3c28543

Browse files
blackwell-systemsSamMorrowDrums
authored andcommitted
fix: prevent sentinel collision XSS and document closing-fence deviation
Security fix: Add NUL byte (0x00) to shouldRemoveRune so that FilterInvisibleCharacters strips NUL bytes before protectCodeAngleBrackets runs. Without this, an attacker can inject literal sentinel strings (\x00LT\x00script\x00GT\x00) that bypass FilterHTMLTags and get restored to <script> by restoreCodeAngleBrackets. Also add a comment documenting the CommonMark closing-fence deviation: the implementation treats any run of >= fenceLen backticks as a closing fence even mid-line, which is more permissive than CommonMark (requires own line, no info string). This is a soft-fail (some angle brackets may be unprotected) rather than a security issue. Tests added: - sentinel collision: verifies NUL-byte injection does not produce <script> - NUL bytes in code blocks: verifies code content is preserved after stripping - NUL byte in shouldRemoveRune: verifies 0x00 is in the removal set
1 parent ac2718a commit 3c28543

2 files changed

Lines changed: 19 additions & 2 deletions

File tree

pkg/sanitize/sanitize.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,12 @@ func protectCodeAngleBrackets(input string) string {
191191
b.WriteRune(runes[i]) // newline
192192
i++
193193
}
194-
// Inside fence: protect angle brackets until closing fence
194+
// Inside fence: protect angle brackets until closing fence.
195+
// NOTE: CommonMark requires the closing fence to be on its own line
196+
// with no info string. This implementation is more permissive: any
197+
// run of >= fenceLen backticks ends the block, even mid-line. This
198+
// is a soft-fail (some angle brackets may be unprotected) rather
199+
// than a security issue.
195200
for i < n {
196201
// Check for closing fence
197202
if runes[i] == '`' {
@@ -333,7 +338,8 @@ func getPolicy() *bluemonday.Policy {
333338

334339
func shouldRemoveRune(r rune) bool {
335340
switch r {
336-
case 0x200B, // ZERO WIDTH SPACE
341+
case 0x0000, // NUL — stripped to prevent sentinel collision in protectCodeAngleBrackets
342+
0x200B, // ZERO WIDTH SPACE
337343
0x200C, // ZERO WIDTH NON-JOINER
338344
0x200E, // LEFT-TO-RIGHT MARK
339345
0x200F, // RIGHT-TO-LEFT MARK

pkg/sanitize/sanitize_test.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ func TestShouldRemoveRune(t *testing.T) {
129129
expected bool
130130
}{
131131
// Individual characters that should be removed
132+
{name: "NUL byte", rune: 0x0000, expected: true},
132133
{name: "zero width space", rune: 0x200B, expected: true},
133134
{name: "zero width non-joiner", rune: 0x200C, expected: true},
134135
{name: "left-to-right mark", rune: 0x200E, expected: true},
@@ -337,6 +338,16 @@ func TestSanitizePreservesAngleBracketsInCodeBlocks(t *testing.T) {
337338
input: "No code here, just text.",
338339
expected: "No code here, just text.",
339340
},
341+
{
342+
name: "sentinel collision does not bypass sanitizer",
343+
input: "\x00LT\x00script\x00GT\x00alert(1)\x00LT\x00/script\x00GT\x00",
344+
expected: "LTscriptGTalert(1)LT/scriptGT", // NUL bytes stripped; sentinels don't match; no <script> injected
345+
},
346+
{
347+
name: "NUL bytes stripped from input with code blocks",
348+
input: "```\nfunc Foo\x00[T any]()\n```",
349+
expected: "```\nfunc Foo[T any]()\n```",
350+
},
340351
}
341352

342353
for _, tt := range tests {

0 commit comments

Comments
 (0)