Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 25 additions & 25 deletions config/blasp.php
Original file line number Diff line number Diff line change
Expand Up @@ -156,32 +156,32 @@
|--------------------------------------------------------------------------
*/
'substitutions' => [
'/a/' => ['a', '4', '@', 'Á', 'á', 'À', 'Â', 'à', 'Â', 'â', 'Ä', 'ä', 'Ã', 'ã', 'Å', 'å', 'æ', 'Æ', 'α', 'Δ', 'Λ', 'λ'],
'/b/' => ['b', '8', '\\', '3', 'ß', 'Β', 'β'],
'/c/' => ['c', 'Ç', 'ç', 'ć', 'Ć', 'č', 'Č', '¢', '€', '<', '(', '{', '©'],
'/d/' => ['d', '\\', ')', 'Þ', 'þ', 'Ð', 'ð'],
'/e/' => ['e', '3', '€', 'È', 'è', 'É', 'é', 'Ê', 'ê', 'ë', 'Ë', 'ē', 'Ē', 'ė', 'Ė', 'ę', 'Ę', '∑'],
'/f/' => ['f', 'ƒ'],
'/g/' => ['g', '6', '9'],
'/h/' => ['h', 'Η'],
'/i/' => ['i', '!', '|', ']', '[', '1', '∫', 'Ì', 'Í', 'Î', 'Ï', 'ì', 'í', 'î', 'ï', 'ī', 'Ī', 'į', 'Į'],
'/j/' => ['j'],
'/k/' => ['k', 'Κ', 'κ'],
'/l/' => ['l', '!', '|', ']', '[', '£', '∫', 'Ì', 'Í', 'Î', 'Ï', 'ł', 'Ł'],
'/m/' => ['m'],
'/n/' => ['n', 'η', 'Ν', 'Π', 'ñ', 'Ñ', 'ń', 'Ń'],
'/o/' => ['o', '0', 'Ο', 'ο', 'Φ', '¤', '°', 'ø', 'ô', 'Ô', 'ö', 'Ö', 'ò', 'Ò', 'ó', 'Ó', 'œ', 'Œ', 'ø', 'Ø', 'ō', 'Ō', 'õ', 'Õ'],
'/p/' => ['p', 'ρ', 'Ρ', '¶', 'þ'],
'/q/' => ['q'],
'/r/' => ['r', '®'],
'/s/' => ['s', '5', '\$', '§', 'ß', 'Ś', 'ś', 'Š', 'š'],
'/t/' => ['t', 'Τ', 'τ'],
'/a/' => ['a', '4', '@', '*', 'Á', 'á', 'À', 'Â', 'à', 'Â', 'â', 'Ä', 'ä', 'Ã', 'ã', 'Å', 'å', 'æ', 'Æ', 'α', 'Δ', 'Λ', 'λ'],
'/b/' => ['b', '8', '\\', '3', '*', 'ß', 'Β', 'β'],
'/c/' => ['c', '*', 'Ç', 'ç', 'ć', 'Ć', 'č', 'Č', '¢', '€', '<', '(', '{', '©'],
'/d/' => ['d', '*', '\\', ')', 'Þ', 'þ', 'Ð', 'ð'],
'/e/' => ['e', '3', '*', '€', 'È', 'è', 'É', 'é', 'Ê', 'ê', 'ë', 'Ë', 'ē', 'Ē', 'ė', 'Ė', 'ę', 'Ę', '∑'],
'/f/' => ['f', '*', 'ƒ'],
'/g/' => ['g', '6', '9', '*'],
'/h/' => ['h', '*', 'Η'],
'/i/' => ['i', '!', '|', ']', '[', '1', '*', '∫', 'Ì', 'Í', 'Î', 'Ï', 'ì', 'í', 'î', 'ï', 'ī', 'Ī', 'į', 'Į'],
'/j/' => ['j', '*'],
'/k/' => ['k', '*', 'Κ', 'κ'],
'/l/' => ['l', '!', '|', ']', '[', '*', '£', '∫', 'Ì', 'Í', 'Î', 'Ï', 'ł', 'Ł'],
'/m/' => ['m', '*'],
'/n/' => ['n', '*', 'η', 'Ν', 'Π', 'ñ', 'Ñ', 'ń', 'Ń'],
'/o/' => ['o', '0', '*', 'Ο', 'ο', 'Φ', '¤', '°', 'ø', 'ô', 'Ô', 'ö', 'Ö', 'ò', 'Ò', 'ó', 'Ó', 'œ', 'Œ', 'ø', 'Ø', 'ō', 'Ō', 'õ', 'Õ'],
'/p/' => ['p', '*', 'ρ', 'Ρ', '¶', 'þ'],
'/q/' => ['q', '*'],
'/r/' => ['r', '*', '®'],
'/s/' => ['s', '5', '*', '\$', '§', 'ß', 'Ś', 'ś', 'Š', 'š'],
'/t/' => ['t', '*', 'Τ', 'τ'],
'/u/' => ['u', 'υ', 'µ', 'û', 'ü', 'ù', 'ú', 'ū', 'Û', 'Ü', 'Ù', 'Ú', 'Ū', '@', '*'],
'/v/' => ['v', 'υ', 'ν'],
'/w/' => ['w', 'ω', 'ψ', 'Ψ'],
'/x/' => ['x', 'Χ', 'χ'],
'/y/' => ['y', '¥', 'γ', 'ÿ', 'ý', 'Ÿ', 'Ý'],
'/z/' => ['z', 'Ζ', 'ž', 'Ž', 'ź', 'Ź', 'ż', 'Ż'],
'/v/' => ['v', '*', 'υ', 'ν'],
'/w/' => ['w', '*', 'ω', 'ψ', 'Ψ'],
'/x/' => ['x', '*', 'Χ', 'χ'],
'/y/' => ['y', '*', '¥', 'γ', 'ÿ', 'ý', 'Ÿ', 'Ý'],
'/z/' => ['z', '*', 'Ζ', 'ž', 'Ž', 'ź', 'Ź', 'ż', 'Ż'],
],

/*
Expand Down
4 changes: 4 additions & 0 deletions src/Core/Analyzer.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ public function analyze(
): Result {
$mask = $mask ?? new CharacterMask(config('blasp.mask', config('blasp.mask_character', '*')));

// Strip invisible Unicode format characters (zero-width spaces, invisible separators, etc.)
// before any driver sees the text, ensuring consistent positions across pipeline drivers
$text = preg_replace('/\p{Cf}/u', '', $text) ?? $text;

return $driver->detect($text, $dictionary, $mask, $options);
}
}
4 changes: 3 additions & 1 deletion src/Drivers/RegexDriver.php
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,9 @@ public function detect(string $text, Dictionary $dictionary, MaskStrategyInterfa
$continue = true;

// Mask in normalizedString only (needed for loop termination)
$normalizedString = mb_substr($normalizedString, 0, $start) . str_repeat('*', $length) .
// Use SOH control char internally to avoid re-matching when '*' is
// a valid substitution character in profanity patterns
$normalizedString = mb_substr($normalizedString, 0, $start) . str_repeat("\x01", $length) .
mb_substr($normalizedString, $start + $length);

// Record masked range using character positions from immutable string
Expand Down
91 changes: 91 additions & 0 deletions tests/BypassVulnerabilityTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
<?php

namespace Blaspsoft\Blasp\Tests;

use Blaspsoft\Blasp\Facades\Blasp;

class BypassVulnerabilityTest extends TestCase
{
// -------------------------------------------------------
// Invisible Unicode Characters (U+2063, U+200B, etc.)
// -------------------------------------------------------

public function test_invisible_separator_in_fuck()
{
$result = Blasp::check("f\u{2063}uck");
$this->assertTrue($result->isOffensive());
$this->assertContains('fuck', $result->uniqueWords());
}

public function test_zero_width_space_in_shit()
{
$result = Blasp::check("s\u{200B}hit");
$this->assertTrue($result->isOffensive());
$this->assertContains('shit', $result->uniqueWords());
}

public function test_multiple_invisible_chars_in_profanity()
{
$result = Blasp::check("f\u{200B}\u{2063}uck");
$this->assertTrue($result->isOffensive());
$this->assertContains('fuck', $result->uniqueWords());
}

public function test_invisible_chars_in_clean_text_no_false_positive()
{
$result = Blasp::check("he\u{2063}llo");
$this->assertFalse($result->isOffensive());
}

public function test_invisible_separator_clean_output_masks_profanity()
{
$result = Blasp::check("f\u{2063}uck this");
$this->assertTrue($result->isOffensive());
$this->assertSame('**** this', $result->clean());
}

// -------------------------------------------------------
// Censored Profanity (asterisk as letter replacement)
// -------------------------------------------------------

public function test_asterisk_censored_fag()
{
$result = Blasp::check('f*g');
$this->assertTrue($result->isOffensive());
$this->assertContains('fag', $result->uniqueWords());
}

public function test_asterisk_censored_fuck()
{
$result = Blasp::check('f**k');
$this->assertTrue($result->isOffensive());
}

public function test_asterisk_censored_shit()
{
$result = Blasp::check('s**t');
$this->assertTrue($result->isOffensive());
}

public function test_asterisk_fully_censored_fuck()
{
$result = Blasp::check('f***');
$this->assertTrue($result->isOffensive());
}

public function test_asterisk_in_non_profane_word_no_false_positive()
{
$result = Blasp::check('b*g');
$this->assertFalse($result->isOffensive());
}

// -------------------------------------------------------
// Combined: invisible + wildcard
// -------------------------------------------------------

public function test_invisible_char_plus_asterisk_censoring()
{
$result = Blasp::check("f\u{2063}*g");
$this->assertTrue($result->isOffensive());
}
}
Loading