diff --git a/config/languages/french.php b/config/languages/french.php index cd4923b..fb8dc2a 100644 --- a/config/languages/french.php +++ b/config/languages/french.php @@ -4,7 +4,7 @@ 'severity' => [ 'mild' => [ 'crotte', 'crottes', 'caca', 'cacas', 'zut', - 'mince', 'flûte', 'flute', 'punaise', + 'punaise', 'idiot', 'idiots', 'idiote', 'idiotes', 'bête', 'bete', 'bêtes', 'betes', 'sot', 'sots', 'sotte', 'sottes', @@ -1537,9 +1537,6 @@ 'réfrigérations', 'refrigerations', 'zut', - 'mince', - 'flûte', - 'flute', 'punaise', ], diff --git a/config/languages/german.php b/config/languages/german.php index 31e9a5c..98706ef 100644 --- a/config/languages/german.php +++ b/config/languages/german.php @@ -19,6 +19,7 @@ 'bekloppt', 'bekloppte', 'bekloppter', 'beklopptes', 'schwanz', 'pimmel', 'hintern', 'po', 'popo', + 'schwul', 'schwuler', 'schwule', 'schwules', ], 'high' => [ 'scheiße', 'scheisse', 'ficken', 'fick', 'gefickt', @@ -27,7 +28,6 @@ 'vögeln', 'voegeln', 'bumsen', ], 'extreme' => [ - 'schwul', 'schwuler', 'schwule', 'schwules', 'tunte', 'tuntig', 'kampflesbe', 'kampflesben', 'kanake', 'kanaken', diff --git a/src/BlaspManager.php b/src/BlaspManager.php index 45b0248..1eadbab 100644 --- a/src/BlaspManager.php +++ b/src/BlaspManager.php @@ -78,6 +78,20 @@ public function createPipelineDriver(): DriverInterface $config = $this->app['config']->get('blasp.drivers.pipeline', []); $driverNames = $config['drivers'] ?? ['regex', 'phonetic']; + if (!is_array($driverNames)) { + throw new InvalidArgumentException('blasp.drivers.pipeline.drivers must be an array of driver names.'); + } + + foreach ($driverNames as $name) { + if (!is_string($name) || trim($name) === '') { + throw new InvalidArgumentException('Each pipeline driver name must be a non-empty string.'); + } + + if (strtolower(trim($name)) === 'pipeline') { + throw new InvalidArgumentException('Pipeline driver cannot contain itself. Remove "pipeline" from blasp.drivers.pipeline.drivers.'); + } + } + $resolvedDrivers = array_map( fn (string $name) => $this->resolveDriver($name), $driverNames, diff --git a/src/BlaspServiceProvider.php b/src/BlaspServiceProvider.php index dcb3bc7..172f5db 100644 --- a/src/BlaspServiceProvider.php +++ b/src/BlaspServiceProvider.php @@ -6,8 +6,6 @@ use Illuminate\Support\ServiceProvider; use Illuminate\Support\Str; use Illuminate\Support\Stringable; -use Blaspsoft\Blasp\Core\Dictionary; - class BlaspServiceProvider extends ServiceProvider { public function boot(): void @@ -53,6 +51,10 @@ public function register(): void protected function registerValidationRule(): void { $this->app['validator']->extend('blasp_check', function ($attribute, $value, $parameters) { + if (!is_string($value) || $value === '') { + return true; + } + $language = $parameters[0] ?? config('blasp.language', config('blasp.default_language', 'english')); $manager = $this->app->make('blasp'); diff --git a/src/Blaspable.php b/src/Blaspable.php index 6e16db0..cc6ecc1 100644 --- a/src/Blaspable.php +++ b/src/Blaspable.php @@ -92,12 +92,13 @@ public function blaspResult(string $attribute): ?Result public static function withoutBlaspChecking(Closure $callback): mixed { + $previousState = static::$blaspCheckingDisabled; static::$blaspCheckingDisabled = true; try { return $callback(); } finally { - static::$blaspCheckingDisabled = false; + static::$blaspCheckingDisabled = $previousState; } } } diff --git a/src/Core/Matchers/PhoneticMatcher.php b/src/Core/Matchers/PhoneticMatcher.php index 4ee80be..60ca537 100644 --- a/src/Core/Matchers/PhoneticMatcher.php +++ b/src/Core/Matchers/PhoneticMatcher.php @@ -14,14 +14,14 @@ public function __construct( private float $maxDistanceRatio = 0.6, private array $phoneticFalsePositives = [], ) { - $this->phoneticFalsePositives = array_map('strtolower', $this->phoneticFalsePositives); + $this->phoneticFalsePositives = array_map(fn($fp) => mb_strtolower($fp, 'UTF-8'), $this->phoneticFalsePositives); $this->buildIndex($profanities); } private function buildIndex(array $profanities): void { foreach ($profanities as $word) { - $lower = strtolower($word); + $lower = mb_strtolower($word, 'UTF-8'); if (mb_strlen($lower, 'UTF-8') < $this->minWordLength) { continue; } @@ -62,7 +62,7 @@ public function match(string $word): ?string foreach ($this->index[$code] as $profanity) { $distance = levenshtein($lower, $profanity); - $maxLen = max(strlen($lower), strlen($profanity)); + $maxLen = max(mb_strlen($lower, 'UTF-8'), mb_strlen($profanity, 'UTF-8')); $threshold = (int) ceil($this->maxDistanceRatio * $maxLen); if ($distance <= $threshold && $distance < $bestDistance) { diff --git a/src/Drivers/PatternDriver.php b/src/Drivers/PatternDriver.php index 6bcce7a..20cc4ee 100644 --- a/src/Drivers/PatternDriver.php +++ b/src/Drivers/PatternDriver.php @@ -57,6 +57,15 @@ public function detect(string $text, Dictionary $dictionary, MaskStrategyInterfa } } + // Apply severity filter before dedup so shorter high-severity matches aren't swallowed + $minimumSeverity = $options['severity'] ?? null; + if ($minimumSeverity instanceof Severity) { + $matchedWords = array_values(array_filter( + $matchedWords, + fn(MatchedWord $w) => $w->severity->isAtLeast($minimumSeverity) + )); + } + // Deduplicate overlapping matches (longest-first already recorded) usort($matchedWords, fn($a, $b) => $a->position - $b->position ?: $b->length - $a->length); $deduplicated = []; @@ -69,15 +78,6 @@ public function detect(string $text, Dictionary $dictionary, MaskStrategyInterfa } $matchedWords = $deduplicated; - // Apply severity filter - $minimumSeverity = $options['severity'] ?? null; - if ($minimumSeverity instanceof Severity) { - $matchedWords = array_values(array_filter( - $matchedWords, - fn(MatchedWord $w) => $w->severity->isAtLeast($minimumSeverity) - )); - } - // Rebuild cleanText from surviving matches (right-to-left) $cleanText = $text; $sorted = $matchedWords; diff --git a/src/Drivers/PipelineDriver.php b/src/Drivers/PipelineDriver.php index ab82b0e..f68be23 100644 --- a/src/Drivers/PipelineDriver.php +++ b/src/Drivers/PipelineDriver.php @@ -65,7 +65,7 @@ public function detect(string $text, Dictionary $dictionary, MaskStrategyInterfa $reversed = array_reverse($kept); foreach ($reversed as $match) { $replacement = $mask->mask($match->text, $match->length); - $cleanText = mb_substr($cleanText, 0, $match->position) . $replacement . mb_substr($cleanText, $match->position + $match->length); + $cleanText = mb_substr($cleanText, 0, $match->position, 'UTF-8') . $replacement . mb_substr($cleanText, $match->position + $match->length, null, 'UTF-8'); } // 5. Recalculate score from merged matches diff --git a/src/Drivers/RegexDriver.php b/src/Drivers/RegexDriver.php index c25c46c..810bf17 100644 --- a/src/Drivers/RegexDriver.php +++ b/src/Drivers/RegexDriver.php @@ -39,11 +39,17 @@ public function detect(string $text, Dictionary $dictionary, MaskStrategyInterfa $normalizedString = $normalizer->normalize($text); $originalNormalized = preg_replace('/\s+/', ' ', $normalizedString); + // Immutable copy for position lookups — never mutated + $immutableNormalized = $originalNormalized; + $matchedWords = []; $uniqueMap = []; $profanitiesCount = 0; $continue = true; + // Track masked character ranges so we don't re-match them + $maskedRanges = []; + while ($continue) { $continue = false; $normalizedString = preg_replace('/\s+/', ' ', $normalizedString); @@ -59,6 +65,19 @@ public function detect(string $text, Dictionary $dictionary, MaskStrategyInterfa $length = mb_strlen($match[0], 'UTF-8'); $matchedText = $match[0]; + // Skip if this range overlaps with an already-masked range + $matchEnd = $start + $length; + $alreadyMasked = false; + foreach ($maskedRanges as [$mStart, $mEnd]) { + if ($start < $mEnd && $matchEnd > $mStart) { + $alreadyMasked = true; + break; + } + } + if ($alreadyMasked) { + continue; + } + // Check word boundary spanning (filter uses byte-level operations) if ($this->filter->isSpanningWordBoundary($matchedText, $normalizedString, $byteStart)) { continue; @@ -73,7 +92,7 @@ public function detect(string $text, Dictionary $dictionary, MaskStrategyInterfa $fullWord = $this->filter->getFullWordContext($normalizedString, $byteStart, $byteLength); // Check pure alpha substring against original (unmasked) normalized - $originalFullWord = $this->filter->getFullWordContext($originalNormalized, $byteStart, $byteLength); + $originalFullWord = $this->filter->getFullWordContext($immutableNormalized, $byteStart, $byteLength); if ($this->compoundDetector->isPureAlphaSubstring($matchedText, $originalFullWord, $profanity, $profanityExpressions)) { continue; } @@ -86,14 +105,20 @@ public function detect(string $text, Dictionary $dictionary, MaskStrategyInterfa $continue = true; // Mask in normalizedString only (needed for loop termination) - $normalizedString = mb_substr($normalizedString, 0, $start) . str_repeat('*', mb_strlen($match[0], 'UTF-8')) . - mb_substr($normalizedString, $start + mb_strlen($match[0], 'UTF-8')); + $normalizedString = mb_substr($normalizedString, 0, $start) . str_repeat('*', $length) . + mb_substr($normalizedString, $start + $length); - // Track match + // Record masked range using character positions from immutable string + $maskedRanges[] = [$start, $matchEnd]; + + // Track match — use position derived from immutable normalized string $profanitiesCount++; + // Get the original text at this position from the original input + $originalMatchText = mb_substr($text, $start, $length); + $matchedWords[] = new MatchedWord( - text: $matchedText, + text: $originalMatchText, base: $profanity, severity: $dictionary->getSeverity($profanity), position: $start, @@ -109,7 +134,7 @@ public function detect(string $text, Dictionary $dictionary, MaskStrategyInterfa } } - // Apply severity filter if set + // Apply severity filter before masking so low-severity matches don't suppress overlapping ones $minimumSeverity = $options['severity'] ?? null; if ($minimumSeverity instanceof Severity) { $matchedWords = array_values(array_filter( diff --git a/src/Middleware/CheckProfanity.php b/src/Middleware/CheckProfanity.php index ba4af4a..c032eda 100644 --- a/src/Middleware/CheckProfanity.php +++ b/src/Middleware/CheckProfanity.php @@ -22,10 +22,10 @@ public function handle(Request $request, Closure $next, ?string $action = null, $fields = config('blasp.middleware.fields', ['*']); $except = config('blasp.middleware.except', ['password', 'email', '_token']); - $input = $request->except($except); - if ($fields !== ['*']) { - $input = $request->only($fields); + $input = collect($request->only($fields))->except($except)->all(); + } else { + $input = $request->except($except); } $textFields = $this->extractTextFields($input); diff --git a/src/PendingCheck.php b/src/PendingCheck.php index 97f20a4..65108eb 100644 --- a/src/PendingCheck.php +++ b/src/PendingCheck.php @@ -318,6 +318,14 @@ protected function trackCacheKey(string $key): void $cache = $this->getCache(); $keys = $cache->get('blasp_result_cache_keys', []); $keys[] = $key; - $cache->forever('blasp_result_cache_keys', array_unique($keys)); + $keys = array_unique($keys); + + // Evict oldest keys when exceeding the configured limit + $maxKeys = config('blasp.cache.max_tracked_keys', 1000); + if (count($keys) > $maxKeys) { + $keys = array_slice($keys, -$maxKeys); + } + + $cache->forever('blasp_result_cache_keys', $keys); } } diff --git a/tests/StrMacroTest.php b/tests/StrMacroTest.php index 95ebf3f..7b62a31 100644 --- a/tests/StrMacroTest.php +++ b/tests/StrMacroTest.php @@ -51,6 +51,6 @@ public function test_stringable_clean_profanity_returns_stringable_instance() public function test_stringable_clean_profanity_returns_clean_text_unchanged() { - $this->assertSame('hello', Str::of('hello')->cleanProfanity()->toString()); + $this->assertSame('hello', (string) Str::of('hello')->cleanProfanity()); } }