From 7ea219128dd7b36c7284cce7238ebf3a97e743f4 Mon Sep 17 00:00:00 2001
From: Dom Morgan <dom@d3r.com>
Date: Tue, 26 May 2026 11:14:37 +0100
Subject: [PATCH 1/3] Revert "Removing dependency on voku/portable-utf8 (#33)"

This reverts commit d96509294ea843b4b86e4900df27424a6ea0ace8.
---
 composer.json              |   2 +-
 src/Stemmer/Catalan.php    |  25 ++--
 src/Stemmer/Danish.php     |  33 +++--
 src/Stemmer/Dutch.php      |  63 ++++-----
 src/Stemmer/English.php    |  79 ++++++------
 src/Stemmer/Finnish.php    | 115 +++++++++--------
 src/Stemmer/French.php     |  93 +++++++-------
 src/Stemmer/German.php     |  49 +++----
 src/Stemmer/Italian.php    |  49 +++----
 src/Stemmer/Norwegian.php  |  23 ++--
 src/Stemmer/Portuguese.php |  49 +++----
 src/Stemmer/Romanian.php   |  29 +++--
 src/Stemmer/Russian.php    |  45 ++++---
 src/Stemmer/Spanish.php    |  72 ++++++-----
 src/Stemmer/Stem.php       |  30 ++---
 src/Stemmer/Swedish.php    |  25 ++--
 src/StemmerFactory.php     |   4 +-
 src/Transliterate.php      | 253 -------------------------------------
 18 files changed, 427 insertions(+), 611 deletions(-)
 delete mode 100644 src/Transliterate.php

diff --git a/composer.json b/composer.json
index d58a45c..b190dda 100644
--- a/composer.json
+++ b/composer.json
@@ -11,7 +11,7 @@
 	],
 	"require": {
 		"php": ">=7.3",
-		"joomla/string": ">=2.0.1"
+		"voku/portable-utf8": "^5.4|^6.0"
 	},
 	"require-dev":{
 		"phpunit/phpunit": "^9.0"
diff --git a/src/Stemmer/Catalan.php b/src/Stemmer/Catalan.php
index 8a5c7d3..d52e4fc 100644
--- a/src/Stemmer/Catalan.php
+++ b/src/Stemmer/Catalan.php
@@ -2,7 +2,7 @@
 
 namespace Wamania\Snowball\Stemmer;
 
-use Joomla\String\StringHelper;
+use voku\helper\UTF8;
 
 /**
  *
@@ -86,7 +86,12 @@ class Catalan extends Stem
      */
     public function stem($word)
     {
-        $this->word = StringHelper::strtolower($word);
+        // we do ALL in UTF-8
+        if (!UTF8::is_utf8($word)) {
+            throw new \Exception('Word must be in UTF-8');
+        }
+
+        $this->word = UTF8::strtolower($word);
 
         // Catalan stemmer does not use Rv
         $this->r1();
@@ -122,7 +127,7 @@ private function step0()
     {
         if (($position = $this->search(static::$attached_pronoun)) !== false) {
             if ($this->inR1($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
                 return true;
             }
         }
@@ -141,7 +146,7 @@ private function step1a()
         //      delete if in R2
         if (($position = $this->search(['acions', 'ada', 'ades'])) !== false) {
             if ($this->inR2($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
             return true;
         }
@@ -157,11 +162,11 @@ private function step1a()
         // atius atives ativa ativitat ativitats ible ibles assa asses assos ent ents íssim íssima íssims íssimes
         // ìssem ìsseu ìssin ims ima imes isme ista ismes istes inia inies íinia ínies ita ites triu trius oses osos
         // ient otes ots
-        //
+        // 
         //      delete if in R1
         if (($position = $this->search(self::$standard_suffix_1a)) !== false) {
             if ($this->inR1($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
             return true;
         }
@@ -236,7 +241,7 @@ private function step1b()
         //      delete if in R1
         if (($position = $this->search(static::$verb_suffixes)) !== false) {
             if ($this->inR1($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
             return true;
         }
@@ -246,7 +251,7 @@ private function step1b()
         //      delete if in R2
         if (($position = $this->search(['ando'])) !== false) {
             if ($this->inR2($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
             return true;
         }
@@ -265,7 +270,7 @@ private function step2()
         //      delete if in R1
         if (($position = $this->search(static::$residual_suffixes)) !== false) {
             if ($this->inR1($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
             return true;
         }
@@ -289,7 +294,7 @@ private function step2()
      */
     private function finish()
     {
-        $this->word = str_replace(
+        $this->word = UTF8::str_replace(
             ['á', 'é', 'í', 'ó', 'ú', 'à', 'è', 'ì', 'ò', 'ï', 'ü', '·'],
             ['a', 'e', 'i', 'o', 'u', 'a', 'e', 'i', 'o', 'i', 'u', '.'],
             $this->word
diff --git a/src/Stemmer/Danish.php b/src/Stemmer/Danish.php
index 5fc7507..c539fdb 100644
--- a/src/Stemmer/Danish.php
+++ b/src/Stemmer/Danish.php
@@ -2,7 +2,7 @@
 
 namespace Wamania\Snowball\Stemmer;
 
-use Joomla\String\StringHelper;
+use voku\helper\UTF8;
 
 /**
  *
@@ -22,7 +22,12 @@ class Danish extends Stem
      */
     public function stem($word): string
     {
-        $this->word = StringHelper::strtolower($word);
+        // we do ALL in UTF-8
+        if (!UTF8::is_utf8($word)) {
+            throw new \Exception('Word must be in UTF-8');
+        }
+
+        $this->word = UTF8::strtolower($word);
 
         // R2 is not used: R1 is defined in the same way as in the German stemmer
         $this->r1();
@@ -30,7 +35,7 @@ public function stem($word): string
         // then R1 is adjusted so that the region before it contains at least 3 letters.
         if ($this->r1Index < 3) {
             $this->r1Index = 3;
-            $this->r1 = StringHelper::substr($this->word, 3);
+            $this->r1 = UTF8::substr($this->word, 3);
         }
 
         // Do each of steps 1, 2 3 and 4.
@@ -51,7 +56,7 @@ public function stem($word): string
      */
     private function hasValidSEnding($word)
     {
-        $lastLetter = StringHelper::substr($word, -1, 1);
+        $lastLetter = UTF8::substr($word, -1, 1);
         return in_array($lastLetter, array('a', 'b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'r', 't', 'v', 'y', 'z', 'å'));
     }
 
@@ -69,14 +74,14 @@ private function step1()
             'erens', 'ered', 'ende', 'erne', 'eres', 'eren', 'eret', 'erer', 'enes', 'heds',
             'ens', 'ene', 'ere', 'ers', 'ets', 'hed', 'es', 'et', 'er', 'en', 'e'
         ))) !== false) {
-            $this->word = StringHelper::substr($this->word, 0, $position);
+            $this->word = UTF8::substr($this->word, 0, $position);
             return true;
         }
 
         // s
         //      delete if preceded by a valid s-ending
         if ( ($position = $this->searchIfInR1(array('s'))) !== false) {
-            $word = StringHelper::substr($this->word, 0, $position);
+            $word = UTF8::substr($this->word, 0, $position);
             if ($this->hasValidSEnding($word)) {
                 $this->word = $word;
             }
@@ -92,7 +97,7 @@ private function step1()
     private function step2()
     {
         if ($this->searchIfInR1(array('gd', 'dt', 'gt', 'kt')) !== false) {
-            $this->word = StringHelper::substr($this->word, 0, -1);
+            $this->word = UTF8::substr($this->word, 0, -1);
         }
     }
 
@@ -103,14 +108,14 @@ private function step3()
     {
         // If the word ends igst, remove the final st.
         if ($this->search(array('igst')) !== false) {
-            $this->word = StringHelper::substr($this->word, 0, -2);
+            $this->word = UTF8::substr($this->word, 0, -2);
         }
 
         // Search for the longest among the following suffixes in R1, and perform the action indicated.
         //  ig   lig   elig   els
         //      delete, and then repeat step 2
         if ( ($position = $this->searchIfInR1(array('elig', 'lig', 'ig', 'els'))) !== false) {
-            $this->word = StringHelper::substr($this->word, 0, $position);
+            $this->word = UTF8::substr($this->word, 0, $position);
             $this->step2();
             return true;
         }
@@ -118,7 +123,7 @@ private function step3()
         //  løst
         //      replace with løs
         if ($this->searchIfInR1(array('løst')) !== false) {
-            $this->word = StringHelper::substr($this->word, 0, -1);
+            $this->word = UTF8::substr($this->word, 0, -1);
         }
     }
 
@@ -128,19 +133,19 @@ private function step3()
      */
     private function step4()
     {
-        $length = StringHelper::strlen($this->word);
+        $length = UTF8::strlen($this->word);
         if (!$this->inR1(($length-1))) {
             return false;
         }
 
-        $lastLetter = StringHelper::substr($this->word, -1, 1);
+        $lastLetter = UTF8::substr($this->word, -1, 1);
         if (in_array($lastLetter, self::$vowels)) {
             return false;
         }
-        $beforeLastLetter = StringHelper::substr($this->word, -2, 1);
+        $beforeLastLetter = UTF8::substr($this->word, -2, 1);
 
         if ($lastLetter == $beforeLastLetter) {
-            $this->word = StringHelper::substr($this->word, 0, -1);
+            $this->word = UTF8::substr($this->word, 0, -1);
         }
         return true;
     }
diff --git a/src/Stemmer/Dutch.php b/src/Stemmer/Dutch.php
index 6a2b563..fc7c1af 100644
--- a/src/Stemmer/Dutch.php
+++ b/src/Stemmer/Dutch.php
@@ -2,7 +2,7 @@
 
 namespace Wamania\Snowball\Stemmer;
 
-use Joomla\String\StringHelper;
+use voku\helper\UTF8;
 
 /**
  *
@@ -22,10 +22,15 @@ class Dutch extends Stem
      */
     public function stem($word)
     {
-        $this->word = StringHelper::strtolower($word);
+        // we do ALL in UTF-8
+        if (!UTF8::is_utf8($word)) {
+            throw new \Exception('Word must be in UTF-8');
+        }
+
+        $this->word = UTF8::strtolower($word);
 
         // First, remove all umlaut and acute accents.
-        $this->word = str_replace(
+        $this->word = UTF8::str_replace(
             array('ä', 'ë', 'ï', 'ö', 'ü', 'á', 'é', 'í', 'ó', 'ú'),
             array('a', 'e', 'i', 'o', 'u', 'a', 'e', 'i', 'o', 'u'),
             $this->word);
@@ -45,7 +50,7 @@ public function stem($word)
         // but then R1 is adjusted so that the region before it contains at least 3 letters.
         if ($this->r1Index < 3) {
             $this->r1Index = 3;
-            $this->r1 = StringHelper::substr($this->word, 3);
+            $this->r1 = UTF8::substr($this->word, 3);
         }
 
         // Do each of steps 1, 2 3 and 4.
@@ -66,7 +71,7 @@ public function stem($word)
      */
     private function hasValidSEnding($word)
     {
-        $lastLetter = StringHelper::substr($word, -1, 1);
+        $lastLetter = UTF8::substr($word, -1, 1);
         return !in_array($lastLetter, array_merge(self::$vowels, array('j')));
     }
 
@@ -77,12 +82,12 @@ private function hasValidSEnding($word)
      */
     private function hasValidEnEnding($word)
     {
-        $lastLetter = StringHelper::substr($word, -1, 1);
+        $lastLetter = UTF8::substr($word, -1, 1);
         if (in_array($lastLetter, self::$vowels)) {
             return false;
         }
 
-        $threeLastLetters = StringHelper::substr($word, -3, 3);
+        $threeLastLetters = UTF8::substr($word, -3, 3);
         if ($threeLastLetters == 'gem') {
             return false;
         }
@@ -95,7 +100,7 @@ private function hasValidEnEnding($word)
     private function unDoubling()
     {
         if ($this->search(array('kk', 'dd', 'tt')) !== false) {
-            $this->word = StringHelper::substr($this->word, 0, -1);
+            $this->word = UTF8::substr($this->word, 0, -1);
         }
     }
 
@@ -118,7 +123,7 @@ private function step1()
         //      delete if in R1 and preceded by a valid en-ending, and then undouble the ending
         if ( ($position = $this->search(array('ene', 'en'))) !== false) {
             if ($this->inR1($position)) {
-                $word = StringHelper::substr($this->word, 0, $position);
+                $word = UTF8::substr($this->word, 0, $position);
                 if ($this->hasValidEnEnding($word)) {
                     $this->word = $word;
                     $this->unDoubling();
@@ -131,7 +136,7 @@ private function step1()
         //      delete if in R1 and preceded by a valid s-ending
         if ( ($position = $this->search(array('se', 's'))) !== false) {
             if ($this->inR1($position)) {
-                $word = StringHelper::substr($this->word, 0, $position);
+                $word = UTF8::substr($this->word, 0, $position);
                 if ($this->hasValidSEnding($word)) {
                     $this->word = $word;
                 }
@@ -150,9 +155,9 @@ private function step2()
     {
         if ( ($position = $this->search(array('e'))) !== false) {
             if ($this->inR1($position)) {
-                $letter = StringHelper::substr($this->word, -2, 1);
+                $letter = UTF8::substr($this->word, -2, 1);
                 if (!in_array($letter, self::$vowels)) {
-                    $this->word = StringHelper::substr($this->word, 0, $position);
+                    $this->word = UTF8::substr($this->word, 0, $position);
                     $this->unDoubling();
 
                     return true;
@@ -171,13 +176,13 @@ private function step3a()
     {
         if ( ($position = $this->search(array('heid'))) !== false) {
             if ($this->inR2($position)) {
-                $letter = StringHelper::substr($this->word, -5, 1);
+                $letter = UTF8::substr($this->word, -5, 1);
                 if ($letter !== 'c') {
-                    $this->word = StringHelper::substr($this->word, 0, $position);
+                    $this->word = UTF8::substr($this->word, 0, $position);
 
                     if ( ($position = $this->search(array('en'))) !== false) {
                         if ($this->inR1($position)) {
-                            $word = StringHelper::substr($this->word, 0, $position);
+                            $word = UTF8::substr($this->word, 0, $position);
                             if ($this->hasValidEnEnding($word)) {
                                 $this->word = $word;
                                 $this->unDoubling();
@@ -201,12 +206,12 @@ private function step3b($removedE)
         //      if preceded by ig, delete if in R2 and not preceded by e, otherwise undouble the ending
         if ( ($position = $this->search(array('end', 'ing'))) !== false) {
             if ($this->inR2($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
 
                 if ( ($position2 = $this->searchIfInR2(array('ig'))) !== false) {
-                    $letter = StringHelper::substr($this->word, -3, 1);
+                    $letter = UTF8::substr($this->word, -3, 1);
                     if ($letter !== 'e') {
-                        $this->word = StringHelper::substr($this->word, 0, $position2);
+                        $this->word = UTF8::substr($this->word, 0, $position2);
                     }
                 } else {
                     $this->unDoubling();
@@ -221,9 +226,9 @@ private function step3b($removedE)
         //      delete if in R2 and not preceded by e
         if ( ($position = $this->search(array('ig'))) !== false) {
             if ($this->inR2($position)) {
-                $letter = StringHelper::substr($this->word, -3, 1);
+                $letter = UTF8::substr($this->word, -3, 1);
                 if ($letter !== 'e') {
-                    $this->word = StringHelper::substr($this->word, 0, $position);
+                    $this->word = UTF8::substr($this->word, 0, $position);
                 }
             }
             return true;
@@ -233,7 +238,7 @@ private function step3b($removedE)
         //      delete if in R2, and then repeat step 2
         if ( ($position = $this->search(array('lijk'))) !== false) {
             if ($this->inR2($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
                 $this->step2();
             }
             return true;
@@ -243,7 +248,7 @@ private function step3b($removedE)
         //      delete if in R2
         if ( ($position = $this->search(array('baar'))) !== false) {
             if ($this->inR2($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
             return true;
         }
@@ -252,7 +257,7 @@ private function step3b($removedE)
         //      delete if in R2 and if step 2 actually removed an e
         if ( ($position = $this->search(array('bar'))) !== false) {
             if ($this->inR2($position) && $removedE) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
             return true;
         }
@@ -268,25 +273,25 @@ private function step3b($removedE)
     private function step4()
     {
         // D is a non-vowel other than I
-        $d = StringHelper::substr($this->word, -1, 1);
+        $d = UTF8::substr($this->word, -1, 1);
         if (in_array($d, array_merge(self::$vowels, array('I')))) {
             return false;
         }
 
         // V is double a, e, o or u
-        $v = StringHelper::substr($this->word, -3, 2);
+        $v = UTF8::substr($this->word, -3, 2);
         if (!in_array($v, array('aa', 'ee', 'oo', 'uu'))) {
             return false;
         }
-        $singleV = StringHelper::substr($v, 0, 1);
+        $singleV = UTF8::substr($v, 0, 1);
 
         // C is a non-vowel
-        $c = StringHelper::substr($this->word, -4, 1);
+        $c = UTF8::substr($this->word, -4, 1);
         if (in_array($c, self::$vowels)) {
             return false;
         }
 
-        $this->word = StringHelper::substr($this->word, 0, -4);
+        $this->word = UTF8::substr($this->word, 0, -4);
         $this->word .= $c . $singleV  .$d;
     }
 
@@ -296,6 +301,6 @@ private function step4()
      */
     private function finish()
     {
-        $this->word = str_replace(array('I', 'Y'), array('i', 'y'), $this->word);
+        $this->word = UTF8::str_replace(array('I', 'Y'), array('i', 'y'), $this->word);
     }
 }
diff --git a/src/Stemmer/English.php b/src/Stemmer/English.php
index f0e1f2c..fe5f186 100644
--- a/src/Stemmer/English.php
+++ b/src/Stemmer/English.php
@@ -2,7 +2,7 @@
 
 namespace Wamania\Snowball\Stemmer;
 
-use Joomla\String\StringHelper;
+use voku\helper\UTF8;
 
 /**
  * English Porter 2
@@ -27,11 +27,16 @@ class English extends Stem
      */
     public function stem($word)
     {
-        if (StringHelper::strlen($word) < 3) {
+        // we do ALL in UTF-8
+        if (!UTF8::is_utf8($word)) {
+            throw new \Exception('Word must be in UTF-8');
+        }
+
+        if (Utf8::strlen($word) < 3) {
             return $word;
         }
 
-        $this->word = StringHelper::strtolower($word);
+        $this->word = UTF8::strtolower($word);
 
         // exceptions
         if (null !== ($word = $this->exception1())) {
@@ -42,9 +47,9 @@ public function stem($word)
         $this->plainVowels = implode('', self::$vowels);
 
         // Remove initial ', if present.
-        $first = StringHelper::substr($this->word, 0, 1);
+        $first = UTF8::substr($this->word, 0, 1);
         if ($first == "'") {
-            $this->word = StringHelper::substr($this->word, 1);
+            $this->word = UTF8::substr($this->word, 1);
         }
 
         // Set initial y, or y after a vowel, to Y
@@ -83,7 +88,7 @@ public function stem($word)
     private function step0()
     {
         if ( ($position = $this->search(array("'s'", "'s", "'"))) !== false) {
-            $this->word = StringHelper::substr($this->word, 0, $position);
+            $this->word = UTF8::substr($this->word, 0, $position);
         }
     }
 
@@ -118,10 +123,10 @@ private function step1a()
         //      delete if the preceding word part contains a vowel not immediately before the s (so gas and this retain the s, gaps and kiwis lose it)
         if ( ($position = $this->search(array('s'))) !== false) {
             for ($i=0; $i<$position-1; $i++) {
-                $letter = StringHelper::substr($this->word, $i, 1);
+                $letter = UTF8::substr($this->word, $i, 1);
 
                 if (in_array($letter, self::$vowels)) {
-                    $this->word = StringHelper::substr($this->word, 0, $position);
+                    $this->word = UTF8::substr($this->word, 0, $position);
                     return true;
                 }
             }
@@ -152,16 +157,16 @@ private function step1b()
         //      if the word is short, add e (so hop -> hope)
         if ( ($position = $this->search(array('edly', 'ingly', 'ed', 'ing'))) !== false) {
             for ($i=0; $i<$position; $i++) {
-                $letter = StringHelper::substr($this->word, $i, 1);
+                $letter = UTF8::substr($this->word, $i, 1);
 
                 if (in_array($letter, self::$vowels)) {
-                    $this->word = StringHelper::substr($this->word, 0, $position);
+                    $this->word = UTF8::substr($this->word, 0, $position);
 
                     if ($this->search(array('at', 'bl', 'iz')) !== false) {
                         $this->word .= 'e';
 
                     } elseif ( ($position2 = $this->search(self::$doubles)) !== false) {
-                        $this->word = StringHelper::substr($this->word, 0, ($position2+1));
+                        $this->word = UTF8::substr($this->word, 0, ($position2+1));
 
                     } elseif ($this->isShort()) {
                         $this->word .= 'e';
@@ -183,7 +188,7 @@ private function step1c()
     {
         // replace suffix y or Y by i if preceded by a non-vowel
         // which is not the first letter of the word (so cry -> cri, by -> by, say -> say)
-        $length = StringHelper::strlen($this->word);
+        $length = UTF8::strlen($this->word);
 
         if ($length < 3) {
             return true;
@@ -191,7 +196,7 @@ private function step1c()
 
         if ( ($position = $this->search(array('y', 'Y'))) !== false) {
             $before = $position - 1;
-            $letter = StringHelper::substr($this->word, $before, 1);
+            $letter = UTF8::substr($this->word, $before, 1);
 
             if (! in_array($letter, self::$vowels)) {
                 $this->word = preg_replace('#(y|Y)$#u', 'i', $this->word);
@@ -318,7 +323,7 @@ private function step2()
 
             if ($this->inR1($position)) {
                 $before = $position - 1;
-                $letter = StringHelper::substr($this->word, $before, 1);
+                $letter = UTF8::substr($this->word, $before, 1);
 
                 if ($letter == 'l') {
                     $this->word = preg_replace('#(ogi)$#u', 'og', $this->word);
@@ -333,10 +338,10 @@ private function step2()
 
             if ($this->inR1($position)) {
                 // a letter for you
-                $letter = StringHelper::substr($this->word, ($position-1), 1);
+                $letter = UTF8::substr($this->word, ($position-1), 1);
 
                 if (in_array($letter, self::$liEnding)) {
-                    $this->word = StringHelper::substr($this->word, 0, $position);
+                    $this->word = UTF8::substr($this->word, 0, $position);
                 }
             }
 
@@ -378,13 +383,13 @@ private function step3()
 
         // ful   ness:   delete
         if ( ($position = $this->searchIfInR1(array('ful', 'ness'))) !== false) {
-            $this->word = StringHelper::substr($this->word, 0, $position);
+            $this->word = UTF8::substr($this->word, 0, $position);
             return true;
         }
 
         // ative*:   delete if in R2
         if ( (($position = $this->searchIfInR1(array('ative'))) !== false) && ($this->inR2($position)) )  {
-            $this->word = StringHelper::substr($this->word, 0, $position);
+            $this->word = UTF8::substr($this->word, 0, $position);
             return true;
         }
 
@@ -404,7 +409,7 @@ private function step4()
             'ate', 'iti', 'ous', 'ive', 'ize', 'al', 'er', 'ic'))) !== false) {
 
             if ($this->inR2($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
             return true;
         }
@@ -413,10 +418,10 @@ private function step4()
         //      delete if preceded by s or t
         if ( ($position = $this->searchIfInR2(array('ion'))) !== false) {
             $before = $position - 1;
-            $letter = StringHelper::substr($this->word, $before, 1);
+            $letter = UTF8::substr($this->word, $before, 1);
 
             if ($letter == 's' || $letter == 't') {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
 
             return true;
@@ -435,11 +440,11 @@ private function step5()
         //      delete if in R2, or in R1 and not preceded by a short syllable
         if ( ($position = $this->search(array('e'))) !== false) {
             if ($this->inR2($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
 
             } elseif ($this->inR1($position)) {
                 if ( (! $this->searchShortSyllabe(-4, 3)) && (! $this->searchShortSyllabe(-3, 2)) ) {
-                    $this->word = StringHelper::substr($this->word, 0, $position);
+                    $this->word = UTF8::substr($this->word, 0, $position);
                 }
             }
 
@@ -450,10 +455,10 @@ private function step5()
         //      delete if in R2 and preceded by l
         if ( ($position = $this->searchIfInR2(array('l'))) !== false) {
             $before = $position - 1;
-            $letter = StringHelper::substr($this->word, $before, 1);
+            $letter = UTF8::substr($this->word, $before, 1);
 
             if ($letter == 'l') {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
 
             return true;
@@ -464,21 +469,21 @@ private function step5()
 
     private function finish()
     {
-        $this->word = str_replace('Y', 'y', $this->word);
+        $this->word = UTF8::str_replace('Y', 'y', $this->word);
     }
 
     private function exceptionR1()
     {
-        if (StringHelper::strpos($this->word, 'gener') === 0) {
-            $this->r1 = StringHelper::substr($this->word, 5);
+        if (Utf8::strpos($this->word, 'gener') === 0) {
+            $this->r1 = UTF8::substr($this->word, 5);
             $this->r1Index = 5;
 
-        } elseif (StringHelper::strpos($this->word, 'commun') === 0) {
-            $this->r1 = StringHelper::substr($this->word, 6);
+        } elseif (Utf8::strpos($this->word, 'commun') === 0) {
+            $this->r1 = UTF8::substr($this->word, 6);
             $this->r1Index = 6;
 
-        } elseif (StringHelper::strpos($this->word, 'arsen') === 0) {
-            $this->r1 = StringHelper::substr($this->word, 5);
+        } elseif (Utf8::strpos($this->word, 'arsen') === 0) {
+            $this->r1 = UTF8::substr($this->word, 5);
             $this->r1Index = 5;
         }
     }
@@ -549,7 +554,7 @@ private function exception2()
      */
     private function isShort()
     {
-        $length = StringHelper::strlen($this->word);
+        $length = UTF8::strlen($this->word);
         return ( ($this->searchShortSyllabe(-3, 3) || $this->searchShortSyllabe(-2, 2)) && ($length == $this->r1Index) );
     }
 
@@ -562,7 +567,7 @@ private function isShort()
      */
     private function searchShortSyllabe($from, $nbLetters)
     {
-        $length = StringHelper::strlen($this->word);
+        $length = UTF8::strlen($this->word);
 
         if ($from < 0) {
             $from = $length + $from;
@@ -576,8 +581,8 @@ private function searchShortSyllabe($from, $nbLetters)
             return false;
         }
 
-        $first = StringHelper::substr($this->word, $from, 1);
-        $second = StringHelper::substr($this->word, ($from+1), 1);
+        $first = UTF8::substr($this->word, $from, 1);
+        $second = UTF8::substr($this->word, ($from+1), 1);
 
         if ($nbLetters == 2) {
             if ( (in_array($first, self::$vowels)) && (!in_array($second, self::$vowels)) ) {
@@ -585,7 +590,7 @@ private function searchShortSyllabe($from, $nbLetters)
             }
         }
 
-        $third = StringHelper::substr($this->word, ($from+2), 1);
+        $third = UTF8::substr($this->word, ($from+2), 1);
 
         if ( (!in_array($first, self::$vowels)) && (in_array($second, self::$vowels))
             && (!in_array($third, array_merge(self::$vowels, array('x', 'Y', 'w'))))) {
diff --git a/src/Stemmer/Finnish.php b/src/Stemmer/Finnish.php
index c6487b5..25539b2 100644
--- a/src/Stemmer/Finnish.php
+++ b/src/Stemmer/Finnish.php
@@ -6,7 +6,7 @@
  */
 namespace Wamania\Snowball\Stemmer;
 
-use Joomla\String\StringHelper;
+use voku\helper\UTF8;
 
 /**
  * Finnish Snowball Stemmer.
@@ -38,7 +38,12 @@ class Finnish extends Stem
      */
     public function stem($word)
     {
-        $this->word = StringHelper::strtolower($word);
+        // we do ALL in UTF-8
+        if (! UTF8::is_utf8($word)) {
+            throw new \Exception('Word must be in UTF-8');
+        }
+
+        $this->word = Utf8::strtolower($word);
 
         // R1 and R2 are then defined in the usual way
         $this->r1();
@@ -69,10 +74,10 @@ private function step1()
         // (a) kin   kaan   kään   ko   kö   han   hän   pa   pä
         //      delete if preceded by n, t or a vowel
         if (($position = $this->searchIfInR1(array('kaan', 'kään', 'kin', 'han', 'hän', 'ko', 'kö', 'pa', 'pä'))) !== false) {
-            $lastLetter = StringHelper::substr($this->word, ($position-1), 1);
+            $lastLetter = Utf8::substr($this->word, ($position-1), 1);
 
             if (in_array($lastLetter, array_merge(['t', 'n'], self::$vowels))) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = Utf8::substr($this->word, 0, $position);
                 $this->r1();
                 $this->r2();
             }
@@ -84,7 +89,7 @@ private function step1()
         //  delete if in R2
         if (($position = $this->searchIfInR1(array('sti'))) !== false) {
             if ($this->inR2($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = Utf8::substr($this->word, 0, $position);
                 $this->r1();
                 $this->r2();
             }
@@ -106,10 +111,10 @@ private function step2()
         // si
         //  delete if not preceded by k
         if (($position = $this->searchIfInR1(array('si'))) !== false) {
-            $lastLetter = StringHelper::substr($this->word, ($position-1), 1);
+            $lastLetter = Utf8::substr($this->word, ($position-1), 1);
 
             if ($lastLetter !== 'k') {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = Utf8::substr($this->word, 0, $position);
                 $this->r1();
                 $this->r2();
                 return true;
@@ -119,7 +124,7 @@ private function step2()
         // ni
         //  delete
         if (($position = $this->searchIfInR1(array('ni'))) !== false) {
-            $this->word = StringHelper::substr($this->word, 0, $position);
+            $this->word = Utf8::substr($this->word, 0, $position);
             // if preceded by kse, replace with ksi
             if ( ($position = $this->search(array('kse'))) !== false) {
                 $this->word = preg_replace('#(kse)$#u', 'ksi', $this->word);
@@ -132,7 +137,7 @@ private function step2()
         // nsa   nsä   mme   nne
         //  delete
         if (($position = $this->searchIfInR1(array('nsa', 'nsä', 'mme', 'nne'))) !== false) {
-            $this->word = StringHelper::substr($this->word, 0, $position);
+            $this->word = Utf8::substr($this->word, 0, $position);
             $this->r1();
             $this->r2();
             return true;
@@ -141,9 +146,9 @@ private function step2()
         // an
         //  delete if preceded by one of   ta   ssa   sta   lla   lta   na
         if (($position = $this->searchIfInR1(array('an'))) !== false) {
-            $word = StringHelper::substr($this->word, 0, $position);
-            $lastThreeLetters = StringHelper::substr($word, -3, 3);
-            $lastTwoLetters = StringHelper::substr($word, -2, 2);
+            $word = Utf8::substr($this->word, 0, $position);
+            $lastThreeLetters = Utf8::substr($word, -3, 3);
+            $lastTwoLetters = Utf8::substr($word, -2, 2);
             if (in_array($lastThreeLetters, array('ssa', 'sta', 'lla', 'lta'), true) || in_array($lastTwoLetters, array('na', 'ta'), true)) {
                 $this->word = $word;
                 $this->r1();
@@ -155,9 +160,9 @@ private function step2()
         // än
         // delete if preceded by one of   tä   ssä   stä   llä   ltä   nä
         if (($position = $this->searchIfInR1(array('än'))) !== false) {
-            $word = StringHelper::substr($this->word, 0, $position);
-            $lastThreeLetters = StringHelper::substr($word, -3, 3);
-            $lastTwoLetters = StringHelper::substr($word, -2, 2);
+            $word = Utf8::substr($this->word, 0, $position);
+            $lastThreeLetters = Utf8::substr($word, -3, 3);
+            $lastTwoLetters = Utf8::substr($word, -2, 2);
             if (in_array($lastThreeLetters, array('ssä', 'stä', 'llä', 'ltä'), true) || in_array($lastTwoLetters, array('nä', 'tä'), true)) {
                 $this->word = $word;
                 $this->r1();
@@ -169,9 +174,9 @@ private function step2()
         // en
         // delete if preceded by one of   lle   ine
         if (($position = $this->searchIfInR1(array('en'))) !== false) {
-            $word = StringHelper::substr($this->word, 0, $position);
-            if (StringHelper::strlen($this->word) > 4) {
-                $lastThreeLetters = StringHelper::substr($this->word, -5, 3);
+            $word = Utf8::substr($this->word, 0, $position);
+            if (Utf8::strlen($this->word) > 4) {
+                $lastThreeLetters = Utf8::substr($this->word, -5, 3);
                 if (in_array($lastThreeLetters, array('lle', 'ine'), true)) {
                     $this->word = $word;
                     $this->r1();
@@ -199,9 +204,9 @@ private function step3()
                 continue;
             }
             if (($position = $this->searchIfInR1(array('h' . $vowel . 'n'))) !== false) {
-                $lastLetter = StringHelper::substr($this->word, $position-1, 1);
+                $lastLetter = Utf8::substr($this->word, $position-1, 1);
                 if ($lastLetter === $vowel) {
-                    $this->word = StringHelper::substr($this->word, 0, $position);
+                    $this->word = Utf8::substr($this->word, 0, $position);
                     $this->_removedInStep3 = true;
                     $this->r1();
                     $this->r2();
@@ -213,11 +218,11 @@ private function step3()
         // siin   den   tten
         // delete if preceded by Vi
         if (($position = $this->searchIfInR1(array('siin', 'den', 'tten'))) !== false) {
-            $lastLetter = StringHelper::substr($this->word, ($position-1), 1);
+            $lastLetter = Utf8::substr($this->word, ($position-1), 1);
             if ($lastLetter === 'i') {
-                $nextLastLetter = StringHelper::substr($this->word, ($position-2), 1);
+                $nextLastLetter = Utf8::substr($this->word, ($position-2), 1);
                 if (in_array($nextLastLetter, self::$restrictedVowels, true)) {
-                    $this->word = StringHelper::substr($this->word, 0, $position);
+                    $this->word = Utf8::substr($this->word, 0, $position);
                     $this->_removedInStep3 = true;
                     $this->r1();
                     $this->r2();
@@ -229,10 +234,10 @@ private function step3()
         // seen
         // delete if preceded by LV
         if (($position = $this->searchIfInR1(array('seen'))) !== false) {
-            $lastLetters = StringHelper::substr($this->word, ($position-2), 2);
+            $lastLetters = Utf8::substr($this->word, ($position-2), 2);
 
             if (in_array($lastLetters, self::$longVowels, true)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = Utf8::substr($this->word, 0, $position);
                 $this->_removedInStep3 = true;
                 $this->r1();
                 $this->r2();
@@ -243,10 +248,10 @@ private function step3()
         // tta    ttä
         // delete if preceded by e
         if (($position = $this->searchIfInR1(array('tta', 'ttä'))) !== false) {
-            $lastLetter = StringHelper::substr($this->word, ($position-1), 1);
+            $lastLetter = Utf8::substr($this->word, ($position-1), 1);
 
             if ($lastLetter === 'e') {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = Utf8::substr($this->word, 0, $position);
                 $this->_removedInStep3 = true;
                 $this->r1();
                 $this->r2();
@@ -257,7 +262,7 @@ private function step3()
         // ta  tä  ssa  ssä  sta  stä  lla  llä  lta  ltä  lle  na  nä  ksi  ine
         // delete
         if (($position = $this->searchIfInR1(array('ssa', 'ssä', 'sta', 'stä', 'lla', 'llä', 'lta', 'ltä', 'lle', 'ksi', 'na', 'nä', 'ine', 'ta', 'tä'))) !== false) {
-            $this->word = StringHelper::substr($this->word, 0, $position);
+            $this->word = Utf8::substr($this->word, 0, $position);
             $this->_removedInStep3 = true;
             $this->r1();
             $this->r2();
@@ -267,11 +272,11 @@ private function step3()
         // a    ä
         // delete if preceded by cv
         if (($position = $this->searchIfInR1(array('a', 'ä'))) !== false) {
-            $lastLetter = StringHelper::substr($this->word, ($position-1), 1);
-            $nextLastLetter = StringHelper::substr($this->word, ($position-2), 1);
+            $lastLetter = Utf8::substr($this->word, ($position-1), 1);
+            $nextLastLetter = Utf8::substr($this->word, ($position-2), 1);
 
             if (in_array($lastLetter, self::$vowels, true) && in_array($nextLastLetter, self::$consonants, true)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = Utf8::substr($this->word, 0, $position);
                 $this->_removedInStep3 = true;
                 $this->r1();
                 $this->r2();
@@ -282,12 +287,12 @@ private function step3()
         // n
         // delete, and if preceded by LV or ie, delete the last vowel
         if (($position = $this->searchIfInR1(array('n'))) !== false) {
-            $lastLetters = StringHelper::substr($this->word, ($position-2), 2);
+            $lastLetters = Utf8::substr($this->word, ($position-2), 2);
 
             if (in_array($lastLetters, self::$longVowels, true) || $lastLetters === 'ie') {
-                $this->word = StringHelper::substr($this->word, 0, $position-1);
+                $this->word = Utf8::substr($this->word, 0, $position-1);
             } else {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = Utf8::substr($this->word, 0, $position);
             }
             $this->r1();
             $this->r2();
@@ -309,9 +314,9 @@ private function step4()
         // mpi   mpa   mpä   mmi   mma   mmä
         // delete if not preceded by po
         if (($position = $this->searchIfInR2(array('mpi', 'mpa', 'mpä', 'mmi', 'mma', 'mmä'))) !== false) {
-            $lastLetters = StringHelper::substr($this->word, ($position-2), 2);
+            $lastLetters = Utf8::substr($this->word, ($position-2), 2);
             if ($lastLetters !== 'po') {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = Utf8::substr($this->word, 0, $position);
                 $this->r1();
                 $this->r2();
                 return true;
@@ -321,7 +326,7 @@ private function step4()
         // impi   impa   impä   immi   imma   immä   eja   ejä
         // delete
         if (($position = $this->searchIfInR2(array('impi', 'impa', 'impä', 'immi', 'imma', 'immä', 'eja', 'ejä'))) !== false) {
-            $this->word = StringHelper::substr($this->word, 0, $position);
+            $this->word = Utf8::substr($this->word, 0, $position);
             $this->r1();
             $this->r2();
             return true;
@@ -342,27 +347,27 @@ private function step5()
     {
         if ($this->_removedInStep3) {
             if (($position = $this->searchIfInR1(array('i', 'j'))) !== false) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = Utf8::substr($this->word, 0, $position);
                 $this->r1();
                 $this->r2();
                 return true;
             }
         } else {
             if (($position = $this->searchIfInR1(array('t'))) !== false) {
-                $lastLetter = StringHelper::substr($this->word, ($position-1), 1);
+                $lastLetter = Utf8::substr($this->word, ($position-1), 1);
                 if (in_array($lastLetter, self::$vowels, true)) {
-                    $this->word = StringHelper::substr($this->word, 0, $position);
+                    $this->word = Utf8::substr($this->word, 0, $position);
                     $this->r1();
                     $this->r2();
                     if (($position2 = $this->searchIfInR2(array('imma'))) !== false) {
-                        $this->word = StringHelper::substr($this->word, 0, $position2);
+                        $this->word = Utf8::substr($this->word, 0, $position2);
                         $this->r1();
                         $this->r2();
                         return true;
                     } elseif (($position2 = $this->searchIfInR2(array('mma'))) !== false) {
-                        $lastLetters = StringHelper::substr($this->word, ($position2-2), 2);
+                        $lastLetters = Utf8::substr($this->word, ($position2-2), 2);
                         if ($lastLetters !== 'po') {
-                            $this->word = StringHelper::substr($this->word, 0, $position2);
+                            $this->word = Utf8::substr($this->word, 0, $position2);
                             $this->r1();
                             $this->r2();
                             return true;
@@ -385,35 +390,35 @@ private function step6()
         // a) If R1 ends LV
         // delete the last letter
         if (($position = $this->searchIfInR1(self::$longVowels)) !== false) {
-            $this->word = StringHelper::substr($this->word, 0, $position+1);
+            $this->word = Utf8::substr($this->word, 0, $position+1);
             $this->r1();
             $this->r2();
         }
 
         // b) If R1 ends cX, c a consonant and X one of   a   ä   e   i,
         // delete the last letter
-        $lastLetter = StringHelper::substr($this->r1, -1, 1);
-        $secondToLastLetter = StringHelper::substr($this->r1, -2, 1);
+        $lastLetter = Utf8::substr($this->r1, -1, 1);
+        $secondToLastLetter = Utf8::substr($this->r1, -2, 1);
         if (in_array($secondToLastLetter, self::$consonants, true) && in_array($lastLetter, array('a', 'e', 'i', 'ä'))) {
-            $this->word = StringHelper::substr($this->word, 0, -1);
+            $this->word = Utf8::substr($this->word, 0, -1);
             $this->r1();
             $this->r2();
         }
 
         // c) If R1 ends oj or uj
         // delete the last letter
-        $twoLastLetters = StringHelper::substr($this->r1, -2, 2);
+        $twoLastLetters = Utf8::substr($this->r1, -2, 2);
         if (in_array($twoLastLetters, array('oj', 'uj'))) {
-            $this->word = StringHelper::substr($this->word, 0, -1);
+            $this->word = Utf8::substr($this->word, 0, -1);
             $this->r1();
             $this->r2();
         }
 
         // d) If R1 ends jo
         // delete the last letter
-        $twoLastLetters = StringHelper::substr($this->r1, -2, 2);
+        $twoLastLetters = Utf8::substr($this->r1, -2, 2);
         if ($twoLastLetters === 'jo') {
-            $this->word = StringHelper::substr($this->word, 0, -1);
+            $this->word = Utf8::substr($this->word, 0, -1);
             $this->r1();
             $this->r2();
         }
@@ -422,15 +427,15 @@ private function step6()
         // vowels, remove the last consonant (so eläkk -> eläk,
         // aatonaatto -> aatonaato)
         $endVowels = '';
-        for ($i = StringHelper::strlen($this->word) - 1; $i > 0; $i--) {
-            $letter = StringHelper::substr($this->word, $i, 1);
+        for ($i = Utf8::strlen($this->word) - 1; $i > 0; $i--) {
+            $letter = Utf8::substr($this->word, $i, 1);
             if (in_array($letter, self::$vowels, true)) {
                 $endVowels = $letter . $endVowels;
             } else {
                 // check for double consonant
-                $prevLetter = StringHelper::substr($this->word, $i-1, 1);
+                $prevLetter = Utf8::substr($this->word, $i-1, 1);
                 if ($prevLetter === $letter) {
-                    $this->word = StringHelper::substr($this->word, 0, $i) . $endVowels;
+                    $this->word = Utf8::substr($this->word, 0, $i) . $endVowels;
                 }
                 break;
             }
diff --git a/src/Stemmer/French.php b/src/Stemmer/French.php
index 2bc53ca..8e1ee96 100644
--- a/src/Stemmer/French.php
+++ b/src/Stemmer/French.php
@@ -2,7 +2,7 @@
 
 namespace Wamania\Snowball\Stemmer;
 
-use Joomla\String\StringHelper;
+use voku\helper\UTF8;
 
 /**
  *
@@ -22,7 +22,12 @@ class French extends Stem
      */
     public function stem($word)
     {
-        $this->word = StringHelper::strtolower($word);
+        // we do ALL in UTF-8
+        if (!UTF8::is_utf8($word)) {
+            throw new \Exception('Word must be in UTF-8');
+        }
+
+        $this->word = UTF8::strtolower($word);
 
         $this->plainVowels = implode('', self::$vowels);
 
@@ -91,7 +96,7 @@ private function step1()
         //     delete if in R2
         if ( ($position = $this->search(array('ances', 'iqUes', 'ismes', 'ables', 'istes', 'ance', 'iqUe','isme', 'able', 'iste', 'eux'))) !== false) {
             if ($this->inR2($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
             return 3;
         }
@@ -101,10 +106,10 @@ private function step1()
         //      if preceded by ic, delete if in R2, else replace by iqU
         if ( ($position = $this->search(array('atrices', 'ateurs', 'ations', 'atrice', 'ateur', 'ation'))) !== false) {
             if ($this->inR2($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
 
                 if ( ($position2 = $this->searchIfInR2(array('ic'))) !== false) {
-                    $this->word = StringHelper::substr($this->word, 0, $position2);
+                    $this->word = UTF8::substr($this->word, 0, $position2);
                 } else {
                     $this->word = preg_replace('#(ic)$#u', 'iqU', $this->word);
                 }
@@ -145,9 +150,9 @@ private function step1()
         if ( ($position = $this->search(array('issements', 'issement'))) != false) {
             if ($this->inR1($position)) {
                 $before = $position - 1;
-                $letter = StringHelper::substr($this->word, $before, 1);
+                $letter = UTF8::substr($this->word, $before, 1);
                 if (! in_array($letter, self::$vowels)) {
-                    $this->word = StringHelper::substr($this->word, 0, $position);
+                    $this->word = UTF8::substr($this->word, 0, $position);
                 }
             }
             return 3;
@@ -163,20 +168,20 @@ private function step1()
 
             // delete if in RV
             if ($this->inRv($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
 
             // if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise,
             if ( ($position = $this->searchIfInR2(array('iv'))) !== false) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
                 if ( ($position2 = $this->searchIfInR2(array('at'))) !== false) {
-                    $this->word = StringHelper::substr($this->word, 0, $position2);
+                    $this->word = UTF8::substr($this->word, 0, $position2);
                 }
 
             // if preceded by eus, delete if in R2, else replace by eux if in R1, otherwise,
             } elseif ( ($position = $this->search(array('eus'))) !== false) {
                 if ($this->inR2($position)) {
-                    $this->word = StringHelper::substr($this->word, 0, $position);
+                    $this->word = UTF8::substr($this->word, 0, $position);
 
                 } elseif ($this->inR1($position)) {
                     $this->word = preg_replace('#(eus)$#u', 'eux', $this->word);
@@ -184,7 +189,7 @@ private function step1()
 
             // if preceded by abl or iqU, delete if in R2, otherwise,
             } elseif ( ($position = $this->searchIfInR2(array('abl', 'iqU'))) !== false) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
 
             // if preceded by ièr or Ièr, replace by i if in RV
             } elseif ( ($position = $this->searchIfInRv(array('ièr', 'Ièr'))) !== false) {
@@ -202,13 +207,13 @@ private function step1()
 
             // delete if in R2
             if ($this->inR2($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
 
             // if preceded by abil, delete if in R2, else replace by abl, otherwise,
             if ( ($position = $this->search(array('abil'))) !== false) {
                 if ($this->inR2($position)) {
-                    $this->word = StringHelper::substr($this->word, 0, $position);
+                    $this->word = UTF8::substr($this->word, 0, $position);
                 } else {
                     $this->word = preg_replace('#(abil)$#u', 'abl', $this->word);
                 }
@@ -216,14 +221,14 @@ private function step1()
             // if preceded by ic, delete if in R2, else replace by iqU, otherwise,
             } elseif ( ($position = $this->search(array('ic'))) !== false) {
                 if ($this->inR2($position)) {
-                    $this->word = StringHelper::substr($this->word, 0, $position);
+                    $this->word = UTF8::substr($this->word, 0, $position);
                 } else {
                     $this->word = preg_replace('#(ic)$#u', 'iqU', $this->word);
                 }
 
             // if preceded by iv, delete if in R2
             } elseif ( ($position = $this->searchIfInR2(array('iv'))) !== false) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
 
             return 3;
@@ -235,15 +240,15 @@ private function step1()
         if ( ($position = $this->search(array('ifs', 'ives', 'if', 'ive'))) !== false) {
 
             if ($this->inR2($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
 
             if ( ($position = $this->searchIfInR2(array('at'))) !== false) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
 
                 if ( ($position2 = $this->search(array('ic'))) !== false) {
                     if ($this->inR2($position2)) {
-                        $this->word = StringHelper::substr($this->word, 0, $position2);
+                        $this->word = UTF8::substr($this->word, 0, $position2);
                     } else {
                         $this->word = preg_replace('#(ic)$#u', 'iqU', $this->word);
                     }
@@ -273,7 +278,7 @@ private function step1()
         //      delete if in R2, else replace by eux if in R1
         if ( ($position = $this->search(array('euses', 'euse'))) !== false) {
             if ($this->inR2($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
 
             } elseif ($this->inR1($position)) {
                 $this->word = preg_replace('#(euses|euse)$#u', 'eux', $this->word);
@@ -304,9 +309,9 @@ private function step1()
         //      delete if preceded by a vowel in RV
         if ( ($position = $this->search(array('ments', 'ment'))) != false) {
             $before = $position - 1;
-            $letter = StringHelper::substr($this->word, $before, 1);
+            $letter = UTF8::substr($this->word, $before, 1);
             if ( $this->inRv($before) && (in_array($letter, self::$vowels)) ) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
 
             return 2;
@@ -332,9 +337,9 @@ private function step2a()
             'issent', 'isses', 'issez', 'isse', 'issiez', 'issions', 'issons', 'is', 'it', 'i'))) !== false) {
 
             $before = $position - 1;
-            $letter = StringHelper::substr($this->word, $before, 1);
+            $letter = UTF8::substr($this->word, $before, 1);
             if ( $this->inRv($before) && (!in_array($letter, self::$vowels)) ) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
 
                 return true;
             }
@@ -355,7 +360,7 @@ private function step2b()
             'ées', 'èrent', 'erais', 'erait', 'erai', 'eraIent', 'eras', 'erez', 'eriez',
             'erions', 'erons', 'eront', 'era', 'er', 'iez', 'ez','és', 'ée', 'é'))) !== false) {
 
-            $this->word = StringHelper::substr($this->word, 0, $position);
+            $this->word = UTF8::substr($this->word, 0, $position);
 
             return true;
         }
@@ -368,12 +373,12 @@ private function step2b()
             'assent', 'asses', 'assiez', 'assions', 'asse', 'as', 'ai', 'a'))) !== false) {
 
             $before = $position - 1;
-            $letter = StringHelper::substr($this->word, $before, 1);
+            $letter = UTF8::substr($this->word, $before, 1);
             if ( $this->inRv($before) && ($letter == 'e') ) {
-                $this->word = StringHelper::substr($this->word, 0, $before);
+                $this->word = UTF8::substr($this->word, 0, $before);
 
             } else {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
 
             return true;
@@ -383,7 +388,7 @@ private function step2b()
         //      delete if in R2
         if ( ($position = $this->searchIfInRv(array('ions'))) !== false) {
             if ($this->inR2($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
 
             return true;
@@ -408,7 +413,7 @@ private function step4()
     {
         //If the word ends s, not preceded by a, i, o, u, è or s, delete it.
         if (preg_match('#[^aiouès]s$#', $this->word)) {
-            $this->word = StringHelper::substr($this->word, 0, -1);
+            $this->word = UTF8::substr($this->word, 0, -1);
         }
 
         // In the rest of step 4, all tests are confined to the RV region.
@@ -416,9 +421,9 @@ private function step4()
         //      delete if in R2 and preceded by s or t
         if ( (($position = $this->searchIfInRv(array('ion'))) !== false) && ($this->inR2($position)) ) {
             $before = $position - 1;
-            $letter = StringHelper::substr($this->word, $before, 1);
+            $letter = UTF8::substr($this->word, $before, 1);
             if ( $this->inRv($before) && (($letter == 's') || ($letter == 't')) ) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
             return true;
         }
@@ -433,7 +438,7 @@ private function step4()
         // e
         //      delete
         if ( ($this->searchIfInRv(array('e'))) !== false) {
-            $this->word = StringHelper::substr($this->word, 0, -1);
+            $this->word = UTF8::substr($this->word, 0, -1);
             return true;
         }
 
@@ -441,7 +446,7 @@ private function step4()
         //      if preceded by gu, delete
         if ( ($position = $this->searchIfInRv(array('guë'))) !== false) {
             if ($this->inRv($position+2)) {
-                $this->word = StringHelper::substr($this->word, 0, -1);
+                $this->word = UTF8::substr($this->word, 0, -1);
                 return true;
             }
         }
@@ -456,7 +461,7 @@ private function step4()
     private function step5()
     {
         if ($this->search(array('enn', 'onn', 'ett', 'ell', 'eill')) !== false) {
-            $this->word = StringHelper::substr($this->word, 0, -1);
+            $this->word = UTF8::substr($this->word, 0, -1);
         }
     }
 
@@ -475,7 +480,7 @@ private function step6()
      */
     private function finish()
     {
-        $this->word = str_replace(array('I','U','Y'), array('i', 'u', 'y'), $this->word);
+        $this->word = UTF8::str_replace(array('I','U','Y'), array('i', 'u', 'y'), $this->word);
     }
 
     /**
@@ -486,7 +491,7 @@ private function finish()
      */
     protected function rv()
     {
-        $length = StringHelper::strlen($this->word);
+        $length = UTF8::strlen($this->word);
 
         $this->rv = '';
         $this->rvIndex = $length;
@@ -496,28 +501,28 @@ protected function rv()
         }
 
         // If the word begins with two vowels, RV is the region after the third letter
-        $first = StringHelper::substr($this->word, 0, 1);
-        $second = StringHelper::substr($this->word, 1, 1);
+        $first = UTF8::substr($this->word, 0, 1);
+        $second = UTF8::substr($this->word, 1, 1);
 
         if ( (in_array($first, self::$vowels)) && (in_array($second, self::$vowels)) ) {
-            $this->rv = StringHelper::substr($this->word, 3);
+            $this->rv = UTF8::substr($this->word, 3);
             $this->rvIndex = 3;
             return true;
         }
 
         // (Exceptionally, par, col or tap, at the begining of a word is also taken to define RV as the region to their right.)
-        $begin3 = StringHelper::substr($this->word, 0, 3);
+        $begin3 = UTF8::substr($this->word, 0, 3);
         if (in_array($begin3, array('par', 'col', 'tap'))) {
-            $this->rv = StringHelper::substr($this->word, 3);
+            $this->rv = UTF8::substr($this->word, 3);
             $this->rvIndex = 3;
             return true;
         }
 
         //  otherwise the region after the first vowel not at the beginning of the word,
         for ($i=1; $i<$length; $i++) {
-            $letter = StringHelper::substr($this->word, $i, 1);
+            $letter = UTF8::substr($this->word, $i, 1);
             if (in_array($letter, self::$vowels)) {
-                $this->rv = StringHelper::substr($this->word, ($i + 1));
+                $this->rv = UTF8::substr($this->word, ($i + 1));
                 $this->rvIndex = $i + 1;
                 return true;
             }
diff --git a/src/Stemmer/German.php b/src/Stemmer/German.php
index 11dc733..4dc81a3 100644
--- a/src/Stemmer/German.php
+++ b/src/Stemmer/German.php
@@ -2,7 +2,7 @@
 
 namespace Wamania\Snowball\Stemmer;
 
-use Joomla\String\StringHelper;
+use voku\helper\UTF8;
 
 /**
  *
@@ -26,12 +26,17 @@ class German extends Stem
      */
     public function stem($word)
     {
+        // we do ALL in UTF-8
+        if (!UTF8::is_utf8($word)) {
+            throw new \Exception('Word must be in UTF-8');
+        }
+
         $this->plainVowels = implode('', self::$vowels);
 
-        $this->word = StringHelper::strtolower($word);
+        $this->word = UTF8::strtolower($word);
 
         // First, replace ß by ss
-        $this->word = str_replace('ß', 'ss', $this->word);
+        $this->word = UTF8::str_replace('ß', 'ss', $this->word);
 
         // put u and y between vowels into upper case
         $this->word = preg_replace('#(['.$this->plainVowels.'])y(['.$this->plainVowels.'])#u', '$1Y$2', $this->word);
@@ -44,7 +49,7 @@ public function stem($word)
         // but then R1 is adjusted so that the region before it contains at least 3 letters.
         if ($this->r1Index < 3) {
             $this->r1Index = 3;
-            $this->r1 = StringHelper::substr($this->word, 3);
+            $this->r1 = UTF8::substr($this->word, 3);
         }
 
         $this->step1();
@@ -63,7 +68,7 @@ private function step1()
         // delete if in R1
         if ( ($position = $this->search(array('em', 'ern', 'er'))) !== false) {
             if ($this->inR1($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
             return true;
         }
@@ -71,11 +76,11 @@ private function step1()
         // delete if in R1
         if ( ($position = $this->search(array('es', 'en', 'e'))) !== false) {
             if ($this->inR1($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
 
                 //If an ending of group (b) is deleted, and the ending is preceded by niss, delete the final s
                 if ($this->search(array('niss')) !== false) {
-                    $this->word = StringHelper::substr($this->word, 0, -1);
+                    $this->word = UTF8::substr($this->word, 0, -1);
                 }
             }
             return true;
@@ -85,10 +90,10 @@ private function step1()
         if ( ($position = $this->search(array('s'))) !== false) {
             if ($this->inR1($position)) {
                 $before = $position - 1;
-                $letter = StringHelper::substr($this->word, $before, 1);
+                $letter = UTF8::substr($this->word, $before, 1);
 
                 if (in_array($letter, self::$sEndings)) {
-                    $this->word = StringHelper::substr($this->word, 0, $position);
+                    $this->word = UTF8::substr($this->word, 0, $position);
                 }
             }
             return true;
@@ -106,7 +111,7 @@ private function step2()
         //      delete if in R1
         if ( ($position = $this->search(array('en', 'er', 'est'))) !== false) {
             if ($this->inR1($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
             return true;
         }
@@ -117,10 +122,10 @@ private function step2()
             if ($this->inR1($position)) {
                 $before = $position - 1;
                 if ($before >= 3) {
-                    $letter = StringHelper::substr($this->word, $before, 1);
+                    $letter = UTF8::substr($this->word, $before, 1);
 
                     if (in_array($letter, self::$stEndings)) {
-                        $this->word = StringHelper::substr($this->word, 0, $position);
+                        $this->word = UTF8::substr($this->word, 0, $position);
                     }
                 }
             }
@@ -139,15 +144,15 @@ private function step3()
         //      if preceded by ig, delete if in R2 and not preceded by e
         if ( ($position = $this->search(array('end', 'ung'))) !== false) {
             if ($this->inR2($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
 
             if ( ($position2 = $this->search(array('ig'))) !== false) {
                 $before = $position2 - 1;
-                $letter = StringHelper::substr($this->word, $before, 1);
+                $letter = UTF8::substr($this->word, $before, 1);
 
                 if ( ($this->inR2($position2)) && ($letter != 'e') ) {
-                    $this->word = StringHelper::substr($this->word, 0, $position2);
+                    $this->word = UTF8::substr($this->word, 0, $position2);
                 }
             }
             return true;
@@ -157,10 +162,10 @@ private function step3()
         //      delete if in R2 and not preceded by e
         if ( ($position = $this->search(array('ig', 'ik', 'isch'))) !== false) {
             $before = $position - 1;
-            $letter = StringHelper::substr($this->word, $before, 1);
+            $letter = UTF8::substr($this->word, $before, 1);
 
             if ( ($this->inR2($position)) && ($letter != 'e') ) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
             return true;
         }
@@ -170,12 +175,12 @@ private function step3()
         //      if preceded by er or en, delete if in R1
         if ( ($position = $this->search(array('lich', 'heit'))) != false) {
             if ($this->inR2($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
 
             if ( ($position2 = $this->search(array('er', 'en'))) !== false) {
                 if ($this->inR1($position2)) {
-                    $this->word = StringHelper::substr($this->word, 0, $position2);
+                    $this->word = UTF8::substr($this->word, 0, $position2);
                 }
             }
             return true;
@@ -186,12 +191,12 @@ private function step3()
         //      if preceded by lich or ig, delete if in R2
         if ( ($position = $this->search(array('keit'))) != false) {
             if ($this->inR2($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
 
             if ( ($position2 = $this->search(array('lich', 'ig'))) !== false) {
                 if ($this->inR2($position2)) {
-                    $this->word = StringHelper::substr($this->word, 0, $position2);
+                    $this->word = UTF8::substr($this->word, 0, $position2);
                 }
             }
             return true;
@@ -206,6 +211,6 @@ private function step3()
     private function finish()
     {
         // turn U and Y back into lower case, and remove the umlaut accent from a, o and u.
-        $this->word = str_replace(array('U', 'Y', 'ä', 'ü', 'ö'), array('u', 'y', 'a', 'u', 'o'), $this->word);
+        $this->word = UTF8::str_replace(array('U', 'Y', 'ä', 'ü', 'ö'), array('u', 'y', 'a', 'u', 'o'), $this->word);
     }
 }
diff --git a/src/Stemmer/Italian.php b/src/Stemmer/Italian.php
index 4bb2004..bb09dee 100644
--- a/src/Stemmer/Italian.php
+++ b/src/Stemmer/Italian.php
@@ -2,7 +2,7 @@
 
 namespace Wamania\Snowball\Stemmer;
 
-use Joomla\String\StringHelper;
+use voku\helper\UTF8;
 
 /**
  *
@@ -22,12 +22,17 @@ class Italian extends Stem
      */
     public function stem($word)
     {
+        // we do ALL in UTF-8
+        if (!UTF8::is_utf8($word)) {
+            throw new \Exception('Word must be in UTF-8');
+        }
+
         $this->plainVowels = implode('', self::$vowels);
 
-        $this->word = StringHelper::strtolower($word);
+        $this->word = UTF8::strtolower($word);
 
         // First, replace all acute accents by grave accents.
-        $this->word = str_replace(array('á', 'é', 'í', 'ó', 'ú'), array('à', 'è', 'ì', 'ò', 'ù'), $this->word);
+        $this->word = UTF8::str_replace(array('á', 'é', 'í', 'ó', 'ú'), array('à', 'è', 'ì', 'ò', 'ù'), $this->word);
 
         //And, as in French, put u after q, and u, i between vowels into upper case. (See note on vowel marking.) The vowels are then
         $this->word = preg_replace('#([q])u#u', '$1U', $this->word);
@@ -67,7 +72,7 @@ private function step0()
             'cele', 'celi', 'celo', 'cene', 'vela', 'vele', 'veli', 'velo', 'vene',
             'gli', 'la', 'le', 'li', 'lo', 'mi', 'ne', 'si', 'ti', 'vi', 'ci'))) !== false) {
 
-            $suffixe = StringHelper::substr($this->word, $position);
+            $suffixe = UTF8::substr($this->word, $position);
 
             // following one of (in RV)
              // a
@@ -77,7 +82,7 @@ private function step0()
             }, $a);
             // In case of (a) the suffix is deleted
             if ($this->searchIfInRv($a) !== false) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
 
             //b
@@ -107,19 +112,19 @@ private function step1()
         //      if preceded by os, ic or abil, delete if in R2
         if ( ($position = $this->search(array('amente'))) !== false) {
             if ($this->inR1($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
 
             // if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise,
             if ( ($position2 = $this->searchIfInR2(array('iv'))) !== false) {
-                $this->word = StringHelper::substr($this->word, 0, $position2);
+                $this->word = UTF8::substr($this->word, 0, $position2);
                 if ( ($position3 = $this->searchIfInR2(array('at'))) !== false) {
-                    $this->word = StringHelper::substr($this->word, 0, $position3);
+                    $this->word = UTF8::substr($this->word, 0, $position3);
                 }
 
                 // if preceded by os, ic or ad, delete if in R2
             } elseif ( ($position4 = $this->searchIfInR2(array('os', 'ic', 'abil'))) != false) {
-                $this->word = StringHelper::substr($this->word, 0, $position4);
+                $this->word = UTF8::substr($this->word, 0, $position4);
             }
             return true;
         }
@@ -132,7 +137,7 @@ private function step1()
         ))) !== false) {
 
             if ($this->inR2($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
             return true;
         }
@@ -142,11 +147,11 @@ private function step1()
         //      if preceded by ic, delete if in R2
         if ( ($position = $this->search(array('azione', 'azioni', 'atore', 'atori'))) !== false) {
             if ($this->inR2($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
 
                 if ( ($position2 = $this->search(array('ic'))) !== false) {
                     if ($this->inR2($position2)) {
-                        $this->word = StringHelper::substr($this->word, 0, $position2);
+                        $this->word = UTF8::substr($this->word, 0, $position2);
                     }
                 }
             }
@@ -184,7 +189,7 @@ private function step1()
         //      delete if in RV
         if ( ($position = $this->search(array('amento', 'amenti', 'imento', 'imenti'))) !== false) {
             if ($this->inRv($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
             return true;
         }
@@ -194,11 +199,11 @@ private function step1()
         //      if preceded by abil, ic or iv, delete if in R2
         if ( ($position = $this->search(array('ità'))) !== false) {
             if ($this->inR2($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
 
             if ( ($position2 = $this->searchIfInR2(array('abil', 'ic', 'iv'))) != false) {
-                $this->word = StringHelper::substr($this->word, 0, $position2);
+                $this->word = UTF8::substr($this->word, 0, $position2);
             }
             return true;
         }
@@ -208,13 +213,13 @@ private function step1()
         //      if preceded by at, delete if in R2 (and if further preceded by ic, delete if in R2)
         if ( ($position = $this->search(array('ivo', 'ivi', 'iva', 'ive'))) !== false) {
             if ($this->inR2($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
 
             if ( ($position2 = $this->searchIfInR2(array('at'))) !== false) {
-                $this->word = StringHelper::substr($this->word, 0, $position2);
+                $this->word = UTF8::substr($this->word, 0, $position2);
                 if ( ($position3 = $this->searchIfInR2(array('ic'))) !== false) {
-                    $this->word = StringHelper::substr($this->word, 0, $position3);
+                    $this->word = UTF8::substr($this->word, 0, $position3);
                 }
             }
             return true;
@@ -238,7 +243,7 @@ private function step2()
             'ano', 'are', 'ata', 'ate', 'ati', 'ato', 'ava', 'avi', 'avo', 'erà', 'ere', 'erò', 'ete', 'eva',
             'evi', 'evo', 'ire', 'ita', 'ite', 'iti', 'ito', 'iva', 'ivi', 'ivo', 'ono', 'uta', 'ute', 'uti', 'uto', 'irò', 'ar', 'ir'))) !== false) {
 
-            $this->word = StringHelper::substr($this->word, 0, $position);
+            $this->word = UTF8::substr($this->word, 0, $position);
         }
     }
 
@@ -249,10 +254,10 @@ private function step2()
     private function step3a()
     {
         if ($this->searchIfInRv(array('a', 'e', 'i', 'o', 'à', 'è', 'ì', 'ò')) !== false) {
-            $this->word = StringHelper::substr($this->word, 0, -1);
+            $this->word = UTF8::substr($this->word, 0, -1);
 
             if ($this->searchIfInRv(array('i')) !== false) {
-                $this->word = StringHelper::substr($this->word, 0, -1);
+                $this->word = UTF8::substr($this->word, 0, -1);
             }
             return true;
         }
@@ -279,6 +284,6 @@ private function step3b()
      */
     private function finish()
     {
-        $this->word = str_replace(array('I', 'U'), array('i', 'u'), $this->word);
+        $this->word = UTF8::str_replace(array('I', 'U'), array('i', 'u'), $this->word);
     }
 }
diff --git a/src/Stemmer/Norwegian.php b/src/Stemmer/Norwegian.php
index 627a578..b44b722 100644
--- a/src/Stemmer/Norwegian.php
+++ b/src/Stemmer/Norwegian.php
@@ -2,7 +2,7 @@
 
 namespace Wamania\Snowball\Stemmer;
 
-use Joomla\String\StringHelper;
+use voku\helper\UTF8;
 
 /**
  *
@@ -22,7 +22,12 @@ class Norwegian extends Stem
      */
     public function stem($word)
     {
-        $this->word = StringHelper::strtolower($word);
+        // we do ALL in UTF-8
+        if (!UTF8::is_utf8($word)) {
+            throw new \Exception('Word must be in UTF-8');
+        }
+
+        $this->word = UTF8::strtolower($word);
 
         // R2 is not used: R1 is defined in the same way as in the German stemmer
         $this->r1();
@@ -30,7 +35,7 @@ public function stem($word)
         // then R1 is adjusted so that the region before it contains at least 3 letters.
         if ($this->r1Index < 3) {
             $this->r1Index = 3;
-            $this->r1 = StringHelper::substr($this->word, 3);
+            $this->r1 = UTF8::substr($this->word, 3);
         }
 
         // Do each of steps 1, 2 3 and 4.
@@ -51,12 +56,12 @@ public function stem($word)
      */
     private function hasValidSEnding($word)
     {
-        $lastLetter = StringHelper::substr($word, -1, 1);
+        $lastLetter = UTF8::substr($word, -1, 1);
         if (in_array($lastLetter, array('b', 'c', 'd', 'f', 'g', 'h', 'j', 'l', 'm', 'n', 'o', 'p', 'r', 't', 'v', 'y', 'z'))) {
             return true;
         }
         if ($lastLetter == 'k') {
-            $beforeLetter = StringHelper::substr($word, -2, 1);
+            $beforeLetter = UTF8::substr($word, -2, 1);
             if (!in_array($beforeLetter, self::$vowels)) {
                 return true;
             }
@@ -83,14 +88,14 @@ private function step1()
             'hetenes', 'hetene', 'hetens', 'heten', 'endes', 'heter', 'ande', 'ende', 'enes', 'edes', 'ede', 'ane',
             'ene', 'het', 'ers', 'ets', 'ast', 'ens', 'en', 'ar', 'er', 'as', 'es', 'et', 'a', 'e'
         ))) !== false) {
-            $this->word = StringHelper::substr($this->word, 0, $position);
+            $this->word = UTF8::substr($this->word, 0, $position);
             return true;
         }
 
         //  s
         //      delete if preceded by a valid s-ending
         if ( ($position = $this->searchIfInR1(array('s'))) !== false) {
-            $word = StringHelper::substr($this->word, 0, $position);
+            $word = UTF8::substr($this->word, 0, $position);
             if ($this->hasValidSEnding($word)) {
                 $this->word = $word;
             }
@@ -105,7 +110,7 @@ private function step1()
     private function step2()
     {
         if ($this->searchIfInR1(array('dt', 'vt')) !== false) {
-            $this->word = StringHelper::substr($this->word, 0, -1);
+            $this->word = UTF8::substr($this->word, 0, -1);
         }
     }
 
@@ -119,7 +124,7 @@ private function step3()
         if ( ($position = $this->searchIfInR1(array(
             'hetslov', 'eleg', 'elov', 'slov', 'elig', 'eig', 'lig', 'els', 'lov', 'leg', 'ig'
         ))) !== false) {
-            $this->word = StringHelper::substr($this->word, 0, $position);
+            $this->word = UTF8::substr($this->word, 0, $position);
         }
     }
 }
diff --git a/src/Stemmer/Portuguese.php b/src/Stemmer/Portuguese.php
index c5f3aae..c71cc59 100644
--- a/src/Stemmer/Portuguese.php
+++ b/src/Stemmer/Portuguese.php
@@ -2,7 +2,7 @@
 
 namespace Wamania\Snowball\Stemmer;
 
-use Joomla\String\StringHelper;
+use voku\helper\UTF8;
 
 /**
  *
@@ -22,9 +22,14 @@ class Portuguese extends Stem
      */
     public function stem($word)
     {
-        $this->word = StringHelper::strtolower($word);
+        // we do ALL in UTF-8
+        if (!UTF8::is_utf8($word)) {
+            throw new \Exception('Word must be in UTF-8');
+        }
+
+        $this->word = UTF8::strtolower($word);
 
-        $this->word = str_replace(array('ã', 'õ'), array('a~', 'o~'), $this->word);
+        $this->word = UTF8::str_replace(array('ã', 'õ'), array('a~', 'o~'), $this->word);
 
         $this->rv();
         $this->r1();
@@ -61,7 +66,7 @@ private function step1()
             'osos', 'osas', 'osa', 'ico', 'ica', 'ador', 'aça~o', 'aço~es' , 'ante'))) !== false) {
 
             if ($this->inR2($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
             return true;
         }
@@ -101,19 +106,19 @@ private function step1()
 
             // delete if in R1
             if ($this->inR1($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
 
             // if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise,
             if ( ($position2 = $this->searchIfInR2(array('iv'))) !== false) {
-                $this->word = StringHelper::substr($this->word, 0, $position2);
+                $this->word = UTF8::substr($this->word, 0, $position2);
                 if ( ($position3 = $this->searchIfInR2(array('at'))) !== false) {
-                    $this->word = StringHelper::substr($this->word, 0, $position3);
+                    $this->word = UTF8::substr($this->word, 0, $position3);
                 }
 
                 // if preceded by os, ic or ad, delete if in R2
             } elseif ( ($position4 = $this->searchIfInR2(array('os', 'ic', 'ad'))) !== false) {
-                $this->word = StringHelper::substr($this->word, 0, $position4);
+                $this->word = UTF8::substr($this->word, 0, $position4);
             }
             return true;
         }
@@ -125,12 +130,12 @@ private function step1()
 
             // delete if in R2
             if ($this->inR2($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
 
             // if preceded by ante, avel or ível, delete if in R2
             if ( ($position2 = $this->searchIfInR2(array('ante', 'avel', 'ível'))) != false) {
-                $this->word = StringHelper::substr($this->word, 0, $position2);
+                $this->word = UTF8::substr($this->word, 0, $position2);
             }
             return true;
         }
@@ -142,12 +147,12 @@ private function step1()
 
             // delete if in R2
             if ($this->inR2($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
 
             // if preceded by abil, ic or iv, delete if in R2
             if ( ($position2 = $this->searchIfInR2(array('abil', 'ic', 'iv'))) !== false) {
-                $this->word = StringHelper::substr($this->word, 0, $position2);
+                $this->word = UTF8::substr($this->word, 0, $position2);
             }
             return true;
         }
@@ -159,12 +164,12 @@ private function step1()
 
             // delete if in R2
             if ($this->inR2($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
 
             // if preceded by at, delete if in R2
             if ( ($position2 = $this->searchIfInR2(array('at'))) !== false) {
-                $this->word = StringHelper::substr($this->word, 0, $position2);
+                $this->word = UTF8::substr($this->word, 0, $position2);
             }
             return true;
         }
@@ -175,7 +180,7 @@ private function step1()
 
             if ($this->inRv($position)) {
                 $before = $position -1;
-                $letter = StringHelper::substr($this->word, $before, 1);
+                $letter = UTF8::substr($this->word, $before, 1);
 
                 if ($letter == 'e') {
                     $this->word = preg_replace('#(iras|ira)$#u', 'ir', $this->word);
@@ -208,7 +213,7 @@ private function step2()
             'ia', 'ei', 'am', 'em', 'ar', 'er', 'ir', 'as', 'es', 'is', 'eu', 'iu', 'ou',
         ))) !== false) {
 
-            $this->word = StringHelper::substr($this->word, 0, $position);
+            $this->word = UTF8::substr($this->word, 0, $position);
             return true;
         }
         return false;
@@ -222,10 +227,10 @@ private function step3()
     {
         // Delete suffix i if in RV and preceded by c
         if ($this->searchIfInRv(array('i')) !== false) {
-            $letter = StringHelper::substr($this->word, -2, 1);
+            $letter = UTF8::substr($this->word, -2, 1);
 
             if ($letter == 'c') {
-                $this->word = StringHelper::substr($this->word, 0, -1);
+                $this->word = UTF8::substr($this->word, 0, -1);
             }
             return true;
         }
@@ -239,7 +244,7 @@ private function step4()
     {
         // If the word ends with one of the suffixes "os   a   i   o   á   í   ó" in RV, delete it
         if ( ($position = $this->searchIfInRv(array('os', 'a', 'i', 'o','á', 'í', 'ó'))) !== false) {
-            $this->word = StringHelper::substr($this->word, 0, $position);
+            $this->word = UTF8::substr($this->word, 0, $position);
             return true;
         }
         return false;
@@ -252,11 +257,11 @@ private function step5()
     {
         // If the word ends with one of "e   é   ê" in RV, delete it, and if preceded by gu (or ci) with the u (or i) in RV, delete the u (or i).
         if ($this->searchIfInRv(array('e', 'é', 'ê')) !== false) {
-            $this->word = StringHelper::substr($this->word, 0, -1);
+            $this->word = UTF8::substr($this->word, 0, -1);
 
             if ( ($position2 = $this->search(array('gu', 'ci'))) !== false) {
                 if ($this->inRv(($position2+1))) {
-                    $this->word = StringHelper::substr($this->word, 0, -1);
+                    $this->word = UTF8::substr($this->word, 0, -1);
                 }
             }
             return true;
@@ -273,6 +278,6 @@ private function step5()
     private function finish()
     {
         // turn U and Y back into lower case, and remove the umlaut accent from a, o and u.
-        $this->word = str_replace(array('a~', 'o~'), array('ã', 'õ'), $this->word);
+        $this->word = UTF8::str_replace(array('a~', 'o~'), array('ã', 'õ'), $this->word);
     }
 }
diff --git a/src/Stemmer/Romanian.php b/src/Stemmer/Romanian.php
index 87047dc..5da8744 100644
--- a/src/Stemmer/Romanian.php
+++ b/src/Stemmer/Romanian.php
@@ -2,7 +2,7 @@
 
 namespace Wamania\Snowball\Stemmer;
 
-use Joomla\String\StringHelper;
+use voku\helper\UTF8;
 
 /**
  *
@@ -22,7 +22,12 @@ class Romanian extends Stem
      */
     public function stem($word)
     {
-        $this->word = StringHelper::strtolower($word);
+        // we do ALL in UTF-8
+        if (!UTF8::is_utf8($word)) {
+            throw new \Exception('Word must be in UTF-8');
+        }
+
+        $this->word = UTF8::strtolower($word);
 
         $this->plainVowels = implode('', self::$vowels);
 
@@ -68,7 +73,7 @@ private function step0()
         //      delete
         if ( ($position = $this->search(array('ul', 'ului'))) !== false) {
             if ($this->inR1($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
             return true;
         }
@@ -104,7 +109,7 @@ private function step0()
         //      replace with i if not preceded by ab
         if ( ($position = $this->search(array('ile'))) !== false) {
             if ($this->inR1($position)) {
-                $before = StringHelper::substr($this->word, ($position-2), 2);
+                $before = UTF8::substr($this->word, ($position-2), 2);
 
                 if ($before != 'ab') {
                     $this->word = preg_replace('#(ile)$#u', 'i', $this->word);
@@ -221,7 +226,7 @@ private function step2()
             'at', 'os', 'iv', 'ut', 'it', 'ic'
         ))) !== false) {
             if ($this->inR2($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
             return true;
         }
@@ -231,9 +236,9 @@ private function step2()
         if ( ($position = $this->search(array('iune', 'iuni'))) !== false) {
             if ($this->inR2($position)) {
                 $before = $position - 1;
-                $letter = StringHelper::substr($this->word, $before, 1);
+                $letter = UTF8::substr($this->word, $before, 1);
                 if ($letter == 'ţ') {
-                    $this->word = StringHelper::substr($this->word, 0, $position);
+                    $this->word = UTF8::substr($this->word, 0, $position);
                     $this->word = preg_replace('#(ţ)$#u', 't', $this->word);
                 }
             }
@@ -277,10 +282,10 @@ private function step3()
             if ($this->inRv($position)) {
                 $before = $position - 1;
                 if ($this->inRv($before)) {
-                    $letter = StringHelper::substr($this->word, $before, 1);
+                    $letter = UTF8::substr($this->word, $before, 1);
 
                     if ( (!in_array($letter, self::$vowels)) || ($letter == 'u') ) {
-                        $this->word = StringHelper::substr($this->word, 0, $position);
+                        $this->word = UTF8::substr($this->word, 0, $position);
                     }
                 }
             }
@@ -296,7 +301,7 @@ private function step3()
             'aţi', 'eţi', 'iţi', 'âţi', 'sei', 'se', 'ăm', 'âm', 'em', 'im'
         ))) !== false) {
             if ($this->inRv($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
             return true;
         }
@@ -310,7 +315,7 @@ private function step4()
         // Search for the longest among the suffixes "a   e   i   ie   ă " and, if it is in RV, delete it.
         if ( ($position = $this->search(array('a', 'ie', 'e', 'i', 'ă'))) !== false) {
             if ($this->inRv($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
         }
 
@@ -324,6 +329,6 @@ private function step4()
     private function finish()
     {
         // Turn I, U back into i, u
-        $this->word = str_replace(array('I', 'U'), array('i', 'u'), $this->word);
+        $this->word = UTF8::str_replace(array('I', 'U'), array('i', 'u'), $this->word);
     }
 }
diff --git a/src/Stemmer/Russian.php b/src/Stemmer/Russian.php
index 3949a45..cd18dbf 100644
--- a/src/Stemmer/Russian.php
+++ b/src/Stemmer/Russian.php
@@ -2,7 +2,7 @@
 
 namespace Wamania\Snowball\Stemmer;
 
-use Joomla\String\StringHelper;
+use voku\helper\UTF8;
 
 /**
  *
@@ -56,7 +56,12 @@ class Russian extends Stem
      */
     public function stem($word)
     {
-        $this->word = StringHelper::strtolower($word);
+        // we do ALL in UTF-8
+        if (!UTF8::is_utf8($word)) {
+            throw new \Exception('Word must be in UTF-8');
+        }
+
+        $this->word = UTF8::strtolower($word);
 
         // R2 is not used: R1 is defined in the same way as in the German stemmer
         $this->r1();
@@ -83,7 +88,7 @@ private function step1()
         // group 1
         if ( ($position = $this->searchIfInRv(self::$perfectiveGerund[0])) !== false) {
             if ( ($this->inRv($position)) && ($this->checkGroup1($position)) ) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
                 return true;
             }
         }
@@ -91,7 +96,7 @@ private function step1()
         // group 2
         if ( ($position = $this->searchIfInRv(self::$perfectiveGerund[1])) !== false) {
             if ($this->inRv($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
                 return true;
             }
         }
@@ -99,7 +104,7 @@ private function step1()
         // Otherwise try and remove a REFLEXIVE ending
         if ( ($position = $this->searchIfInRv(self::$reflexive)) !== false) {
             if ($this->inRv($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
         }
 
@@ -107,18 +112,18 @@ private function step1()
         // As soon as one of the endings (1) to (3) is found remove it, and terminate step 1.
         if ( ($position = $this->searchIfInRv(self::$adjective)) !== false) {
             if ($this->inRv($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
 
                 if ( ($position2 = $this->search(self::$participle[0])) !== false) {
                     if ( ($this->inRv($position2)) && ($this->checkGroup1($position2)) ) {
-                        $this->word = StringHelper::substr($this->word, 0, $position2);
+                        $this->word = UTF8::substr($this->word, 0, $position2);
                         return true;
                     }
                 }
 
                 if ( ($position2 = $this->search(self::$participle[1])) !== false) {
                     if ($this->inRv($position2)) {
-                        $this->word = StringHelper::substr($this->word, 0, $position2);
+                        $this->word = UTF8::substr($this->word, 0, $position2);
                         return true;
                     }
                 }
@@ -129,21 +134,21 @@ private function step1()
 
         if ( ($position = $this->searchIfInRv(self::$verb[0])) !== false) {
             if ( ($this->inRv($position)) && ($this->checkGroup1($position)) ) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
                 return true;
             }
         }
 
         if ( ($position = $this->searchIfInRv(self::$verb[1])) !== false) {
             if ($this->inRv($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
                 return true;
             }
         }
 
         if ( ($position = $this->searchIfInRv(self::$noun)) !== false) {
             if ($this->inRv($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
                 return true;
             }
         }
@@ -158,7 +163,7 @@ private function step2()
     {
         if ( ($position = $this->searchIfInRv(array('и'))) !== false) {
             if ($this->inRv($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
                 return true;
             }
         }
@@ -173,7 +178,7 @@ private function step3()
     {
         if ( ($position = $this->searchIfInRv(self::$derivational)) !== false) {
             if ($this->inR2($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
                 return true;
             }
         }
@@ -187,18 +192,18 @@ private function step4()
     {
         // (2) if the word ends with a SUPERLATIVE ending, remove it
         if ( ($position = $this->searchIfInRv(self::$superlative)) !== false) {
-            $this->word = StringHelper::substr($this->word, 0, $position);
+            $this->word = UTF8::substr($this->word, 0, $position);
         }
 
         // (1) Undouble н (n)
         if ( ($position = $this->searchIfInRv(array('нн'))) !== false) {
-            $this->word = StringHelper::substr($this->word, 0, ($position+1));
+            $this->word = UTF8::substr($this->word, 0, ($position+1));
             return true;
         }
 
         // (3) if the word ends ь (') (soft sign) remove it
         if ( ($position = $this->searchIfInRv(array('ь'))) !== false) {
-            $this->word = StringHelper::substr($this->word, 0, $position);
+            $this->word = UTF8::substr($this->word, 0, $position);
             return true;
         }
     }
@@ -208,15 +213,15 @@ private function step4()
      */
     protected function rv()
     {
-        $length = StringHelper::strlen($this->word);
+        $length = UTF8::strlen($this->word);
 
         $this->rv = '';
         $this->rvIndex = $length;
 
         for ($i=0; $i<$length; $i++) {
-            $letter = StringHelper::substr($this->word, $i, 1);
+            $letter = UTF8::substr($this->word, $i, 1);
             if (in_array($letter, self::$vowels)) {
-                $this->rv = StringHelper::substr($this->word, ($i+1));
+                $this->rv = UTF8::substr($this->word, ($i+1));
                 $this->rvIndex = $i + 1;
                 return true;
             }
@@ -237,7 +242,7 @@ private function checkGroup1($position)
             return false;
         }
 
-        $letter = StringHelper::substr($this->word, ($position - 1), 1);
+        $letter = UTF8::substr($this->word, ($position - 1), 1);
 
         if ($letter == 'а' || $letter == 'я') {
             return true;
diff --git a/src/Stemmer/Spanish.php b/src/Stemmer/Spanish.php
index b83c040..4f6f2c8 100644
--- a/src/Stemmer/Spanish.php
+++ b/src/Stemmer/Spanish.php
@@ -2,8 +2,7 @@
 
 namespace Wamania\Snowball\Stemmer;
 
-use Joomla\String\StringHelper;
-use Wamania\Snowball\Transliterate;
+use voku\helper\UTF8;
 
 /**
  *
@@ -23,7 +22,12 @@ class Spanish extends Stem
      */
     public function stem($word)
     {
-        $this->word = StringHelper::strtolower($word);
+        // we do ALL in UTF-8
+        if (!UTF8::is_utf8($word)) {
+            throw new \Exception('Word must be in UTF-8');
+        }
+
+        $this->word = UTF8::strtolower($word);
 
         $this->rv();
         $this->r1();
@@ -67,7 +71,7 @@ public function stem($word)
     private function step0()
     {
         if ( ($position = $this->searchIfInRv(array('selas', 'selos', 'las', 'los', 'les', 'nos', 'selo', 'sela', 'me', 'se', 'la', 'le', 'lo' ))) != false) {
-            $suffixe = StringHelper::substr($this->word, $position);
+            $suffixe = UTF8::substr($this->word, $position);
 
             // a
             $a = array('iéndo', 'ándo', 'ár', 'ér', 'ír');
@@ -76,11 +80,11 @@ private function step0()
             }, $a);
 
             if ( ($position2 = $this->searchIfInRv($a)) !== false) {
-                $suffixe2 = StringHelper::substr($this->word, $position2);
-                $suffixe2 = Transliterate::utf8_latin_to_ascii($suffixe2); // unaccent
-                $this->word = StringHelper::substr($this->word, 0, $position2);
+                $suffixe2 = UTF8::substr($this->word, $position2);
+                $suffixe2 = UTF8::to_utf8(UTF8::to_ascii($suffixe2)); // unaccent
+                $this->word = UTF8::substr($this->word, 0, $position2);
                 $this->word .= $suffixe2;
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
                 return true;
             }
 
@@ -91,15 +95,15 @@ private function step0()
             }, $b);
 
             if ( ($position2 = $this->searchIfInRv($b)) !== false) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
                 return true;
             }
 
             // c
             if ( ($position2 = $this->searchIfInRv(array('yendo' . $suffixe))) != false) {
-                $before = StringHelper::substr($this->word, ($position2-1), 1);
+                $before = UTF8::substr($this->word, ($position2-1), 1);
                 if ( (isset($before)) && ($before == 'u') ) {
-                    $this->word = StringHelper::substr($this->word, 0, $position);
+                    $this->word = UTF8::substr($this->word, 0, $position);
                     return true;
                 }
             }
@@ -121,7 +125,7 @@ private function step1()
             'ible', 'ables', 'able', 'ismos', 'ismo', 'icas', 'icos', 'ica', 'ico', 'anzas', 'anza'))) != false) {
 
             if ($this->inR2($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
             return true;
         }
@@ -133,11 +137,11 @@ private function step1()
             'adoras', 'adora', 'aciones', 'ación', 'adores', 'ador', 'antes', 'ante', 'ancias', 'ancia'))) != false) {
 
             if ($this->inR2($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
 
             if ( ($position2 = $this->searchIfInR2(array('ic')))) {
-                $this->word = StringHelper::substr($this->word, 0, $position2);
+                $this->word = UTF8::substr($this->word, 0, $position2);
             }
             return true;
         }
@@ -177,19 +181,19 @@ private function step1()
 
             // delete if in R1
             if ($this->inR1($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
 
             // if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise,
             if ( ($position2 = $this->searchIfInR2(array('iv'))) !== false) {
-                $this->word = StringHelper::substr($this->word, 0, $position2);
+                $this->word = UTF8::substr($this->word, 0, $position2);
                 if ( ($position3 = $this->searchIfInR2(array('at'))) !== false) {
-                    $this->word = StringHelper::substr($this->word, 0, $position3);
+                    $this->word = UTF8::substr($this->word, 0, $position3);
                 }
 
             // if preceded by os, ic or ad, delete if in R2
             } elseif ( ($position4 = $this->searchIfInR2(array('os', 'ic', 'ad'))) != false) {
-                $this->word = StringHelper::substr($this->word, 0, $position4);
+                $this->word = UTF8::substr($this->word, 0, $position4);
             }
             return true;
         }
@@ -201,12 +205,12 @@ private function step1()
 
             // delete if in R2
             if ($this->inR2($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
 
             // if preceded by ante, able or ible, delete if in R2
             if ( ($position2 = $this->searchIfInR2(array('ante', 'able', 'ible'))) != false) {
-                $this->word = StringHelper::substr($this->word, 0, $position2);
+                $this->word = UTF8::substr($this->word, 0, $position2);
             }
             return true;
         }
@@ -218,12 +222,12 @@ private function step1()
 
             // delete if in R2
             if ($this->inR2($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
 
             // if preceded by abil, ic or iv, delete if in R2
             if ( ($position2 = $this->searchIfInR2(array('abil', 'ic', 'iv'))) != false) {
-                $this->word = StringHelper::substr($this->word, 0, $position2);
+                $this->word = UTF8::substr($this->word, 0, $position2);
             }
             return true;
         }
@@ -235,12 +239,12 @@ private function step1()
 
             // delete if in R2
             if ($this->inR2($position)) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
             }
 
             // if preceded by at, delete if in R2
             if ( ($position2 = $this->searchIfInR2(array('at'))) != false) {
-                $this->word = StringHelper::substr($this->word, 0, $position2);
+                $this->word = UTF8::substr($this->word, 0, $position2);
             }
             return true;
         }
@@ -258,9 +262,9 @@ private function step2a()
         if ( ($position = $this->searchIfInRv(array(
             'yamos', 'yendo', 'yeron', 'yan', 'yen', 'yais', 'yas', 'yes', 'yo', 'yó', 'ya', 'ye'))) != false) {
 
-            $before = StringHelper::substr($this->word, ($position-1), 1);
+            $before = UTF8::substr($this->word, ($position-1), 1);
             if ( (isset($before)) && ($before == 'u') ) {
-                $this->word = StringHelper::substr($this->word, 0, $position);
+                $this->word = UTF8::substr($this->word, 0, $position);
                 return true;
             }
         }
@@ -285,17 +289,17 @@ private function step2b()
             'aré', 'erá', 'eré', 'áis', 'ías', 'irá', 'iré', 'aba', 'ían', 'ada', 'ara', 'ase', 'ida', 'ado', 'ido', 'ará',
             'ad', 'ed', 'id', 'ís', 'ió', 'ar', 'er', 'ir', 'as', 'ía', 'an'
         ))) != false) {
-            $this->word = StringHelper::substr($this->word, 0, $position);
+            $this->word = UTF8::substr($this->word, 0, $position);
             return true;
         }
 
         // en   es   éis   emos
         //      delete, and if preceded by gu delete the u (the gu need not be in RV)
         if ( ($position = $this->searchIfInRv(array('éis', 'emos', 'en', 'es'))) != false) {
-            $this->word = StringHelper::substr($this->word, 0, $position);
+            $this->word = UTF8::substr($this->word, 0, $position);
 
             if ( ($position2 = $this->search(array('gu'))) != false) {
-                $this->word = StringHelper::substr($this->word, 0, ($position2+1));
+                $this->word = UTF8::substr($this->word, 0, ($position2+1));
             }
 
 
@@ -312,19 +316,19 @@ private function step3()
         // os   a   o   á   í   ó
         //      delete if in RV
         if ( ($position = $this->searchIfInRv(array('os', 'a', 'o', 'á', 'í', 'ó'))) != false) {
-            $this->word = StringHelper::substr($this->word, 0, $position);
+            $this->word = UTF8::substr($this->word, 0, $position);
             return true;
         }
 
         // e   é
         //      delete if in RV, and if preceded by gu with the u in RV delete the u
         if ( ($position = $this->searchIfInRv(array('e', 'é'))) != false) {
-            $this->word = StringHelper::substr($this->word, 0, $position);
+            $this->word = UTF8::substr($this->word, 0, $position);
 
             if ( ($position2 = $this->searchIfInRv(array('u'))) != false) {
-                $before = StringHelper::substr($this->word, ($position2-1), 1);
+                $before = UTF8::substr($this->word, ($position2-1), 1);
                 if ( (isset($before)) && ($before == 'g') ) {
-                    $this->word = StringHelper::substr($this->word, 0, $position2);
+                    $this->word = UTF8::substr($this->word, 0, $position2);
                     return true;
                 }
             }
@@ -339,6 +343,6 @@ private function step3()
      */
     private function finish()
     {
-        $this->word = str_replace(array('á', 'í', 'ó', 'é', 'ú'), array('a', 'i', 'o', 'e', 'u'), $this->word);
+        $this->word = UTF8::str_replace(array('á', 'í', 'ó', 'é', 'ú'), array('a', 'i', 'o', 'e', 'u'), $this->word);
     }
 }
diff --git a/src/Stemmer/Stem.php b/src/Stemmer/Stem.php
index 1ce7274..0c6f148 100644
--- a/src/Stemmer/Stem.php
+++ b/src/Stemmer/Stem.php
@@ -2,7 +2,7 @@
 
 namespace Wamania\Snowball\Stemmer;
 
-use Joomla\String\StringHelper;
+use voku\helper\UTF8;
 
 abstract class Stem implements Stemmer
 {
@@ -94,12 +94,12 @@ protected function searchIfInR2($suffixes)
 
     protected function search($suffixes, $offset = 0)
     {
-        $length = StringHelper::strlen($this->word);
+        $length = UTF8::strlen($this->word);
         if ($offset > $length) {
             return false;
         }
         foreach ($suffixes as $suffixe) {
-            if ( (($position = StringHelper::strrpos($this->word, $suffixe, $offset)) !== false) && ((StringHelper::strlen($suffixe)+$position) == $length) ) {
+            if ( (($position = UTF8::strrpos($this->word, $suffixe, $offset)) !== false) && ((Utf8::strlen($suffixe)+$position) == $length) ) {
                 return $position;
             }
         }
@@ -134,7 +134,7 @@ protected function r2()
      */
     protected function rx($in)
     {
-        $length = StringHelper::strlen($in);
+        $length = UTF8::strlen($in);
 
         // defaults
         $value = '';
@@ -143,7 +143,7 @@ protected function rx($in)
         // we search all vowels
         $vowels = array();
         for ($i=0; $i<$length; $i++) {
-            $letter = StringHelper::substr($in, $i, 1);
+            $letter = UTF8::substr($in, $i, 1);
             if (in_array($letter, static::$vowels)) {
                 $vowels[] = $i;
             }
@@ -152,11 +152,11 @@ protected function rx($in)
         // search the non-vowel following a vowel
         foreach ($vowels as $position) {
             $after = $position + 1;
-            $letter = StringHelper::substr($in, $after, 1);
+            $letter = UTF8::substr($in, $after, 1);
 
             if (! in_array($letter, static::$vowels)) {
                 $index = $after + 1;
-                $value = StringHelper::substr($in, ($after+1));
+                $value = UTF8::substr($in, ($after+1));
 
                 break;
             }
@@ -175,7 +175,7 @@ protected function rx($in)
      */
     protected function rv()
     {
-        $length = StringHelper::strlen($this->word);
+        $length = UTF8::strlen($this->word);
 
         $this->rv = '';
         $this->rvIndex = $length;
@@ -184,16 +184,16 @@ protected function rv()
             return true;
         }
 
-        $first = StringHelper::substr($this->word, 0, 1);
-        $second = StringHelper::substr($this->word, 1, 1);
+        $first = UTF8::substr($this->word, 0, 1);
+        $second = UTF8::substr($this->word, 1, 1);
 
         // If the second letter is a consonant, RV is the region after the next following vowel,
         if (!in_array($second, static::$vowels)) {
             for ($i=2; $i<$length; $i++) {
-                $letter = StringHelper::substr($this->word, $i, 1);
+                $letter = UTF8::substr($this->word, $i, 1);
                 if (in_array($letter, static::$vowels)) {
                     $this->rvIndex = $i + 1;
-                    $this->rv = StringHelper::substr($this->word, ($i+1));
+                    $this->rv = UTF8::substr($this->word, ($i+1));
                     return true;
                 }
             }
@@ -202,10 +202,10 @@ protected function rv()
         // or if the first two letters are vowels, RV is the region after the next consonant,
         if ( (in_array($first, static::$vowels)) && (in_array($second, static::$vowels)) ) {
             for ($i=2; $i<$length; $i++) {
-                $letter = StringHelper::substr($this->word, $i, 1);
+                $letter = UTF8::substr($this->word, $i, 1);
                 if (! in_array($letter, static::$vowels)) {
                     $this->rvIndex = $i + 1;
-                    $this->rv = StringHelper::substr($this->word, ($i+1));
+                    $this->rv = UTF8::substr($this->word, ($i+1));
                     return true;
                 }
             }
@@ -213,7 +213,7 @@ protected function rv()
 
         // and otherwise (consonant-vowel case) RV is the region after the third letter.
         if ( (! in_array($first, static::$vowels)) && (in_array($second, static::$vowels)) ) {
-            $this->rv = StringHelper::substr($this->word, 3);
+            $this->rv = UTF8::substr($this->word, 3);
             $this->rvIndex = 3;
             return true;
         }
diff --git a/src/Stemmer/Swedish.php b/src/Stemmer/Swedish.php
index ed8103c..32352ef 100644
--- a/src/Stemmer/Swedish.php
+++ b/src/Stemmer/Swedish.php
@@ -2,7 +2,7 @@
 
 namespace Wamania\Snowball\Stemmer;
 
-use Joomla\String\StringHelper;
+use voku\helper\UTF8;
 
 /**
  *
@@ -22,7 +22,12 @@ class Swedish extends Stem
      */
     public function stem($word)
     {
-        $this->word = StringHelper::strtolower($word);
+        // we do ALL in UTF-8
+        if (!UTF8::is_utf8($word)) {
+            throw new \Exception('Word must be in UTF-8');
+        }
+
+        $this->word = UTF8::strtolower($word);
 
         // R2 is not used: R1 is defined in the same way as in the German stemmer
         $this->r1();
@@ -30,7 +35,7 @@ public function stem($word)
         // then R1 is adjusted so that the region before it contains at least 3 letters.
         if ($this->r1Index < 3) {
             $this->r1Index = 3;
-            $this->r1 = StringHelper::substr($this->word, 3);
+            $this->r1 = UTF8::substr($this->word, 3);
         }
 
         // Do each of steps 1, 2 3 and 4.
@@ -50,7 +55,7 @@ public function stem($word)
      */
     private function hasValidSEnding($word)
     {
-        $lastLetter = StringHelper::substr($word, -1, 1);
+        $lastLetter = UTF8::substr($word, -1, 1);
         return in_array($lastLetter, array('b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'r', 't', 'v', 'y'));
     }
 
@@ -69,14 +74,14 @@ private function step1()
             'orna', 'arna', 'erna', 'aren', 'ande', 'ades', 'arne', 'erns', 'aste', 'ade', 'ern', 'het',
             'ast', 'are', 'ens', 'or', 'es', 'ad', 'en', 'at', 'ar', 'as', 'er', 'a', 'e'
         ))) !== false) {
-            $this->word = StringHelper::substr($this->word, 0, $position);
+            $this->word = UTF8::substr($this->word, 0, $position);
             return true;
         }
 
         //  s
         //      delete if preceded by a valid s-ending
         if ( ($position = $this->searchIfInR1(array('s'))) !== false) {
-            $word = StringHelper::substr($this->word, 0, $position);
+            $word = UTF8::substr($this->word, 0, $position);
             if ($this->hasValidSEnding($word)) {
                 $this->word = $word;
             }
@@ -91,7 +96,7 @@ private function step2()
     {
         // dd   gd   nn   dt   gt   kt   tt
         if ($this->searchIfInR1(array('dd', 'gd', 'nn', 'dt', 'gt', 'kt', 'tt')) !== false) {
-            $this->word = StringHelper::substr($this->word, 0, -1);
+            $this->word = UTF8::substr($this->word, 0, -1);
         }
     }
 
@@ -104,21 +109,21 @@ private function step3()
         // lig   ig   els
         //      delete
         if ( ($position = $this->searchIfInR1(array('lig', 'ig', 'els'))) !== false) {
-            $this->word = StringHelper::substr($this->word, 0, $position);
+            $this->word = UTF8::substr($this->word, 0, $position);
             return true;
         }
 
         // löst
         //      replace with lös
         if ( ($this->searchIfInR1(array('löst'))) !== false) {
-            $this->word = StringHelper::substr($this->word, 0, -1);
+            $this->word = UTF8::substr($this->word, 0, -1);
             return true;
         }
 
         // fullt
         //      replace with full
         if ( ($this->searchIfInR1(array('fullt'))) !== false) {
-            $this->word = StringHelper::substr($this->word, 0, -1);
+            $this->word = UTF8::substr($this->word, 0, -1);
             return true;
         }
     }
diff --git a/src/StemmerFactory.php b/src/StemmerFactory.php
index b8c487a..d60a8c6 100644
--- a/src/StemmerFactory.php
+++ b/src/StemmerFactory.php
@@ -2,7 +2,7 @@
 
 namespace Wamania\Snowball;
 
-use Joomla\String\StringHelper;
+use voku\helper\UTF8;
 use Wamania\Snowball\Stemmer\Catalan;
 use Wamania\Snowball\Stemmer\Danish;
 use Wamania\Snowball\Stemmer\Dutch;
@@ -43,7 +43,7 @@ class StemmerFactory
      */
     public static function create(string $code): Stemmer
     {
-        $code = StringHelper::strtolower($code);
+        $code = UTF8::strtolower($code);
 
         foreach (self::LANGS as $classname => $isoCodes) {
             if (in_array($code, $isoCodes)) {
diff --git a/src/Transliterate.php b/src/Transliterate.php
deleted file mode 100644
index 3399f6b..0000000
--- a/src/Transliterate.php
+++ /dev/null
@@ -1,253 +0,0 @@
-<?php
-
-namespace Wamania\Snowball;
-
-/**
- * Class to transliterate strings
- *
- * @note   Copy of Joomlas transliterate class which is a port of phputf8's utf8_accents_to_ascii()
- */
-class Transliterate
-{
-    /**
-     * Returns strings transliterated from UTF-8 to Latin
-     *
-     * @param   string   $string  String to transliterate
-     * @param   integer  $case    Optionally specify upper or lower case. Default to null.
-     *
-     * @return  string  Transliterated string
-     */
-    public static function utf8_latin_to_ascii($string, $case = 0)
-    {
-        static $UTF8_LOWER_ACCENTS = null;
-        static $UTF8_UPPER_ACCENTS = null;
-
-        if ($case <= 0) {
-            if (\is_null($UTF8_LOWER_ACCENTS)) {
-                $UTF8_LOWER_ACCENTS = [
-                    'à' => 'a',
-                    'ô' => 'o',
-                    'ď' => 'd',
-                    'ḟ' => 'f',
-                    'ë' => 'e',
-                    'š' => 's',
-                    'ơ' => 'o',
-                    'ß' => 'ss',
-                    'ă' => 'a',
-                    'ř' => 'r',
-                    'ț' => 't',
-                    'ň' => 'n',
-                    'ā' => 'a',
-                    'ķ' => 'k',
-                    'ŝ' => 's',
-                    'ỳ' => 'y',
-                    'ņ' => 'n',
-                    'ĺ' => 'l',
-                    'ħ' => 'h',
-                    'ṗ' => 'p',
-                    'ó' => 'o',
-                    'ú' => 'u',
-                    'ě' => 'e',
-                    'é' => 'e',
-                    'ç' => 'c',
-                    'ẁ' => 'w',
-                    'ċ' => 'c',
-                    'õ' => 'o',
-                    'ṡ' => 's',
-                    'ø' => 'o',
-                    'ģ' => 'g',
-                    'ŧ' => 't',
-                    'ș' => 's',
-                    'ė' => 'e',
-                    'ĉ' => 'c',
-                    'ś' => 's',
-                    'î' => 'i',
-                    'ű' => 'u',
-                    'ć' => 'c',
-                    'ę' => 'e',
-                    'ŵ' => 'w',
-                    'ṫ' => 't',
-                    'ū' => 'u',
-                    'č' => 'c',
-                    'ö' => 'oe',
-                    'è' => 'e',
-                    'ŷ' => 'y',
-                    'ą' => 'a',
-                    'ł' => 'l',
-                    'ų' => 'u',
-                    'ů' => 'u',
-                    'ş' => 's',
-                    'ğ' => 'g',
-                    'ļ' => 'l',
-                    'ƒ' => 'f',
-                    'ž' => 'z',
-                    'ẃ' => 'w',
-                    'ḃ' => 'b',
-                    'å' => 'a',
-                    'ì' => 'i',
-                    'ï' => 'i',
-                    'ḋ' => 'd',
-                    'ť' => 't',
-                    'ŗ' => 'r',
-                    'ä' => 'ae',
-                    'í' => 'i',
-                    'ŕ' => 'r',
-                    'ê' => 'e',
-                    'ü' => 'ue',
-                    'ò' => 'o',
-                    'ē' => 'e',
-                    'ñ' => 'n',
-                    'ń' => 'n',
-                    'ĥ' => 'h',
-                    'ĝ' => 'g',
-                    'đ' => 'd',
-                    'ĵ' => 'j',
-                    'ÿ' => 'y',
-                    'ũ' => 'u',
-                    'ŭ' => 'u',
-                    'ư' => 'u',
-                    'ţ' => 't',
-                    'ý' => 'y',
-                    'ő' => 'o',
-                    'â' => 'a',
-                    'ľ' => 'l',
-                    'ẅ' => 'w',
-                    'ż' => 'z',
-                    'ī' => 'i',
-                    'ã' => 'a',
-                    'ġ' => 'g',
-                    'ṁ' => 'm',
-                    'ō' => 'o',
-                    'ĩ' => 'i',
-                    'ù' => 'u',
-                    'į' => 'i',
-                    'ź' => 'z',
-                    'á' => 'a',
-                    'û' => 'u',
-                    'þ' => 'th',
-                    'ð' => 'dh',
-                    'æ' => 'ae',
-                    'µ' => 'u',
-                    'ĕ' => 'e',
-                    'œ' => 'oe',
-                ];
-            }
-
-            $string = str_replace(array_keys($UTF8_LOWER_ACCENTS), array_values($UTF8_LOWER_ACCENTS), $string);
-        }
-
-        if ($case >= 0) {
-            if (\is_null($UTF8_UPPER_ACCENTS)) {
-                $UTF8_UPPER_ACCENTS = [
-                    'À' => 'A',
-                    'Ô' => 'O',
-                    'Ď' => 'D',
-                    'Ḟ' => 'F',
-                    'Ë' => 'E',
-                    'Š' => 'S',
-                    'Ơ' => 'O',
-                    'Ă' => 'A',
-                    'Ř' => 'R',
-                    'Ț' => 'T',
-                    'Ň' => 'N',
-                    'Ā' => 'A',
-                    'Ķ' => 'K',
-                    'Ŝ' => 'S',
-                    'Ỳ' => 'Y',
-                    'Ņ' => 'N',
-                    'Ĺ' => 'L',
-                    'Ħ' => 'H',
-                    'Ṗ' => 'P',
-                    'Ó' => 'O',
-                    'Ú' => 'U',
-                    'Ě' => 'E',
-                    'É' => 'E',
-                    'Ç' => 'C',
-                    'Ẁ' => 'W',
-                    'Ċ' => 'C',
-                    'Õ' => 'O',
-                    'Ṡ' => 'S',
-                    'Ø' => 'O',
-                    'Ģ' => 'G',
-                    'Ŧ' => 'T',
-                    'Ș' => 'S',
-                    'Ė' => 'E',
-                    'Ĉ' => 'C',
-                    'Ś' => 'S',
-                    'Î' => 'I',
-                    'Ű' => 'U',
-                    'Ć' => 'C',
-                    'Ę' => 'E',
-                    'Ŵ' => 'W',
-                    'Ṫ' => 'T',
-                    'Ū' => 'U',
-                    'Č' => 'C',
-                    'Ö' => 'Oe',
-                    'È' => 'E',
-                    'Ŷ' => 'Y',
-                    'Ą' => 'A',
-                    'Ł' => 'L',
-                    'Ų' => 'U',
-                    'Ů' => 'U',
-                    'Ş' => 'S',
-                    'Ğ' => 'G',
-                    'Ļ' => 'L',
-                    'Ƒ' => 'F',
-                    'Ž' => 'Z',
-                    'Ẃ' => 'W',
-                    'Ḃ' => 'B',
-                    'Å' => 'A',
-                    'Ì' => 'I',
-                    'Ï' => 'I',
-                    'Ḋ' => 'D',
-                    'Ť' => 'T',
-                    'Ŗ' => 'R',
-                    'Ä' => 'Ae',
-                    'Í' => 'I',
-                    'Ŕ' => 'R',
-                    'Ê' => 'E',
-                    'Ü' => 'Ue',
-                    'Ò' => 'O',
-                    'Ē' => 'E',
-                    'Ñ' => 'N',
-                    'Ń' => 'N',
-                    'Ĥ' => 'H',
-                    'Ĝ' => 'G',
-                    'Đ' => 'D',
-                    'Ĵ' => 'J',
-                    'Ÿ' => 'Y',
-                    'Ũ' => 'U',
-                    'Ŭ' => 'U',
-                    'Ư' => 'U',
-                    'Ţ' => 'T',
-                    'Ý' => 'Y',
-                    'Ő' => 'O',
-                    'Â' => 'A',
-                    'Ľ' => 'L',
-                    'Ẅ' => 'W',
-                    'Ż' => 'Z',
-                    'Ī' => 'I',
-                    'Ã' => 'A',
-                    'Ġ' => 'G',
-                    'Ṁ' => 'M',
-                    'Ō' => 'O',
-                    'Ĩ' => 'I',
-                    'Ù' => 'U',
-                    'Į' => 'I',
-                    'Ź' => 'Z',
-                    'Á' => 'A',
-                    'Û' => 'U',
-                    'Þ' => 'Th',
-                    'Ð' => 'Dh',
-                    'Æ' => 'Ae',
-                    'Ĕ' => 'E',
-                    'Œ' => 'Oe',
-                ];
-            }
-
-            $string = str_replace(array_keys($UTF8_UPPER_ACCENTS), array_values($UTF8_UPPER_ACCENTS), $string);
-        }
-
-        return $string;
-    }
-}

From 73d7331e375f3075a91068dc9bbad281d9d1b4ea Mon Sep 17 00:00:00 2001
From: Dom Morgan <dom@d3r.com>
Date: Tue, 26 May 2026 11:33:32 +0100
Subject: [PATCH 2/3] Remove deprecations

---
 .gitignore                      |  1 +
 test/CatalanTest.php            |  2 +-
 test/CsvFileIterator.php        |  5 +++++
 test/CsvFileVerboseIterator.php | 28 ----------------------------
 4 files changed, 7 insertions(+), 29 deletions(-)
 delete mode 100644 test/CsvFileVerboseIterator.php

diff --git a/.gitignore b/.gitignore
index 1fc9c34..d2895e2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@
 # PHPUnit
 /app/phpunit.xml
 /phpunit.xml
+.phpunit.result.cache
 
 # Build data
 /build/
diff --git a/test/CatalanTest.php b/test/CatalanTest.php
index 2512c48..b410590 100644
--- a/test/CatalanTest.php
+++ b/test/CatalanTest.php
@@ -20,6 +20,6 @@ public function testStem($word, $stem)
 
     public function load()
     {
-        return new CsvFileVerboseIterator('test/files/ca.txt');
+        return new CsvFileIterator('test/files/ca.txt');
     }
 }
diff --git a/test/CsvFileIterator.php b/test/CsvFileIterator.php
index ddc0b23..bf1ed17 100644
--- a/test/CsvFileIterator.php
+++ b/test/CsvFileIterator.php
@@ -19,6 +19,7 @@ public function __destruct()
         fclose($this->file);
     }
 
+    #[\ReturnTypeWillChange]
     public function rewind()
     {
         rewind($this->file);
@@ -32,21 +33,25 @@ public function rewind()
         $this->key = 0;
     }
 
+    #[\ReturnTypeWillChange]
     public function valid()
     {
         return !feof($this->file);
     }
 
+    #[\ReturnTypeWillChange]
     public function key()
     {
         return $this->key;
     }
 
+    #[\ReturnTypeWillChange]
     public function current()
     {
         return $this->current;
     }
 
+    #[\ReturnTypeWillChange]
     public function next()
     {
         $line = fgets($this->file);
diff --git a/test/CsvFileVerboseIterator.php b/test/CsvFileVerboseIterator.php
deleted file mode 100644
index 25314b6..0000000
--- a/test/CsvFileVerboseIterator.php
+++ /dev/null
@@ -1,28 +0,0 @@
-<?php
-namespace Wamania\Snowball\Tests;
-
-class CsvFileVerboseIterator extends CsvFileIterator
-{
-    public function rewind()
-    {
-        parent::rewind();
-        $this->_updateKey($this->current());
-    }
-
-    public function next()
-    {
-        parent::next();
-        if ($this->valid()) {
-            $this->_updateKey($this->current());
-        }
-    }
-
-    protected function _updateKey($value)
-    {
-        if ($value && sizeof($value)) {
-            $this->key = $value[0];
-        } elseif (sizeof($this->current)) {
-            $this->key = $this->current[0];
-        }
-    }
-}

From fcc2652d0107421e8a987be7fbd3ccfccdd608ca Mon Sep 17 00:00:00 2001
From: Dom Morgan <dom@d3r.com>
Date: Tue, 26 May 2026 17:38:09 +0100
Subject: [PATCH 3/3] Remove deprecated str_replace

---
 src/Stemmer/Catalan.php    | 4 ++--
 src/Stemmer/Dutch.php      | 4 ++--
 src/Stemmer/English.php    | 2 +-
 src/Stemmer/French.php     | 2 +-
 src/Stemmer/German.php     | 4 ++--
 src/Stemmer/Italian.php    | 4 ++--
 src/Stemmer/Portuguese.php | 4 ++--
 src/Stemmer/Romanian.php   | 2 +-
 src/Stemmer/Spanish.php    | 2 +-
 9 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/Stemmer/Catalan.php b/src/Stemmer/Catalan.php
index d52e4fc..b1de0ab 100644
--- a/src/Stemmer/Catalan.php
+++ b/src/Stemmer/Catalan.php
@@ -162,7 +162,7 @@ private function step1a()
         // atius atives ativa ativitat ativitats ible ibles assa asses assos ent ents íssim íssima íssims íssimes
         // ìssem ìsseu ìssin ims ima imes isme ista ismes istes inia inies íinia ínies ita ites triu trius oses osos
         // ient otes ots
-        // 
+        //
         //      delete if in R1
         if (($position = $this->search(self::$standard_suffix_1a)) !== false) {
             if ($this->inR1($position)) {
@@ -294,7 +294,7 @@ private function step2()
      */
     private function finish()
     {
-        $this->word = UTF8::str_replace(
+        $this->word = str_replace(
             ['á', 'é', 'í', 'ó', 'ú', 'à', 'è', 'ì', 'ò', 'ï', 'ü', '·'],
             ['a', 'e', 'i', 'o', 'u', 'a', 'e', 'i', 'o', 'i', 'u', '.'],
             $this->word
diff --git a/src/Stemmer/Dutch.php b/src/Stemmer/Dutch.php
index fc7c1af..8fba0d6 100644
--- a/src/Stemmer/Dutch.php
+++ b/src/Stemmer/Dutch.php
@@ -30,7 +30,7 @@ public function stem($word)
         $this->word = UTF8::strtolower($word);
 
         // First, remove all umlaut and acute accents.
-        $this->word = UTF8::str_replace(
+        $this->word = str_replace(
             array('ä', 'ë', 'ï', 'ö', 'ü', 'á', 'é', 'í', 'ó', 'ú'),
             array('a', 'e', 'i', 'o', 'u', 'a', 'e', 'i', 'o', 'u'),
             $this->word);
@@ -301,6 +301,6 @@ private function step4()
      */
     private function finish()
     {
-        $this->word = UTF8::str_replace(array('I', 'Y'), array('i', 'y'), $this->word);
+        $this->word = str_replace(array('I', 'Y'), array('i', 'y'), $this->word);
     }
 }
diff --git a/src/Stemmer/English.php b/src/Stemmer/English.php
index fe5f186..0e747d0 100644
--- a/src/Stemmer/English.php
+++ b/src/Stemmer/English.php
@@ -469,7 +469,7 @@ private function step5()
 
     private function finish()
     {
-        $this->word = UTF8::str_replace('Y', 'y', $this->word);
+        $this->word = str_replace('Y', 'y', $this->word);
     }
 
     private function exceptionR1()
diff --git a/src/Stemmer/French.php b/src/Stemmer/French.php
index 8e1ee96..cef305e 100644
--- a/src/Stemmer/French.php
+++ b/src/Stemmer/French.php
@@ -480,7 +480,7 @@ private function step6()
      */
     private function finish()
     {
-        $this->word = UTF8::str_replace(array('I','U','Y'), array('i', 'u', 'y'), $this->word);
+        $this->word = str_replace(array('I','U','Y'), array('i', 'u', 'y'), $this->word);
     }
 
     /**
diff --git a/src/Stemmer/German.php b/src/Stemmer/German.php
index 4dc81a3..2410ee7 100644
--- a/src/Stemmer/German.php
+++ b/src/Stemmer/German.php
@@ -36,7 +36,7 @@ public function stem($word)
         $this->word = UTF8::strtolower($word);
 
         // First, replace ß by ss
-        $this->word = UTF8::str_replace('ß', 'ss', $this->word);
+        $this->word = str_replace('ß', 'ss', $this->word);
 
         // put u and y between vowels into upper case
         $this->word = preg_replace('#(['.$this->plainVowels.'])y(['.$this->plainVowels.'])#u', '$1Y$2', $this->word);
@@ -211,6 +211,6 @@ private function step3()
     private function finish()
     {
         // turn U and Y back into lower case, and remove the umlaut accent from a, o and u.
-        $this->word = UTF8::str_replace(array('U', 'Y', 'ä', 'ü', 'ö'), array('u', 'y', 'a', 'u', 'o'), $this->word);
+        $this->word = str_replace(array('U', 'Y', 'ä', 'ü', 'ö'), array('u', 'y', 'a', 'u', 'o'), $this->word);
     }
 }
diff --git a/src/Stemmer/Italian.php b/src/Stemmer/Italian.php
index bb09dee..40c9d86 100644
--- a/src/Stemmer/Italian.php
+++ b/src/Stemmer/Italian.php
@@ -32,7 +32,7 @@ public function stem($word)
         $this->word = UTF8::strtolower($word);
 
         // First, replace all acute accents by grave accents.
-        $this->word = UTF8::str_replace(array('á', 'é', 'í', 'ó', 'ú'), array('à', 'è', 'ì', 'ò', 'ù'), $this->word);
+        $this->word = str_replace(array('á', 'é', 'í', 'ó', 'ú'), array('à', 'è', 'ì', 'ò', 'ù'), $this->word);
 
         //And, as in French, put u after q, and u, i between vowels into upper case. (See note on vowel marking.) The vowels are then
         $this->word = preg_replace('#([q])u#u', '$1U', $this->word);
@@ -284,6 +284,6 @@ private function step3b()
      */
     private function finish()
     {
-        $this->word = UTF8::str_replace(array('I', 'U'), array('i', 'u'), $this->word);
+        $this->word = str_replace(array('I', 'U'), array('i', 'u'), $this->word);
     }
 }
diff --git a/src/Stemmer/Portuguese.php b/src/Stemmer/Portuguese.php
index c71cc59..485aba0 100644
--- a/src/Stemmer/Portuguese.php
+++ b/src/Stemmer/Portuguese.php
@@ -29,7 +29,7 @@ public function stem($word)
 
         $this->word = UTF8::strtolower($word);
 
-        $this->word = UTF8::str_replace(array('ã', 'õ'), array('a~', 'o~'), $this->word);
+        $this->word = str_replace(array('ã', 'õ'), array('a~', 'o~'), $this->word);
 
         $this->rv();
         $this->r1();
@@ -278,6 +278,6 @@ private function step5()
     private function finish()
     {
         // turn U and Y back into lower case, and remove the umlaut accent from a, o and u.
-        $this->word = UTF8::str_replace(array('a~', 'o~'), array('ã', 'õ'), $this->word);
+        $this->word = str_replace(array('a~', 'o~'), array('ã', 'õ'), $this->word);
     }
 }
diff --git a/src/Stemmer/Romanian.php b/src/Stemmer/Romanian.php
index 5da8744..3e9edd1 100644
--- a/src/Stemmer/Romanian.php
+++ b/src/Stemmer/Romanian.php
@@ -329,6 +329,6 @@ private function step4()
     private function finish()
     {
         // Turn I, U back into i, u
-        $this->word = UTF8::str_replace(array('I', 'U'), array('i', 'u'), $this->word);
+        $this->word = str_replace(array('I', 'U'), array('i', 'u'), $this->word);
     }
 }
diff --git a/src/Stemmer/Spanish.php b/src/Stemmer/Spanish.php
index 4f6f2c8..190f761 100644
--- a/src/Stemmer/Spanish.php
+++ b/src/Stemmer/Spanish.php
@@ -343,6 +343,6 @@ private function step3()
      */
     private function finish()
     {
-        $this->word = UTF8::str_replace(array('á', 'í', 'ó', 'é', 'ú'), array('a', 'i', 'o', 'e', 'u'), $this->word);
+        $this->word = str_replace(array('á', 'í', 'ó', 'é', 'ú'), array('a', 'i', 'o', 'e', 'u'), $this->word);
     }
 }