From 5394968c86920fbc1748f3a67f726b9d2ed55e4e Mon Sep 17 00:00:00 2001 From: Shinsuke Sugaya Date: Sun, 15 Mar 2026 16:43:46 +0900 Subject: [PATCH] fix(index): correct analyzer filters and dynamic template names in index mappings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix English analyzer filter ordering and add missing stemmer/stopword filters. Add missing keyword filters for German, Persian, Romanian, and Thai analyzers. Correct dynamic template names (lang_ca→lang_ckb-iq, lang_en→lang_en-ie, lang_hu→lang_hy) and fix Hindi analyzer from empty_analyzer to hindi_analyzer. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/main/resources/fess_indices/_aws/fess.json | 12 +++++++++--- src/main/resources/fess_indices/_aws/fess/doc.json | 8 ++++---- src/main/resources/fess_indices/_cloud/fess.json | 12 +++++++++--- src/main/resources/fess_indices/_cloud/fess/doc.json | 8 ++++---- src/main/resources/fess_indices/fess.json | 12 +++++++++--- src/main/resources/fess_indices/fess/doc.json | 8 ++++---- 6 files changed, 39 insertions(+), 21 deletions(-) diff --git a/src/main/resources/fess_indices/_aws/fess.json b/src/main/resources/fess_indices/_aws/fess.json index 5125553ed6..225352689c 100644 --- a/src/main/resources/fess_indices/_aws/fess.json +++ b/src/main/resources/fess_indices/_aws/fess.json @@ -761,10 +761,12 @@ "tokenizer": "standard", "filter": [ "truncate20_filter", + "possessive_stemmer_en_filter", "lowercase", + "stopword_en_filter", "english_keywords", "english_override", - "possessive_stemmer_en_filter" + "stemmer_en_filter" ] }, "finnish_analyzer": { @@ -811,6 +813,7 @@ "lowercase", "german_stop", "german_normalization", + "german_keywords", "german_override", "german_stemmer" ] @@ -960,7 +963,8 @@ "lowercase", "arabic_normalization", "persian_normalization", - "persian_stop" + "persian_stop", + "persian_keywords" ] }, "portuguese_analyzer": { @@ -982,6 +986,7 @@ "truncate20_filter", "lowercase", "romanian_stop", + "romanian_keywords", "romanian_override", "romanian_stemmer" ] @@ -1048,7 +1053,8 @@ "filter": [ "truncate20_filter", "lowercase", - "thai_stop" + "thai_stop", + "thai_keywords" ] }, "turkish_analyzer": { diff --git a/src/main/resources/fess_indices/_aws/fess/doc.json b/src/main/resources/fess_indices/_aws/fess/doc.json index 32d9bee3c5..8e9ff8b96d 100644 --- a/src/main/resources/fess_indices/_aws/fess/doc.json +++ b/src/main/resources/fess_indices/_aws/fess/doc.json @@ -37,7 +37,7 @@ } }, { - "lang_ca": { + "lang_ckb-iq": { "match": "*_ckb-iq", "mapping": { "type": "text", @@ -91,7 +91,7 @@ } }, { - "lang_en": { + "lang_en-ie": { "match": "*_en-ie", "mapping": { "type": "text", @@ -185,7 +185,7 @@ "match": "*_hi", "mapping": { "type": "text", - "analyzer": "empty_analyzer" + "analyzer": "hindi_analyzer" } } }, @@ -208,7 +208,7 @@ } }, { - "lang_hu": { + "lang_hy": { "match": "*_hy", "mapping": { "type": "text", diff --git a/src/main/resources/fess_indices/_cloud/fess.json b/src/main/resources/fess_indices/_cloud/fess.json index 5125553ed6..225352689c 100644 --- a/src/main/resources/fess_indices/_cloud/fess.json +++ b/src/main/resources/fess_indices/_cloud/fess.json @@ -761,10 +761,12 @@ "tokenizer": "standard", "filter": [ "truncate20_filter", + "possessive_stemmer_en_filter", "lowercase", + "stopword_en_filter", "english_keywords", "english_override", - "possessive_stemmer_en_filter" + "stemmer_en_filter" ] }, "finnish_analyzer": { @@ -811,6 +813,7 @@ "lowercase", "german_stop", "german_normalization", + "german_keywords", "german_override", "german_stemmer" ] @@ -960,7 +963,8 @@ "lowercase", "arabic_normalization", "persian_normalization", - "persian_stop" + "persian_stop", + "persian_keywords" ] }, "portuguese_analyzer": { @@ -982,6 +986,7 @@ "truncate20_filter", "lowercase", "romanian_stop", + "romanian_keywords", "romanian_override", "romanian_stemmer" ] @@ -1048,7 +1053,8 @@ "filter": [ "truncate20_filter", "lowercase", - "thai_stop" + "thai_stop", + "thai_keywords" ] }, "turkish_analyzer": { diff --git a/src/main/resources/fess_indices/_cloud/fess/doc.json b/src/main/resources/fess_indices/_cloud/fess/doc.json index 32d9bee3c5..8e9ff8b96d 100644 --- a/src/main/resources/fess_indices/_cloud/fess/doc.json +++ b/src/main/resources/fess_indices/_cloud/fess/doc.json @@ -37,7 +37,7 @@ } }, { - "lang_ca": { + "lang_ckb-iq": { "match": "*_ckb-iq", "mapping": { "type": "text", @@ -91,7 +91,7 @@ } }, { - "lang_en": { + "lang_en-ie": { "match": "*_en-ie", "mapping": { "type": "text", @@ -185,7 +185,7 @@ "match": "*_hi", "mapping": { "type": "text", - "analyzer": "empty_analyzer" + "analyzer": "hindi_analyzer" } } }, @@ -208,7 +208,7 @@ } }, { - "lang_hu": { + "lang_hy": { "match": "*_hy", "mapping": { "type": "text", diff --git a/src/main/resources/fess_indices/fess.json b/src/main/resources/fess_indices/fess.json index 74396ff6c4..65cd8a7379 100644 --- a/src/main/resources/fess_indices/fess.json +++ b/src/main/resources/fess_indices/fess.json @@ -806,10 +806,12 @@ "tokenizer": "standard", "filter": [ "truncate20_filter", + "possessive_stemmer_en_filter", "lowercase", + "stopword_en_filter", "english_keywords", "english_override", - "possessive_stemmer_en_filter" + "stemmer_en_filter" ] }, "finnish_analyzer": { @@ -856,6 +858,7 @@ "lowercase", "german_stop", "german_normalization", + "german_keywords", "german_override", "german_stemmer" ] @@ -1005,7 +1008,8 @@ "lowercase", "arabic_normalization", "persian_normalization", - "persian_stop" + "persian_stop", + "persian_keywords" ] }, "portuguese_analyzer": { @@ -1027,6 +1031,7 @@ "truncate20_filter", "lowercase", "romanian_stop", + "romanian_keywords", "romanian_override", "romanian_stemmer" ] @@ -1093,7 +1098,8 @@ "filter": [ "truncate20_filter", "lowercase", - "thai_stop" + "thai_stop", + "thai_keywords" ] }, "traditional_chinese_analyzer": { diff --git a/src/main/resources/fess_indices/fess/doc.json b/src/main/resources/fess_indices/fess/doc.json index 987dd2aa8d..4281982a31 100644 --- a/src/main/resources/fess_indices/fess/doc.json +++ b/src/main/resources/fess_indices/fess/doc.json @@ -37,7 +37,7 @@ } }, { - "lang_ca": { + "lang_ckb-iq": { "match": "*_ckb-iq", "mapping": { "type": "text", @@ -91,7 +91,7 @@ } }, { - "lang_en": { + "lang_en-ie": { "match": "*_en-ie", "mapping": { "type": "text", @@ -185,7 +185,7 @@ "match": "*_hi", "mapping": { "type": "text", - "analyzer": "empty_analyzer" + "analyzer": "hindi_analyzer" } } }, @@ -208,7 +208,7 @@ } }, { - "lang_hu": { + "lang_hy": { "match": "*_hy", "mapping": { "type": "text",