diff --git a/README.md b/README.md index afbbe36..52ad3dd 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ https://unicode-org.github.io/icu/userguide/format_parse/messages/ var mf = new MessageFormatter(); var str = @"You have {notifications, plural, - zero {no notifications} + =0 {no notifications} one {one notification} =42 {a universal amount of notifications} other {# notifications} @@ -86,7 +86,8 @@ and about 3 seconds (3236ms) without it. **These results are with a debug build, MessageFormat.NET supports the most commonly used formats: * Select Format: `{gender, select, male{He likes} female{She likes} other{They like}} cheeseburgers` -* Plural Format: `There {msgCount, plural, zero {are no unread messages} one {is 1 unread message} other{are # unread messages}}.` (where `#` is the actual number, with the offset (if any) subtracted). +* Plural Format: `There {msgCount, plural, =0 {are no unread messages} one {is 1 unread message} other{are # unread messages}}.` (where `#` is the actual number, with the offset (if any) subtracted). +* Ordinal Format: `You are the {position, selectordinal, one {#st} two {#nd} few {#rd} other {#th}} person in line.` * Simple variable replacement: `Your name is {name}` * Numbers: `Your age is {age, number}` * Dates: `You were born {birthday, date}` @@ -136,15 +137,18 @@ var message = formatter.FormatMessage("{value, number, $0.0}", new { value = 23 ## Adding your own pluralizer functions > Since MessageFormat 5.0, pluralizers based on the [official CLDR data][plural-cldr] ship -> with the package, so this is no longer needed. +> with the package, so this is no longer needed except when overriding specific custom locales. Same thing as with [MessageFormat.js][0], you can add your own pluralizer function. -The `Pluralizers` property is a `IDictionary`, so you can remove the built-in -ones if you want. +The `CardinalPluralizers` property is a `IDictionary` that starts empty, along +with `OrdinalPluralizers` for ordinal numbers. + +Adding to these Dictionaries will take precedence over the CLDR data for exact matches on +the input locales. ````csharp var mf = new MessageFormatter(); -mf.Pluralizers.Add("", n => { +mf.CardinalPluralizers.Add("", n => { // ´n´ is the number being pluralized. if(n == 0) return "zero"; @@ -159,11 +163,9 @@ you may use in your pluralization block. ````csharp var mf = new MessageFormatter(true, "en"); // true = use cache -mf.Pluralizers["en"] = n => +mf.CardinalPluralizers["en"] = n => { // ´n´ is the number being pluralized. - if (n == 0) - return "zero"; if (n == 1) return "one"; if (n > 1000) diff --git a/src/Jeffijoe.MessageFormat.MetadataGenerator/Jeffijoe.MessageFormat.MetadataGenerator.csproj b/src/Jeffijoe.MessageFormat.MetadataGenerator/Jeffijoe.MessageFormat.MetadataGenerator.csproj index 57486f9..4a55968 100644 --- a/src/Jeffijoe.MessageFormat.MetadataGenerator/Jeffijoe.MessageFormat.MetadataGenerator.csproj +++ b/src/Jeffijoe.MessageFormat.MetadataGenerator/Jeffijoe.MessageFormat.MetadataGenerator.csproj @@ -11,6 +11,7 @@ + @@ -19,6 +20,10 @@ runtime; build; native; contentfiles; analyzers; buildtransitive + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + diff --git a/src/Jeffijoe.MessageFormat.MetadataGenerator/Plural/Parsing/AST/Condition.cs b/src/Jeffijoe.MessageFormat.MetadataGenerator/Plural/Parsing/AST/Condition.cs index b3c0bea..05b0ae6 100644 --- a/src/Jeffijoe.MessageFormat.MetadataGenerator/Plural/Parsing/AST/Condition.cs +++ b/src/Jeffijoe.MessageFormat.MetadataGenerator/Plural/Parsing/AST/Condition.cs @@ -3,6 +3,19 @@ namespace Jeffijoe.MessageFormat.MetadataGenerator.Plural.Parsing.AST; +/// +/// Represents the 'condition' part of the LDML grammar. +/// +/// +/// Given the following 'pluralRule' tag: +/// <pluralRule count="one">i = 1 and v = 0 @integer 1</pluralRule> +/// +/// A Condition instance would represent 'i = 1 and v = 0' as a single . +/// +/// +/// The grammar defines a condition as a union of 'and_conditions', which we model as a +/// list of that each internally tracks . +/// [DebuggerDisplay("{{RuleDescription}}")] public class Condition { @@ -13,9 +26,22 @@ public Condition(string count, string ruleDescription, IReadOnlyList + /// The plural form this condition or rule defines, e.g., "one", "two", "few", "many", "other". + /// public string Count { get; } + /// + /// The original text of this rule, e.g., "i = 1 and v = 0 @integer 1". + /// + /// + /// Note - this includes the sample text ('@integer 1') which gets stripped out + /// when parsing the rule's conditional logic. + /// public string RuleDescription { get; } + /// + /// Parsed representation of . + /// public IReadOnlyList OrConditions { get; } } \ No newline at end of file diff --git a/src/Jeffijoe.MessageFormat.MetadataGenerator/Plural/Parsing/AST/PluralRule.cs b/src/Jeffijoe.MessageFormat.MetadataGenerator/Plural/Parsing/AST/PluralRule.cs index 54ba99e..c37efeb 100644 --- a/src/Jeffijoe.MessageFormat.MetadataGenerator/Plural/Parsing/AST/PluralRule.cs +++ b/src/Jeffijoe.MessageFormat.MetadataGenerator/Plural/Parsing/AST/PluralRule.cs @@ -2,6 +2,16 @@ namespace Jeffijoe.MessageFormat.MetadataGenerator.Plural.Parsing.AST; +/// +/// Corresponds to a pluralRules tag in CLDR XML (not to be confused with pluralRule). +/// Each instance of this class represents multiple individual rules for a set of locales. +/// +/// +/// <pluralRules locales="ast de en et fi fy gl ia ie io ji lij nl sc sv sw ur yi"> +/// <pluralRule count = "one"> i = 1 and v = 0 @integer 1</pluralRule> +/// ... +/// </pluralRules> +/// public class PluralRule { public PluralRule(string[] locales, IReadOnlyList conditions) diff --git a/src/Jeffijoe.MessageFormat.MetadataGenerator/Plural/Parsing/PluralParser.cs b/src/Jeffijoe.MessageFormat.MetadataGenerator/Plural/Parsing/PluralParser.cs index 38f02cc..1656357 100644 --- a/src/Jeffijoe.MessageFormat.MetadataGenerator/Plural/Parsing/PluralParser.cs +++ b/src/Jeffijoe.MessageFormat.MetadataGenerator/Plural/Parsing/PluralParser.cs @@ -1,4 +1,5 @@ -using System.Collections.Generic; +using System; +using System.Collections.Generic; using System.Xml; using System.Linq; using Jeffijoe.MessageFormat.MetadataGenerator.Plural.Parsing.AST; @@ -16,17 +17,41 @@ public PluralParser(XmlDocument rulesDocument, string[] excludedLocales) _excludedLocales = new HashSet(excludedLocales); } - public IEnumerable Parse() + /// + /// Parses the represented XML document into a new , and returns it. + /// + /// A containing the parsed plural rules of a single type. + public PluralRuleSet Parse() + { + var index = new PluralRuleSet(); + ParseInto(index); + return index; + } + + /// + /// Parses the represented XML document and merges the rules into the given . + /// + /// + /// If the CLDR XML is missing expected attributes. + public void ParseInto(PluralRuleSet ruleIndex) { var root = _rulesDocument.DocumentElement!; - + foreach(XmlNode dataElement in root.ChildNodes) { if (dataElement.Name != "plurals") { continue; } - + + var typeAttr = dataElement.Attributes["type"]; + if (!typeAttr.Specified) + { + throw new ArgumentException("CLDR ruleset document is unexpectedly missing 'type' attribute on 'plurals' element."); + } + + string pluralType = typeAttr.Value; + foreach (XmlNode rule in dataElement.ChildNodes) { if(rule.Name == "pluralRules") @@ -34,7 +59,7 @@ public IEnumerable Parse() var parsed = ParseSingleRule(rule); if (parsed != null) { - yield return parsed; + ruleIndex.Add(pluralType, parsed); } } } diff --git a/src/Jeffijoe.MessageFormat.MetadataGenerator/Plural/Parsing/PluralRuleSet.cs b/src/Jeffijoe.MessageFormat.MetadataGenerator/Plural/Parsing/PluralRuleSet.cs new file mode 100644 index 0000000..16eacd2 --- /dev/null +++ b/src/Jeffijoe.MessageFormat.MetadataGenerator/Plural/Parsing/PluralRuleSet.cs @@ -0,0 +1,133 @@ +using Jeffijoe.MessageFormat.MetadataGenerator.Plural.Parsing.AST; +using System; +using System.Collections.Generic; + +namespace Jeffijoe.MessageFormat.MetadataGenerator.Plural.Parsing; + +/// +/// Represents multiple fully parsed documents of instances, each with a given type (i.e., 'cardinal' vs 'ordinals'). +/// +public class PluralRuleSet +{ + /// + /// Special CLDR locale ID to use as the default for inheritance. All locales can chain to this + /// during lookups. + /// + public const string RootLocale = "root"; + + /// + /// CLDR plural type attribute for the counting number ruleset. + /// Used to translate strings that contain a count, e.g., to pluralize nouns. + /// + public const string CardinalType = "cardinal"; + + /// + /// CLDR plural type attribute for the ordered number ruleset. + /// Used to translate strings containing ordinal numbers, e.g., "1st", "2nd", "3rd". + /// + public const string OrdinalType = "ordinal"; + + // Backing fields for the public properties below. + private readonly List _allRules = []; + private readonly Dictionary _indicesByLocale = new(StringComparer.OrdinalIgnoreCase); + + /// + /// Gets the unique conditions that have been indexed. Can be used to generate unique helper functions + /// to match specific rules based on an input number. + /// + public IReadOnlyList UniqueRules => this._allRules; + + /// + /// Maps normalized CLDR locale IDs to indices within + /// for their cardinal and ordinal rules, if defined. + /// + public IReadOnlyDictionary RuleIndicesByLocale => this._indicesByLocale; + + /// + /// Adds the given rule to our indices under the given plural type. + /// + /// e.g., 'cardinal' or 'ordinal'. + /// The parsed rule. + /// Thrown when a nonstandard plural type is provided. + public void Add(string pluralType, PluralRule rule) + { + this._allRules.Add(rule); + int newRuleIndex = this._allRules.Count - 1; + + int? cardinalIndex = null; + int? ordinalIndex = null; + if (pluralType == CardinalType) + { + cardinalIndex = newRuleIndex; + } + else if (pluralType == OrdinalType) + { + ordinalIndex = newRuleIndex; + } + else + { + throw new ArgumentOutOfRangeException(nameof(pluralType), pluralType, "Unexpected plural type"); + } + + // Loop over each locale for this rule and update our indices with the new value. + // If we've seen it before (for a different plural type), we'll update it in-place. + foreach (var locale in rule.Locales) + { + var normalized = this.NormalizeCldrLocale(locale); + + PluralRuleIndices newIndices; + if (this._indicesByLocale.TryGetValue(normalized, out var existingIndices)) + { + // Merge any previous indices we've observed for this locale + newIndices = existingIndices with + { + CardinalRuleIndex = cardinalIndex ?? existingIndices.CardinalRuleIndex, + OrdinalRuleIndex = ordinalIndex ?? existingIndices.OrdinalRuleIndex + }; + } + else + { + newIndices = new PluralRuleIndices( + CardinalRuleIndex: cardinalIndex, + OrdinalRuleIndex: ordinalIndex + ); + + } + + this._indicesByLocale[normalized] = newIndices; + if (normalized != locale) + { + this._indicesByLocale[locale] = newIndices; + } + } + } + + /// + /// Converts a CLDR locale ID to a normalized form for indexing. + /// + /// See the LDML spec + /// for an explanation of the forms that Unicode locale IDs can take. + /// + /// Notably, CLDR locale IDs use underscores as separators, while BCP 47 (which is the primary form + /// we expect as inputs at runtime) use dashes. + /// + /// + /// The return string is intended to be used for case-insensitive runtime lookup of input locales, + /// but the string itself is not strictly BCP 47 or CLDR compliant. For example, the CLDR 'root' + /// language is passed through instead of being remapped to 'und'. + /// + private string NormalizeCldrLocale(string cldrLocaleId) + { + return cldrLocaleId.Replace('_', '-'); + } + + /// + /// Helper type to represent the pluralization rules for a given locale, which may include both + /// cardinal and ordinal rules, or just one of the two. + /// + /// + /// For example, in CLDR 48.1, "pt_PT" has a defined plural rule but is expected to chain to "pt" + /// for ordinal lookup. + /// + public record PluralRuleIndices(int? CardinalRuleIndex, int? OrdinalRuleIndex); +} \ No newline at end of file diff --git a/src/Jeffijoe.MessageFormat.MetadataGenerator/Plural/PluralLanguagesGenerator.cs b/src/Jeffijoe.MessageFormat.MetadataGenerator/Plural/PluralLanguagesGenerator.cs index 9d1e439..d47015c 100644 --- a/src/Jeffijoe.MessageFormat.MetadataGenerator/Plural/PluralLanguagesGenerator.cs +++ b/src/Jeffijoe.MessageFormat.MetadataGenerator/Plural/PluralLanguagesGenerator.cs @@ -1,13 +1,10 @@ using Jeffijoe.MessageFormat.MetadataGenerator.Plural.Parsing; -using Jeffijoe.MessageFormat.MetadataGenerator.Plural.Parsing.AST; using Jeffijoe.MessageFormat.MetadataGenerator.Plural.SourceGeneration; using Microsoft.CodeAnalysis; using System; -using System.Collections.Generic; using System.IO; -using System.Linq; using System.Xml; namespace Jeffijoe.MessageFormat.MetadataGenerator.Plural; @@ -36,20 +33,26 @@ private string[] ReadExcludeLocales(GeneratorExecutionContext context) return Array.Empty(); } - private IReadOnlyList GetRules(string[] excludedLocales) + private PluralRuleSet GetRules(string[] excludedLocales) { - using var rulesStream = GetRulesContentStream(); - var xml = new XmlDocument(); - xml.Load(rulesStream); + PluralRuleSet ruleIndex = new(); + foreach (var ruleset in new[] { "plurals.xml", "ordinals.xml" }) + { + using var rulesStream = GetRulesContentStream(ruleset); + var xml = new XmlDocument(); + xml.Load(rulesStream); + + var parser = new PluralParser(xml, excludedLocales); + parser.ParseInto(ruleIndex); + } - var parser = new PluralParser(xml, excludedLocales); - return parser.Parse().ToArray(); + return ruleIndex; } - private Stream GetRulesContentStream() + private Stream GetRulesContentStream(string cldrFileName) { - return typeof(PluralLanguagesGenerator).Assembly.GetManifestResourceStream("Jeffijoe.MessageFormat.MetadataGenerator.data.plurals.xml")!; + return typeof(PluralLanguagesGenerator).Assembly.GetManifestResourceStream($"Jeffijoe.MessageFormat.MetadataGenerator.data.{cldrFileName}")!; } public void Initialize(GeneratorInitializationContext context) diff --git a/src/Jeffijoe.MessageFormat.MetadataGenerator/Plural/SourceGeneration/PluralRulesMetadataGenerator.cs b/src/Jeffijoe.MessageFormat.MetadataGenerator/Plural/SourceGeneration/PluralRulesMetadataGenerator.cs index e797b99..caf0dec 100644 --- a/src/Jeffijoe.MessageFormat.MetadataGenerator/Plural/SourceGeneration/PluralRulesMetadataGenerator.cs +++ b/src/Jeffijoe.MessageFormat.MetadataGenerator/Plural/SourceGeneration/PluralRulesMetadataGenerator.cs @@ -1,16 +1,15 @@ -using System.Collections.Generic; -using System.Text; -using Jeffijoe.MessageFormat.MetadataGenerator.Plural.Parsing.AST; +using System.Text; +using Jeffijoe.MessageFormat.MetadataGenerator.Plural.Parsing; namespace Jeffijoe.MessageFormat.MetadataGenerator.Plural.SourceGeneration; public class PluralRulesMetadataGenerator { - private readonly IReadOnlyList _rules; + private readonly PluralRuleSet _rules; private readonly StringBuilder _sb; private int _indent; - public PluralRulesMetadataGenerator(IReadOnlyList rules) + public PluralRulesMetadataGenerator(PluralRuleSet rules) { _rules = rules; _sb = new StringBuilder(); @@ -18,8 +17,10 @@ public PluralRulesMetadataGenerator(IReadOnlyList rules) public string GenerateClass() { + WriteLine("#nullable enable"); WriteLine("using System;"); WriteLine("using System.Collections.Generic;"); + WriteLine("using System.Diagnostics.CodeAnalysis;"); WriteLine("namespace Jeffijoe.MessageFormat.Formatting.Formatters"); WriteLine("{"); @@ -29,18 +30,20 @@ public string GenerateClass() WriteLine("{"); AddIndent(); - for (var ruleIdx = 0; ruleIdx < _rules.Count; ruleIdx++) - { - var rule = _rules[ruleIdx]; + // Export a constant for the normalized root locale to match the logic we're using internally. + // This way the rest of the lib's locale chaining can continue to work if we swap out + // normalization internally. + var rootRules = _rules.RuleIndicesByLocale[PluralRuleSet.RootLocale]; + WriteLine($"public static readonly string RootLocale = \"{PluralRuleSet.RootLocale}\";"); + // Generate a method for each unique rule, by index, that chooses the plural form + // for a given input source number (the PluralContext) according to that rule. + var uniqueRules = _rules.UniqueRules; + for (var ruleIdx = 0; ruleIdx < uniqueRules.Count; ruleIdx++) + { + var rule = uniqueRules[ruleIdx]; var ruleGenerator = new RuleGenerator(rule); - foreach(var locale in rule.Locales) - { - WriteLine($"public static string Locale_{locale.ToUpper()}(PluralContext context) => Rule{ruleIdx}(context);"); - WriteLine(string.Empty); - } - WriteLine($"private static string Rule{ruleIdx}(PluralContext context)"); WriteLine("{"); AddIndent(); @@ -52,33 +55,76 @@ public string GenerateClass() WriteLine(string.Empty); } - WriteLine("private static readonly Dictionary Pluralizers = new Dictionary()"); + // Generate a static lookup dictionary of locale (case-insensitive) to LocalePluralizers for that locale. + // e.g., + // en -> { + // Cardinal = Rule0, + // Ordinal = Rule1, + // }, + // [etc for other locales, with some null values for unmapped locales] + WriteLine("private static readonly Dictionary Pluralizers = new(StringComparer.OrdinalIgnoreCase)"); WriteLine("{"); AddIndent(); - for (int ruleIdx = 0; ruleIdx < _rules.Count; ruleIdx++) + foreach (var kvp in _rules.RuleIndicesByLocale) { - PluralRule rule = _rules[ruleIdx]; - foreach (var locale in rule.Locales) - { - WriteLine($"{{\"{locale}\", Rule{ruleIdx}}},"); - } + string locale = kvp.Key; - WriteLine(string.Empty); + // When index is defined, we want "Rule#" as a reference to the delegate generated above; + // otherwise we want null. + int? cardinalIdx = kvp.Value.CardinalRuleIndex; + int? ordinalIdx = kvp.Value.OrdinalRuleIndex; + string cardinalValue = cardinalIdx is not null ? $"Rule{cardinalIdx}" : "null"; + string ordinalValue = ordinalIdx is not null ? $"Rule{ordinalIdx}" : "null"; + + WriteLine($"{{\"{locale}\", new LocalePluralizers(Cardinal: {cardinalValue}, Ordinal: {ordinalValue})}},"); } DecreaseIndent(); WriteLine("};"); WriteLine(string.Empty); - WriteLine("public static partial bool TryGetRuleByLocale(string locale, out ContextPluralizer contextPluralizer)"); + // Finally generate our public API to the rest of the library, that takes a locale and pluralType + // and tries to retrieve an appropriate localizer to map an input source number to the form for the request. + WriteLine("public static partial bool TryGetCardinalRuleByLocale(string locale, [NotNullWhen(true)] out ContextPluralizer? contextPluralizer)"); WriteLine("{"); AddIndent(); - WriteLine("return Pluralizers.TryGetValue(locale, out contextPluralizer);"); + WriteLine("if (!Pluralizers.TryGetValue(locale, out var pluralizersForLocale))"); + WriteLine("{"); + AddIndent(); + WriteLine("contextPluralizer = null;"); + WriteLine("return false;"); + DecreaseIndent(); + WriteLine("}"); + WriteLine("contextPluralizer = pluralizersForLocale.Cardinal;"); + WriteLine("return contextPluralizer != null;"); DecreaseIndent(); WriteLine("}"); + WriteLine(string.Empty); + + // Repeat the above for ordinal rules. + WriteLine("public static partial bool TryGetOrdinalRuleByLocale(string locale, [NotNullWhen(true)] out ContextPluralizer? contextPluralizer)"); + WriteLine("{"); + AddIndent(); + + WriteLine("if (!Pluralizers.TryGetValue(locale, out var pluralizersForLocale))"); + WriteLine("{"); + AddIndent(); + WriteLine("contextPluralizer = null;"); + WriteLine("return false;"); + DecreaseIndent(); + WriteLine("}"); + WriteLine("contextPluralizer = pluralizersForLocale.Ordinal;"); + WriteLine("return contextPluralizer != null;"); + + DecreaseIndent(); + WriteLine("}"); + + // Generate the helper record and then clean up. + WriteLine(string.Empty); + WriteLine("private record LocalePluralizers(ContextPluralizer? Cardinal, ContextPluralizer? Ordinal);"); DecreaseIndent(); WriteLine("}"); diff --git a/src/Jeffijoe.MessageFormat.MetadataGenerator/data/README.md b/src/Jeffijoe.MessageFormat.MetadataGenerator/data/README.md new file mode 100644 index 0000000..75ddf78 --- /dev/null +++ b/src/Jeffijoe.MessageFormat.MetadataGenerator/data/README.md @@ -0,0 +1,3 @@ +CLDR supplemental data files obtained from https://cldr.unicode.org/downloads/cldr-48 + +CLDR v48.1 was released 2025-01-08; refer to https://cldr.unicode.org/index/downloads \ No newline at end of file diff --git a/src/Jeffijoe.MessageFormat.MetadataGenerator/data/ordinals.xml b/src/Jeffijoe.MessageFormat.MetadataGenerator/data/ordinals.xml new file mode 100644 index 0000000..c8ea54b --- /dev/null +++ b/src/Jeffijoe.MessageFormat.MetadataGenerator/data/ordinals.xml @@ -0,0 +1,176 @@ + + + + + + + + + + + + @integer 0~15, 100, 1000, 10000, 100000, 1000000, … + + + + + + n % 10 = 1,2 and n % 100 != 11,12 @integer 1, 2, 21, 22, 31, 32, 41, 42, 51, 52, 61, 62, 71, 72, 81, 82, 101, 1001, … + @integer 0, 3~17, 100, 1000, 10000, 100000, 1000000, … + + + n = 1 @integer 1 + @integer 0, 2~16, 100, 1000, 10000, 100000, 1000000, … + + + n = 1,5 @integer 1, 5 + @integer 0, 2~4, 6~17, 100, 1000, 10000, 100000, 1000000, … + + + n = 1..4 @integer 1~4 + @integer 0, 5~19, 100, 1000, 10000, 100000, 1000000, … + + + + + + n % 10 = 2,3 and n % 100 != 12,13 @integer 2, 3, 22, 23, 32, 33, 42, 43, 52, 53, 62, 63, 72, 73, 82, 83, 102, 1002, … + @integer 0, 1, 4~17, 100, 1000, 10000, 100000, 1000000, … + + + n % 10 = 3 and n % 100 != 13 @integer 3, 23, 33, 43, 53, 63, 73, 83, 103, 1003, … + @integer 0~2, 4~16, 100, 1000, 10000, 100000, 1000000, … + + + n % 10 = 6,9 or n = 10 @integer 6, 9, 10, 16, 19, 26, 29, 36, 39, 106, 1006, … + @integer 0~5, 7, 8, 11~15, 17, 18, 20, 100, 1000, 10000, 100000, 1000000, … + + + + + + n % 10 = 6 or n % 10 = 9 or n % 10 = 0 and n != 0 @integer 6, 9, 10, 16, 19, 20, 26, 29, 30, 36, 39, 40, 100, 1000, 10000, 100000, 1000000, … + @integer 0~5, 7, 8, 11~15, 17, 18, 21, 101, 1001, … + + + n = 11,8,80,800 @integer 8, 11, 80, 800 + @integer 0~7, 9, 10, 12~17, 100, 1000, 10000, 100000, 1000000, … + + + n = 11,8,80..89,800..899 @integer 8, 11, 80~89, 800~803 + @integer 0~7, 9, 10, 12~17, 100, 1000, 10000, 100000, 1000000, … + + + + + + i = 1 @integer 1 + i = 0 or i % 100 = 2..20,40,60,80 @integer 0, 2~16, 102, 1002, … + @integer 21~36, 100, 1000, 10000, 100000, 1000000, … + + + n = 1 @integer 1 + n % 10 = 4 and n % 100 != 14 @integer 4, 24, 34, 44, 54, 64, 74, 84, 104, 1004, … + @integer 0, 2, 3, 5~17, 100, 1000, 10000, 100000, 1000000, … + + + n = 1..4 or n % 100 = 1..4,21..24,41..44,61..64,81..84 @integer 1~4, 21~24, 41~44, 61~64, 101, 1001, … + n = 5 or n % 100 = 5 @integer 5, 105, 205, 305, 405, 505, 605, 705, 1005, … + @integer 0, 6~20, 100, 1000, 10000, 100000, 1000000, … + + + + + + i = 0 @integer 0 + i = 1 @integer 1 + i = 2,3,4,5,6 @integer 2~6 + @integer 7~22, 100, 1000, 10000, 100000, 1000000, … + + + + + + n % 10 = 1 and n % 100 != 11 @integer 1, 21, 31, 41, 51, 61, 71, 81, 101, 1001, … + n % 10 = 2 and n % 100 != 12 @integer 2, 22, 32, 42, 52, 62, 72, 82, 102, 1002, … + n % 10 = 3 and n % 100 != 13 @integer 3, 23, 33, 43, 53, 63, 73, 83, 103, 1003, … + @integer 0, 4~18, 100, 1000, 10000, 100000, 1000000, … + + + n = 1 @integer 1 + n = 2,3 @integer 2, 3 + n = 4 @integer 4 + @integer 0, 5~19, 100, 1000, 10000, 100000, 1000000, … + + + n = 1,11 @integer 1, 11 + n = 2,12 @integer 2, 12 + n = 3,13 @integer 3, 13 + @integer 0, 4~10, 14~21, 100, 1000, 10000, 100000, 1000000, … + + + n = 1,3 @integer 1, 3 + n = 2 @integer 2 + n = 4 @integer 4 + @integer 0, 5~19, 100, 1000, 10000, 100000, 1000000, … + + + + + + i % 10 = 1 and i % 100 != 11 @integer 1, 21, 31, 41, 51, 61, 71, 81, 101, 1001, … + i % 10 = 2 and i % 100 != 12 @integer 2, 22, 32, 42, 52, 62, 72, 82, 102, 1002, … + i % 10 = 7,8 and i % 100 != 17,18 @integer 7, 8, 27, 28, 37, 38, 47, 48, 57, 58, 67, 68, 77, 78, 87, 88, 107, 1007, … + @integer 0, 3~6, 9~19, 100, 1000, 10000, 100000, 1000000, … + + + + + + i % 10 = 1,2,5,7,8 or i % 100 = 20,50,70,80 @integer 1, 2, 5, 7, 8, 11, 12, 15, 17, 18, 20~22, 25, 101, 1001, … + i % 10 = 3,4 or i % 1000 = 100,200,300,400,500,600,700,800,900 @integer 3, 4, 13, 14, 23, 24, 33, 34, 43, 44, 53, 54, 63, 64, 73, 74, 100, 1003, … + i = 0 or i % 10 = 6 or i % 100 = 40,60,90 @integer 0, 6, 16, 26, 36, 40, 46, 56, 106, 1006, … + @integer 9, 10, 19, 29, 30, 39, 49, 59, 69, 79, 109, 1000, 10000, 100000, 1000000, … + + + + + + n = 1 @integer 1 + n = 2,3 @integer 2, 3 + n = 4 @integer 4 + n = 6 @integer 6 + @integer 0, 5, 7~20, 100, 1000, 10000, 100000, 1000000, … + + + n = 1,5,7,8,9,10 @integer 1, 5, 7~10 + n = 2,3 @integer 2, 3 + n = 4 @integer 4 + n = 6 @integer 6 + @integer 0, 11~25, 100, 1000, 10000, 100000, 1000000, … + + + n = 1,5,7..9 @integer 1, 5, 7~9 + n = 2,3 @integer 2, 3 + n = 4 @integer 4 + n = 6 @integer 6 + @integer 0, 10~24, 100, 1000, 10000, 100000, 1000000, … + + + + + + n = 0,7,8,9 @integer 0, 7~9 + n = 1 @integer 1 + n = 2 @integer 2 + n = 3,4 @integer 3, 4 + n = 5,6 @integer 5, 6 + @integer 10~25, 100, 1000, 10000, 100000, 1000000, … + + + diff --git a/src/Jeffijoe.MessageFormat.MetadataGenerator/data/plurals.xml b/src/Jeffijoe.MessageFormat.MetadataGenerator/data/plurals.xml index 49c49ea..26cca25 100644 --- a/src/Jeffijoe.MessageFormat.MetadataGenerator/data/plurals.xml +++ b/src/Jeffijoe.MessageFormat.MetadataGenerator/data/plurals.xml @@ -1,9 +1,9 @@ @@ -19,7 +19,7 @@ CLDR data files are interpreted according to the LDML specification (http://unic - + i = 0 or n = 1 @integer 0, 1 @decimal 0.0~1.0, 0.00~0.04 @integer 2~17, 100, 1000, 10000, 100000, 1000000, … @decimal 1.1~2.6, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, … @@ -27,7 +27,7 @@ CLDR data files are interpreted according to the LDML specification (http://unic i = 0,1 @integer 0, 1 @decimal 0.0~1.5 @integer 2~17, 100, 1000, 10000, 100000, 1000000, … @decimal 2.0~3.5, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, … - + i = 1 and v = 0 @integer 1 @integer 0, 2~16, 100, 1000, 10000, 100000, 1000000, … @decimal 0.0~1.5, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, … @@ -35,7 +35,7 @@ CLDR data files are interpreted according to the LDML specification (http://unic n = 0,1 or i = 0 and f = 1 @integer 0, 1 @decimal 0.0, 0.1, 1.0, 0.00, 0.01, 1.00, 0.000, 0.001, 1.000, 0.0000, 0.0001, 1.0000 @integer 2~17, 100, 1000, 10000, 100000, 1000000, … @decimal 0.2~0.9, 1.1~1.8, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, … - + n = 0..1 @integer 0, 1 @decimal 0.0, 1.0, 0.00, 1.00, 0.000, 1.000, 0.0000, 1.0000 @integer 2~17, 100, 1000, 10000, 100000, 1000000, … @decimal 0.1~0.9, 1.1~1.7, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, … @@ -76,7 +76,7 @@ CLDR data files are interpreted according to the LDML specification (http://unic i = 0,1 and n != 0 @integer 1 @decimal 0.1~1.6 @integer 2~17, 100, 1000, 10000, 100000, 1000000, … @decimal 2.0~3.5, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, … - + n = 0 @integer 0 @decimal 0.0, 0.00, 0.000, 0.0000 n = 1 @integer 1 @decimal 1.0, 1.00, 1.000, 1.0000 @integer 2~17, 100, 1000, 10000, 100000, 1000000, … @decimal 0.1~0.9, 1.1~1.7, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, … @@ -125,7 +125,7 @@ CLDR data files are interpreted according to the LDML specification (http://unic e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5 @integer 1000000, 1c6, 2c6, 3c6, 4c6, 5c6, 6c6, … @decimal 1.0000001c6, 1.1c6, 2.0000001c6, 2.1c6, 3.0000001c6, 3.1c6, … @integer 2~17, 100, 1000, 10000, 100000, 1c3, 2c3, 3c3, 4c3, 5c3, 6c3, … @decimal 2.0~3.5, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, 1.0001c3, 1.1c3, 2.0001c3, 2.1c3, 3.0001c3, 3.1c3, … - + i = 1 and v = 0 @integer 1 e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5 @integer 1000000, 1c6, 2c6, 3c6, 4c6, 5c6, 6c6, … @decimal 1.0000001c6, 1.1c6, 2.0000001c6, 2.1c6, 3.0000001c6, 3.1c6, … @integer 0, 2~16, 100, 1000, 10000, 100000, 1c3, 2c3, 3c3, 4c3, 5c3, 6c3, … @decimal 0.0~1.5, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, 1.0001c3, 1.1c3, 2.0001c3, 2.1c3, 3.0001c3, 3.1c3, … @@ -192,6 +192,13 @@ CLDR data files are interpreted according to the LDML specification (http://unic + + n % 10 = 1 and n % 100 != 11 @integer 1, 21, 31, 41, 51, 61, 71, 81, 101, 1001, … @decimal 1.0, 21.0, 31.0, 41.0, 51.0, 61.0, 71.0, 81.0, 101.0, 1001.0, … + n = 2 @integer 2 @decimal 2.0, 2.00, 2.000, 2.0000 + n != 2 and n % 10 = 2..9 and n % 100 != 11..19 @integer 3~9, 22~29, 32, 102, 1002, … @decimal 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 22.0, 102.0, 1002.0, … + f != 0 @decimal 0.1~0.9, 1.1~1.7, 10.1, 100.1, 1000.1, … + @integer 0, 10~20, 30, 40, 50, 60, 100, 1000, 10000, 100000, 1000000, … @decimal 0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, … + n % 10 = 1 and n % 100 != 11,71,91 @integer 1, 21, 31, 41, 51, 61, 81, 101, 1001, … @decimal 1.0, 21.0, 31.0, 41.0, 51.0, 61.0, 81.0, 101.0, 1001.0, … n % 10 = 2 and n % 100 != 12,72,92 @integer 2, 22, 32, 42, 52, 62, 82, 102, 1002, … @decimal 2.0, 22.0, 32.0, 42.0, 52.0, 62.0, 82.0, 102.0, 1002.0, … diff --git a/src/Jeffijoe.MessageFormat.Tests/Formatting/Formatters/PluralFormatterTests.cs b/src/Jeffijoe.MessageFormat.Tests/Formatting/Formatters/PluralFormatterTests.cs index e870bde..9ad7cc7 100644 --- a/src/Jeffijoe.MessageFormat.Tests/Formatting/Formatters/PluralFormatterTests.cs +++ b/src/Jeffijoe.MessageFormat.Tests/Formatting/Formatters/PluralFormatterTests.cs @@ -42,13 +42,13 @@ public void Pluralize(double n, string expected) new ParsedArguments( new[] { - new KeyedBlock("zero", "nothing"), + new KeyedBlock("=0", "nothing"), new KeyedBlock("one", "just one"), new KeyedBlock("other", "wow") }, Array.Empty()); var request = new FormatterRequest(new Literal(1, 1, 1, 1, ""), "test", "plural", null); - var actual = subject.Pluralize("en", arguments, new PluralContext(Convert.ToDecimal(Convert.ToDouble(args[request.Variable]))), 0); + var actual = subject.Pluralize("en", PluralRulesMetadata.TryGetCardinalRuleByLocale, subject.CardinalPluralizers, arguments, new PluralContext(Convert.ToDecimal(Convert.ToDouble(args[request.Variable]))), 0); Assert.Equal(expected, actual); } @@ -56,7 +56,7 @@ public void Pluralize(double n, string expected) /// The pluralize_defaults_to_en_locale_when_specified_locale_is_not_found /// [Fact] - public void Pluralize_defaults_to_en_locale_when_specified_locale_is_not_found() + public void Pluralize_defaults_to_root_locale_when_specified_locale_is_not_found() { var subject = new PluralFormatter(); var args = new Dictionary { { "test", 1 } }; @@ -64,14 +64,14 @@ public void Pluralize_defaults_to_en_locale_when_specified_locale_is_not_found() new ParsedArguments( new[] { - new KeyedBlock("zero", "nothing"), + new KeyedBlock("=0", "nothing"), new KeyedBlock("one", "just one"), - new KeyedBlock("other", "wow") + new KeyedBlock("other", "wow") // Root locale should resolve "1" to "other" }, Array.Empty()); var request = new FormatterRequest(new Literal(1, 1, 1, 1, ""), "test", "plural", null); - var actual = subject.Pluralize("unknown", arguments, new PluralContext(Convert.ToDecimal(Convert.ToDouble(args[request.Variable]))), 0); - Assert.Equal("just one", actual); + var actual = subject.Pluralize("unknown", PluralRulesMetadata.TryGetCardinalRuleByLocale, subject.CardinalPluralizers, arguments, new PluralContext(Convert.ToDecimal(Convert.ToDouble(args[request.Variable]))), 0); + Assert.Equal("wow", actual); } /// @@ -86,12 +86,12 @@ public void Pluralize_throws_when_missing_other_block() new ParsedArguments( new[] { - new KeyedBlock("zero", "nothing"), + new KeyedBlock("=0", "nothing"), new KeyedBlock("one", "just one") }, Array.Empty()); var request = new FormatterRequest(new Literal(1, 1, 1, 1, ""), "test", "plural", null); - Assert.Throws(() => subject.Pluralize("unknown", arguments, new PluralContext(Convert.ToDecimal(Convert.ToDouble(args[request.Variable]))), 0)); + Assert.Throws(() => subject.Pluralize(PluralRulesMetadata.RootLocale, PluralRulesMetadata.TryGetCardinalRuleByLocale, subject.CardinalPluralizers, arguments, new PluralContext(Convert.ToDecimal(Convert.ToDouble(args[request.Variable]))), 0)); } /// diff --git a/src/Jeffijoe.MessageFormat.Tests/Helpers/LocaleHelperTests.cs b/src/Jeffijoe.MessageFormat.Tests/Helpers/LocaleHelperTests.cs new file mode 100644 index 0000000..e785d5f --- /dev/null +++ b/src/Jeffijoe.MessageFormat.Tests/Helpers/LocaleHelperTests.cs @@ -0,0 +1,61 @@ +using Jeffijoe.MessageFormat.Formatting.Formatters; +using Jeffijoe.MessageFormat.Helpers; +using System.Linq; +using Xunit; + +namespace Jeffijoe.MessageFormat.Tests.Helpers; + +/// +/// The locale helper tests. +/// +public class LocaleHelperTests +{ + /// + /// Tests that both '-' and '_' are supported when extracting the base language. + /// + [Fact] + public void GetInheritanceChain_HandlesBothSeparators() + { + Assert.Equal( + ["en-US", "en", PluralRulesMetadata.RootLocale], + LocaleHelper.GetInheritanceChain("en-US").ToList() + ); + + Assert.Equal( + ["en_US", "en", PluralRulesMetadata.RootLocale], + LocaleHelper.GetInheritanceChain("en_US").ToList() + ); + } + + /// + /// Confirms that our implementation only returns the original locale, + /// the language, and the root. + /// + /// + /// This is a perf optimization given the CLDR data set we're using. + /// + [Fact] + public void GetInheritanceChain_SkipsIntermediateTags() + { + Assert.Equal( + ["th-TH-u-nu-thai", "th", PluralRulesMetadata.RootLocale], + LocaleHelper.GetInheritanceChain("th-TH-u-nu-thai").ToList() + ); + } + + [Theory] + [InlineData("")] + [InlineData("-")] + [InlineData("_")] + [InlineData("x")] + [InlineData("x-")] + [InlineData("x-test")] + [InlineData("i-test")] + public void GetInheritanceChain_HandlesBadInput(string input) + { + Assert.Equal( + [PluralRulesMetadata.RootLocale], + LocaleHelper.GetInheritanceChain(input).ToList() + ); + } +} \ No newline at end of file diff --git a/src/Jeffijoe.MessageFormat.Tests/MessageFormatterFullIntegrationTests.cs b/src/Jeffijoe.MessageFormat.Tests/MessageFormatterFullIntegrationTests.cs index 6651ff3..ddb6497 100644 --- a/src/Jeffijoe.MessageFormat.Tests/MessageFormatterFullIntegrationTests.cs +++ b/src/Jeffijoe.MessageFormat.Tests/MessageFormatterFullIntegrationTests.cs @@ -6,6 +6,7 @@ using System.Collections.Generic; using Jeffijoe.MessageFormat.Formatting; +using Jeffijoe.MessageFormat.Formatting.Formatters; using Jeffijoe.MessageFormat.Tests.TestHelpers; using Xunit; using Xunit.Abstractions; @@ -202,6 +203,14 @@ public static IEnumerable Tests other {and # others added this to their profiles} }."; + const string Case7 = @"Your {count, selectordinal, + =0 {nonexistent} + one {#st} + two {#nd} + few {#rd} + other {#th} + } notification is the most recent one."; + yield return new object[] { @@ -348,6 +357,20 @@ public static IEnumerable Tests new Dictionary { { "count", 3 } }, "You and 2 others added this to their profiles." }; + yield return + new object[] + { + Case7, + new Dictionary { { "count", 0 } }, + "Your nonexistent notification is the most recent one." + }; + yield return + new object[] + { + Case7, + new Dictionary { { "count", 2 } }, + "Your 2nd notification is the most recent one." + }; yield return new object[] { @@ -380,6 +403,24 @@ public void FormatMessage(string source, Dictionary args, strin { var subject = new MessageFormatter(false); + // Historically these tests relied on a default English pluralizer that mapped + // 0 to "zero"; adding that back in manually to ensure we maintain test coverage + // for multiple forms. + subject.CardinalPluralizers!.Add("en", (number) => + { + if (number == 0) + { + return "zero"; + } else if (number == 1) + { + return "one"; + } + else + { + return "other"; + } + }); + // Warmup subject.FormatMessage(source, args); Benchmark.Start("Formatting", this.outputHelper); @@ -463,7 +504,7 @@ public void FormatMessage_with_reflection_overload() { var subject = new MessageFormatter(false); const string Pattern = "You have {UnreadCount, plural, " - + "zero {no unread messages}" + + "=0 {no unread messages}" + "one {just one unread message}" + "other {# unread messages}" + "} today."; var actual = subject.FormatMessage(Pattern, new { UnreadCount = 0 }); Assert.Equal("You have no unread messages today.", actual); @@ -490,7 +531,7 @@ public void ReadMe_test_to_make_sure_I_dont_look_like_a_fool() { var mf = new MessageFormatter(false); const string Str = @"You have {notifications, plural, - zero {no notifications} + =0 {no notifications} one {one notification} =42 {a universal amount of notifications} other {# notifications} @@ -505,7 +546,7 @@ public void ReadMe_test_to_make_sure_I_dont_look_like_a_fool() var mf = new MessageFormatter(false); const string Str = @"You {NUM_ADDS, plural, offset:1 =0{didnt add this to your profile} - zero{added this to your profile} + =1{added this to your profile} one{and one other person added this to their profile} other{and # others added this to their profiles} }."; @@ -576,6 +617,16 @@ public void ReadMe_test_to_make_sure_I_dont_look_like_a_fool() Assert.Equal("Your messages go here.", formatted); } + { + var mf = new MessageFormatter(false); + const string Str = @"You are the {position, selectordinal, one {#st} two {#nd} few {#rd} other {#th}} person in line."; + var formatted = mf.FormatMessage(Str, new Dictionary { { "position", 23 } }); + Assert.Equal("You are the 23rd person in line.", formatted); + + formatted = mf.FormatMessage(Str, new Dictionary { { "position", 1 } }); + Assert.Equal("You are the 1st person in line.", formatted); + } + { var mf = new MessageFormatter(false); const string Str = @"His name is {LAST_NAME}... {FIRST_NAME} {LAST_NAME}"; @@ -603,7 +654,7 @@ public void ReadMe_test_to_make_sure_I_dont_look_like_a_fool() { var mf = new MessageFormatter(useCache: true, locale: "en"); - mf.Pluralizers!["en"] = n => + mf.CardinalPluralizers!["en"] = n => { // ´n´ is the number being pluralized. // ReSharper disable once CompareOfFloatsByEqualityOperator diff --git a/src/Jeffijoe.MessageFormat.Tests/MetadataGenerator/GeneratedPluralRulesTests.cs b/src/Jeffijoe.MessageFormat.Tests/MetadataGenerator/GeneratedPluralRulesTests.cs index 0db8c6b..09f56bc 100644 --- a/src/Jeffijoe.MessageFormat.Tests/MetadataGenerator/GeneratedPluralRulesTests.cs +++ b/src/Jeffijoe.MessageFormat.Tests/MetadataGenerator/GeneratedPluralRulesTests.cs @@ -29,8 +29,8 @@ public void Uk_PluralizerTests(double n, string expected) new KeyedBlock("other", "дня") }, new FormatterExtension[0]); - var request = new FormatterRequest(new Literal(1, 1, 1, 1, ""), "test", "plural", null); - var actual = subject.Pluralize("uk", arguments, new PluralContext(Convert.ToDecimal(Convert.ToDouble(args[request.Variable]))), 0); + var request = new FormatterRequest(new Literal(1, 1, 1, 1, ""), "test", PluralFormatter.PluralFunction, null); + var actual = subject.Pluralize("uk", PluralRulesMetadata.TryGetCardinalRuleByLocale, subject.CardinalPluralizers, arguments, new PluralContext(Convert.ToDecimal(Convert.ToDouble(args[request.Variable]))), 0); Assert.Equal(expected, actual); } @@ -54,8 +54,8 @@ public void Ru_PluralizerTests(double n, string expected) new KeyedBlock("other", "дня") }, new FormatterExtension[0]); - var request = new FormatterRequest(new Literal(1, 1, 1, 1, ""), "test", "plural", null); - var actual = subject.Pluralize("ru", arguments, new PluralContext(Convert.ToDecimal(args[request.Variable])), 0); + var request = new FormatterRequest(new Literal(1, 1, 1, 1, ""), "test", PluralFormatter.PluralFunction, null); + var actual = subject.Pluralize("ru", PluralRulesMetadata.TryGetCardinalRuleByLocale, subject.CardinalPluralizers, arguments, new PluralContext(Convert.ToDecimal(args[request.Variable])), 0); Assert.Equal(expected, actual); } @@ -65,7 +65,7 @@ public void Ru_PluralizerTests(double n, string expected) [InlineData(101, "days")] [InlineData(102, "days")] [InlineData(105, "days")] - public void En_PluralizerTests(double n, string expected) + public void EnUS_Cardinal_PluralizerTests(double n, string expected) { var subject = new PluralFormatter(); var args = new Dictionary { { "test", n } }; @@ -73,12 +73,67 @@ public void En_PluralizerTests(double n, string expected) new ParsedArguments( new[] { + // Regression test to ensure 0 does not match 'zero' for English + new KeyedBlock("zero", "FAIL"), new KeyedBlock("one", "day"), new KeyedBlock("other", "days") }, new FormatterExtension[0]); - var request = new FormatterRequest(new Literal(1, 1, 1, 1, ""), "test", "plural", null); - var actual = subject.Pluralize("en", arguments, new PluralContext(Convert.ToDecimal(args[request.Variable])), 0); + var request = new FormatterRequest(new Literal(1, 1, 1, 1, ""), "test", PluralFormatter.PluralFunction, null); + var actual = subject.Pluralize("en_US", PluralRulesMetadata.TryGetCardinalRuleByLocale, subject.CardinalPluralizers, arguments, new PluralContext(Convert.ToDecimal(args[request.Variable])), 0); Assert.Equal(expected, actual); } + + [Theory] + [InlineData(0, "0th")] + [InlineData(1, "1st")] + [InlineData(2, "2nd")] + [InlineData(3, "3rd")] + [InlineData(4, "4th")] + [InlineData(9, "9th")] + [InlineData(11, "11th")] + [InlineData(21, "21st")] + public void EnUS_Ordinal_PluralizerTests(double n, string expected) + { + var subject = new PluralFormatter(); + var args = new Dictionary { { "test", n } }; + var arguments = + new ParsedArguments( + new[] + { + new KeyedBlock("one", "#st"), + new KeyedBlock("two", "#nd"), + new KeyedBlock("few", "#rd"), + new KeyedBlock("other", "#th"), + }, + new FormatterExtension[0]); + var request = new FormatterRequest(new Literal(1, 1, 1, 1, ""), "test", PluralFormatter.OrdinalFunction, null); + var pluralized = subject.Pluralize("en-US", PluralRulesMetadata.TryGetOrdinalRuleByLocale, subject.OrdinalPluralizers, arguments, new PluralContext(Convert.ToDecimal(args[request.Variable])), 0); + var actual = subject.ReplaceNumberLiterals(pluralized, n); + Assert.Equal(expected, actual); + } + + [Fact] + public void RootLocale_MatchesRules() + { + Assert.True(PluralRulesMetadata.TryGetCardinalRuleByLocale(PluralRulesMetadata.RootLocale, out _)); + Assert.True(PluralRulesMetadata.TryGetOrdinalRuleByLocale(PluralRulesMetadata.RootLocale, out _)); + } + + /// + /// Tests to confirm that separators normalize properly in the data, + /// and that language lookups are case insensitive. + /// + [Fact] + public void Fallback_PluralizerTests() + { + Assert.True(PluralRulesMetadata.TryGetCardinalRuleByLocale("kok_Latn", out _)); + Assert.True(PluralRulesMetadata.TryGetCardinalRuleByLocale("pt-PT", out _)); + Assert.True(PluralRulesMetadata.TryGetCardinalRuleByLocale("pt-pt", out _)); + Assert.True(PluralRulesMetadata.TryGetCardinalRuleByLocale("PT_PT", out _)); + Assert.True(PluralRulesMetadata.TryGetCardinalRuleByLocale("pT", out _)); + + Assert.True(PluralRulesMetadata.TryGetOrdinalRuleByLocale("kok_Latn", out _)); + Assert.False(PluralRulesMetadata.TryGetOrdinalRuleByLocale("pt-PT", out _)); + } } \ No newline at end of file diff --git a/src/Jeffijoe.MessageFormat.Tests/MetadataGenerator/ParserTests.cs b/src/Jeffijoe.MessageFormat.Tests/MetadataGenerator/ParserTests.cs index 4cecb75..a17f8a5 100644 --- a/src/Jeffijoe.MessageFormat.Tests/MetadataGenerator/ParserTests.cs +++ b/src/Jeffijoe.MessageFormat.Tests/MetadataGenerator/ParserTests.cs @@ -23,7 +23,7 @@ public void CanParseLocales() "); - var rule = Assert.Single(rules); + var rule = Assert.Single(rules.UniqueRules); var expected = new[] { "am", "as", "bn", "doi", "fa", "gu", "hi", "kn", "pcm", "zu" @@ -44,7 +44,7 @@ public void OtherCountIsIgnored() "); - var rule = Assert.Single(rules); + var rule = Assert.Single(rules.UniqueRules); Assert.Empty(rule.Conditions); } @@ -53,7 +53,7 @@ public void CanParseSingleCount_RuleDescription_WithoutRelations() { var rules = ParseRules(GenerateXmlWithRuleContent("@integer 1, 21, 31, 41, 51, 61, 71, 81, 101, 1001, …")); - var rule = Assert.Single(rules); + var rule = Assert.Single(rules.UniqueRules); var condition = Assert.Single(rule.Conditions); var expected = "@integer 1, 21, 31, 41, 51, 61, 71, 81, 101, 1001, …"; Assert.Equal(expected, condition.RuleDescription); @@ -64,7 +64,7 @@ public void CanParseSingleCount_VisibleDigitsNumber() { var rules = ParseRules( GenerateXmlWithRuleContent(@"v = 0 @integer 1, 21, 31, 41, 51, 61, 71, 81, 101, 1001, …")); - var rule = Assert.Single(rules); + var rule = Assert.Single(rules.UniqueRules); var condition = Assert.Single(rule.Conditions); var orCondition = Assert.Single(condition.OrConditions); var actual = Assert.Single(orCondition.AndConditions); @@ -78,7 +78,7 @@ public void CanParseSingleCount_IntegerDigits() { var rules = ParseRules( GenerateXmlWithRuleContent(@"i = 0 @integer 1, 21, 31, 41, 51, 61, 71, 81, 101, 1001, …")); - var rule = Assert.Single(rules); + var rule = Assert.Single(rules.UniqueRules); var condition = Assert.Single(rule.Conditions); var orCondition = Assert.Single(condition.OrConditions); var actual = Assert.Single(orCondition.AndConditions); @@ -92,7 +92,7 @@ public void CanParseSingleCount_AbsoluteNumber() { var rules = ParseRules( GenerateXmlWithRuleContent("n = 1 @integer 1, 21, 31, 41, 51, 61, 71, 81, 101, 1001, …")); - var rule = Assert.Single(rules); + var rule = Assert.Single(rules.UniqueRules); var condition = Assert.Single(rule.Conditions); var orCondition = Assert.Single(condition.OrConditions); var actual = Assert.Single(orCondition.AndConditions); @@ -107,7 +107,7 @@ public void CanParseSingleCount_AbsoluteNumber() public void CanParseVariousRelations(string ruleText, Relation expectedRelation) { var rules = ParseRules(GenerateXmlWithRuleContent(ruleText)); - var rule = Assert.Single(rules); + var rule = Assert.Single(rules.UniqueRules); var condition = Assert.Single(rule.Conditions); var orCondition = Assert.Single(condition.OrConditions); var actual = Assert.Single(orCondition.AndConditions); @@ -120,7 +120,7 @@ public void CanParseVariousRelations(string ruleText, Relation expectedRelation) public void CanParseOrRules() { var rules = ParseRules(GenerateXmlWithRuleContent("n = 2 or n = 1 or n = 0 @integer 1, 21, 31, 41, 51, 61, 71, 81, 101, 1001, …")); - var rule = Assert.Single(rules); + var rule = Assert.Single(rules.UniqueRules); var condition = Assert.Single(rule.Conditions); Assert.Equal(3, condition.OrConditions.Count); @@ -142,7 +142,7 @@ public void CanParseOrRules() public void CanParseAndRules() { var rules = ParseRules(GenerateXmlWithRuleContent("n = 2 and n = 1 and n = 0 @integer 1, 21, 31, 41, 51, 61, 71, 81, 101, 1001, …")); - var rule = Assert.Single(rules); + var rule = Assert.Single(rules.UniqueRules); var condition = Assert.Single(rule.Conditions); var orCondition = Assert.Single(condition.OrConditions); @@ -166,7 +166,7 @@ public void CanParseModuloInLeftOperator() { var rules = ParseRules( GenerateXmlWithRuleContent("n % 5 = 3 @integer 1, 21, 31, 41, 51, 61, 71, 81, 101, 1001, …")); - var rule = Assert.Single(rules); + var rule = Assert.Single(rules.UniqueRules); var condition = Assert.Single(rule.Conditions); var orCondition = Assert.Single(condition.OrConditions); var actual = Assert.Single(orCondition.AndConditions); @@ -181,7 +181,7 @@ public void CanParseRangeInRightOperator() { var rules = ParseRules( GenerateXmlWithRuleContent("n = 3..5 @integer 1, 21, 31, 41, 51, 61, 71, 81, 101, 1001, …")); - var rule = Assert.Single(rules); + var rule = Assert.Single(rules.UniqueRules); var condition = Assert.Single(rule.Conditions); var orCondition = Assert.Single(condition.OrConditions); var actual = Assert.Single(orCondition.AndConditions); @@ -196,7 +196,7 @@ public void CanParseCommaSeparatedInRightOperator() { var rules = ParseRules( GenerateXmlWithRuleContent("n = 3,5,8, 10 @integer 1, 21, 31, 41, 51, 61, 71, 81, 101, 1001, …")); - var rule = Assert.Single(rules); + var rule = Assert.Single(rules.UniqueRules); var condition = Assert.Single(rule.Conditions); var orCondition = Assert.Single(condition.OrConditions); var actual = Assert.Single(orCondition.AndConditions); @@ -211,7 +211,7 @@ public void CanParseMixedCommaSeparatedAndRangeInRightOperator() { var rules = ParseRules( GenerateXmlWithRuleContent("n = 3,5..7,12,15 @integer 1, 21, 31, 41, 51, 61, 71, 81, 101, 1001, …")); - var rule = Assert.Single(rules); + var rule = Assert.Single(rules.UniqueRules); var condition = Assert.Single(rule.Conditions); var orCondition = Assert.Single(condition.OrConditions); var actual = Assert.Single(orCondition.AndConditions); @@ -234,7 +234,7 @@ public void MapsVariable_ToCorrectOperator(char variable, OperandSymbol symbol) { var rules = ParseRules( GenerateXmlWithRuleContent($"{variable} = 3")); - var rule = Assert.Single(rules); + var rule = Assert.Single(rules.UniqueRules); var condition = Assert.Single(rule.Conditions); var orCondition = Assert.Single(condition.OrConditions); var actual = Assert.Single(orCondition.AndConditions); @@ -264,7 +264,7 @@ private static void AssertOperationEqual(Operation expected, Operation actual) Assert.Equal(expected.OperandRight, actual.OperandRight); } - private static IEnumerable ParseRules(string xmlText) + private static PluralRuleSet ParseRules(string xmlText) { var xml = new XmlDocument(); xml.LoadXml(xmlText); diff --git a/src/Jeffijoe.MessageFormat.Tests/MetadataGenerator/PluralMetadataClassGeneratorTests.cs b/src/Jeffijoe.MessageFormat.Tests/MetadataGenerator/PluralMetadataClassGeneratorTests.cs index b10ed33..7060ae2 100644 --- a/src/Jeffijoe.MessageFormat.Tests/MetadataGenerator/PluralMetadataClassGeneratorTests.cs +++ b/src/Jeffijoe.MessageFormat.Tests/MetadataGenerator/PluralMetadataClassGeneratorTests.cs @@ -1,4 +1,5 @@ -using Jeffijoe.MessageFormat.MetadataGenerator.Plural.Parsing.AST; +using Jeffijoe.MessageFormat.MetadataGenerator.Plural.Parsing; +using Jeffijoe.MessageFormat.MetadataGenerator.Plural.Parsing.AST; using Jeffijoe.MessageFormat.MetadataGenerator.Plural.SourceGeneration; using Xunit; @@ -12,7 +13,7 @@ public void CanGenerateClassFromRules() { var rules = new[] { - new PluralRule(new[] {"en", "uk"}, + new PluralRule(new[] {"root", "en", "uk"}, new[] { new Condition("one", string.Empty, new [] @@ -22,23 +23,38 @@ public void CanGenerateClassFromRules() new Operation(new VariableOperand(OperandSymbol.AbsoluteValue), Relation.Equals, new[] {new NumberOperand(3) }) }) }) - }) + }), + new PluralRule(new[] {"root", "en", "pt_PT"}, + new[] + { + new Condition("many", string.Empty, new [] + { + new OrCondition(new[] + { + new Operation(new VariableOperand(OperandSymbol.AbsoluteValue), Relation.Equals, new[] {new NumberOperand(120) }) + }) + }) + }), }; - var generator = new PluralRulesMetadataGenerator(rules); + + var ruleSet = new PluralRuleSet(); + ruleSet.Add("cardinal", rules[0]); + ruleSet.Add("ordinal", rules[1]); + + var generator = new PluralRulesMetadataGenerator(ruleSet); var actual = generator.GenerateClass(); var expected = @" +#nullable enable using System; using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; namespace Jeffijoe.MessageFormat.Formatting.Formatters { internal static partial class PluralRulesMetadata { - public static string Locale_EN(PluralContext context) => Rule0(context); - - public static string Locale_UK(PluralContext context) => Rule0(context); - + public static readonly string RootLocale = ""root""; private static string Rule0(PluralContext context) { if ((context.N == 3)) @@ -47,17 +63,46 @@ private static string Rule0(PluralContext context) return ""other""; } - private static readonly Dictionary Pluralizers = new Dictionary() + private static string Rule1(PluralContext context) { - {""en"", Rule0}, - {""uk"", Rule0}, + if ((context.N == 120)) + return ""many""; + return ""other""; + } + + private static readonly Dictionary Pluralizers = new(StringComparer.OrdinalIgnoreCase) + { + {""root"", new LocalePluralizers(Cardinal: Rule0, Ordinal: Rule1)}, + {""en"", new LocalePluralizers(Cardinal: Rule0, Ordinal: Rule1)}, + {""uk"", new LocalePluralizers(Cardinal: Rule0, Ordinal: null)}, + {""pt-PT"", new LocalePluralizers(Cardinal: null, Ordinal: Rule1)}, + {""pt_PT"", new LocalePluralizers(Cardinal: null, Ordinal: Rule1)}, }; - public static partial bool TryGetRuleByLocale(string locale, out ContextPluralizer contextPluralizer) + public static partial bool TryGetCardinalRuleByLocale(string locale, [NotNullWhen(true)] out ContextPluralizer? contextPluralizer) { - return Pluralizers.TryGetValue(locale, out contextPluralizer); + if (!Pluralizers.TryGetValue(locale, out var pluralizersForLocale)) + { + contextPluralizer = null; + return false; + } + contextPluralizer = pluralizersForLocale.Cardinal; + return contextPluralizer != null; } + + public static partial bool TryGetOrdinalRuleByLocale(string locale, [NotNullWhen(true)] out ContextPluralizer? contextPluralizer) + { + if (!Pluralizers.TryGetValue(locale, out var pluralizersForLocale)) + { + contextPluralizer = null; + return false; + } + contextPluralizer = pluralizersForLocale.Ordinal; + return contextPluralizer != null; + } + + private record LocalePluralizers(ContextPluralizer? Cardinal, ContextPluralizer? Ordinal); } } ".TrimStart(); diff --git a/src/Jeffijoe.MessageFormat/Formatting/Formatters/PluralContext.cs b/src/Jeffijoe.MessageFormat/Formatting/Formatters/PluralContext.cs index ca7a49c..1c60f70 100644 --- a/src/Jeffijoe.MessageFormat/Formatting/Formatters/PluralContext.cs +++ b/src/Jeffijoe.MessageFormat/Formatting/Formatters/PluralContext.cs @@ -3,6 +3,9 @@ namespace Jeffijoe.MessageFormat.Formatting.Formatters; +/// +/// Represents the 'operations' for a given source number, as defined by Unicode TR35/LDML. +/// internal readonly struct PluralContext { public PluralContext(int number) @@ -26,10 +29,24 @@ public PluralContext(double number) : this(number.ToString(CultureInfo.Invariant { } + /// + /// Represents operands for a source number in string format. + /// This library treats the input as a stringified double and does not currently parse out + /// compact decimal forms (e.g., "1.25c4"). + /// public PluralContext(string number) : this(number, double.Parse(number, CultureInfo.InvariantCulture)) { } + /// + /// Common constructor for parsing out operands from a stringified number. + /// + /// + /// The values of , , , and are all derived + /// from the fractional part of the number, so it's important be parsable as a number. + /// + /// The number in string form, as a decimal (not scientific/compact form). + /// The number pre-parsed as a double. private PluralContext(string number, double parsed) { Number = parsed; @@ -60,26 +77,77 @@ private PluralContext(string number, double parsed) W = fractionSpanWithoutZeroes.Length; F = int.Parse(fractionSpan); T = int.Parse(fractionSpanWithoutZeroes); + + // The compact decimal exponent representations are not used in this library as operands are + // always assumed to be parsable numbers. C = 0; E = 0; } } + /// + /// The 'source number' being evaluated for pluralization. + /// public double Number { get; } + /// + /// The absolute value of . + /// public double N { get; } + /// + /// The integer digits of . + /// + /// + /// 22.6 -> I = 22 + /// public int I { get; } + /// + /// The count of visible fraction digits of , with trailing zeroes. + /// + /// + /// 1.450 -> V = 3 + /// public int V { get; } + /// + /// The count of visible fraction digits of , without trailing zeroes. + /// + /// + /// 1.450 -> W = 2 + /// public int W { get; } + /// + /// The visible fraction digits of , with trailing zeroes, as an integer. + /// + /// + /// 1.450 -> F = 450 + /// public int F { get; } + /// + /// The visible fraction digits of , without trailing zeroes, as an integer. + /// + /// + /// 1.450 -> T = 45 + /// public int T { get; } + /// + /// The compact decimal exponent of , in such cases where + /// is represented as "[x]cC" such that == x * 10^C. + /// + /// + /// 1.25c4 -> C = 4 + /// 125c2 -> C = 2 + /// 12500 -> C = 0, as the number is not represented in compact decimal form. + /// public int C { get; } + /// + /// Deprecated (in LDML) synonym for , reserved for future use by the standard. + /// public int E { get; } } \ No newline at end of file diff --git a/src/Jeffijoe.MessageFormat/Formatting/Formatters/PluralFormatter.cs b/src/Jeffijoe.MessageFormat/Formatting/Formatters/PluralFormatter.cs index 844f854..3d005ff 100644 --- a/src/Jeffijoe.MessageFormat/Formatting/Formatters/PluralFormatter.cs +++ b/src/Jeffijoe.MessageFormat/Formatting/Formatters/PluralFormatter.cs @@ -3,6 +3,7 @@ // Author: Jeff Hansen // Copyright (C) Jeff Hansen 2014. All rights reserved. +using Jeffijoe.MessageFormat.Helpers; using System; using System.Collections.Generic; using System.Diagnostics.CodeAnalysis; @@ -15,6 +16,21 @@ namespace Jeffijoe.MessageFormat.Formatting.Formatters; /// public class PluralFormatter : BaseFormatter, IFormatter { + /// + /// ICU MessageFormat function name for "default" pluralization, based on cardinal numbers. + /// + internal const string PluralFunction = "plural"; + + /// + /// ICU MessageFormat function name for ordinal pluralization. + /// + internal const string OrdinalFunction = "selectordinal"; + + /// + /// Delegate type to try to look up a specific plural rule for a given locale. + /// + internal delegate bool TryGetRuleForLocale(string locale, [NotNullWhen(true)] out ContextPluralizer? contextPluralizer); + #region Constructors and Destructors /// @@ -22,8 +38,8 @@ public class PluralFormatter : BaseFormatter, IFormatter /// public PluralFormatter() { - this.Pluralizers = new Dictionary(); - this.AddStandardPluralizers(); + this.CardinalPluralizers = new Dictionary(); + this.OrdinalPluralizers = new Dictionary(); } #endregion @@ -35,14 +51,21 @@ public PluralFormatter() /// public bool VariableMustExist => true; - /// - /// Gets the pluralizers dictionary. Key is the locale. + /// Gets the pluralizers dictionary to use for cardinal numbers. Key is the locale. /// /// /// The pluralizers. /// - public IDictionary Pluralizers { get; private set; } + public IDictionary CardinalPluralizers { get; } + + /// + /// Gets the pluralizers dictionary to use for ordinal numbers. Key is the locale. + /// + /// + /// The ordinal pluralizers. + /// + public IDictionary OrdinalPluralizers { get; } #endregion @@ -59,7 +82,12 @@ public PluralFormatter() /// public bool CanFormat(FormatterRequest request) { - return request.FormatterName == "plural"; + if (request.FormatterName is null) + { + return false; + } + + return request.FormatterName == PluralFunction || request.FormatterName == OrdinalFunction; } /// @@ -84,6 +112,9 @@ public bool CanFormat(FormatterRequest request) /// /// The . /// + /// + /// If does not specify a formatter name supported by . + /// public string Format(string locale, FormatterRequest request, IReadOnlyDictionary args, @@ -98,8 +129,33 @@ public string Format(string locale, offset = Convert.ToDouble(offsetExtension.Value); } + // Get CLDR plural ruleset from request. + // CanFormat() should have guaranteed this is valid, but we'll be defensive just in case. + TryGetRuleForLocale cldrPluralLookup; + IDictionary customLookup; + if (request.FormatterName == PluralFunction) + { + cldrPluralLookup = PluralRulesMetadata.TryGetCardinalRuleByLocale; + customLookup = this.CardinalPluralizers; + } + else if (request.FormatterName == OrdinalFunction) + { + cldrPluralLookup = PluralRulesMetadata.TryGetOrdinalRuleByLocale; + customLookup = this.OrdinalPluralizers; + } + else + { + throw new MessageFormatterException($"Unsupported plural formatter name: {request.FormatterName}"); + } + var ctx = CreatePluralContext(value, offset); - var pluralized = this.Pluralize(locale, arguments, ctx, offset); + var pluralized = this.Pluralize( + locale, + cldrPluralLookup, + customLookup, + arguments, + ctx, + offset); var result = this.ReplaceNumberLiterals(pluralized, ctx.Number); var formatted = messageFormatter.FormatMessage(result, args); return formatted; @@ -115,6 +171,13 @@ public string Format(string locale, /// /// The locale. /// + /// + /// Delegate to retrieve a for a given locale. + /// + /// + /// Dictionary to retrieve a for a given locale, to be evaluated + /// before resolving against . + /// /// /// The parsed arguments string. /// @@ -128,26 +191,42 @@ public string Format(string locale, /// The . /// /// - /// The 'other' option was not found in pattern. + /// The 'other' option was not found in pattern, or is missing + /// both the provided locale and the CLDR root locale. /// [SuppressMessage("StyleCop.CSharp.ReadabilityRules", "SA1126:PrefixCallsCorrectly", Justification = "Reviewed. Suppression is OK here.")] - internal string Pluralize(string locale, ParsedArguments arguments, PluralContext context, double offset) + internal string Pluralize( + string locale, + TryGetRuleForLocale cldrPluralLookup, + IDictionary customLookup, + ParsedArguments arguments, + PluralContext context, + double offset) { - string pluralForm; - if (this.Pluralizers.TryGetValue(locale, out var pluralizer)) + string? pluralForm = null; + if (customLookup.TryGetValue(locale, out var pluralizer)) { pluralForm = pluralizer(context.Number); } - else if (PluralRulesMetadata.TryGetRuleByLocale(locale, out var contextPluralizer)) + else { - pluralForm= contextPluralizer(context); + foreach (var candidate in LocaleHelper.GetInheritanceChain(locale)) + { + if (cldrPluralLookup(candidate, out var contextPluralizer)) + { + pluralForm = contextPluralizer(context); + break; + } + } } - else + + if (pluralForm is null) { - pluralForm = this.Pluralizers["en"](context.Number); + // GetInheritanceChain should resolve the root CLDR locale as a last attempt, so this should never happen... + throw new MessageFormatterException($"Could not find locale {locale} in specified plural rule lookup"); } - + KeyedBlock? other = null; foreach (var keyedBlock in arguments.KeyedBlocks) { @@ -284,31 +363,6 @@ internal string ReplaceNumberLiterals(string pluralized, double n) } } - /// - /// Adds the standard pluralizers. - /// - private void AddStandardPluralizers() - { - this.Pluralizers.Add( - "en", - n => - { - // ReSharper disable CompareOfFloatsByEqualityOperator - if (n == 0) - { - return "zero"; - } - - if (n == 1) - { - return "one"; - } - - // ReSharper restore CompareOfFloatsByEqualityOperator - return "other"; - }); - } - /// /// Creates a for the specified value. /// diff --git a/src/Jeffijoe.MessageFormat/Formatting/Formatters/PluralRulesMetadata.cs b/src/Jeffijoe.MessageFormat/Formatting/Formatters/PluralRulesMetadata.cs index 7c98e93..3c18382 100644 --- a/src/Jeffijoe.MessageFormat/Formatting/Formatters/PluralRulesMetadata.cs +++ b/src/Jeffijoe.MessageFormat/Formatting/Formatters/PluralRulesMetadata.cs @@ -1,7 +1,11 @@ -namespace Jeffijoe.MessageFormat.Formatting.Formatters; +using System.Diagnostics.CodeAnalysis; + +namespace Jeffijoe.MessageFormat.Formatting.Formatters; [System.Diagnostics.CodeAnalysis.ExcludeFromCodeCoverage] internal static partial class PluralRulesMetadata { - public static partial bool TryGetRuleByLocale(string locale, out ContextPluralizer contextPluralizer); + public static partial bool TryGetCardinalRuleByLocale(string locale, out ContextPluralizer? contextPluralizer); + + public static partial bool TryGetOrdinalRuleByLocale(string locale, out ContextPluralizer? contextPluralizer); } \ No newline at end of file diff --git a/src/Jeffijoe.MessageFormat/Helpers/LocaleHelper.cs b/src/Jeffijoe.MessageFormat/Helpers/LocaleHelper.cs new file mode 100644 index 0000000..a5a31cc --- /dev/null +++ b/src/Jeffijoe.MessageFormat/Helpers/LocaleHelper.cs @@ -0,0 +1,57 @@ +using Jeffijoe.MessageFormat.Formatting.Formatters; +using System.Collections.Generic; + +namespace Jeffijoe.MessageFormat.Helpers; + +/// +/// Helpers for working with locale strings. +/// +internal class LocaleHelper +{ + /// + /// Partial implementation of locale inheritance + /// from the LDML spec. + /// + /// Given an input locale in BCP 47 format, yields back various strings to use as lookups in CLDR data. + /// + /// + /// This function doesn't perform any canonicalization of input or fully implement the LDML spec. + /// It first yields the input as-is, then the base language tag, then the CLDR "root" value. + /// + /// This is because at the time of authorship, the only lookups needed by this library are for CLDR plurals, + /// which almost exclusively use languages without subtags. + /// + /// + /// Given "language-Script-REGION", yields: + /// - language-Script-REGION + /// - language + /// - root + /// + /// A BCP 47 locale tag + public static IEnumerable GetInheritanceChain(string locale) + { + // 0 or 1 characters do not form a valid language ID, so we can skip those + // Also skip x- and i- as those BCP 47 tags will never match CLDR and should + // only resolve to 'root'. + if (locale.Length >= 2 && locale[1] != '-') + { + yield return locale; + } + + // If the length is 2, we don't have any subtags for valid input + if (locale.Length >= 3 && locale[1] != '-') + { + // Find the first separator character, Substring to that, and break + for (int i = 2; i < locale.Length; i++) + { + if (locale[i] == '_' || locale[i] == '-') + { + yield return locale.Substring(0, i); + break; + } + } + } + + yield return PluralRulesMetadata.RootLocale; + } +} diff --git a/src/Jeffijoe.MessageFormat/MessageFormatter.cs b/src/Jeffijoe.MessageFormat/MessageFormatter.cs index f258efe..3e90160 100644 --- a/src/Jeffijoe.MessageFormat/MessageFormatter.cs +++ b/src/Jeffijoe.MessageFormat/MessageFormatter.cs @@ -145,17 +145,42 @@ public IFormatterLibrary Formatters public string Locale { get; set; } /// - /// Gets the pluralizers dictionary from the , if set. Key is the locale. + /// Gets the custom cardinal pluralizers dictionary from the , if set. Key is the locale. + /// These are the pluralizers used to translate e.g., {count, plural, one {1 book} other {# books}} /// + /// + /// The library relies on Unicode CLDR rules for locales by default, and any values in this dictionary override those behaviors + /// for the specified locales. + /// + /// + /// The pluralizers, or null if the plural formatter has not been added. + /// + public IDictionary? CardinalPluralizers + { + get + { + var pluralFormatter = this.Formatters.OfType().FirstOrDefault(); + return pluralFormatter?.CardinalPluralizers; + } + } + + /// + /// Gets the custom ordinal number pluralizers dictionary from the , if set. Key is the locale. + /// These are the pluralizers used to translate e.g., {count, selectordinal, one {#st} two {#nd} few {#rd} other {#th}} + /// + /// + /// The library relies on Unicode CLDR rules for locales by default, and any values in this dictionary override those behaviors + /// for the specified locales. + /// /// /// The pluralizers, or null if the plural formatter has not been added. /// - public IDictionary? Pluralizers + public IDictionary? OrdinalPluralizers { get { var pluralFormatter = this.Formatters.OfType().FirstOrDefault(); - return pluralFormatter?.Pluralizers; + return pluralFormatter?.OrdinalPluralizers; } }