Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 11 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ https://unicode-org.github.io/icu/userguide/format_parse/messages/
var mf = new MessageFormatter();

var str = @"You have {notifications, plural,
zero {no notifications}
=0 {no notifications}
one {one notification}
=42 {a universal amount of notifications}
other {# notifications}
Expand Down Expand Up @@ -86,7 +86,8 @@ and about 3 seconds (3236ms) without it. **These results are with a debug build,
MessageFormat.NET supports the most commonly used formats:

* Select Format: `{gender, select, male{He likes} female{She likes} other{They like}} cheeseburgers`
* Plural Format: `There {msgCount, plural, zero {are no unread messages} one {is 1 unread message} other{are # unread messages}}.` (where `#` is the actual number, with the offset (if any) subtracted).
* Plural Format: `There {msgCount, plural, =0 {are no unread messages} one {is 1 unread message} other{are # unread messages}}.` (where `#` is the actual number, with the offset (if any) subtracted).
* Ordinal Format: `You are the {position, selectordinal, one {#st} two {#nd} few {#rd} other {#th}} person in line.`
* Simple variable replacement: `Your name is {name}`
* Numbers: `Your age is {age, number}`
* Dates: `You were born {birthday, date}`
Expand Down Expand Up @@ -136,15 +137,18 @@ var message = formatter.FormatMessage("{value, number, $0.0}", new { value = 23
## Adding your own pluralizer functions

> Since MessageFormat 5.0, pluralizers based on the [official CLDR data][plural-cldr] ship
> with the package, so this is no longer needed.
> with the package, so this is no longer needed except when overriding specific custom locales.

Same thing as with [MessageFormat.js][0], you can add your own pluralizer function.
The `Pluralizers` property is a `IDictionary<string, Pluralizer>`, so you can remove the built-in
ones if you want.
The `CardinalPluralizers` property is a `IDictionary<string, Pluralizer>` that starts empty, along
with `OrdinalPluralizers` for ordinal numbers.

Adding to these Dictionaries will take precedence over the CLDR data for exact matches on
the input locales.

````csharp
var mf = new MessageFormatter();
mf.Pluralizers.Add("<locale>", n => {
mf.CardinalPluralizers.Add("<locale>", n => {
// ´n´ is the number being pluralized.
if(n == 0)
return "zero";
Expand All @@ -159,11 +163,9 @@ you may use in your pluralization block.

````csharp
var mf = new MessageFormatter(true, "en"); // true = use cache
mf.Pluralizers["en"] = n =>
mf.CardinalPluralizers["en"] = n =>
{
// ´n´ is the number being pluralized.
if (n == 0)
return "zero";
if (n == 1)
return "one";
if (n > 1000)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

<ItemGroup>
<EmbeddedResource Include="data/plurals.xml" />
<EmbeddedResource Include="data/ordinals.xml" />
</ItemGroup>

<ItemGroup>
Expand All @@ -19,6 +20,10 @@
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
<PackageReference Include="Microsoft.CodeAnalysis.CSharp.Workspaces" Version="4.9.2" />
<PackageReference Include="PolySharp" Version="1.15.0">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
</ItemGroup>


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,19 @@

namespace Jeffijoe.MessageFormat.MetadataGenerator.Plural.Parsing.AST;

/// <summary>
/// Represents the 'condition' part of the <see href="https://unicode.org/reports/tr35/tr35-numbers.html#plural-rules-syntax">LDML grammar</see>.
/// </summary>
/// <example>
/// Given the following 'pluralRule' tag:
/// &lt;pluralRule count="one"&gt;i = 1 and v = 0 @integer 1&lt;/pluralRule&gt;
///
/// A Condition instance would represent 'i = 1 and v = 0' as a single <see cref="OrCondition"/>.
/// </example>
/// <remarks>
/// The grammar defines a condition as a union of 'and_conditions', which we model as a
/// list of <see cref="OrCondition"/> that each internally tracks <see cref="OrCondition.AndConditions"/>.
/// </remarks>
[DebuggerDisplay("{{RuleDescription}}")]
public class Condition
{
Expand All @@ -13,9 +26,22 @@ public Condition(string count, string ruleDescription, IReadOnlyList<OrCondition
OrConditions = orConditions;
}

/// <summary>
/// The plural form this condition or rule defines, e.g., "one", "two", "few", "many", "other".
/// </summary>
public string Count { get; }

/// <summary>
/// The original text of this rule, e.g., "i = 1 and v = 0 @integer 1".
/// </summary>
/// <remarks>
/// Note - this includes the sample text ('@integer 1') which gets stripped out
/// when parsing the rule's conditional logic.
/// </remarks>
public string RuleDescription { get; }

/// <summary>
/// Parsed representation of <see cref="RuleDescription"/>.
/// </summary>
public IReadOnlyList<OrCondition> OrConditions { get; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,16 @@

namespace Jeffijoe.MessageFormat.MetadataGenerator.Plural.Parsing.AST;

/// <summary>
/// Corresponds to a pluralRules tag in CLDR XML (not to be confused with pluralRule).
/// Each instance of this class represents multiple individual rules for a set of locales.
/// </summary>
/// <example>
/// &lt;pluralRules locales="ast de en et fi fy gl ia ie io ji lij nl sc sv sw ur yi"&gt;
/// &lt;pluralRule count = "one"&gt; i = 1 and v = 0 @integer 1&lt;/pluralRule&gt;
/// ...
/// &lt;/pluralRules&gt;
/// </example>
public class PluralRule
{
public PluralRule(string[] locales, IReadOnlyList<Condition> conditions)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System.Collections.Generic;
using System;
using System.Collections.Generic;
using System.Xml;
using System.Linq;
using Jeffijoe.MessageFormat.MetadataGenerator.Plural.Parsing.AST;
Expand All @@ -16,25 +17,49 @@ public PluralParser(XmlDocument rulesDocument, string[] excludedLocales)
_excludedLocales = new HashSet<string>(excludedLocales);
}

public IEnumerable<PluralRule> Parse()
/// <summary>
/// Parses the represented XML document into a new <see cref="PluralRuleSet"/>, and returns it.
/// </summary>
/// <returns>A <see cref="PluralRuleSet"/> containing the parsed plural rules of a single type.</returns>
public PluralRuleSet Parse()
{
var index = new PluralRuleSet();
ParseInto(index);
return index;
}

/// <summary>
/// Parses the represented XML document and merges the rules into the given <see cref="PluralRuleSet"/>.
/// </summary>
/// <param name="ruleIndex"></param>
/// <exception cref="ArgumentException">If the CLDR XML is missing expected attributes.</exception>
public void ParseInto(PluralRuleSet ruleIndex)
{
var root = _rulesDocument.DocumentElement!;

foreach(XmlNode dataElement in root.ChildNodes)
{
if (dataElement.Name != "plurals")
{
continue;
}


var typeAttr = dataElement.Attributes["type"];
if (!typeAttr.Specified)
{
throw new ArgumentException("CLDR ruleset document is unexpectedly missing 'type' attribute on 'plurals' element.");
}

string pluralType = typeAttr.Value;

foreach (XmlNode rule in dataElement.ChildNodes)
{
if(rule.Name == "pluralRules")
{
var parsed = ParseSingleRule(rule);
if (parsed != null)
{
yield return parsed;
ruleIndex.Add(pluralType, parsed);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
using Jeffijoe.MessageFormat.MetadataGenerator.Plural.Parsing.AST;
using System;
using System.Collections.Generic;

namespace Jeffijoe.MessageFormat.MetadataGenerator.Plural.Parsing;

/// <summary>
/// Represents multiple fully parsed documents of <see cref="PluralRule"/> instances, each with a given type (i.e., 'cardinal' vs 'ordinals').
/// </summary>
public class PluralRuleSet
{
/// <summary>
/// Special CLDR locale ID to use as the default for inheritance. All locales can chain to this
/// during lookups.
/// </summary>
public const string RootLocale = "root";

/// <summary>
/// CLDR plural type attribute for the counting number ruleset.
/// Used to translate strings that contain a count, e.g., to pluralize nouns.
/// </summary>
public const string CardinalType = "cardinal";

/// <summary>
/// CLDR plural type attribute for the ordered number ruleset.
/// Used to translate strings containing ordinal numbers, e.g., "1st", "2nd", "3rd".
/// </summary>
public const string OrdinalType = "ordinal";

// Backing fields for the public properties below.
private readonly List<PluralRule> _allRules = [];
private readonly Dictionary<string, PluralRuleIndices> _indicesByLocale = new(StringComparer.OrdinalIgnoreCase);

/// <summary>
/// Gets the unique conditions that have been indexed. Can be used to generate unique helper functions
/// to match specific rules based on an input number.
/// </summary>
public IReadOnlyList<PluralRule> UniqueRules => this._allRules;

/// <summary>
/// Maps <see cref="NormalizeCldrLocale(string)">normalized CLDR locale IDs</see> to indices within
/// <see cref="UniqueRules"/> for their cardinal and ordinal rules, if defined.
/// </summary>
public IReadOnlyDictionary<string, PluralRuleIndices> RuleIndicesByLocale => this._indicesByLocale;

/// <summary>
/// Adds the given rule to our indices under the given plural type.
/// </summary>
/// <param name="pluralType">e.g., 'cardinal' or 'ordinal'.</param>
/// <param name="rule">The parsed rule.</param>
/// <exception cref="ArgumentOutOfRangeException">Thrown when a nonstandard plural type is provided.</exception>
public void Add(string pluralType, PluralRule rule)
{
this._allRules.Add(rule);
int newRuleIndex = this._allRules.Count - 1;

int? cardinalIndex = null;
int? ordinalIndex = null;
if (pluralType == CardinalType)
{
cardinalIndex = newRuleIndex;
}
else if (pluralType == OrdinalType)
{
ordinalIndex = newRuleIndex;
}
else
{
throw new ArgumentOutOfRangeException(nameof(pluralType), pluralType, "Unexpected plural type");
}

// Loop over each locale for this rule and update our indices with the new value.
// If we've seen it before (for a different plural type), we'll update it in-place.
foreach (var locale in rule.Locales)
{
var normalized = this.NormalizeCldrLocale(locale);

PluralRuleIndices newIndices;
if (this._indicesByLocale.TryGetValue(normalized, out var existingIndices))
{
// Merge any previous indices we've observed for this locale
newIndices = existingIndices with
{
CardinalRuleIndex = cardinalIndex ?? existingIndices.CardinalRuleIndex,
OrdinalRuleIndex = ordinalIndex ?? existingIndices.OrdinalRuleIndex
};
}
else
{
newIndices = new PluralRuleIndices(
CardinalRuleIndex: cardinalIndex,
OrdinalRuleIndex: ordinalIndex
);

}

this._indicesByLocale[normalized] = newIndices;
if (normalized != locale)
{
this._indicesByLocale[locale] = newIndices;
}
}
}

/// <summary>
/// Converts a CLDR locale ID to a normalized form for indexing.
///
/// See <see href="https://unicode.org/reports/tr35/#unicode-locale-identifier"/>the LDML spec</see>
/// for an explanation of the forms that Unicode locale IDs can take.
///
/// Notably, CLDR locale IDs use underscores as separators, while BCP 47 (which is the primary form
/// we expect as inputs at runtime) use dashes.
/// </summary>
/// <remarks>
/// The return string is intended to be used for case-insensitive runtime lookup of input locales,
/// but the string itself is not strictly BCP 47 or CLDR compliant. For example, the CLDR 'root'
/// language is passed through instead of being remapped to 'und'.
/// </remarks>
private string NormalizeCldrLocale(string cldrLocaleId)
{
return cldrLocaleId.Replace('_', '-');
}

/// <summary>
/// Helper type to represent the pluralization rules for a given locale, which may include both
/// cardinal and ordinal rules, or just one of the two.
/// </summary>
/// <remarks>
/// For example, in CLDR 48.1, "pt_PT" has a defined plural rule but is expected to chain to "pt"
/// for ordinal lookup.
/// </remarks>
public record PluralRuleIndices(int? CardinalRuleIndex, int? OrdinalRuleIndex);
}
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
using Jeffijoe.MessageFormat.MetadataGenerator.Plural.Parsing;
using Jeffijoe.MessageFormat.MetadataGenerator.Plural.Parsing.AST;
using Jeffijoe.MessageFormat.MetadataGenerator.Plural.SourceGeneration;

using Microsoft.CodeAnalysis;

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Xml;

namespace Jeffijoe.MessageFormat.MetadataGenerator.Plural;
Expand Down Expand Up @@ -36,20 +33,26 @@ private string[] ReadExcludeLocales(GeneratorExecutionContext context)
return Array.Empty<string>();
}

private IReadOnlyList<PluralRule> GetRules(string[] excludedLocales)
private PluralRuleSet GetRules(string[] excludedLocales)
{
using var rulesStream = GetRulesContentStream();
var xml = new XmlDocument();
xml.Load(rulesStream);
PluralRuleSet ruleIndex = new();
foreach (var ruleset in new[] { "plurals.xml", "ordinals.xml" })
{
using var rulesStream = GetRulesContentStream(ruleset);
var xml = new XmlDocument();
xml.Load(rulesStream);

var parser = new PluralParser(xml, excludedLocales);
parser.ParseInto(ruleIndex);
}

var parser = new PluralParser(xml, excludedLocales);
return parser.Parse().ToArray();
return ruleIndex;
}


private Stream GetRulesContentStream()
private Stream GetRulesContentStream(string cldrFileName)
{
return typeof(PluralLanguagesGenerator).Assembly.GetManifestResourceStream("Jeffijoe.MessageFormat.MetadataGenerator.data.plurals.xml")!;
return typeof(PluralLanguagesGenerator).Assembly.GetManifestResourceStream($"Jeffijoe.MessageFormat.MetadataGenerator.data.{cldrFileName}")!;
}

public void Initialize(GeneratorInitializationContext context)
Expand Down
Loading