From 35facd7143002c76e88a8e26d1f4d30a2d3ca2dd Mon Sep 17 00:00:00 2001 From: Ricky Atkins Date: Wed, 10 Dec 2025 16:40:43 +0000 Subject: [PATCH 1/6] Add WordList::filter and impl Clone for WordList --- static-lang-word-lists/src/word_lists.rs | 38 +++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/static-lang-word-lists/src/word_lists.rs b/static-lang-word-lists/src/word_lists.rs index f7ad642..aeb1344 100644 --- a/static-lang-word-lists/src/word_lists.rs +++ b/static-lang-word-lists/src/word_lists.rs @@ -16,7 +16,7 @@ use crate::newline_delimited_words; pub(crate) type Word = String; pub(crate) type WordSource = Box<[Word]>; -#[derive(Debug, Deserialize)] +#[derive(Debug, Clone, Deserialize)] #[serde(deny_unknown_fields)] pub(crate) struct WordListMetadata { name: Cow<'static, str>, @@ -216,6 +216,42 @@ impl WordList { pub fn is_empty(&self) -> bool { self.words.is_empty() } + + /// Create a new word list by removing words from an existing one, according + /// to the `predicate`. + /// + /// You can think of this similar to calling [`Vec::retain`], except it + /// returns a new list instead of modifying the old one in-place. + pub fn filter(&self, mut predicate: F) -> Self + where + F: FnMut(&str) -> bool, + { + let reduced_words = self + .words + .iter() + .filter(|word| predicate(word)) + .cloned() + .collect::>(); + let reduced_words = + EagerOrLazy::Eager(reduced_words.into_boxed_slice()); + Self { + metadata: self.metadata.clone(), + words: reduced_words, + } + } +} + +impl Clone for WordList { + /// Returns a duplicate of the value. + /// + /// Note: this will load the word list for `&self` and the newly returned + /// word list. + fn clone(&self) -> Self { + Self { + metadata: self.metadata.clone(), + words: EagerOrLazy::Eager(self.words.deref().clone()), + } + } } impl Index for WordList { From 266d7507ccabe15e8625099c4cca85b54ef65a8c Mon Sep 17 00:00:00 2001 From: Ricky Atkins Date: Wed, 10 Dec 2025 17:20:29 +0000 Subject: [PATCH 2/6] Add (undocumented) builder for WordListMetadata and make it public Make WordList::define support anything that can be transformed into WordListMetadata --- static-lang-word-lists/src/lib.rs | 4 +- static-lang-word-lists/src/metadata.rs | 133 +++++++++++++++++++++++ static-lang-word-lists/src/word_lists.rs | 81 ++++---------- 3 files changed, 158 insertions(+), 60 deletions(-) create mode 100644 static-lang-word-lists/src/metadata.rs diff --git a/static-lang-word-lists/src/lib.rs b/static-lang-word-lists/src/lib.rs index 01b40df..d8dbcb9 100644 --- a/static-lang-word-lists/src/lib.rs +++ b/static-lang-word-lists/src/lib.rs @@ -66,9 +66,10 @@ //! download by setting the environment variable `STATIC_LANG_WORD_LISTS_LOCAL`. //! Otherwise, you're welcome to audit the [build script](https://github.com/googlefonts/fontheight/blob/main/static-lang-word-lists/build.rs). +mod metadata; mod word_lists; -pub(crate) use word_lists::WordListMetadata; +pub use metadata::*; #[cfg(feature = "rayon")] pub use word_lists::rayon::ParWordListIter; pub use word_lists::{WordList, WordListError, WordListIter}; @@ -124,4 +125,5 @@ macro_rules! word_list { // Module declaration has to be below macro definition to be able to use it mod declarations; + pub use declarations::*; diff --git a/static-lang-word-lists/src/metadata.rs b/static-lang-word-lists/src/metadata.rs new file mode 100644 index 0000000..3e73af4 --- /dev/null +++ b/static-lang-word-lists/src/metadata.rs @@ -0,0 +1,133 @@ +use std::{borrow::Cow, fs, path::Path}; + +use serde::Deserialize; + +use crate::WordListError; + +#[derive(Debug, Clone, Eq, PartialEq, Hash, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct WordListMetadata { + pub(crate) name: Cow<'static, str>, + pub(crate) script: Option>, + pub(crate) language: Option>, +} + +impl WordListMetadata { + // Used by word_list! + // Library users should use the Builder struct + #[must_use] + pub(crate) const fn new( + name: &'static str, + script: Option<&'static str>, + language: Option<&'static str>, + ) -> Self { + // Can't use Option::map in const context + let script = match script { + Some(script) => Some(Cow::Borrowed(script)), + None => None, + }; + let language = match language { + Some(language) => Some(Cow::Borrowed(language)), + None => None, + }; + WordListMetadata { + name: Cow::Borrowed(name), + script, + language, + } + } + + #[allow(clippy::result_large_err)] + pub(crate) fn load( + metadata_path: impl AsRef, + ) -> Result { + let path = metadata_path.as_ref(); + let metadata_content = fs::read_to_string(path).map_err(|io_err| { + WordListError::FailedToRead(path.to_owned(), io_err) + })?; + let metadata: WordListMetadata = toml::from_str(&metadata_content) + .map_err(|json_err| { + WordListError::MetadataError(path.to_owned(), json_err) + })?; + Ok(metadata) + } + + pub(crate) fn new_from_name(name: impl Into) -> Self { + WordListMetadata { + name: Cow::Owned(name.into()), + script: None, + language: None, + } + } + + #[must_use] + pub fn name(&self) -> &str { + self.name.as_ref() + } + + #[must_use] + pub fn script(&self) -> Option<&str> { + self.script.as_deref() + } + + #[must_use] + pub fn language(&self) -> Option<&str> { + self.language.as_deref() + } +} + +impl From for WordListMetadata +where + S: Into>, +{ + fn from(word_list_name: S) -> Self { + WordListMetadata { + name: word_list_name.into(), + script: None, + language: None, + } + } +} + +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub struct WordListMetadataBuilder(WordListMetadata); + +impl WordListMetadataBuilder { + pub fn new(word_list_name: impl Into>) -> Self { + Self(WordListMetadata { + name: word_list_name.into(), + script: None, + language: None, + }) + } + + pub fn script(self, script: impl Into>) -> Self { + Self(WordListMetadata { + script: Some(script.into()), + ..self.0 + }) + } + + pub fn language(self, language: impl Into>) -> Self { + Self(WordListMetadata { + language: Some(language.into()), + ..self.0 + }) + } + + pub fn build(self) -> WordListMetadata { + self.into() + } +} + +impl From for WordListMetadata { + fn from(builder: WordListMetadataBuilder) -> Self { + builder.0 + } +} + +impl From for WordListMetadataBuilder { + fn from(metadata: WordListMetadata) -> Self { + Self(metadata) + } +} diff --git a/static-lang-word-lists/src/word_lists.rs b/static-lang-word-lists/src/word_lists.rs index aeb1344..5ee3084 100644 --- a/static-lang-word-lists/src/word_lists.rs +++ b/static-lang-word-lists/src/word_lists.rs @@ -7,69 +7,14 @@ use std::{ sync::LazyLock, }; -use serde::Deserialize; use thiserror::Error; -use crate::newline_delimited_words; +use crate::{metadata::WordListMetadata, newline_delimited_words}; // TODO: this can be Box pub(crate) type Word = String; pub(crate) type WordSource = Box<[Word]>; -#[derive(Debug, Clone, Deserialize)] -#[serde(deny_unknown_fields)] -pub(crate) struct WordListMetadata { - name: Cow<'static, str>, - script: Option>, - language: Option>, -} - -impl WordListMetadata { - // Used by word_list! - #[must_use] - pub(crate) const fn new( - name: &'static str, - script: Option<&'static str>, - language: Option<&'static str>, - ) -> Self { - // Can't use Option::map in const context - let script = match script { - Some(script) => Some(Cow::Borrowed(script)), - None => None, - }; - let language = match language { - Some(language) => Some(Cow::Borrowed(language)), - None => None, - }; - WordListMetadata { - name: Cow::Borrowed(name), - script, - language, - } - } - - #[allow(clippy::result_large_err)] - fn load(metadata_path: impl AsRef) -> Result { - let path = metadata_path.as_ref(); - let metadata_content = fs::read_to_string(path).map_err(|io_err| { - WordListError::FailedToRead(path.to_owned(), io_err) - })?; - let metadata: WordListMetadata = toml::from_str(&metadata_content) - .map_err(|json_err| { - WordListError::MetadataError(path.to_owned(), json_err) - })?; - Ok(metadata) - } - - fn new_from_name(name: impl Into) -> Self { - WordListMetadata { - name: Cow::Owned(name.into()), - script: None, - language: None, - } - } -} - /// A list of words, with optional additional metadata. #[derive(Debug)] pub struct WordList { @@ -132,14 +77,18 @@ impl WordList { /// Create a new word list from an iterable. /// - /// Metadata is unspecified. + /// Types that `impl Into`: + /// - [`&str`] (used as name of word list) + /// - [`String`] (used as name of word list) + /// - [`WordListMetadata`] + /// - [`WordListMetadataBuilder`](crate::WordListMetadataBuilder) #[must_use] pub fn define( - name: impl Into, + name_or_metadata: impl Into, words: impl IntoIterator>, ) -> Self { WordList { - metadata: WordListMetadata::new_from_name(name.into()), + metadata: name_or_metadata.into(), words: words.into_iter().map(Into::into).collect::>().into(), } } @@ -239,6 +188,20 @@ impl WordList { words: reduced_words, } } + + /// Override the existing metadata for a word list + /// + /// Doing this for a built-in word list will require you to clone it first: + /// + /// ``` + /// let mut word_list = static_lang_word_lists::AOSP_ARABIC.clone(); + /// word_list.set_metadata("not AOSP Arabic nyehehehe"); + /// // Step 3: world domination! + /// ``` + #[inline] + pub fn set_metadata(&mut self, metadata: WordListMetadata) { + self.metadata = metadata; + } } impl Clone for WordList { From c108bd2e954956d90eaac919bdf7b7c023008faf Mon Sep 17 00:00:00 2001 From: Ricky Atkins Date: Thu, 11 Dec 2025 10:52:53 +0000 Subject: [PATCH 3/6] Add documentation & attrs to new methods Add WordList::metadata --- static-lang-word-lists/src/metadata.rs | 37 ++++++++++++++++++++++++ static-lang-word-lists/src/word_lists.rs | 21 ++++++++++++-- 2 files changed, 55 insertions(+), 3 deletions(-) diff --git a/static-lang-word-lists/src/metadata.rs b/static-lang-word-lists/src/metadata.rs index 3e73af4..01ecc89 100644 --- a/static-lang-word-lists/src/metadata.rs +++ b/static-lang-word-lists/src/metadata.rs @@ -4,6 +4,16 @@ use serde::Deserialize; use crate::WordListError; +/// Metadata about a [`WordList`](crate::WordList). +/// +/// Contains: +/// - [name](Self::name) +/// - [script](Self::script) (optional) +/// - [language](Self::language) (optional) +/// +/// `WordListMetadata` is an immutable structure. +/// If you need to edit one, you must first convert it to a +/// [`WordListMetadataBuilder`] first using the [`From`]/[`Into`] impl. #[derive(Debug, Clone, Eq, PartialEq, Hash, Deserialize)] #[serde(deny_unknown_fields)] pub struct WordListMetadata { @@ -60,16 +70,29 @@ impl WordListMetadata { } } + /// Get the name of the word list. + #[inline] #[must_use] pub fn name(&self) -> &str { self.name.as_ref() } + /// Get the script of the word list, if known. + /// + /// The script is expected to be an [ISO 15924](https://en.wikipedia.org/wiki/ISO_15924) + /// four-letter capitalised code, but this is only guaranteed for built-in + /// word lists. + #[inline] #[must_use] pub fn script(&self) -> Option<&str> { self.script.as_deref() } + /// Get the language of the word list, if known. + /// + /// The language is expected to be an [ISO 639-1](https://en.wikipedia.org/wiki/ISO_639-1) + /// two-letter code, but this is only guaranteed for built-in word lists. + #[inline] #[must_use] pub fn language(&self) -> Option<&str> { self.language.as_deref() @@ -89,10 +112,13 @@ where } } +/// An editable [`WordListMetadata`]. #[derive(Debug, Clone, Eq, PartialEq, Hash)] pub struct WordListMetadataBuilder(WordListMetadata); impl WordListMetadataBuilder { + /// Start creating a new metadata struct from scratch. + #[inline] pub fn new(word_list_name: impl Into>) -> Self { Self(WordListMetadata { name: word_list_name.into(), @@ -101,6 +127,10 @@ impl WordListMetadataBuilder { }) } + /// Set the [ISO 15924](https://en.wikipedia.org/wiki/ISO_15924) script of the word list. + /// + /// ⚠️ The value value isn't checked to be a valid ISO 15924 tag. + #[inline] pub fn script(self, script: impl Into>) -> Self { Self(WordListMetadata { script: Some(script.into()), @@ -108,6 +138,10 @@ impl WordListMetadataBuilder { }) } + /// Set the [ISO 639-1](https://en.wikipedia.org/wiki/ISO_639-1) language of the word list. + /// + /// ⚠️ The value value isn't checked to be a valid ISO 639-1 tag. + #[inline] pub fn language(self, language: impl Into>) -> Self { Self(WordListMetadata { language: Some(language.into()), @@ -115,6 +149,9 @@ impl WordListMetadataBuilder { }) } + /// Convert the builder into an immutable [`WordListMetadata`]. + #[inline] + #[must_use] pub fn build(self) -> WordListMetadata { self.into() } diff --git a/static-lang-word-lists/src/word_lists.rs b/static-lang-word-lists/src/word_lists.rs index 5ee3084..23f8be5 100644 --- a/static-lang-word-lists/src/word_lists.rs +++ b/static-lang-word-lists/src/word_lists.rs @@ -122,7 +122,7 @@ impl WordList { #[inline] #[must_use] pub fn name(&self) -> &str { - &self.metadata.name + self.metadata.name() } /// Get the script of the word list, if known. @@ -133,7 +133,7 @@ impl WordList { #[inline] #[must_use] pub fn script(&self) -> Option<&str> { - self.metadata.script.as_deref() + self.metadata.script() } /// Get the language of the word list, if known. @@ -143,7 +143,21 @@ impl WordList { #[inline] #[must_use] pub fn language(&self) -> Option<&str> { - self.metadata.language.as_deref() + self.metadata.language() + } + + /// Access the word list's metadata. + /// + /// You usually only need to do this if you plan to clone & edit the + /// metadata, as otherwise you can access metadata from the `WordList` + /// directly: + /// - [`WordList::name`] + /// - [`WordList::script`] + /// - [`WordList::language`] + #[inline] + #[must_use] + pub const fn metadata(&self) -> &WordListMetadata { + &self.metadata } /// Iterate through the word list. @@ -171,6 +185,7 @@ impl WordList { /// /// You can think of this similar to calling [`Vec::retain`], except it /// returns a new list instead of modifying the old one in-place. + /// Metadata isn't modified. pub fn filter(&self, mut predicate: F) -> Self where F: FnMut(&str) -> bool, From 03572edec09b0d945a1ad2d4d778fb6d9ac9afa2 Mon Sep 17 00:00:00 2001 From: Ricky Atkins Date: Thu, 11 Dec 2025 10:57:18 +0000 Subject: [PATCH 4/6] Loosen type requirements for WordList::set_metadata --- static-lang-word-lists/src/word_lists.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/static-lang-word-lists/src/word_lists.rs b/static-lang-word-lists/src/word_lists.rs index 23f8be5..4def1a5 100644 --- a/static-lang-word-lists/src/word_lists.rs +++ b/static-lang-word-lists/src/word_lists.rs @@ -204,7 +204,13 @@ impl WordList { } } - /// Override the existing metadata for a word list + /// Override the existing metadata for a word list. + /// + /// Types that `impl Into`: + /// - [`&str`] (used as name of word list) + /// - [`String`] (used as name of word list) + /// - [`WordListMetadata`] + /// - [`WordListMetadataBuilder`](crate::WordListMetadataBuilder) /// /// Doing this for a built-in word list will require you to clone it first: /// @@ -214,8 +220,8 @@ impl WordList { /// // Step 3: world domination! /// ``` #[inline] - pub fn set_metadata(&mut self, metadata: WordListMetadata) { - self.metadata = metadata; + pub fn set_metadata(&mut self, metadata: impl Into) { + self.metadata = metadata.into(); } } From 1bd2d44fc22305c66ff334eef7d98ee3f404e841 Mon Sep 17 00:00:00 2001 From: Ricky Atkins Date: Mon, 26 Jan 2026 16:15:30 +0000 Subject: [PATCH 5/6] Simplify: remove builder If the builder aren't abstracting away the Cows, I might as well just give direct access to them --- static-lang-word-lists/src/metadata.rs | 89 +++++------------------- static-lang-word-lists/src/word_lists.rs | 2 - 2 files changed, 19 insertions(+), 72 deletions(-) diff --git a/static-lang-word-lists/src/metadata.rs b/static-lang-word-lists/src/metadata.rs index 01ecc89..fb3509c 100644 --- a/static-lang-word-lists/src/metadata.rs +++ b/static-lang-word-lists/src/metadata.rs @@ -6,25 +6,30 @@ use crate::WordListError; /// Metadata about a [`WordList`](crate::WordList). /// -/// Contains: -/// - [name](Self::name) -/// - [script](Self::script) (optional) -/// - [language](Self::language) (optional) -/// -/// `WordListMetadata` is an immutable structure. -/// If you need to edit one, you must first convert it to a -/// [`WordListMetadataBuilder`] first using the [`From`]/[`Into`] impl. +/// If you don't want to mess around with the 🐄s, convenience methods are +/// provided for reading fields: +/// - [`WordListMetadata::name`] +/// - [`WordListMetadata::script`] +/// - [`WordListMetadata::language`] #[derive(Debug, Clone, Eq, PartialEq, Hash, Deserialize)] #[serde(deny_unknown_fields)] pub struct WordListMetadata { - pub(crate) name: Cow<'static, str>, - pub(crate) script: Option>, - pub(crate) language: Option>, + /// The cosmetic name for the word list + pub name: Cow<'static, str>, + /// The script of the word list, if known. + /// + /// The script is expected to be an [ISO 15924](https://en.wikipedia.org/wiki/ISO_15924) + /// four-letter capitalised code. + pub script: Option>, + /// The language of the word list, if known. + /// + /// The language is expected to be an [ISO 639-1](https://en.wikipedia.org/wiki/ISO_639-1) + /// two-letter code. + pub language: Option>, } impl WordListMetadata { // Used by word_list! - // Library users should use the Builder struct #[must_use] pub(crate) const fn new( name: &'static str, @@ -47,8 +52,9 @@ impl WordListMetadata { } } + /// Load metadata from an on-disk TOML file #[allow(clippy::result_large_err)] - pub(crate) fn load( + pub fn load( metadata_path: impl AsRef, ) -> Result { let path = metadata_path.as_ref(); @@ -111,60 +117,3 @@ where } } } - -/// An editable [`WordListMetadata`]. -#[derive(Debug, Clone, Eq, PartialEq, Hash)] -pub struct WordListMetadataBuilder(WordListMetadata); - -impl WordListMetadataBuilder { - /// Start creating a new metadata struct from scratch. - #[inline] - pub fn new(word_list_name: impl Into>) -> Self { - Self(WordListMetadata { - name: word_list_name.into(), - script: None, - language: None, - }) - } - - /// Set the [ISO 15924](https://en.wikipedia.org/wiki/ISO_15924) script of the word list. - /// - /// ⚠️ The value value isn't checked to be a valid ISO 15924 tag. - #[inline] - pub fn script(self, script: impl Into>) -> Self { - Self(WordListMetadata { - script: Some(script.into()), - ..self.0 - }) - } - - /// Set the [ISO 639-1](https://en.wikipedia.org/wiki/ISO_639-1) language of the word list. - /// - /// ⚠️ The value value isn't checked to be a valid ISO 639-1 tag. - #[inline] - pub fn language(self, language: impl Into>) -> Self { - Self(WordListMetadata { - language: Some(language.into()), - ..self.0 - }) - } - - /// Convert the builder into an immutable [`WordListMetadata`]. - #[inline] - #[must_use] - pub fn build(self) -> WordListMetadata { - self.into() - } -} - -impl From for WordListMetadata { - fn from(builder: WordListMetadataBuilder) -> Self { - builder.0 - } -} - -impl From for WordListMetadataBuilder { - fn from(metadata: WordListMetadata) -> Self { - Self(metadata) - } -} diff --git a/static-lang-word-lists/src/word_lists.rs b/static-lang-word-lists/src/word_lists.rs index 4def1a5..b7e4e75 100644 --- a/static-lang-word-lists/src/word_lists.rs +++ b/static-lang-word-lists/src/word_lists.rs @@ -81,7 +81,6 @@ impl WordList { /// - [`&str`] (used as name of word list) /// - [`String`] (used as name of word list) /// - [`WordListMetadata`] - /// - [`WordListMetadataBuilder`](crate::WordListMetadataBuilder) #[must_use] pub fn define( name_or_metadata: impl Into, @@ -210,7 +209,6 @@ impl WordList { /// - [`&str`] (used as name of word list) /// - [`String`] (used as name of word list) /// - [`WordListMetadata`] - /// - [`WordListMetadataBuilder`](crate::WordListMetadataBuilder) /// /// Doing this for a built-in word list will require you to clone it first: /// From 9623d5245f31a02c9754c5314207c79a558d70ac Mon Sep 17 00:00:00 2001 From: Ricky Atkins Date: Mon, 26 Jan 2026 16:32:45 +0000 Subject: [PATCH 6/6] Make WordList.metadata public --- static-lang-word-lists/src/word_lists.rs | 46 +++++++----------------- 1 file changed, 12 insertions(+), 34 deletions(-) diff --git a/static-lang-word-lists/src/word_lists.rs b/static-lang-word-lists/src/word_lists.rs index b7e4e75..43da153 100644 --- a/static-lang-word-lists/src/word_lists.rs +++ b/static-lang-word-lists/src/word_lists.rs @@ -19,7 +19,18 @@ pub(crate) type WordSource = Box<[Word]>; #[derive(Debug)] pub struct WordList { words: EagerOrLazy, - metadata: WordListMetadata, + /// Metadata associated with this word list. + /// + /// Includes the word list's name, script (if known), and language (if + /// known). + /// + /// You usually only need to access this directly if you plan to edit the + /// metadata, as otherwise you can access metadata from the `WordList` + /// directly: + /// - [`WordList::name`] + /// - [`WordList::script`] + /// - [`WordList::language`] + pub metadata: WordListMetadata, } impl WordList { @@ -145,20 +156,6 @@ impl WordList { self.metadata.language() } - /// Access the word list's metadata. - /// - /// You usually only need to do this if you plan to clone & edit the - /// metadata, as otherwise you can access metadata from the `WordList` - /// directly: - /// - [`WordList::name`] - /// - [`WordList::script`] - /// - [`WordList::language`] - #[inline] - #[must_use] - pub const fn metadata(&self) -> &WordListMetadata { - &self.metadata - } - /// Iterate through the word list. #[must_use] pub fn iter(&self) -> WordListIter<'_> { @@ -202,25 +199,6 @@ impl WordList { words: reduced_words, } } - - /// Override the existing metadata for a word list. - /// - /// Types that `impl Into`: - /// - [`&str`] (used as name of word list) - /// - [`String`] (used as name of word list) - /// - [`WordListMetadata`] - /// - /// Doing this for a built-in word list will require you to clone it first: - /// - /// ``` - /// let mut word_list = static_lang_word_lists::AOSP_ARABIC.clone(); - /// word_list.set_metadata("not AOSP Arabic nyehehehe"); - /// // Step 3: world domination! - /// ``` - #[inline] - pub fn set_metadata(&mut self, metadata: impl Into) { - self.metadata = metadata.into(); - } } impl Clone for WordList {