Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion static-lang-word-lists/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,10 @@
//! download by setting the environment variable `STATIC_LANG_WORD_LISTS_LOCAL`.
//! Otherwise, you're welcome to audit the [build script](https://github.com/googlefonts/fontheight/blob/main/static-lang-word-lists/build.rs).

mod metadata;
mod word_lists;

pub(crate) use word_lists::WordListMetadata;
pub use metadata::*;
#[cfg(feature = "rayon")]
pub use word_lists::rayon::ParWordListIter;
pub use word_lists::{WordList, WordListError, WordListIter};
Expand Down Expand Up @@ -124,4 +125,5 @@ macro_rules! word_list {

// Module declaration has to be below macro definition to be able to use it
mod declarations;

pub use declarations::*;
119 changes: 119 additions & 0 deletions static-lang-word-lists/src/metadata.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
use std::{borrow::Cow, fs, path::Path};

use serde::Deserialize;

use crate::WordListError;

/// Metadata about a [`WordList`](crate::WordList).
///
/// If you don't want to mess around with the 🐄s, convenience methods are
/// provided for reading fields:
/// - [`WordListMetadata::name`]
/// - [`WordListMetadata::script`]
/// - [`WordListMetadata::language`]
#[derive(Debug, Clone, Eq, PartialEq, Hash, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct WordListMetadata {
/// The cosmetic name for the word list
pub name: Cow<'static, str>,
/// The script of the word list, if known.
///
/// The script is expected to be an [ISO 15924](https://en.wikipedia.org/wiki/ISO_15924)
/// four-letter capitalised code.
pub script: Option<Cow<'static, str>>,
/// The language of the word list, if known.
///
/// The language is expected to be an [ISO 639-1](https://en.wikipedia.org/wiki/ISO_639-1)
/// two-letter code.
pub language: Option<Cow<'static, str>>,
}

impl WordListMetadata {
// Used by word_list!
#[must_use]
pub(crate) const fn new(
name: &'static str,
script: Option<&'static str>,
language: Option<&'static str>,
) -> Self {
// Can't use Option::map in const context
let script = match script {
Some(script) => Some(Cow::Borrowed(script)),
None => None,
};
let language = match language {
Some(language) => Some(Cow::Borrowed(language)),
None => None,
};
WordListMetadata {
name: Cow::Borrowed(name),
script,
language,
}
}

/// Load metadata from an on-disk TOML file
#[allow(clippy::result_large_err)]
pub fn load(
metadata_path: impl AsRef<Path>,
) -> Result<Self, WordListError> {
let path = metadata_path.as_ref();
let metadata_content = fs::read_to_string(path).map_err(|io_err| {
WordListError::FailedToRead(path.to_owned(), io_err)
})?;
let metadata: WordListMetadata = toml::from_str(&metadata_content)
.map_err(|json_err| {
WordListError::MetadataError(path.to_owned(), json_err)
})?;
Ok(metadata)
}

pub(crate) fn new_from_name(name: impl Into<String>) -> Self {
WordListMetadata {
name: Cow::Owned(name.into()),
script: None,
language: None,
}
}

/// Get the name of the word list.
#[inline]
#[must_use]
pub fn name(&self) -> &str {
self.name.as_ref()
}

/// Get the script of the word list, if known.
///
/// The script is expected to be an [ISO 15924](https://en.wikipedia.org/wiki/ISO_15924)
/// four-letter capitalised code, but this is only guaranteed for built-in
/// word lists.
#[inline]
#[must_use]
pub fn script(&self) -> Option<&str> {
self.script.as_deref()
}

/// Get the language of the word list, if known.
///
/// The language is expected to be an [ISO 639-1](https://en.wikipedia.org/wiki/ISO_639-1)
/// two-letter code, but this is only guaranteed for built-in word lists.
#[inline]
#[must_use]
pub fn language(&self) -> Option<&str> {
self.language.as_deref()
}
}

impl<S> From<S> for WordListMetadata
where
S: Into<Cow<'static, str>>,
{
fn from(word_list_name: S) -> Self {
WordListMetadata {
name: word_list_name.into(),
script: None,
language: None,
}
}
}
122 changes: 59 additions & 63 deletions static-lang-word-lists/src/word_lists.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,74 +7,30 @@ use std::{
sync::LazyLock,
};

use serde::Deserialize;
use thiserror::Error;

use crate::newline_delimited_words;
use crate::{metadata::WordListMetadata, newline_delimited_words};

// TODO: this can be Box<str>
pub(crate) type Word = String;
pub(crate) type WordSource = Box<[Word]>;

#[derive(Debug, Deserialize)]
#[serde(deny_unknown_fields)]
pub(crate) struct WordListMetadata {
name: Cow<'static, str>,
script: Option<Cow<'static, str>>,
language: Option<Cow<'static, str>>,
}

impl WordListMetadata {
// Used by word_list!
#[must_use]
pub(crate) const fn new(
name: &'static str,
script: Option<&'static str>,
language: Option<&'static str>,
) -> Self {
// Can't use Option::map in const context
let script = match script {
Some(script) => Some(Cow::Borrowed(script)),
None => None,
};
let language = match language {
Some(language) => Some(Cow::Borrowed(language)),
None => None,
};
WordListMetadata {
name: Cow::Borrowed(name),
script,
language,
}
}

#[allow(clippy::result_large_err)]
fn load(metadata_path: impl AsRef<Path>) -> Result<Self, WordListError> {
let path = metadata_path.as_ref();
let metadata_content = fs::read_to_string(path).map_err(|io_err| {
WordListError::FailedToRead(path.to_owned(), io_err)
})?;
let metadata: WordListMetadata = toml::from_str(&metadata_content)
.map_err(|json_err| {
WordListError::MetadataError(path.to_owned(), json_err)
})?;
Ok(metadata)
}

fn new_from_name(name: impl Into<String>) -> Self {
WordListMetadata {
name: Cow::Owned(name.into()),
script: None,
language: None,
}
}
}

/// A list of words, with optional additional metadata.
#[derive(Debug)]
pub struct WordList {
words: EagerOrLazy<WordSource>,
metadata: WordListMetadata,
/// Metadata associated with this word list.
///
/// Includes the word list's name, script (if known), and language (if
/// known).
///
/// You usually only need to access this directly if you plan to edit the
/// metadata, as otherwise you can access metadata from the `WordList`
/// directly:
/// - [`WordList::name`]
/// - [`WordList::script`]
/// - [`WordList::language`]
pub metadata: WordListMetadata,
}

impl WordList {
Expand Down Expand Up @@ -132,14 +88,17 @@ impl WordList {

/// Create a new word list from an iterable.
///
/// Metadata is unspecified.
/// Types that `impl Into<WordListMetadata>`:
/// - [`&str`] (used as name of word list)
/// - [`String`] (used as name of word list)
/// - [`WordListMetadata`]
#[must_use]
pub fn define(
name: impl Into<String>,
name_or_metadata: impl Into<WordListMetadata>,
words: impl IntoIterator<Item = impl Into<String>>,
) -> Self {
WordList {
metadata: WordListMetadata::new_from_name(name.into()),
metadata: name_or_metadata.into(),
words: words.into_iter().map(Into::into).collect::<Vec<_>>().into(),
}
}
Expand Down Expand Up @@ -173,7 +132,7 @@ impl WordList {
#[inline]
#[must_use]
pub fn name(&self) -> &str {
&self.metadata.name
self.metadata.name()
}

/// Get the script of the word list, if known.
Expand All @@ -184,7 +143,7 @@ impl WordList {
#[inline]
#[must_use]
pub fn script(&self) -> Option<&str> {
self.metadata.script.as_deref()
self.metadata.script()
}

/// Get the language of the word list, if known.
Expand All @@ -194,7 +153,7 @@ impl WordList {
#[inline]
#[must_use]
pub fn language(&self) -> Option<&str> {
self.metadata.language.as_deref()
self.metadata.language()
}

/// Iterate through the word list.
Expand All @@ -216,6 +175,43 @@ impl WordList {
pub fn is_empty(&self) -> bool {
self.words.is_empty()
}

/// Create a new word list by removing words from an existing one, according
/// to the `predicate`.
///
/// You can think of this similar to calling [`Vec::retain`], except it
/// returns a new list instead of modifying the old one in-place.
/// Metadata isn't modified.
pub fn filter<F>(&self, mut predicate: F) -> Self
where
F: FnMut(&str) -> bool,
{
let reduced_words = self
.words
.iter()
.filter(|word| predicate(word))
.cloned()
.collect::<Vec<_>>();
let reduced_words =
EagerOrLazy::Eager(reduced_words.into_boxed_slice());
Self {
metadata: self.metadata.clone(),
words: reduced_words,
}
}
}

impl Clone for WordList {
/// Returns a duplicate of the value.
///
/// Note: this will load the word list for `&self` and the newly returned
/// word list.
fn clone(&self) -> Self {
Self {
metadata: self.metadata.clone(),
words: EagerOrLazy::Eager(self.words.deref().clone()),
}
}
}

impl Index<usize> for WordList {
Expand Down