From 591726ecb593864a19c8a2c32c4978970aee0400 Mon Sep 17 00:00:00 2001 From: Adam Israel Date: Sun, 22 Mar 2026 13:36:17 -0400 Subject: [PATCH 1/2] fix: improve word count calculation Switched to a new crate (md-word-count) that I wrote to count words specifically in Markdown files, ignoring symbols and comments. --- Cargo.lock | 34 ++++++++++++++-------------------- Cargo.toml | 2 +- src/main.rs | 19 +++++++++++++------ src/markdown.rs | 8 +++++--- 4 files changed, 33 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 09540a7..286e298 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -853,12 +853,22 @@ version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" +[[package]] +name = "md-word-count" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89662c916910fb8a7beca6708d08c3e5875fea3eb4b4394581603ff13a2adf91" +dependencies = [ + "regex", +] + [[package]] name = "md2ms" version = "0.1.0" dependencies = [ "clap", "docx-rs", + "md-word-count", "obsidian-rs", "pulldown-cmark", "rand", @@ -868,7 +878,6 @@ dependencies = [ "shellexpand", "thiserror 2.0.12", "thousands", - "words-count", "yaml-front-matter", ] @@ -1157,9 +1166,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.11.1" +version = "1.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" dependencies = [ "aho-corasick", "memchr", @@ -1169,9 +1178,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.9" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" dependencies = [ "aho-corasick", "memchr", @@ -1723,12 +1732,6 @@ version = "2.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539" -[[package]] -name = "unicode-blocks" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b12e05d9e06373163a9bb6bb8c263c261b396643a99445fe6b9811fd376581b" - [[package]] name = "unicode-ident" version = "1.0.18" @@ -2091,15 +2094,6 @@ dependencies = [ "bitflags 2.9.1", ] -[[package]] -name = "words-count" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d28653ddaede5475c44a03e4014ae19f35aa9b231c423228b28963cb873e4869" -dependencies = [ - "unicode-blocks", -] - [[package]] name = "writeable" version = "0.6.1" diff --git a/Cargo.toml b/Cargo.toml index e690b5f..7ba69f2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,6 @@ readme = "README.md" docx-rs = { git = "https://github.com/bokuweb/docx-rs.git" } clap = { version = "4.0", features = ["derive"] } serde = "1.0.218" -words-count = "0.1.6" yaml-front-matter = "0.1.0" regex = "1.11.1" pulldown-cmark = { version = "0.13.0", default-features = false } @@ -27,3 +26,4 @@ obsidian-rs = { git = "https://github.com/adamisrael/obsidian-rs.git" } rand = "0.9.1" thiserror = "2.0.12" thousands = "0.2.0" +md-word-count = "0.1.1" diff --git a/src/main.rs b/src/main.rs index 1773cc6..db20ca9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,14 +1,15 @@ // Syntax: md2ms [options] // md2ms --output-dir -use md2ms::constants; use clap::Parser; +use docx_rs::*; use thousands::Separable; use yaml_front_matter::Document; +use md_word_count::count_words; use std::path::PathBuf; -use docx_rs::*; +use md2ms::constants; use md2ms::context::Context; use md2ms::error::Md2msError; use md2ms::markdown::flatten_markdown; @@ -174,17 +175,23 @@ fn compile(ctx: &mut Context) -> Result<(), Md2msError> { match flatten_markdown(ctx, mddoc) { Ok(md) => { - // Using this crate for now, but maybe convert this to my own code - let wc = words_count::count(md.iter().map(|p| p.raw_text()).collect::()); + // Calculate the word count by iterating through the raw Markdown files. + let mut wc = 0; + for (f, markdown) in ctx.clone().files { + if f == "metadata.md" { + continue; + } + wc += count_words(markdown.content.as_str()); + } // If the author wants the word count, give them the exact count, not the approximate value. if ctx.word_count { - println!("Exact word count: {}", wc.words.separate_with_commas()); + println!("Exact word count: {}", wc.separate_with_commas()); return Ok(()); } // Round up for the manuscript - let nwc = round_up(wc.words); + let nwc = round_up(wc); // A PathBuf to build the path to the output file let output_dir = shellexpand::tilde(&ctx.output_dir.to_string_lossy()).to_string(); diff --git a/src/markdown.rs b/src/markdown.rs index c205485..59e4d3b 100644 --- a/src/markdown.rs +++ b/src/markdown.rs @@ -1,14 +1,16 @@ // use std::collections::HashMap; +use docx_rs::*; +use regex::Regex; +use yaml_front_matter::{Document, YamlFrontMatter}; + use crate::cmark::parse_paragraph; use crate::constants; use crate::context::Context; use crate::error::Md2msError; use crate::metadata::Metadata; use crate::pii::PII; -use docx_rs::*; -use regex::Regex; -use yaml_front_matter::{Document, YamlFrontMatter}; + /// Strip Markdown comments out of the content fn strip_comments(mut content: String) -> String { From f63931750b730dac587025a2034ac9badfb065f1 Mon Sep 17 00:00:00 2001 From: Adam Israel Date: Sun, 22 Mar 2026 13:38:59 -0400 Subject: [PATCH 2/2] fix: rustfmt --- src/main.rs | 2 +- src/markdown.rs | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main.rs b/src/main.rs index db20ca9..709324b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,9 +3,9 @@ use clap::Parser; use docx_rs::*; +use md_word_count::count_words; use thousands::Separable; use yaml_front_matter::Document; -use md_word_count::count_words; use std::path::PathBuf; diff --git a/src/markdown.rs b/src/markdown.rs index 59e4d3b..22fc929 100644 --- a/src/markdown.rs +++ b/src/markdown.rs @@ -11,7 +11,6 @@ use crate::error::Md2msError; use crate::metadata::Metadata; use crate::pii::PII; - /// Strip Markdown comments out of the content fn strip_comments(mut content: String) -> String { // Add support single and multi-line %% comment blocks %%