diff --git a/.gitignore b/.gitignore index 446fe69..dd7eee6 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ # @help: https://git-scm.com/docs/gitignore + +target/ diff --git a/s3_dsa/p01/.gitignore b/s3_dsa/p01/.gitignore new file mode 100644 index 0000000..e40c306 --- /dev/null +++ b/s3_dsa/p01/.gitignore @@ -0,0 +1,3 @@ +# @help: https://git-scm.com/docs/gitignore + +*.bin diff --git a/s3_dsa/p01/Cargo.lock b/s3_dsa/p01/Cargo.lock new file mode 100644 index 0000000..54f07f9 --- /dev/null +++ b/s3_dsa/p01/Cargo.lock @@ -0,0 +1,69 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "bytemuck" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8334215b81e418a0a7bdb8ef0849474f40bb10c8b71f1c4ed315cff49f32494d" + +[[package]] +name = "fern" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69ff9c9d5fb3e6da8ac2f77ab76fe7e8087d512ce095200f8f29ac5b656cf6dc" +dependencies = [ + "log", +] + +[[package]] +name = "gen" +version = "0.1.0" +dependencies = [ + "nanorand", +] + +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "nacci" +version = "0.1.0" +dependencies = [ + "log", +] + +[[package]] +name = "nanorand" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a51313c5820b0b02bd422f4b44776fbf47961755c74ce64afc73bfad10226c3" + +[[package]] +name = "polyphase" +version = "0.1.0" +dependencies = [ + "bytemuck", + "log", + "nacci", +] + +[[package]] +name = "sort" +version = "0.1.0" +dependencies = [ + "fern", + "humantime", + "log", + "polyphase", +] diff --git a/s3_dsa/p01/Cargo.toml b/s3_dsa/p01/Cargo.toml new file mode 100644 index 0000000..d627abf --- /dev/null +++ b/s3_dsa/p01/Cargo.toml @@ -0,0 +1,23 @@ +#:schema https://raw.githubusercontent.com/SchemaStore/schemastore/master/src/schemas/json/cargo.json +# @help: https://doc.rust-lang.org/cargo/reference/manifest.html + +[workspace] +default-members = ["./src/bin/sort/"] +members = [ + "./src/bin/gen/", + "./src/bin/sort/", + "./src/lib/nacci/", + "./src/lib/polyphase/", +] +resolver = "2" + +[workspace.dependencies] +log = "0.4.22" + +[workspace.package] +authors = ["fruzitent "] +description = "s3_dsa/p01" +edition = "2021" +license-file = "license.md" +readme = "readme.md" +repository = "git+https://github.com/fruzitent/kpi.git" diff --git a/s3_dsa/p01/rust-toolchain.toml b/s3_dsa/p01/rust-toolchain.toml new file mode 100644 index 0000000..15badae --- /dev/null +++ b/s3_dsa/p01/rust-toolchain.toml @@ -0,0 +1,5 @@ +#:schema https://raw.githubusercontent.com/SchemaStore/schemastore/master/src/schemas/json/rust-toolchain.json +# @help: https://rust-lang.github.io/rustup/overrides.html#the-toolchain-file + +[toolchain] +channel = "stable" diff --git a/s3_dsa/p01/rustfmt.toml b/s3_dsa/p01/rustfmt.toml new file mode 100644 index 0000000..0192a88 --- /dev/null +++ b/s3_dsa/p01/rustfmt.toml @@ -0,0 +1,4 @@ +#:schema https://raw.githubusercontent.com/SchemaStore/schemastore/master/src/schemas/json/rustfmt.json +# @help: https://rust-lang.github.io/rustfmt + +max_width = 120 diff --git a/s3_dsa/p01/src/bin/gen/Cargo.toml b/s3_dsa/p01/src/bin/gen/Cargo.toml new file mode 100644 index 0000000..2567458 --- /dev/null +++ b/s3_dsa/p01/src/bin/gen/Cargo.toml @@ -0,0 +1,21 @@ +#:schema https://raw.githubusercontent.com/SchemaStore/schemastore/master/src/schemas/json/cargo.json +# @help: https://doc.rust-lang.org/cargo/reference/manifest.html + +[build-dependencies] + +[dependencies] +nanorand = "0.7.0" + +[dev-dependencies] + +[features] + +[package] +authors.workspace = true +description.workspace = true +edition.workspace = true +license-file.workspace = true +name = "gen" +readme.workspace = true +repository.workspace = true +version = "0.1.0" diff --git a/s3_dsa/p01/src/bin/gen/src/main.rs b/s3_dsa/p01/src/bin/gen/src/main.rs new file mode 100644 index 0000000..deafbac --- /dev/null +++ b/s3_dsa/p01/src/bin/gen/src/main.rs @@ -0,0 +1,11 @@ +fn main() { + let path = std::path::Path::new("./input.bin"); + let size = usize::pow(2, 30); + + let mut data = vec![0; size]; + let mut rng = nanorand::tls_rng(); + nanorand::Rng::fill_bytes(&mut rng, &mut data); + + let mut file = std::fs::File::create(path).unwrap(); + std::io::Write::write_all(&mut file, &data).unwrap(); +} diff --git a/s3_dsa/p01/src/bin/sort/Cargo.toml b/s3_dsa/p01/src/bin/sort/Cargo.toml new file mode 100644 index 0000000..58dc07b --- /dev/null +++ b/s3_dsa/p01/src/bin/sort/Cargo.toml @@ -0,0 +1,24 @@ +#:schema https://raw.githubusercontent.com/SchemaStore/schemastore/master/src/schemas/json/cargo.json +# @help: https://doc.rust-lang.org/cargo/reference/manifest.html + +[build-dependencies] + +[dependencies] +fern = "0.7.0" +humantime = "2.1.0" +log.workspace = true +polyphase = { path = "../../lib/polyphase/" } + +[dev-dependencies] + +[features] + +[package] +authors.workspace = true +description.workspace = true +edition.workspace = true +license-file.workspace = true +name = "sort" +readme.workspace = true +repository.workspace = true +version = "0.1.0" diff --git a/s3_dsa/p01/src/bin/sort/src/main.rs b/s3_dsa/p01/src/bin/sort/src/main.rs new file mode 100644 index 0000000..0801382 --- /dev/null +++ b/s3_dsa/p01/src/bin/sort/src/main.rs @@ -0,0 +1,24 @@ +fn setup_logger() -> Result<(), fern::InitError> { + fern::Dispatch::new() + .format(|out, message, record| { + out.finish(format_args!( + "[{} {} {}:{}] {}", + humantime::format_rfc3339_seconds(std::time::SystemTime::now()), + record.level(), + record.file().unwrap(), + record.line().unwrap(), + message + )) + }) + .level(log::LevelFilter::Debug) + .chain(std::io::stdout()) + .apply()?; + Ok(()) +} + +fn main() { + setup_logger().unwrap(); + let input_path = std::path::Path::new("./input.bin"); + let output_path = std::path::Path::new("./output.bin"); + polyphase::sort(input_path, output_path, false, 3); +} diff --git a/s3_dsa/p01/src/lib/nacci/Cargo.toml b/s3_dsa/p01/src/lib/nacci/Cargo.toml new file mode 100644 index 0000000..d8f0d95 --- /dev/null +++ b/s3_dsa/p01/src/lib/nacci/Cargo.toml @@ -0,0 +1,21 @@ +#:schema https://raw.githubusercontent.com/SchemaStore/schemastore/master/src/schemas/json/cargo.json +# @help: https://doc.rust-lang.org/cargo/reference/manifest.html + +[build-dependencies] + +[dependencies] +log.workspace = true + +[dev-dependencies] + +[features] + +[package] +authors.workspace = true +description.workspace = true +edition.workspace = true +license-file.workspace = true +name = "nacci" +readme.workspace = true +repository.workspace = true +version = "0.1.0" diff --git a/s3_dsa/p01/src/lib/nacci/src/lib.rs b/s3_dsa/p01/src/lib/nacci/src/lib.rs new file mode 100644 index 0000000..3957279 --- /dev/null +++ b/s3_dsa/p01/src/lib/nacci/src/lib.rs @@ -0,0 +1,80 @@ +/// @see: https://en.wikipedia.org/wiki/Generalizations_of_Fibonacci_numbers#Fibonacci_numbers_of_higher_order +pub struct Nacci { + // TODO: replace usize with T + cache: Vec, + index: usize, + total: usize, +} + +impl Nacci { + pub fn new(order: usize) -> Option { + if order < 2 { + // TODO: https://en.wikipedia.org/wiki/Negafibonacci_coding + return None; + } + let mut cache = vec![0; order]; + cache[order - 1] = 1; + Some(Self { + cache, + index: 0, + total: 1, + }) + } +} + +impl Iterator for Nacci { + type Item = usize; + + fn collect(self) -> B + where + B: FromIterator, + Self: Sized, + { + FromIterator::from_iter(self) + } + + fn next(&mut self) -> Option { + let result = self.cache[self.index]; + self.cache[self.index] = self.total; + self.index = (self.index + 1) % self.cache.len(); + self.total = self.total * 2 - result; + Some(result) + } +} + +pub fn get_dist(length: usize, tape_count: usize) -> Vec { + let fib = Nacci::new(tape_count - 1).unwrap(); + let mut levels: Vec = vec![0; tape_count - 1]; + for (i, val) in fib.enumerate() { + levels[i % (tape_count - 1)] = val; + let total: usize = levels.iter().sum(); + log::debug!("i={i:?}, val={val:?}, levels={levels:?}, total={total:?}", i = i + 1); + if total >= length { + break; + } + } + levels +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn it_works() { + assert!(Nacci::new(1).is_none()); + assert_eq!( + Nacci::new(2).unwrap().take(16).collect::>(), + vec![0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610] + ); + assert_eq!( + Nacci::new(3).unwrap().take(16).collect::>(), + vec![0, 0, 1, 1, 2, 4, 7, 13, 24, 44, 81, 149, 274, 504, 927, 1705] + ); + assert_eq!( + Nacci::new(4).unwrap().take(16).collect::>(), + vec![0, 0, 0, 1, 1, 2, 4, 8, 15, 29, 56, 108, 208, 401, 773, 1490] + ); + assert_eq!(Nacci::new(5).unwrap().take(0).collect::>(), vec![]); + } +} diff --git a/s3_dsa/p01/src/lib/polyphase/Cargo.toml b/s3_dsa/p01/src/lib/polyphase/Cargo.toml new file mode 100644 index 0000000..cc0becb --- /dev/null +++ b/s3_dsa/p01/src/lib/polyphase/Cargo.toml @@ -0,0 +1,24 @@ +#:schema https://raw.githubusercontent.com/SchemaStore/schemastore/master/src/schemas/json/cargo.json +# @help: https://doc.rust-lang.org/cargo/reference/manifest.html + +[build-dependencies] + +[dependencies] +bytemuck = "1.19.0" +log.workspace = true +nacci = { path = "../nacci/" } + +[dev-dependencies] + +[features] +init_merge = [] + +[package] +authors.workspace = true +description.workspace = true +edition.workspace = true +license-file.workspace = true +name = "polyphase" +readme.workspace = true +repository.workspace = true +version = "0.1.0" diff --git a/s3_dsa/p01/src/lib/polyphase/src/lib.rs b/s3_dsa/p01/src/lib/polyphase/src/lib.rs new file mode 100644 index 0000000..f417ef2 --- /dev/null +++ b/s3_dsa/p01/src/lib/polyphase/src/lib.rs @@ -0,0 +1,139 @@ +const CHUNK_SIZE: usize = 4096; + +fn comparator(a: T, b: T, reverse: bool) -> bool +where + T: Ord, +{ + if reverse { + a > b + } else { + a < b + } +} + +fn process_file(file: std::fs::File, chunk_size: usize, mut callback: F) +where + F: FnMut(&[u8]), +{ + let mut reader = std::io::BufReader::with_capacity(chunk_size, file); + loop { + let mut buffer = vec![0; chunk_size].into_boxed_slice(); + let length = std::io::Read::read(&mut reader, &mut buffer).unwrap(); + if length == 0 { + break; + } + callback(&buffer[..length]); + } +} + +fn process_chunk(runs: &mut Vec>, data: &[i32], reverse: bool) { + #[cfg(feature = "init_merge")] + let mut run = runs.last_mut().unwrap(); + + for &curr in data.iter() { + #[cfg(not(feature = "init_merge"))] + // TODO: unused variable: reverse + runs.push(vec![curr]); + + #[cfg(feature = "init_merge")] + match run.last() { + Some(&prev) => { + if comparator(prev, curr, reverse) { + run.push(curr); + } else { + runs.push(vec![curr]); + run = runs.last_mut().unwrap(); + } + } + None => { + run.push(curr); + } + } + } +} + +type Tape = Vec>; + +fn write_to_tape(series: Tape, dist: &mut [usize], tapes: &mut [Tape]) +where + T: Clone, +{ + let mut iter = series.iter(); + for (i, fib) in dist.iter_mut().enumerate() { + while *fib != 0 { + if let Some(s) = iter.next() { + tapes[i].push(s.clone()); + } else { + tapes[i].push(vec![]); + } + *fib -= 1; + } + } +} + +/// @see: https://en.wikipedia.org/wiki/Polyphase_merge_sort +pub fn sort(input_path: &std::path::Path, output_path: &std::path::Path, reverse: bool, tape_count: usize) { + if tape_count >= 8 { + log::warn!("balanced_merge_sort() may perform better at 8 or more tapes"); + } + + let mut series: Tape = vec![ + #[cfg(feature = "init_merge")] + vec![], + ]; + + let input_file = std::fs::File::open(input_path).unwrap(); + process_file(input_file, CHUNK_SIZE, |buffer| { + let data: &[i32] = bytemuck::try_cast_slice(buffer).unwrap(); + process_chunk(&mut series, data, reverse); + }); + log::debug!("series={series:?}"); + + let mut dist = nacci::get_dist(series.len(), tape_count); + log::debug!("dist={dist:?}"); + + let mut tapes: Vec> = vec![vec![]; tape_count]; + write_to_tape(series, &mut dist, &mut tapes); + log::debug!("tapes={tapes:?}"); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn add_dummy_series() { + let mut dist = vec![1, 2, 4]; + let series: Tape = vec![vec![6], vec![3, 20], vec![15], vec![13], vec![8, 10, 17], vec![1]]; + let tape_count: usize = 4; + + let mut tapes: Vec> = vec![vec![]; tape_count]; + write_to_tape(series, &mut dist, &mut tapes); + assert_eq!( + tapes, + vec![ + vec![vec![6]], + vec![vec![3, 20], vec![15]], + vec![vec![13], vec![8, 10, 17], vec![1], vec![]], + vec![], + ] + ); + } + + #[test] + fn get_series() { + let mut series: Tape = vec![ + #[cfg(feature = "init_merge")] + vec![], + ]; + + let data: &[i32] = &[5, 4, 3, 7, 6, 9]; + process_chunk(&mut series, data, true); + + #[cfg(feature = "init_merge")] + assert_eq!(series, vec![vec![5, 4, 3], vec![7, 6], vec![9]]); + + #[cfg(not(feature = "init_merge"))] + assert_eq!(series, vec![vec![5], vec![4], vec![3], vec![7], vec![6], vec![9]]); + } +}