diff --git a/docs/paper/reductions.typ b/docs/paper/reductions.typ index 341ffd5a5..a4d185192 100644 --- a/docs/paper/reductions.typ +++ b/docs/paper/reductions.typ @@ -148,6 +148,7 @@ "MinimumTardinessSequencing": [Minimum Tardiness Sequencing], "MultipleChoiceBranching": [Multiple Choice Branching], "MultipleCopyFileAllocation": [Multiple Copy File Allocation], + "ExpectedRetrievalCost": [Expected Retrieval Cost], "MultiprocessorScheduling": [Multiprocessor Scheduling], "PartitionIntoPathsOfLength2": [Partition into Paths of Length 2], "PartitionIntoTriangles": [Partition Into Triangles], @@ -2460,6 +2461,45 @@ A classical NP-complete problem from Garey and Johnson @garey1979[Ch.~3, p.~76], ] } +#{ + let x = load-model-example("ExpectedRetrievalCost") + let K = x.instance.bound + [ + #problem-def("ExpectedRetrievalCost")[ + Given a set $R = {r_1, dots, r_n}$ of records, access probabilities $p(r) in [0, 1]$ with $sum_(r in R) p(r) = 1$, a positive integer $m$ of circular storage sectors, and a bound $K$, determine whether there exists a partition $R_1, dots, R_m$ of $R$ such that + $sum_(i=1)^m sum_(j=1)^m p(R_i) p(R_j) d(i, j) <= K,$ + where $p(R_i) = sum_(r in R_i) p(r)$ and + $d(i, j) = j - i - 1$ for $1 <= i < j <= m$, while $d(i, j) = m - i + j - 1$ for $1 <= j <= i <= m$. + ][ + Expected Retrieval Cost is storage-and-retrieval problem SR4 in Garey and Johnson @garey1979. The model abstracts a drum-like storage device with fixed read heads: placing probability mass evenly around the cycle reduces the expected waiting time until the next requested sector rotates under the head. Cody and Coffman introduced the formulation and analyzed exact and heuristic record-allocation algorithms for fixed numbers of sectors @codycoffman1976. Garey and Johnson record that the general decision problem is NP-complete in the strong sense via transformations from Partition and 3-Partition @garey1979. The implementation in this repository uses one $m$-ary variable per record, so the registered exact baseline enumerates $m^n$ assignments. For practicality, the code stores the probabilities and bound as floating-point values even though the book states $K$ as an integer. + + *Example.* Take six records with probabilities $(0.2, 0.15, 0.15, 0.2, 0.1, 0.2)$, three sectors, and $K = #K$. Assign + $R_1 = {r_1, r_5}$, $R_2 = {r_2, r_4}$, and $R_3 = {r_3, r_6}$. + Then the sector masses are $(p(R_1), p(R_2), p(R_3)) = (0.3, 0.35, 0.35)$. + For $m = 3$, the non-zero latencies are $d(1, 1) = d(2, 2) = d(3, 3) = 2$, $d(1, 3) = d(2, 1) = d(3, 2) = 1$, and the remaining pairs contribute 0. Hence the expected retrieval cost is $1.0025 <= #K$, so the allocation is satisfying. + + #pred-commands( + "pred create --example ExpectedRetrievalCost -o expected-retrieval-cost.json", + "pred solve expected-retrieval-cost.json --solver brute-force", + "pred evaluate expected-retrieval-cost.json --config " + x.optimal_config.map(str).join(","), + ) + + #figure( + table( + columns: 3, + inset: 6pt, + stroke: 0.5pt + luma(180), + [Sector], [Records], [Mass], + [$S_1$], [$r_1, r_5$], [$0.3$], + [$S_2$], [$r_2, r_4$], [$0.35$], + [$S_3$], [$r_3, r_6$], [$0.35$], + ), + caption: [Expected Retrieval Cost example with cyclic sector order $S_1 -> S_2 -> S_3 -> S_1$. The satisfying allocation yields masses $(0.3, 0.35, 0.35)$ and total cost $1.0025$.], + ) + ] + ] +} + == Set Problems #{ diff --git a/docs/paper/references.bib b/docs/paper/references.bib index d2b4b3c87..62192cb04 100644 --- a/docs/paper/references.bib +++ b/docs/paper/references.bib @@ -1116,6 +1116,17 @@ @article{coffman1972 doi = {10.1007/BF00288685} } +@article{codycoffman1976, + author = {R. A. Cody and E. G. Coffman, Jr.}, + title = {Record Allocation for Minimizing Expected Retrieval Costs on Drum-Like Storage Devices}, + journal = {Journal of the ACM}, + volume = {23}, + number = {1}, + pages = {103--115}, + year = {1976}, + doi = {10.1145/321921.321933} +} + @inproceedings{cordella2004, author = {Luigi P. Cordella and Pasquale Foggia and Carlo Sansone and Mario Vento}, title = {A (Sub)Graph Isomorphism Algorithm for Matching Large Graphs}, diff --git a/problemreductions-cli/src/cli.rs b/problemreductions-cli/src/cli.rs index efb9876ae..4c968e141 100644 --- a/problemreductions-cli/src/cli.rs +++ b/problemreductions-cli/src/cli.rs @@ -249,6 +249,7 @@ Flags by problem type: CapacityAssignment --capacities, --cost-matrix, --delay-matrix, --cost-budget, --delay-budget SubsetSum --sizes, --target SumOfSquaresPartition --sizes, --num-groups, --bound + ExpectedRetrievalCost --probabilities, --num-sectors, --latency-bound PaintShop --sequence MaximumSetPacking --sets [--weights] MinimumHittingSet --universe, --sets @@ -474,6 +475,9 @@ pub struct CreateArgs { /// Item sizes for BinPacking (comma-separated, e.g., "3,3,2,2") #[arg(long)] pub sizes: Option, + /// Record access probabilities for ExpectedRetrievalCost (comma-separated, e.g., "0.2,0.15,0.15,0.2,0.1,0.2") + #[arg(long)] + pub probabilities: Option, /// Bin capacity for BinPacking #[arg(long)] pub capacity: Option, @@ -546,6 +550,9 @@ pub struct CreateArgs { /// Bound parameter (lower bound for LongestCircuit; upper or length bound for BoundedComponentSpanningForest, LengthBoundedDisjointPaths, LongestCommonSubsequence, MultipleCopyFileAllocation, MultipleChoiceBranching, OptimalLinearArrangement, RootedTreeArrangement, RuralPostman, ShortestCommonSupersequence, or StringToStringCorrection) #[arg(long, allow_hyphen_values = true)] pub bound: Option, + /// Upper bound on expected retrieval latency for ExpectedRetrievalCost + #[arg(long)] + pub latency_bound: Option, /// Upper bound on total path length #[arg(long)] pub length_bound: Option, @@ -703,6 +710,9 @@ pub struct CreateArgs { /// Number of groups for SumOfSquaresPartition #[arg(long)] pub num_groups: Option, + /// Number of sectors for ExpectedRetrievalCost + #[arg(long)] + pub num_sectors: Option, /// Source string for StringToStringCorrection (comma-separated symbol indices, e.g., "0,1,2,3") #[arg(long)] pub source_string: Option, diff --git a/problemreductions-cli/src/commands/create.rs b/problemreductions-cli/src/commands/create.rs index 230b5cb28..cc0cd8c0f 100644 --- a/problemreductions-cli/src/commands/create.rs +++ b/problemreductions-cli/src/commands/create.rs @@ -21,13 +21,14 @@ use problemreductions::models::graph::{ use problemreductions::models::misc::{ AdditionalKey, BinPacking, BoyceCoddNormalFormViolation, CapacityAssignment, CbqRelation, ConjunctiveBooleanQuery, ConsistencyOfDatabaseFrequencyTables, EnsembleComputation, - FlowShopScheduling, FrequencyTable, KnownValue, LongestCommonSubsequence, - MinimumTardinessSequencing, MultiprocessorScheduling, PaintShop, PartiallyOrderedKnapsack, - QueryArg, RectilinearPictureCompression, ResourceConstrainedScheduling, - SchedulingWithIndividualDeadlines, SequencingToMinimizeMaximumCumulativeCost, - SequencingToMinimizeWeightedCompletionTime, SequencingToMinimizeWeightedTardiness, - SequencingWithReleaseTimesAndDeadlines, SequencingWithinIntervals, ShortestCommonSupersequence, - StringToStringCorrection, SubsetSum, SumOfSquaresPartition, TimetableDesign, + ExpectedRetrievalCost, FlowShopScheduling, FrequencyTable, KnownValue, + LongestCommonSubsequence, MinimumTardinessSequencing, MultiprocessorScheduling, PaintShop, + PartiallyOrderedKnapsack, QueryArg, RectilinearPictureCompression, + ResourceConstrainedScheduling, SchedulingWithIndividualDeadlines, + SequencingToMinimizeMaximumCumulativeCost, SequencingToMinimizeWeightedCompletionTime, + SequencingToMinimizeWeightedTardiness, SequencingWithReleaseTimesAndDeadlines, + SequencingWithinIntervals, ShortestCommonSupersequence, StringToStringCorrection, SubsetSum, + SumOfSquaresPartition, TimetableDesign, }; use problemreductions::models::BiconnectivityAugmentation; use problemreductions::prelude::*; @@ -43,6 +44,10 @@ const MULTIPLE_COPY_FILE_ALLOCATION_EXAMPLE_ARGS: &str = "--graph 0-1,1-2,2-3 --usage 5,4,3,2 --storage 1,1,1,1 --bound 8"; const MULTIPLE_COPY_FILE_ALLOCATION_USAGE: &str = "Usage: pred create MultipleCopyFileAllocation --graph 0-1,1-2,2-3 --usage 5,4,3,2 --storage 1,1,1,1 --bound 8"; +const EXPECTED_RETRIEVAL_COST_EXAMPLE_ARGS: &str = + "--probabilities 0.2,0.15,0.15,0.2,0.1,0.2 --num-sectors 3 --latency-bound 1.01"; +const EXPECTED_RETRIEVAL_COST_USAGE: &str = + "Usage: pred create ExpectedRetrievalCost --probabilities 0.2,0.15,0.15,0.2,0.1,0.2 --num-sectors 3 --latency-bound 1.01"; /// Check if all data flags are None (no problem-specific input provided). fn all_data_flags_empty(args: &CreateArgs) -> bool { @@ -85,6 +90,7 @@ fn all_data_flags_empty(args: &CreateArgs) -> bool { && args.requirement_2.is_none() && args.requirement.is_none() && args.sizes.is_none() + && args.probabilities.is_none() && args.capacity.is_none() && args.sequence.is_none() && args.sets.is_none() @@ -110,6 +116,7 @@ fn all_data_flags_empty(args: &CreateArgs) -> bool { && args.tree.is_none() && args.required_edges.is_none() && args.bound.is_none() + && args.latency_bound.is_none() && args.length_bound.is_none() && args.weight_bound.is_none() && args.cost_bound.is_none() @@ -152,6 +159,7 @@ fn all_data_flags_empty(args: &CreateArgs) -> bool { && args.task_avail.is_none() && args.alphabet_size.is_none() && args.num_groups.is_none() + && args.num_sectors.is_none() && args.dependencies.is_none() && args.num_attributes.is_none() && args.source_string.is_none() @@ -606,6 +614,7 @@ fn example_for(canonical: &str, graph_type: Option<&str>) -> &'static str { } "MultiprocessorScheduling" => "--lengths 4,5,3,2,6 --num-processors 2 --deadline 10", "MinimumMultiwayCut" => "--graph 0-1,1-2,2-3 --terminals 0,2 --edge-weights 1,1,1", + "ExpectedRetrievalCost" => EXPECTED_RETRIEVAL_COST_EXAMPLE_ARGS, "SequencingWithinIntervals" => "--release-times 0,0,5 --deadlines 11,11,6 --lengths 3,1,1", "StaffScheduling" => { "--schedules \"1,1,1,1,1,0,0;0,1,1,1,1,1,0;0,0,1,1,1,1,1;1,0,0,1,1,1,1;1,1,0,0,1,1,1\" --requirements 2,2,2,3,3,2,1 --num-workers 4 --k 5" @@ -1510,6 +1519,59 @@ pub fn create(args: &CreateArgs, out: &OutputConfig) -> Result<()> { ) } + // ExpectedRetrievalCost (probabilities + sectors + latency bound) + "ExpectedRetrievalCost" => { + let probabilities_str = args.probabilities.as_deref().ok_or_else(|| { + anyhow::anyhow!( + "ExpectedRetrievalCost requires --probabilities\n\n{EXPECTED_RETRIEVAL_COST_USAGE}" + ) + })?; + let probabilities: Vec = util::parse_comma_list(probabilities_str) + .map_err(|e| anyhow::anyhow!("{e}\n\n{EXPECTED_RETRIEVAL_COST_USAGE}"))?; + anyhow::ensure!( + !probabilities.is_empty(), + "ExpectedRetrievalCost requires at least one probability\n\n{EXPECTED_RETRIEVAL_COST_USAGE}" + ); + anyhow::ensure!( + probabilities.iter().all(|p| p.is_finite() && (0.0..=1.0).contains(p)), + "ExpectedRetrievalCost probabilities must be finite values in [0, 1]\n\n{EXPECTED_RETRIEVAL_COST_USAGE}" + ); + let total_probability: f64 = probabilities.iter().sum(); + anyhow::ensure!( + (total_probability - 1.0).abs() <= 1e-9, + "ExpectedRetrievalCost probabilities must sum to 1.0\n\n{EXPECTED_RETRIEVAL_COST_USAGE}" + ); + + let num_sectors = args.num_sectors.ok_or_else(|| { + anyhow::anyhow!( + "ExpectedRetrievalCost requires --num-sectors\n\n{EXPECTED_RETRIEVAL_COST_USAGE}" + ) + })?; + anyhow::ensure!( + num_sectors >= 2, + "ExpectedRetrievalCost requires at least two sectors\n\n{EXPECTED_RETRIEVAL_COST_USAGE}" + ); + + let latency_bound = args.latency_bound.ok_or_else(|| { + anyhow::anyhow!( + "ExpectedRetrievalCost requires --latency-bound\n\n{EXPECTED_RETRIEVAL_COST_USAGE}" + ) + })?; + anyhow::ensure!( + latency_bound.is_finite() && latency_bound >= 0.0, + "ExpectedRetrievalCost requires a finite non-negative --latency-bound\n\n{EXPECTED_RETRIEVAL_COST_USAGE}" + ); + + ( + ser(ExpectedRetrievalCost::new( + probabilities, + num_sectors, + latency_bound, + ))?, + resolved_variant.clone(), + ) + } + // UndirectedFlowLowerBounds (graph + capacities + lower bounds + terminals + requirement) "UndirectedFlowLowerBounds" => { let usage = "Usage: pred create UndirectedFlowLowerBounds --graph 0-1,0-2,1-3,2-3,1-4,3-5,4-5 --capacities 2,2,2,2,1,3,2 --lower-bounds 1,1,0,0,1,0,1 --source 0 --sink 5 --requirement 3"; @@ -7049,6 +7111,7 @@ mod tests { requirement_1: None, requirement_2: None, sizes: None, + probabilities: None, capacity: None, sequence: None, sets: None, @@ -7073,6 +7136,7 @@ mod tests { tree: None, required_edges: None, bound: None, + latency_bound: None, length_bound: None, weight_bound: None, cost_bound: None, @@ -7111,6 +7175,7 @@ mod tests { craftsman_avail: None, task_avail: None, num_groups: None, + num_sectors: None, domain_size: None, relations: None, conjuncts_spec: None, @@ -7375,6 +7440,61 @@ mod tests { std::fs::remove_file(output_path).ok(); } + #[test] + fn test_create_expected_retrieval_cost_json() { + use crate::dispatch::ProblemJsonOutput; + use problemreductions::models::misc::ExpectedRetrievalCost; + + let mut args = empty_args(); + args.problem = Some("ExpectedRetrievalCost".to_string()); + args.probabilities = Some("0.2,0.15,0.15,0.2,0.1,0.2".to_string()); + args.num_sectors = Some(3); + args.latency_bound = Some(1.01); + + let output_path = std::env::temp_dir().join(format!( + "expected-retrieval-cost-{}.json", + std::process::id() + )); + let out = OutputConfig { + output: Some(output_path.clone()), + quiet: true, + json: false, + auto_json: false, + }; + + create(&args, &out).unwrap(); + + let json = std::fs::read_to_string(&output_path).unwrap(); + let created: ProblemJsonOutput = serde_json::from_str(&json).unwrap(); + assert_eq!(created.problem_type, "ExpectedRetrievalCost"); + + let problem: ExpectedRetrievalCost = serde_json::from_value(created.data).unwrap(); + assert_eq!(problem.num_records(), 6); + assert_eq!(problem.num_sectors(), 3); + assert!(problem.evaluate(&[0, 1, 2, 1, 0, 2])); + + let _ = std::fs::remove_file(output_path); + } + + #[test] + fn test_create_expected_retrieval_cost_requires_latency_bound() { + let mut args = empty_args(); + args.problem = Some("ExpectedRetrievalCost".to_string()); + args.probabilities = Some("0.2,0.15,0.15,0.2,0.1,0.2".to_string()); + args.num_sectors = Some(3); + args.latency_bound = None; + + let out = OutputConfig { + output: None, + quiet: true, + json: false, + auto_json: false, + }; + + let err = create(&args, &out).unwrap_err().to_string(); + assert!(err.contains("ExpectedRetrievalCost requires --latency-bound")); + } + #[test] fn test_create_stacker_crane_json() { let mut args = empty_args(); diff --git a/src/lib.rs b/src/lib.rs index 29db0f87f..a91083181 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -69,9 +69,9 @@ pub mod prelude { pub use crate::models::misc::{ AdditionalKey, BinPacking, BoyceCoddNormalFormViolation, CapacityAssignment, CbqRelation, ConjunctiveBooleanQuery, ConjunctiveQueryFoldability, ConsistencyOfDatabaseFrequencyTables, - EnsembleComputation, Factoring, FlowShopScheduling, Knapsack, LongestCommonSubsequence, - MinimumTardinessSequencing, MultiprocessorScheduling, PaintShop, Partition, QueryArg, - RectilinearPictureCompression, ResourceConstrainedScheduling, + EnsembleComputation, ExpectedRetrievalCost, Factoring, FlowShopScheduling, Knapsack, + LongestCommonSubsequence, MinimumTardinessSequencing, MultiprocessorScheduling, PaintShop, + Partition, QueryArg, RectilinearPictureCompression, ResourceConstrainedScheduling, SchedulingWithIndividualDeadlines, SequencingToMinimizeMaximumCumulativeCost, SequencingToMinimizeWeightedCompletionTime, SequencingToMinimizeWeightedTardiness, SequencingWithReleaseTimesAndDeadlines, SequencingWithinIntervals, diff --git a/src/models/misc/expected_retrieval_cost.rs b/src/models/misc/expected_retrieval_cost.rs new file mode 100644 index 000000000..640885691 --- /dev/null +++ b/src/models/misc/expected_retrieval_cost.rs @@ -0,0 +1,174 @@ +//! Expected Retrieval Cost problem implementation. +//! +//! Given record access probabilities, decide whether records can be assigned to +//! circular storage sectors so the expected rotational latency stays below a +//! prescribed bound. + +use crate::registry::{FieldInfo, ProblemSchemaEntry, ProblemSizeFieldEntry}; +use crate::traits::{Problem, SatisfactionProblem}; +use serde::{Deserialize, Serialize}; + +const FLOAT_TOLERANCE: f64 = 1e-9; + +inventory::submit! { + ProblemSchemaEntry { + name: "ExpectedRetrievalCost", + display_name: "Expected Retrieval Cost", + aliases: &[], + dimensions: &[], + module_path: module_path!(), + description: "Assign records to circular storage sectors so the expected retrieval latency stays within a bound", + fields: &[ + FieldInfo { name: "probabilities", type_name: "Vec", description: "Access probabilities p(r) for each record" }, + FieldInfo { name: "num_sectors", type_name: "usize", description: "Number of sectors on the drum-like device" }, + FieldInfo { name: "bound", type_name: "f64", description: "Upper bound K on the expected retrieval cost" }, + ], + } +} + +inventory::submit! { + ProblemSizeFieldEntry { + name: "ExpectedRetrievalCost", + fields: &["num_records", "num_sectors"], + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ExpectedRetrievalCost { + probabilities: Vec, + num_sectors: usize, + bound: f64, +} + +impl ExpectedRetrievalCost { + pub fn new(probabilities: Vec, num_sectors: usize, bound: f64) -> Self { + assert!( + !probabilities.is_empty(), + "ExpectedRetrievalCost requires at least one record" + ); + assert!( + num_sectors >= 2, + "ExpectedRetrievalCost requires at least two sectors" + ); + assert!(bound.is_finite(), "bound must be finite"); + assert!(bound >= 0.0, "bound must be non-negative"); + for &probability in &probabilities { + assert!( + probability.is_finite(), + "probabilities must be finite real numbers" + ); + assert!( + (0.0..=1.0).contains(&probability), + "probabilities must lie in [0, 1]" + ); + } + let total_probability: f64 = probabilities.iter().sum(); + assert!( + (total_probability - 1.0).abs() <= FLOAT_TOLERANCE, + "probabilities must sum to 1.0" + ); + Self { + probabilities, + num_sectors, + bound, + } + } + + pub fn probabilities(&self) -> &[f64] { + &self.probabilities + } + + pub fn num_records(&self) -> usize { + self.probabilities.len() + } + + pub fn num_sectors(&self) -> usize { + self.num_sectors + } + + pub fn bound(&self) -> f64 { + self.bound + } + + pub fn sector_masses(&self, config: &[usize]) -> Option> { + if config.len() != self.num_records() { + return None; + } + + let mut masses = vec![0.0; self.num_sectors]; + for (record, §or) in config.iter().enumerate() { + if sector >= self.num_sectors { + return None; + } + masses[sector] += self.probabilities[record]; + } + Some(masses) + } + + pub fn expected_cost(&self, config: &[usize]) -> Option { + let masses = self.sector_masses(config)?; + let mut total = 0.0; + for source in 0..self.num_sectors { + for target in 0..self.num_sectors { + total += masses[source] + * masses[target] + * latency_distance(self.num_sectors, source, target) as f64; + } + } + Some(total) + } + + pub fn is_valid_solution(&self, config: &[usize]) -> bool { + self.expected_cost(config) + .is_some_and(|cost| cost <= self.bound + FLOAT_TOLERANCE) + } +} + +impl Problem for ExpectedRetrievalCost { + const NAME: &'static str = "ExpectedRetrievalCost"; + type Metric = bool; + + fn variant() -> Vec<(&'static str, &'static str)> { + crate::variant_params![] + } + + fn dims(&self) -> Vec { + vec![self.num_sectors; self.num_records()] + } + + fn evaluate(&self, config: &[usize]) -> bool { + self.is_valid_solution(config) + } +} + +impl SatisfactionProblem for ExpectedRetrievalCost {} + +fn latency_distance(num_sectors: usize, source: usize, target: usize) -> usize { + if source < target { + target - source - 1 + } else { + num_sectors - source + target - 1 + } +} + +crate::declare_variants! { + default sat ExpectedRetrievalCost => "num_sectors ^ num_records", +} + +#[cfg(feature = "example-db")] +pub(crate) fn canonical_model_example_specs() -> Vec { + vec![crate::example_db::specs::ModelExampleSpec { + id: "expected_retrieval_cost", + instance: Box::new(ExpectedRetrievalCost::new( + vec![0.2, 0.15, 0.15, 0.2, 0.1, 0.2], + 3, + 1.01, + )), + optimal_config: vec![0, 1, 2, 1, 0, 2], + optimal_value: serde_json::json!(true), + }] +} + +#[cfg(test)] +#[path = "../../unit_tests/models/misc/expected_retrieval_cost.rs"] +mod tests; diff --git a/src/models/misc/mod.rs b/src/models/misc/mod.rs index 33b0aa36f..38ae5af4f 100644 --- a/src/models/misc/mod.rs +++ b/src/models/misc/mod.rs @@ -7,6 +7,7 @@ //! - [`ConsistencyOfDatabaseFrequencyTables`]: Pairwise frequency-table consistency //! - [`ConjunctiveBooleanQuery`]: Evaluate a conjunctive Boolean query over relations //! - [`ConjunctiveQueryFoldability`]: Conjunctive Query Foldability +//! - [`ExpectedRetrievalCost`]: Allocate records to circular sectors within a latency bound //! - [`Factoring`]: Integer factorization //! - [`FlowShopScheduling`]: Flow Shop Scheduling (meet deadline on m processors) //! - [`Knapsack`]: 0-1 Knapsack (maximize value subject to weight capacity) @@ -40,6 +41,7 @@ pub(crate) mod conjunctive_boolean_query; pub(crate) mod conjunctive_query_foldability; mod consistency_of_database_frequency_tables; mod ensemble_computation; +pub(crate) mod expected_retrieval_cost; pub(crate) mod factoring; mod flow_shop_scheduling; mod knapsack; @@ -76,6 +78,7 @@ pub use consistency_of_database_frequency_tables::{ ConsistencyOfDatabaseFrequencyTables, FrequencyTable, KnownValue, }; pub use ensemble_computation::EnsembleComputation; +pub use expected_retrieval_cost::ExpectedRetrievalCost; pub use factoring::Factoring; pub use flow_shop_scheduling::FlowShopScheduling; pub use knapsack::Knapsack; @@ -111,6 +114,7 @@ pub(crate) fn canonical_model_example_specs() -> Vec ExpectedRetrievalCost { + ExpectedRetrievalCost::new(vec![0.2, 0.15, 0.15, 0.2, 0.1, 0.2], 3, 1.01) +} + +fn no_problem() -> ExpectedRetrievalCost { + ExpectedRetrievalCost::new(vec![0.5, 0.1, 0.1, 0.1, 0.1, 0.1], 3, 0.5) +} + +#[test] +fn test_expected_retrieval_cost_basic_accessors() { + let problem = yes_problem(); + assert_eq!(problem.num_records(), 6); + assert_eq!(problem.num_sectors(), 3); + assert_eq!(problem.probabilities(), &[0.2, 0.15, 0.15, 0.2, 0.1, 0.2]); + assert!((problem.bound() - 1.01).abs() < EPS); + assert_eq!(problem.dims(), vec![3; 6]); + assert_eq!(problem.num_variables(), 6); +} + +#[test] +fn test_expected_retrieval_cost_sector_masses_and_cost() { + let problem = yes_problem(); + let config = [0, 1, 2, 1, 0, 2]; + let masses = problem.sector_masses(&config).unwrap(); + assert_eq!(masses.len(), 3); + assert!((masses[0] - 0.3).abs() < EPS); + assert!((masses[1] - 0.35).abs() < EPS); + assert!((masses[2] - 0.35).abs() < EPS); + + let cost = problem.expected_cost(&config).unwrap(); + assert!((cost - 1.0025).abs() < EPS); +} + +#[test] +fn test_expected_retrieval_cost_evaluate_yes_and_no_instances() { + let yes = yes_problem(); + assert!(yes.evaluate(&[0, 1, 2, 1, 0, 2])); + assert!(yes.is_valid_solution(&[0, 1, 2, 1, 0, 2])); + + let no = no_problem(); + assert!(!no.evaluate(&[0, 1, 1, 1, 2, 2])); + assert!(!no.is_valid_solution(&[0, 1, 1, 1, 2, 2])); + let no_cost = no.expected_cost(&[0, 1, 1, 1, 2, 2]).unwrap(); + assert!((no_cost - 1.07).abs() < EPS); +} + +#[test] +fn test_expected_retrieval_cost_rejects_invalid_configs() { + let problem = yes_problem(); + assert_eq!(problem.sector_masses(&[0, 1, 2]), None); + assert_eq!(problem.expected_cost(&[0, 1, 2]), None); + assert!(!problem.evaluate(&[0, 1, 2])); + + assert_eq!(problem.sector_masses(&[0, 1, 2, 1, 0, 3]), None); + assert_eq!(problem.expected_cost(&[0, 1, 2, 1, 0, 3]), None); + assert!(!problem.evaluate(&[0, 1, 2, 1, 0, 3])); +} + +#[test] +fn test_expected_retrieval_cost_solver_finds_satisfying_assignment() { + let problem = yes_problem(); + let solver = BruteForce::new(); + let solution = solver.find_satisfying(&problem).unwrap(); + assert!(problem.evaluate(&solution)); +} + +#[test] +fn test_expected_retrieval_cost_paper_example() { + let problem = yes_problem(); + let config = [0, 1, 2, 1, 0, 2]; + assert!(problem.evaluate(&config)); + + let solver = BruteForce::new(); + let satisfying = solver.find_all_satisfying(&problem); + assert_eq!(satisfying.len(), 54); +} + +#[test] +fn test_expected_retrieval_cost_serialization() { + let problem = yes_problem(); + let json = serde_json::to_value(&problem).unwrap(); + let restored: ExpectedRetrievalCost = serde_json::from_value(json).unwrap(); + assert_eq!(restored.probabilities(), problem.probabilities()); + assert_eq!(restored.num_sectors(), problem.num_sectors()); + assert!((restored.bound() - problem.bound()).abs() < EPS); +}