diff --git a/src/driver/mod.rs b/src/driver/mod.rs index c952b54f..0752c62d 100644 --- a/src/driver/mod.rs +++ b/src/driver/mod.rs @@ -38,7 +38,8 @@ use chumsky::container::Container; use crate::error::{Error, ErrorCollector, RichError, Span}; use crate::parse::{self, ParseFromStrWithErrors}; -use crate::resolution::{CanonPath, DependencyMap, SourceFile}; +use crate::resolution::DependencyMap; +use crate::source::{CanonPath, SourceFile}; pub use crate::driver::resolve_order::{FileScoped, Program, SymbolTable}; @@ -182,12 +183,12 @@ impl DependencyGraph { /// This function will return an `Err(String)` only for critical internal compiler errors /// (e.g., if a provided `SourceFile` is unexpectedly missing its underlying file path). pub fn new( - root_source: SourceFile, + root_source: CanonSourceFile, dependency_map: Arc, root_program: &parse::Program, handler: &mut ErrorCollector, ) -> Result, String> { - let root_canon_source = CanonSourceFile::try_from(root_source)?; + let root_canon_source = root_source; let mut graph = Self { modules: vec![Module { @@ -389,15 +390,11 @@ pub(crate) mod tests { let lib_dir = canon(&ws.create_dir("workspace/libs/lib")); // Set up the dependency map for imports (e.g. `use lib::...`) - let mut map = DependencyMap::new(); - map.insert(workspace_dir.clone(), "lib".to_string(), lib_dir.clone()) - .expect("Failed to insert dependency map"); - - // Register the strict crate boundaries so local files are forced to use `crate::` - map.insert(workspace_dir.clone(), CRATE_STR.to_string(), workspace_dir) - .expect("Failed to insert workspace crate boundary"); - map.insert(lib_dir.clone(), CRATE_STR.to_string(), lib_dir) - .expect("Failed to insert library crate boundary"); + let map = crate::resolution::DependencyMapBuilder::new() + .with_entry_root(workspace_dir.clone()) + .add_dependency(workspace_dir.clone(), "lib".to_string(), lib_dir.clone()) + .build() + .expect("Failed to create dependency map"); let map = Arc::new(map); let mut root_file_path = None; @@ -416,7 +413,7 @@ pub(crate) mod tests { let root_p = root_file_path.expect("main.simf must be defined in file list"); let main_canon_source = CanonSourceFile::new(root_p, Arc::from(root_content)); - let main_source = SourceFile::from(main_canon_source); + let main_source = SourceFile::from(main_canon_source.clone()); let main_program_option = parse::Program::parse_from_str_with_errors(main_source.clone(), &mut handler); @@ -426,7 +423,7 @@ pub(crate) mod tests { }; let graph_option = - DependencyGraph::new(main_source, map, &main_program, &mut handler).unwrap(); + DependencyGraph::new(main_canon_source, map, &main_program, &mut handler).unwrap(); let mut file_ids = HashMap::new(); @@ -648,4 +645,19 @@ pub(crate) mod tests { .map_or(true, |deps| deps.is_empty()); assert!(b_has_no_deps, "B depends on nothing"); } + + #[test] + fn test_canon_source_file_rejects_anonymous() { + let anonymous_source = SourceFile::anonymous(Arc::from("fn main() {}")); + let result = CanonSourceFile::try_from(anonymous_source); + + assert!( + result.is_err(), + "CanonSourceFile must explicitly reject anonymous files" + ); + assert_eq!( + result.unwrap_err(), + "Cannot canonicalize the SourceFile because it is missing a file name." + ); + } } diff --git a/src/error.rs b/src/error.rs index bbd36836..adc6e161 100644 --- a/src/error.rs +++ b/src/error.rs @@ -15,7 +15,7 @@ use simplicity::elements; use crate::lexer::Token; use crate::parse::MatchPattern; -use crate::resolution::SourceFile; +use crate::source::SourceFile; use crate::str::{AliasName, FunctionName, Identifier, JetName, ModuleName, WitnessName}; use crate::types::{ResolvedType, UIntType}; @@ -474,6 +474,11 @@ impl fmt::Display for ErrorCollector { /// Records _what_ happened but not where. #[derive(Debug, Clone, Eq, PartialEq, Hash)] pub enum Error { + DependencyPathNotFound(String), + DependencyNotADirectory(String), + ReservedDependencyKeyword(String), + DuplicateDependencyAlias(String, String), + InvalidDependencyIdentifier(String), Internal(String), UnknownLibrary(String), ArraySizeNonZero(usize), @@ -533,6 +538,11 @@ pub enum Error { impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { + Error::DependencyPathNotFound(path) => write!(f, "Path not found: {}", path), + Error::DependencyNotADirectory(path) => write!(f, "Path must be a directory: {}", path), + Error::ReservedDependencyKeyword(kw) => write!(f, "The '{}' keyword is reserved and cannot be manually mapped. Use the builder's context definitions instead.", kw), + Error::DuplicateDependencyAlias(alias, context) => write!(f, "Duplicate dependency mapping: alias '{}' is defined multiple times for context '{}'", alias, context), + Error::InvalidDependencyIdentifier(alias) => write!(f, "Invalid dependency alias '{}': must be a valid identifier and not a reserved keyword", alias), Error::Internal(err) => write!( f, "INTERNAL ERROR: {err}" diff --git a/src/lexer.rs b/src/lexer.rs index 94f7a8d0..f7505111 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -255,24 +255,14 @@ pub fn lex<'src>(input: &'src str) -> (Option>, Vec bool { - matches!( - s, - "pub" - | "use" - | "as" - | "fn" - | "let" - | "type" - | "mod" - | "const" - | "match" - | CRATE_STR - | "true" - | "false" - ) + KEYWORDS.contains(&s) } #[cfg(test)] diff --git a/src/lib.rs b/src/lib.rs index 0f0d2e00..275ef744 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -16,6 +16,8 @@ pub mod num; pub mod parse; pub mod pattern; pub mod resolution; +pub mod source; + #[cfg(feature = "serde")] mod serde; pub mod str; @@ -36,10 +38,12 @@ pub extern crate simplicity; pub use simplicity::elements; use crate::debug::DebugSymbols; +use crate::driver::CanonSourceFile; use crate::driver::DependencyGraph; use crate::error::{ErrorCollector, WithContent, WithSource as _}; use crate::parse::ParseFromStrWithErrors; -use crate::resolution::{DependencyMap, SourceFile}; +use crate::resolution::DependencyMap; +use crate::source::SourceFile; pub use crate::types::ResolvedType; pub use crate::value::Value; pub use crate::witness::{Arguments, Parameters, WitnessTypes, WitnessValues}; @@ -61,19 +65,20 @@ impl TemplateProgram { /// /// The string is not a valid SimplicityHL program. pub fn new_with_dep( - source: SourceFile, + source: CanonSourceFile, dependency_map: &DependencyMap, ) -> Result { let mut error_handler = ErrorCollector::new(); + let source_file = SourceFile::from(source.clone()); // 1. Parse root file let parsed_program = - parse::Program::parse_from_str_with_errors(source.clone(), &mut error_handler) + parse::Program::parse_from_str_with_errors(source_file.clone(), &mut error_handler) .ok_or_else(|| error_handler.to_string())?; // 2. Create the driver program let driver_program: driver::Program = if dependency_map.is_empty() { - driver::Program::from_parse(&parsed_program, source.content(), &mut error_handler) + driver::Program::from_parse(&parsed_program, source_file.content(), &mut error_handler) .ok_or_else(|| error_handler.to_string())? } else { let graph = DependencyGraph::new( @@ -90,7 +95,7 @@ impl TemplateProgram { }; // 3. AST Analysis - let ast_program = ast::Program::analyze(&driver_program).with_source(source.clone())?; + let ast_program = ast::Program::analyze(&driver_program).with_source(source_file)?; Ok(Self { simfony: ast_program, file: source.content(), @@ -188,7 +193,7 @@ impl CompiledProgram { /// - [`TemplateProgram::new_with_dep`] /// - [`TemplateProgram::instantiate`] pub fn new_with_dep( - source: SourceFile, + source: CanonSourceFile, dependency_map: &DependencyMap, arguments: Arguments, include_debug_symbols: bool, @@ -422,7 +427,10 @@ pub(crate) mod tests { pub fn template_deps(prog_path: &Path, dependency_map: &DependencyMap) -> Self { let program_text = std::fs::read_to_string(prog_path).unwrap(); - let source = SourceFile::new(prog_path, Arc::from(program_text)); + let source = CanonSourceFile::new( + crate::source::CanonPath::canonicalize(prog_path).unwrap(), + Arc::from(program_text), + ); let program = match TemplateProgram::new_with_dep(source, dependency_map) { Ok(x) => x, @@ -494,33 +502,22 @@ pub(crate) mod tests { I: IntoIterator, K: Into, { - let mut dependency_map = DependencyMap::new(); + let mut builder = crate::resolution::DependencyMapBuilder::new(); if let Some(parent) = prog_path.as_ref().parent() { let canon_root = crate::resolution::tests::canon(parent); - let _ = dependency_map.insert( - canon_root.clone(), - crate::driver::CRATE_STR.to_string(), - canon_root, - ); + builder = builder.with_entry_root(canon_root); } for (context, alias, target) in dependencies { let context = crate::resolution::tests::canon(context.as_ref()); let target = crate::resolution::tests::canon(target.as_ref()); - dependency_map - .insert(context.clone(), alias.into(), target.clone()) - .unwrap(); - - // Treat each mapped dependency as an isolated external package to satisfy strict local-file checks - let _ = dependency_map.insert( - target.clone(), - crate::driver::CRATE_STR.to_string(), - target, - ); + builder = builder.add_dependency(context, alias.into(), target); } + let dependency_map = builder.build().unwrap(); + TestCase::::template_deps(prog_path.as_ref(), &dependency_map) .with_arguments(Arguments::default()) } @@ -725,7 +722,6 @@ pub(crate) mod tests { #[test] fn test_crate_keyword_compilation_success() { - use crate::resolution::{CanonPath, DependencyMap}; use crate::test_utils::TempWorkspace; let ws = TempWorkspace::new("crate_success"); @@ -740,14 +736,10 @@ pub(crate) mod tests { ); let main_path = root.join("main.simf"); - let mut dependency_map = DependencyMap::new(); - let canon_root = CanonPath::canonicalize(&root).unwrap(); - dependency_map - .insert( - canon_root.clone(), - crate::driver::CRATE_STR.to_string(), - canon_root, - ) + let canon_root = crate::source::CanonPath::canonicalize(&root).unwrap(); + let dependency_map = crate::resolution::DependencyMapBuilder::new() + .with_entry_root(canon_root) + .build() .unwrap(); TestCase::::template_deps(&main_path, &dependency_map) @@ -756,6 +748,16 @@ pub(crate) mod tests { .assert_run_success(); } + #[test] + fn test_anonymous_source_compiles_without_dependencies() { + let code = "fn main() { assert!(true); }"; + let program = TemplateProgram::new(code); + assert!( + program.is_ok(), + "TemplateProgram::new should successfully compile anonymous source files without requiring canonical paths" + ); + } + #[test] fn cat() { TestCase::program_file("./examples/cat.simf") @@ -1162,23 +1164,23 @@ mod error_tests { use super::*; use crate::resolution::tests::canon; - use crate::resolution::CanonPath; + use crate::source::CanonPath; use crate::test_utils::TempWorkspace; fn dependency_map(root_dir: &Path, drp: &str, lib_dir: &Path) -> DependencyMap { - let mut dependency_map = DependencyMap::new(); - let context = CanonPath::canonicalize(root_dir).unwrap(); let target = CanonPath::canonicalize(lib_dir).unwrap(); - dependency_map.insert(context, drp.into(), target).unwrap(); - - dependency_map + crate::resolution::DependencyMapBuilder::new() + .with_entry_root(context.clone()) + .add_dependency(context, drp.into(), target) + .build() + .unwrap() } - fn source_file(path: &Path) -> SourceFile { + fn source_file(path: &Path) -> CanonSourceFile { let content = std::fs::read_to_string(path).expect("Failed to read test file"); - SourceFile::new(path, Arc::from(content)) + CanonSourceFile::new(canon(path), Arc::from(content)) } #[test] @@ -1211,6 +1213,7 @@ mod error_tests { #[test] fn omitted_context_dependency_applies_inside_dependency_files() { let ws = TempWorkspace::new("omitted_context_dependency"); + let root_dir = ws.create_dir("workspace"); let lib_dir = ws.create_dir("workspace/lib"); let main_path = ws.create_file( "workspace/main.simf", @@ -1222,7 +1225,7 @@ mod error_tests { ); ws.create_file("workspace/lib/base.simf", "pub fn one() -> u32 { 1 }\n"); - let dependencies = dependency_map(&main_path, "lib", &lib_dir); + let dependencies = dependency_map(&root_dir, "lib", &lib_dir); let _err = TemplateProgram::new_with_dep(source_file(&main_path), &dependencies) .expect_err("omitted-context dependencies"); } @@ -1333,7 +1336,7 @@ mod functional_tests { } #[test] - #[should_panic(expected = "not found")] + #[should_panic(expected = "DependencyPathNotFound")] fn file_not_found_error() { run_dependency_test( format!("{}/file-not-found", ERROR_TESTS_DIR).as_str(), @@ -1342,7 +1345,7 @@ mod functional_tests { } #[test] - #[should_panic(expected = "not found")] + #[should_panic(expected = "DependencyPathNotFound")] fn lib_not_found_error() { run_dependency_test(format!("{}/lib-not-found", ERROR_TESTS_DIR).as_str(), "lib"); } diff --git a/src/main.rs b/src/main.rs index 8d6a15cc..651a1076 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,9 +3,8 @@ use base64::engine::general_purpose::STANDARD; use clap::{Arg, ArgAction, Command}; use simplicityhl::{ - driver::CRATE_STR, - resolution::{CanonPath, DependencyMap, SourceFile}, - AbiMeta, CompiledProgram, + driver::CanonSourceFile, resolution::DependencyMapBuilder, source::CanonPath, AbiMeta, + CompiledProgram, }; use std::path::Path; use std::{env, fmt}; @@ -129,15 +128,14 @@ fn main() -> Result<(), Box> { .get_many::("dependencies") .unwrap_or_default(); - let mut dependencies = DependencyMap::new(); - - // Automatically assign the `crate` root to the project directory let canon_root = main_path .as_path() .parent() .and_then(|p| CanonPath::canonicalize(p).ok()); + + let mut builder = DependencyMapBuilder::new(); if let Some(ref canon) = canon_root { - let _ = dependencies.insert(canon.clone(), CRATE_STR.to_string(), canon.clone()); + builder = builder.with_entry_root(canon.clone()); } for arg in dep_args { @@ -164,20 +162,18 @@ fn main() -> Result<(), Box> { let target_path = CanonPath::canonicalize(Path::new(path_str))?; - if let Err(e) = dependencies.insert(context_path, alias.to_string(), target_path.clone()) { - eprintln!("Error: {e}"); - std::process::exit(1); - } + builder = builder.add_dependency(context_path, alias.to_string(), target_path); + } - // Treat the external package as an isolated boundary, allowing it to use `crate::` internally - if let Err(e) = dependencies.insert(target_path.clone(), CRATE_STR.to_string(), target_path) - { + let dependencies = match builder.build() { + Ok(map) => map, + Err(e) => { eprintln!("Error: {e}"); std::process::exit(1); } - } + }; - let source = SourceFile::new(main_path.as_path(), std::sync::Arc::from(main_text)); + let source = CanonSourceFile::new(main_path.clone(), std::sync::Arc::from(main_text)); let compiled = match CompiledProgram::new_with_dep(source, &dependencies, args_opt, include_debug_symbols) { diff --git a/src/parse.rs b/src/parse.rs index cff32f5f..4b36975d 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -24,7 +24,7 @@ use crate::impl_eq_hash; use crate::lexer::Token; use crate::num::NonZeroPow2Usize; use crate::pattern::Pattern; -use crate::resolution::SourceFile; +use crate::source::SourceFile; use crate::str::{ AliasName, Binary, Decimal, FunctionName, Hexadecimal, Identifier, JetName, ModuleName, SymbolName, WitnessName, diff --git a/src/resolution.rs b/src/resolution.rs index 22ec3abb..f5091252 100644 --- a/src/resolution.rs +++ b/src/resolution.rs @@ -1,129 +1,154 @@ -use std::io; -use std::path::Path; -use std::sync::Arc; - -use crate::driver::{CanonSourceFile, CRATE_STR}; +use crate::driver::CRATE_STR; use crate::error::{Error, RichError, WithSpan as _}; use crate::parse::UseDecl; +use crate::source::CanonPath; -/// Powers error reporting by mapping compiler diagnostics to the specific file. -#[derive(Debug, Clone, Eq, PartialEq, Hash)] -pub struct SourceFile { - /// The path of the source file (e.g., "./src/main.simf"). - name: Option>, - /// The actual text content of the source file. - content: Arc, +/// This defines how a specific dependency root path (e.g. "math") +/// should be resolved to a physical path on the disk, restricted to +/// files executing within the `context_prefix`. +#[derive(Debug, Clone)] +pub(crate) struct Remapping { + /// The base directory that owns this dependency mapping. + pub(crate) context_prefix: CanonPath, + /// The dependency root path name used in the `use` statement (e.g., "math"). + pub(crate) drp_name: String, + /// The physical path this dependency root path points to. + pub(crate) target: CanonPath, } -impl From<(&Path, &str)> for SourceFile { - fn from((name, content): (&Path, &str)) -> Self { - Self::new(name, Arc::from(content)) +fn is_valid_dependency_identifier(s: &str) -> bool { + if s.is_empty() { + return false; + } + let mut chars = s.chars(); + let first = chars.next().unwrap(); + if !(first.is_ascii_alphabetic() || first == '_') { + return false; } + if !chars.all(|c| c.is_ascii_alphanumeric() || c == '_') { + return false; + } + !crate::lexer::is_keyword(s) } -impl From for SourceFile { - fn from(canon_source: CanonSourceFile) -> Self { - Self::new(canon_source.name().as_path(), canon_source.content()) - } +/// A router for resolving dependencies across multi-file workspaces. +/// +/// Mappings are strictly sorted by the longest `context_prefix` match. +/// This mathematical guarantee ensures that if multiple nested directories +/// define the same dependency root path, the most specific (deepest) context wins. +#[derive(Debug, Clone, Default)] +pub struct DependencyMap { + /// External dependency remappings (e.g., `use math::...`) + remappings: Vec, + /// Package roots for resolving `crate::...` (sorted by longest path match) + package_roots: Vec, } -impl SourceFile { - /// Creates a standard `SourceFile` from a file path and its content. - pub fn new(name: &Path, content: Arc) -> Self { - Self { - name: Some(Arc::from(name)), - content, - } - } +#[derive(Debug, Clone, Default)] +pub struct DependencyMapBuilder { + entry_root: Option, + deps: Vec, +} - /// Creates an anonymous `SourceFile` without a file path (e.g., for a single-file programs) - pub fn anonymous(content: Arc) -> Self { - Self { - name: None, - content, - } +impl DependencyMapBuilder { + pub fn new() -> Self { + Self::default() } - pub fn name(&self) -> &Option> { - &self.name + pub fn with_entry_root(mut self, root: CanonPath) -> Self { + self.entry_root = Some(root); + self } - pub fn content(&self) -> Arc { - self.content.clone() + pub fn add_dependency(mut self, context: CanonPath, alias: String, target: CanonPath) -> Self { + self.deps.push(Remapping { + context_prefix: context, + drp_name: alias, + target, + }); + self } -} -/// A guaranteed, fully coanonicalized absolute path. -#[derive(Debug, Clone, Eq, PartialEq, Hash)] -pub struct CanonPath(Arc); - -impl CanonPath { - /// Safely resolves an absolute path via the OS and wraps it in a `CanonPath`. - /// - /// # Errors - /// - /// Returns a `String` containing the OS error if the path does not exist or - /// cannot be accessed. The caller is expected to map this into a more specific - /// compiler diagnostic (e.g., `RichError`). - pub fn canonicalize(path: &Path) -> Result { - // We use `map_err` here to intercept the generic OS error and enrich - // it with the specific path that failed - let canon_path = std::fs::canonicalize(path).map_err(|err| { - format!( - "Failed to find library target path '{}' :{}", - path.display(), - err - ) - })?; + pub fn build(self) -> Result { + let mut remappings = Vec::new(); + let mut crate_roots = Vec::new(); - Ok(Self(Arc::from(canon_path.as_path()))) - } + if let Some(root) = self.entry_root { + if !root.as_path().exists() { + return Err(Error::DependencyPathNotFound( + root.as_path().display().to_string(), + )); + } + if !root.as_path().is_dir() { + return Err(Error::DependencyNotADirectory( + root.as_path().display().to_string(), + )); + } + crate_roots.push(root); + } - /// Appends a logical module path to this physical root directory and verifies it. - /// It automatically appends the `.simf` extension to the final path *before* asking - /// the OS to verify its existence. - pub fn join(&self, parts: &[&str]) -> Result { - let mut new_path = self.0.to_path_buf(); + for dep in self.deps { + if !dep.context_prefix.as_path().exists() { + return Err(Error::DependencyPathNotFound( + dep.context_prefix.as_path().display().to_string(), + )); + } + if !dep.context_prefix.as_path().is_dir() { + return Err(Error::DependencyNotADirectory( + dep.context_prefix.as_path().display().to_string(), + )); + } + if !dep.target.as_path().exists() { + return Err(Error::DependencyPathNotFound( + dep.target.as_path().display().to_string(), + )); + } + if !dep.target.as_path().is_dir() { + return Err(Error::DependencyNotADirectory( + dep.target.as_path().display().to_string(), + )); + } - for part in parts { - new_path.push(part); - } + if !is_valid_dependency_identifier(&dep.drp_name) { + if dep.drp_name == CRATE_STR { + return Err(Error::ReservedDependencyKeyword(dep.drp_name)); + } + return Err(Error::InvalidDependencyIdentifier(dep.drp_name)); + } - Self::canonicalize(&new_path.with_extension("simf")) - } + // Reject duplicates: same context and same alias + if remappings.iter().any(|r: &Remapping| { + r.context_prefix == dep.context_prefix && r.drp_name == dep.drp_name + }) { + return Err(Error::DuplicateDependencyAlias( + dep.drp_name.clone(), + dep.context_prefix.as_path().display().to_string(), + )); + } - /// Check if the current file is executing inside the context's directory tree. - /// This prevents a file in `/project_a/` from using a dependency meant for `/project_b/` - pub fn starts_with(&self, path: &CanonPath) -> bool { - self.as_path().starts_with(path.as_path()) - } + crate_roots.push(dep.target.clone()); + remappings.push(dep); + } - pub fn as_path(&self) -> &Path { - &self.0 - } -} + // Sort alphabetically to group duplicates together + crate_roots.sort(); + crate_roots.dedup(); -/// This defines how a specific dependency root path (e.g. "math") -/// should be resolved to a physical path on the disk, restricted to -/// files executing within the `context_prefix`. -#[derive(Debug, Clone)] -pub struct Remapping { - /// The base directory that owns this dependency mapping. - pub context_prefix: CanonPath, - /// The dependency root path name used in the `use` statement (e.g., "math"). - pub drp_name: String, - /// The physical path this dependency root path points to. - pub target: CanonPath, -} + // Sort package roots by length descending to ensure longest prefix match + crate_roots.sort_by(|a, b| { + b.as_path() + .as_os_str() + .len() + .cmp(&a.as_path().as_os_str().len()) + }); -/// A router for resolving dependencies across multi-file workspaces. -/// -/// Mappings are strictly sorted by the longest `context_prefix` match. -/// This mathematical guarantee ensures that if multiple nested directories -/// define the same dependency root path, the most specific (deepest) context wins. -#[derive(Debug, Clone, Default)] -pub struct DependencyMap { - inner: Vec, + let mut map = DependencyMap { + remappings, + package_roots: crate_roots, + }; + map.sort_mappings(); + Ok(map) + } } impl DependencyMap { @@ -132,45 +157,25 @@ impl DependencyMap { } pub fn is_empty(&self) -> bool { - self.inner.is_empty() + self.remappings.is_empty() && self.package_roots.is_empty() } /// Re-sort the vector in descending order so the longest context paths are always at the front. /// This mathematically guarantees that the first match we find is the most specific. fn sort_mappings(&mut self) { - self.inner.sort_by(|a, b| { + self.remappings.sort_by(|a, b| { let len_a = a.context_prefix.as_path().as_os_str().len(); let len_b = b.context_prefix.as_path().as_os_str().len(); len_b.cmp(&len_a) }); } - /// Add a dependency mapped to a specific calling file's path prefix. - /// Re-sorts the vector internally to guarantee the Longest Prefix Match. - /// - /// # Arguments - /// - /// * `context` - The physical root directory where this dependency rule applies - /// (e.g., `/workspace/frontend`). - /// * `drp_name` - The Dependency Root Path Name. This is the logical alias the - /// programmer types in their source code (e.g., the `"math"` in `use math::vector;`). - /// * `target` - The physical directory where the compiler should actually - /// look for the code (e.g., `/libs/frontend_math`). - pub fn insert( - &mut self, - context: CanonPath, - drp_name: String, - target: CanonPath, - ) -> io::Result<()> { - self.inner.push(Remapping { - context_prefix: context, - drp_name, - target, - }); - - self.sort_mappings(); - - Ok(()) + /// Returns the package root for the given file, which corresponds to the + /// target directory of the most specific dependency or the entry root. + pub fn get_package_root(&self, current_file: &CanonPath) -> Option<&CanonPath> { + self.package_roots + .iter() + .find(|root| current_file.starts_with(root)) } /// Resolve `use dependency_root_path_name::...` into a physical file path by finding the @@ -183,9 +188,13 @@ impl DependencyMap { let parts = use_decl.path(); let drp_name = use_decl.drp_name()?; + if drp_name == CRATE_STR { + return self.resolve_crate_path(current_file, use_decl, &parts); + } + // Because the vector is sorted by longest prefix, // the VERY FIRST match we find is guaranteed to be the correct one. - for remapping in &self.inner { + for remapping in &self.remappings { if !current_file.starts_with(&remapping.context_prefix) { continue; } @@ -194,61 +203,73 @@ impl DependencyMap { if remapping.drp_name == drp_name { let resolved = Self::build_and_verify_path(&remapping.target, &parts[1..]) .map_err(|failed_path| { - let err = if drp_name == CRATE_STR { - Error::FileNotFound(failed_path) - } else { - Error::ExternalFileNotFound(drp_name.to_string(), failed_path) - }; - RichError::new(err, *use_decl.span()) + RichError::new( + Error::ExternalFileNotFound(drp_name.to_string(), failed_path), + *use_decl.span(), + ) })?; - self.check_local_file_imported_as_external( - drp_name, - current_file, - &resolved, - use_decl, - )?; + if !resolved.starts_with(&remapping.target) { + return Err(RichError::new( + Error::ExternalFileNotFound( + drp_name.to_string(), + resolved.as_path().to_path_buf(), + ), + *use_decl.span(), + )); + } + + self.check_local_file_imported_as_external(current_file, &resolved, use_decl)?; return Ok(resolved); } } - // If the unmapped root path is "crate", it means the compiler driver failed to configure the workspace root. - // "crate" explicitly signals local code and should never be treated as an unknown external library. - if drp_name == CRATE_STR { - return Err(Error::Internal( - "The 'crate' root path was not configured by the compiler.".to_string(), - )) - .with_span(*use_decl.span()); + Err(Error::UnknownLibrary(drp_name.to_string())).with_span(*use_decl.span()) + } + + /// Resolves `crate::...` imports into a physical file path. + fn resolve_crate_path( + &self, + current_file: &CanonPath, + use_decl: &UseDecl, + parts: &[&str], + ) -> Result { + let root = self + .get_package_root(current_file) + .ok_or_else(|| { + Error::Internal( + "The 'crate' root path was not configured by the compiler.".to_string(), + ) + }) + .map_err(|e| RichError::new(e, *use_decl.span()))?; + + let resolved = Self::build_and_verify_path(root, &parts[1..]).map_err(|failed_path| { + RichError::new(Error::FileNotFound(failed_path), *use_decl.span()) + })?; + + if !resolved.starts_with(root) { + return Err(RichError::new( + Error::FileNotFound(resolved.as_path().to_path_buf()), + *use_decl.span(), + )); } - Err(Error::UnknownLibrary(drp_name.to_string())).with_span(*use_decl.span()) + Ok(resolved) } /// Enforces that a local file is imported via `crate::` and not via an external alias. fn check_local_file_imported_as_external( &self, - drp_name: &str, current_file: &CanonPath, resolved: &CanonPath, use_decl: &UseDecl, ) -> Result<(), RichError> { - if drp_name == CRATE_STR { - return Ok(()); - } - - let current_crate = self - .inner - .iter() - .find(|r| current_file.starts_with(&r.context_prefix) && r.drp_name == CRATE_STR); - - let resolved_crate = self - .inner - .iter() - .find(|r| resolved.starts_with(&r.context_prefix) && r.drp_name == CRATE_STR); + let current_crate = self.get_package_root(current_file); + let resolved_crate = self.get_package_root(resolved); if let (Some(curr), Some(res)) = (current_crate, resolved_crate) { - if curr.target == res.target { + if curr == res { return Err(Error::LocalFileImportedAsExternal( resolved.as_path().to_path_buf(), )) @@ -281,6 +302,7 @@ impl DependencyMap { pub(crate) mod tests { use crate::str::Identifier; use crate::test_utils::TempWorkspace; + use std::path::Path; use super::*; @@ -288,12 +310,6 @@ pub(crate) mod tests { CanonPath::canonicalize(p).unwrap_or_else(|_| CanonPath::dummy_for_test(p)) } - impl CanonPath { - pub fn dummy_for_test(path: &Path) -> Self { - Self(Arc::from(path)) - } - } - /// Helper to easily construct a `UseDecl` for path resolution tests. fn create_dummy_use_decl(path_segments: &[&str]) -> UseDecl { let path: Vec = path_segments @@ -304,6 +320,26 @@ pub(crate) mod tests { UseDecl::dummy_path(path) } + /// Attempting to manually map the `crate` keyword using `insert()` must result in an error. + #[test] + fn test_insert_crate_fails() { + let ws = TempWorkspace::new("insert_crate_fail"); + let project_dir = canon(&ws.create_dir("workspace")); + + let result = DependencyMapBuilder::new() + .add_dependency( + project_dir.clone(), + CRATE_STR.to_string(), + project_dir.clone(), + ) + .build(); + + assert!(matches!( + result.unwrap_err(), + Error::ReservedDependencyKeyword(_) + )); + } + /// When a user registers the same library dependency root path multiple times /// for different folders, the compiler must always check the longest folder path first. #[test] @@ -318,18 +354,17 @@ pub(crate) mod tests { let target_v3 = canon(&ws.create_dir("lib/math_v3")); let target_v2 = canon(&ws.create_dir("lib/math_v2")); - let mut map = DependencyMap::new(); - map.insert(workspace_dir.clone(), "math".to_string(), target_v1) - .unwrap(); - map.insert(nested_dir.clone(), "math".to_string(), target_v3) - .unwrap(); - map.insert(project_a_dir.clone(), "math".to_string(), target_v2) + let map = DependencyMapBuilder::new() + .add_dependency(workspace_dir.clone(), "math".to_string(), target_v1) + .add_dependency(nested_dir.clone(), "math".to_string(), target_v3) + .add_dependency(project_a_dir.clone(), "math".to_string(), target_v2) + .build() .unwrap(); // The longest prefixes should bubble to the top - assert_eq!(map.inner[0].context_prefix, nested_dir); - assert_eq!(map.inner[1].context_prefix, project_a_dir); - assert_eq!(map.inner[2].context_prefix, workspace_dir); + assert_eq!(map.remappings[0].context_prefix, nested_dir); + assert_eq!(map.remappings[1].context_prefix, project_a_dir); + assert_eq!(map.remappings[2].context_prefix, workspace_dir); } /// Projects should not be able to "steal" or accidentally access dependencies @@ -342,8 +377,9 @@ pub(crate) mod tests { let target_utils = canon(&ws.create_dir("libs/utils_a")); let current_file = canon(&ws.create_file("project_b/main.simf", "")); - let mut map = DependencyMap::new(); - map.insert(project_a, "utils".to_string(), target_utils) + let map = DependencyMapBuilder::new() + .add_dependency(project_a, "utils".to_string(), target_utils) + .build() .unwrap(); let use_decl = create_dummy_use_decl(&["utils"]); @@ -362,30 +398,26 @@ pub(crate) mod tests { fn test_resolve_longest_prefix_match() { let ws = TempWorkspace::new("resolve_prefix"); - // 1. Setup Global Context let global_context = canon(&ws.create_dir("workspace")); let global_target = canon(&ws.create_dir("libs/global_math")); let global_expected = canon(&ws.create_file("libs/global_math/vector.simf", "")); - // 2. Setup Frontend Context let frontend_context = canon(&ws.create_dir("workspace/frontend")); let frontend_target = canon(&ws.create_dir("libs/frontend_math")); let frontend_expected = canon(&ws.create_file("libs/frontend_math/vector.simf", "")); - let mut map = DependencyMap::new(); - map.insert(global_context, "math".to_string(), global_target) - .unwrap(); - map.insert(frontend_context, "math".to_string(), frontend_target) + let map = DependencyMapBuilder::new() + .add_dependency(global_context, "math".to_string(), global_target) + .add_dependency(frontend_context, "math".to_string(), frontend_target) + .build() .unwrap(); let use_decl = create_dummy_use_decl(&["math", "vector"]); - // 3. Test Frontend Override let frontend_file = canon(&ws.create_file("workspace/frontend/src/main.simf", "")); let resolved_frontend = map.resolve_path(&frontend_file, &use_decl).unwrap(); assert_eq!(resolved_frontend, frontend_expected); - // 4. Test Global Fallback let backend_file = canon(&ws.create_file("workspace/backend/src/main.simf", "")); let resolved_backend = map.resolve_path(&backend_file, &use_decl).unwrap(); assert_eq!(resolved_backend, global_expected); @@ -400,21 +432,15 @@ pub(crate) mod tests { ws.create_file("workspace/utils.simf", ""); let current_file = canon(&ws.create_file("workspace/main.simf", "")); - let mut map = DependencyMap::new(); - // The driver sets up the crate root - map.insert( - project_dir.clone(), - CRATE_STR.to_string(), - project_dir.clone(), - ) - .unwrap(); - // The user tries to alias a folder inside their own project as an external dependency - map.insert( - project_dir.clone(), - "utils_lib".to_string(), - project_dir.clone(), - ) - .unwrap(); + let map = DependencyMapBuilder::new() + .with_entry_root(project_dir.clone()) + .add_dependency( + project_dir.clone(), + "utils_lib".to_string(), + project_dir.clone(), + ) + .build() + .unwrap(); let use_decl = create_dummy_use_decl(&["utils_lib", "utils"]); let result = map.resolve_path(¤t_file, &use_decl); @@ -435,13 +461,10 @@ pub(crate) mod tests { let expected = canon(&ws.create_file("workspace/utils.simf", "")); let current_file = canon(&ws.create_file("workspace/main.simf", "")); - let mut map = DependencyMap::new(); - map.insert( - project_dir.clone(), - CRATE_STR.to_string(), - project_dir.clone(), - ) - .unwrap(); + let map = DependencyMapBuilder::new() + .with_entry_root(project_dir.clone()) + .build() + .unwrap(); let use_decl = create_dummy_use_decl(&[CRATE_STR, "utils"]); let result = map.resolve_path(¤t_file, &use_decl).unwrap(); @@ -480,12 +503,210 @@ pub(crate) mod tests { let current_file = canon(&ws.create_file("workspace/frontend/src/main.simf", "")); - let mut map = DependencyMap::new(); - map.insert(context, "math".to_string(), target).unwrap(); + let map = DependencyMapBuilder::new() + .add_dependency(context, "math".to_string(), target) + .build() + .unwrap(); let use_decl = create_dummy_use_decl(&["math", "vector"]); let result = map.resolve_path(¤t_file, &use_decl).unwrap(); assert_eq!(result, expected); } + + #[test] + fn test_builder_rejects_file_as_directory() { + let ws = TempWorkspace::new("file_as_dir"); + let file_path = canon(&ws.create_file("workspace/not_a_dir.simf", "")); + let valid_dir = canon(&ws.create_dir("workspace/valid_dir")); + + let res1 = DependencyMapBuilder::new() + .with_entry_root(file_path.clone()) + .build(); + assert!(matches!( + res1.unwrap_err(), + Error::DependencyNotADirectory(_) + )); + + let res2 = DependencyMapBuilder::new() + .add_dependency(file_path.clone(), "alias".to_string(), valid_dir.clone()) + .build(); + assert!(matches!( + res2.unwrap_err(), + Error::DependencyNotADirectory(_) + )); + + let res3 = DependencyMapBuilder::new() + .add_dependency(valid_dir.clone(), "alias".to_string(), file_path) + .build(); + assert!(matches!( + res3.unwrap_err(), + Error::DependencyNotADirectory(_) + )); + } + + #[test] + fn test_builder_rejects_non_existent_paths() { + let ws = TempWorkspace::new("non_existent"); + let valid_dir = canon(&ws.create_dir("workspace/valid_dir")); + let fake_path = CanonPath::dummy_for_test(Path::new("/does/not/exist/in/this/universe")); + + let res = DependencyMapBuilder::new() + .add_dependency(valid_dir.clone(), "alias".to_string(), fake_path) + .build(); + assert!(matches!(res.unwrap_err(), Error::DependencyPathNotFound(_))); + } + + #[test] + fn test_builder_rejects_invalid_identifiers() { + let ws = TempWorkspace::new("invalid_idents"); + let valid_dir = canon(&ws.create_dir("workspace/valid_dir")); + + let bad_aliases = vec!["", "123lib", "my-lib", "lib!", " space "]; + + for bad_alias in bad_aliases { + let res = DependencyMapBuilder::new() + .add_dependency(valid_dir.clone(), bad_alias.to_string(), valid_dir.clone()) + .build(); + assert!( + matches!(res.unwrap_err(), Error::InvalidDependencyIdentifier(_)), + "Builder should reject alias: '{}'", + bad_alias + ); + } + } + + #[test] + fn test_builder_rejects_reserved_keywords() { + let ws = TempWorkspace::new("reserved_keywords"); + let valid_dir = canon(&ws.create_dir("workspace/valid_dir")); + + let keywords = crate::lexer::KEYWORDS.to_vec(); + + for kw in keywords { + let res = DependencyMapBuilder::new() + .add_dependency(valid_dir.clone(), kw.to_string(), valid_dir.clone()) + .build(); + let err = res.unwrap_err(); + if kw == CRATE_STR { + assert!(matches!(err, Error::ReservedDependencyKeyword(_))); + } else { + assert!(matches!(err, Error::InvalidDependencyIdentifier(_))); + } + } + } + + #[test] + fn test_builder_rejects_duplicates() { + let ws = TempWorkspace::new("duplicates"); + let valid_dir = canon(&ws.create_dir("workspace/valid_dir")); + let target1 = canon(&ws.create_dir("workspace/target1")); + let target2 = canon(&ws.create_dir("workspace/target2")); + + let res = DependencyMapBuilder::new() + .add_dependency(valid_dir.clone(), "alias".to_string(), target1) + .add_dependency(valid_dir.clone(), "alias".to_string(), target2) + .build(); + + assert!(matches!( + res.unwrap_err(), + Error::DuplicateDependencyAlias(..) + )); + } + + #[test] + fn test_resolve_rejects_escaping_package_root() { + let ws = TempWorkspace::new("escaping_root"); + let context = canon(&ws.create_dir("workspace")); + let target = canon(&ws.create_dir("libs/target")); + let current_file = canon(&ws.create_file("workspace/main.simf", "")); + + let _outside_file = canon(&ws.create_file("libs/escaped.simf", "")); + + let map = DependencyMapBuilder::new() + .add_dependency(context, "alias".to_string(), target.clone()) + .build() + .unwrap(); + + let use_decl = create_dummy_use_decl(&["alias", "..", "escaped"]); + let result = map.resolve_path(¤t_file, &use_decl); + + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("not found")); + } + + /// A dependency package should not be able to expose a symlinked source file + /// whose canonical path escapes the dependency root. + #[cfg(unix)] + #[test] + fn test_dependency_symlink_escape_rejected() { + let ws = TempWorkspace::new("dependency_symlink_escape"); + + let workspace_dir = canon(&ws.create_dir("workspace")); + let dependency_dir_path = ws.create_dir("deps/package"); + let escaped_file = ws.create_file("outside/foo.simf", ""); + std::os::unix::fs::symlink(&escaped_file, dependency_dir_path.join("foo.simf")).unwrap(); + + let dependency_dir = canon(&dependency_dir_path); + let current_file = canon(&ws.create_file("workspace/main.simf", "")); + + let map = DependencyMapBuilder::new() + .with_entry_root(workspace_dir.clone()) + .add_dependency(workspace_dir, "dep".to_string(), dependency_dir) + .build() + .unwrap(); + + let use_decl = create_dummy_use_decl(&["dep", "foo"]); + map.resolve_path(¤t_file, &use_decl) + .expect_err("dependency symlink escape was accepted"); + } + + /// It proves that the builder correctly deduplicates `package_roots` + /// even if multiple roots have the exact same string length. + #[test] + fn test_package_roots_deduplication() { + let ws = TempWorkspace::new("dedup_roots"); + + let workspace_dir = canon(&ws.create_dir("workspace")); + let lib_a = canon(&ws.create_dir("workspace/libs/A")); + let lib_b = canon(&ws.create_dir("workspace/libs/B")); + + let map = DependencyMapBuilder::new() + .with_entry_root(workspace_dir.clone()) + .add_dependency(workspace_dir.clone(), "lib_a".to_string(), lib_a.clone()) + .add_dependency(workspace_dir.clone(), "lib_b".to_string(), lib_b.clone()) + .add_dependency(lib_b.clone(), "lib_a".to_string(), lib_a.clone()) + .build() + .unwrap(); + + // The package roots should only contain workspace_dir, lib_a, and lib_b (exactly 3 unique roots). + assert_eq!( + map.package_roots.len(), + 3, + "Package roots were not correctly deduplicated" + ); + } + + /// It proves that if a dependency is nested physically inside the entry root, + /// files inside the dependency correctly resolve `crate::` to their own sandbox boundary, + /// and NOT the parent workspace boundary. + #[test] + fn test_crate_resolves_to_closest_package_root() { + let ws = TempWorkspace::new("closest_root"); + let workspace_dir = canon(&ws.create_dir("workspace")); + let lib_dir = canon(&ws.create_dir("workspace/libs/math")); + + let map = DependencyMapBuilder::new() + .with_entry_root(workspace_dir.clone()) + .add_dependency(workspace_dir.clone(), "math".to_string(), lib_dir.clone()) + .build() + .unwrap(); + + let lib_file = canon(&ws.create_file("workspace/libs/math/vector.simf", "")); + let lib_crate = map.get_package_root(&lib_file).unwrap(); + assert_eq!( + lib_crate, &lib_dir, + "Nested dependency did not securely shadow the parent workspace root" + ); + } } diff --git a/src/source.rs b/src/source.rs new file mode 100644 index 00000000..44a6c235 --- /dev/null +++ b/src/source.rs @@ -0,0 +1,106 @@ +use std::path::Path; +use std::sync::Arc; + +use crate::driver::CanonSourceFile; + +/// Powers error reporting by mapping compiler diagnostics to the specific file. +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub struct SourceFile { + /// The path of the source file (e.g., "./src/main.simf"). + name: Option>, + /// The actual text content of the source file. + content: Arc, +} + +impl From<(&Path, &str)> for SourceFile { + fn from((name, content): (&Path, &str)) -> Self { + Self::new(name, Arc::from(content)) + } +} + +impl From for SourceFile { + fn from(canon_source: CanonSourceFile) -> Self { + Self::new(canon_source.name().as_path(), canon_source.content()) + } +} + +impl SourceFile { + /// Creates a standard `SourceFile` from a file path and its content. + pub fn new(name: &Path, content: Arc) -> Self { + Self { + name: Some(Arc::from(name)), + content, + } + } + + /// Creates an anonymous `SourceFile` without a file path (e.g., for a single-file programs) + pub fn anonymous(content: Arc) -> Self { + Self { + name: None, + content, + } + } + + pub fn name(&self) -> &Option> { + &self.name + } + + pub fn content(&self) -> Arc { + self.content.clone() + } +} + +/// A guaranteed, fully coanonicalized absolute path. +#[derive(Debug, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)] +pub struct CanonPath(Arc); + +impl CanonPath { + /// Safely resolves an absolute path via the OS and wraps it in a `CanonPath`. + /// + /// # Errors + /// + /// Returns a `String` containing the OS error if the path does not exist or + /// cannot be accessed. The caller is expected to map this into a more specific + /// compiler diagnostic (e.g., `RichError`). + pub fn canonicalize(path: &Path) -> Result { + // We use `map_err` here to intercept the generic OS error and enrich + // it with the specific path that failed + let canon_path = std::fs::canonicalize(path).map_err(|err| { + format!( + "Failed to find library target path '{}' :{}", + path.display(), + err + ) + })?; + + Ok(Self(Arc::from(canon_path.as_path()))) + } + + /// Appends a logical module path to this physical root directory and verifies it. + /// It automatically appends the `.simf` extension to the final path *before* asking + /// the OS to verify its existence. + pub fn join(&self, parts: &[&str]) -> Result { + let mut new_path = self.0.to_path_buf(); + + for part in parts { + new_path.push(part); + } + + Self::canonicalize(&new_path.with_extension("simf")) + } + + /// Check if the current file is executing inside the context's directory tree. + /// This prevents a file in `/project_a/` from using a dependency meant for `/project_b/` + pub fn starts_with(&self, path: &CanonPath) -> bool { + self.as_path().starts_with(path.as_path()) + } + + pub fn as_path(&self) -> &Path { + &self.0 + } + + #[cfg(test)] + pub fn dummy_for_test(path: &Path) -> Self { + Self(Arc::from(path)) + } +} diff --git a/tests/cli.rs b/tests/cli.rs index 2244e377..6ede40b4 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -27,3 +27,32 @@ fn cli_dependency_can_use_crate_root() { String::from_utf8_lossy(&output.stderr), ); } + +#[test] +fn cli_reserved_crate_mapping_fails() { + let root = repo_path("functional-tests/valid-test-cases/external-library-uses-crate"); + let main = root.join("main.simf"); + let ext_lib = root.join("ext_lib"); + + // Attempt to maliciously override the `crate` keyword + let dep_arg = format!("crate={}", ext_lib.display()); + + let output = Command::new(env!("CARGO_BIN_EXE_simc")) + .arg(main) + .arg("--dep") + .arg(dep_arg) + .output() + .expect("failed to run simc"); + + assert!( + !output.status.success(), + "simc unexpectedly succeeded when overriding the 'crate' dependency" + ); + + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("keyword is reserved"), + "Expected 'keyword is reserved' error, got:\n{}", + stderr + ); +}