From 2111b161a95b493528e05a2fcc90c99d6488f6b8 Mon Sep 17 00:00:00 2001 From: brockelmore <31553173+brockelmore@users.noreply.github.com> Date: Tue, 10 Mar 2026 20:32:51 -0700 Subject: [PATCH 01/13] feat: trait method calls on primitives, span-based errors, break/continue warnings Enable dot-syntax for compiler-provided trait methods on primitive types (e.g., `a.unsafe_add(b)` instead of `UnsafeAdd::unsafe_add(a, b)`). Supports chained calls and complex receiver expressions. Convert all `IrError::Lowering(String)` to span-based `IrError::Diagnostic` for better error messages with source locations. Remove the now-unused `Lowering` variant. Emit compiler warnings for unimplemented `break`/`continue`. Co-Authored-By: Claude Opus 4.6 --- crates/ir/src/lib.rs | 3 - crates/ir/src/to_egglog/calls.rs | 151 +++++++++++++++---- crates/ir/src/to_egglog/composite.rs | 61 ++++++-- crates/ir/src/to_egglog/control_flow.rs | 18 ++- crates/ir/src/to_egglog/expr.rs | 57 ++++--- crates/ir/src/to_egglog/pattern.rs | 6 +- crates/ir/src/to_egglog/storage.rs | 9 +- examples/tests/test_full_math.edge | 30 ++-- examples/tests/test_inlined_halt.edge | 6 +- examples/tests/test_method_on_primitive.edge | 26 ++++ examples/tests/test_signed_widths.edge | 6 +- examples/tests/test_unsafe_arith.edge | 12 +- 12 files changed, 289 insertions(+), 96 deletions(-) create mode 100644 examples/tests/test_method_on_primitive.edge diff --git a/crates/ir/src/lib.rs b/crates/ir/src/lib.rs index 040854f..ae7a202 100644 --- a/crates/ir/src/lib.rs +++ b/crates/ir/src/lib.rs @@ -45,9 +45,6 @@ pub use schema::{EvmContract, EvmExpr, EvmProgram, RcExpr}; /// Errors that can occur during IR lowering or optimization. #[derive(Debug, thiserror::Error)] pub enum IrError { - /// Error during AST lowering - #[error("lowering error: {0}")] - Lowering(String), /// Error during AST lowering with source span for diagnostics #[error("{message}")] LoweringSpanned { diff --git a/crates/ir/src/to_egglog/calls.rs b/crates/ir/src/to_egglog/calls.rs index cfe579d..d232137 100644 --- a/crates/ir/src/to_egglog/calls.rs +++ b/crates/ir/src/to_egglog/calls.rs @@ -23,20 +23,20 @@ impl AstToEgglog { let type_name = &components[0].name; let variant_name = &components[1].name; if self.union_types.contains_key(type_name) { - return self.lower_union_instantiation_expr(type_name, variant_name, args); + return self.lower_union_instantiation_expr(type_name, variant_name, args, Some(span)); } // Check for generic union types (e.g., Result::Ok(42) where Result was monomorphized) if self.generic_type_templates.contains_key(type_name) { // First try to find an already-monomorphized version if let Some(mangled) = self.resolve_generic_type_name(type_name) { - return self.lower_union_instantiation_expr(&mangled, variant_name, args); + return self.lower_union_instantiation_expr(&mangled, variant_name, args, Some(span)); } // No monomorphized version yet — try to infer type params from // the constructor argument and monomorphize on the fly. if let Some(mangled) = self.try_monomorphize_union_from_constructor(type_name, variant_name, args)? { - return self.lower_union_instantiation_expr(&mangled, variant_name, args); + return self.lower_union_instantiation_expr(&mangled, variant_name, args, Some(span)); } return Err(IrError::Diagnostic( edge_diagnostics::Diagnostic::error(format!( @@ -199,6 +199,23 @@ impl AstToEgglog { .collect(); return self.inline_function_call(¶ms, &body, &all_args); } + + // Check compiler-provided trait methods for primitive types + if Self::is_primitive_type(type_name) { + if let Some(op) = self.compiler_provided_method(method_name) { + if args.len() != 1 { + return Err(IrError::Diagnostic( + edge_diagnostics::Diagnostic::error(format!( + "`.{method_name}()` expects exactly 1 argument", + )) + .with_label(span.clone(), "expected 1 argument"), + )); + } + let lhs = self.lower_expr(receiver)?; + let rhs = self.lower_expr(&args[0])?; + return Ok(ast_helpers::bop(op, lhs, rhs)); + } + } } // If receiver type is known but no method found, give a clear error @@ -223,6 +240,25 @@ impl AstToEgglog { return Err(IrError::Diagnostic(diag)); } + // When receiver type is unknown, try compiler-provided trait methods + // (handles chained calls like `a.unsafe_add(b).unsafe_sub(c)`, + // paren expressions, and other cases where type inference fails) + if receiver_type.is_none() { + if let Some(op) = self.compiler_provided_method(method_name) { + if args.len() != 1 { + return Err(IrError::Diagnostic( + edge_diagnostics::Diagnostic::error(format!( + "`.{method_name}()` expects exactly 1 argument", + )) + .with_label(span.clone(), "expected 1 argument"), + )); + } + let lhs = self.lower_expr(receiver)?; + let rhs = self.lower_expr(&args[0])?; + return Ok(ast_helpers::bop(op, lhs, rhs)); + } + } + // Fallback: treat as FunctionCall(FieldAccess(...), args) — lower normally let _field_access = self.lower_field_access(receiver, method_name)?; let args_ir: Vec = args @@ -282,31 +318,27 @@ impl AstToEgglog { )); } - // Built-in UnsafeAdd/UnsafeSub/UnsafeMul for primitives - let unsafe_op = match (trait_name, method_name) { - ("UnsafeAdd", "unsafe_add") => Some(EvmBinaryOp::Add), - ("UnsafeSub", "unsafe_sub") => Some(EvmBinaryOp::Sub), - ("UnsafeMul", "unsafe_mul") => Some(EvmBinaryOp::Mul), - _ => None, - }; - if let Some(op) = unsafe_op { - // Check if receiver is a primitive (not a user-defined type) + // Compiler-provided trait methods for primitive types + { let receiver_type = self.infer_receiver_type(&args[0]); - if receiver_type.is_none() { - // Primitive type — emit unchecked op directly - if args.len() != 2 { - return Err(IrError::Diagnostic( - edge_diagnostics::Diagnostic::error(format!( - "`{trait_name}::{method_name}` expects exactly 2 arguments", - )) - .with_label(span.clone(), "expected 2 arguments"), - )); + let is_primitive = receiver_type + .as_ref() + .map_or(true, |t| Self::is_primitive_type(t)); + if is_primitive { + if let Some(op) = self.compiler_provided_method(method_name) { + if args.len() != 2 { + return Err(IrError::Diagnostic( + edge_diagnostics::Diagnostic::error(format!( + "`{trait_name}::{method_name}` expects exactly 2 arguments", + )) + .with_label(span.clone(), "expected 2 arguments"), + )); + } + let lhs = self.lower_expr(&args[0])?; + let rhs = self.lower_expr(&args[1])?; + return Ok(ast_helpers::bop(op, lhs, rhs)); } - let lhs = self.lower_expr(&args[0])?; - let rhs = self.lower_expr(&args[1])?; - return Ok(ast_helpers::bop(op, lhs, rhs)); } - // User-defined type — fall through to trait impl lookup } // Try to infer receiver type @@ -457,17 +489,84 @@ impl AstToEgglog { pub(crate) fn infer_receiver_type(&self, expr: &edge_ast::Expr) -> Option { match expr { edge_ast::Expr::Ident(ident) => { - // Check scope for composite type info for scope in self.scopes.iter().rev() { if let Some(binding) = scope.bindings.get(&ident.name) { + // Composite type (struct/union/array) takes priority if let Some(ref ct) = binding.composite_type { return Some(ct.clone()); } + // Fall back to primitive type name from EvmType + return Self::evm_type_to_name(&binding._ty); } } None } edge_ast::Expr::StructInstantiation(_, type_name, _, _) => Some(type_name.name.clone()), + edge_ast::Expr::Literal(lit) => match lit.as_ref() { + edge_ast::Lit::Bool(_, _) => Some("bool".to_string()), + edge_ast::Lit::Int(_, Some(pt), _) => { + Some(Self::primitive_type_to_name(pt)) + } + edge_ast::Lit::Int(_, None, _) => Some("u256".to_string()), + _ => None, + }, + _ => None, + } + } + + /// Convert an EvmType to a type name string (for primitives). + fn evm_type_to_name(ty: &EvmType) -> Option { + match ty { + EvmType::Base(base) => match base { + EvmBaseType::UIntT(256) => Some("u256".to_string()), + EvmBaseType::UIntT(w) => Some(format!("u{w}")), + EvmBaseType::IntT(256) => Some("i256".to_string()), + EvmBaseType::IntT(w) => Some(format!("i{w}")), + EvmBaseType::BoolT => Some("bool".to_string()), + EvmBaseType::AddrT => Some("address".to_string()), + EvmBaseType::BytesT(n) => Some(format!("bytes{n}")), + EvmBaseType::UnitT | EvmBaseType::StateT => None, + }, + _ => None, + } + } + + /// Convert a PrimitiveType to a type name string. + fn primitive_type_to_name(pt: &edge_ast::ty::PrimitiveType) -> String { + use edge_ast::ty::PrimitiveType; + match pt { + PrimitiveType::UInt(256) => "u256".to_string(), + PrimitiveType::UInt(w) => format!("u{w}"), + PrimitiveType::Int(256) => "i256".to_string(), + PrimitiveType::Int(w) => format!("i{w}"), + PrimitiveType::Bool => "bool".to_string(), + PrimitiveType::Address => "address".to_string(), + PrimitiveType::FixedBytes(n) => format!("bytes{n}"), + PrimitiveType::Bit => "bit".to_string(), + } + } + + /// Check if a type name refers to a primitive type (not a user-defined composite). + pub(crate) fn is_primitive_type(type_name: &str) -> bool { + type_name == "u256" + || type_name == "i256" + || type_name == "bool" + || type_name == "address" + || type_name.starts_with("u") + && type_name[1..].parse::().is_ok() + || type_name.starts_with("i") + && type_name[1..].parse::().is_ok() + || type_name.starts_with("bytes") + && type_name[5..].parse::().is_ok() + } + + /// Look up a compiler-provided trait method for a primitive type. + /// Returns the binary op if the method matches an imported std::ops trait. + fn compiler_provided_method(&self, method_name: &str) -> Option { + match method_name { + "unsafe_add" if self.std_ops_traits.contains("UnsafeAdd") => Some(EvmBinaryOp::Add), + "unsafe_sub" if self.std_ops_traits.contains("UnsafeSub") => Some(EvmBinaryOp::Sub), + "unsafe_mul" if self.std_ops_traits.contains("UnsafeMul") => Some(EvmBinaryOp::Mul), _ => None, } } diff --git a/crates/ir/src/to_egglog/composite.rs b/crates/ir/src/to_egglog/composite.rs index 8c98791..d690043 100644 --- a/crates/ir/src/to_egglog/composite.rs +++ b/crates/ir/src/to_egglog/composite.rs @@ -8,6 +8,7 @@ use crate::{ schema::{EvmBaseType, EvmBinaryOp, EvmType, RcExpr}, IrError, }; +use edge_diagnostics; impl AstToEgglog { /// Look up the variant index for a union type. @@ -16,28 +17,44 @@ impl AstToEgglog { &self, type_name: &str, variant_name: &str, + span: Option<&edge_types::span::Span>, ) -> Result { // Try direct lookup first let variants = if let Some(v) = self.union_types.get(type_name) { v } else if let Some(mangled) = self.resolve_generic_type_name(type_name) { - self.union_types - .get(&mangled) - .ok_or_else(|| IrError::Lowering(format!("unknown union type: {type_name}")))? + self.union_types.get(&mangled).ok_or_else(|| { + let diag = + edge_diagnostics::Diagnostic::error(format!("unknown union type: `{type_name}`")); + IrError::Diagnostic(if let Some(s) = span { + diag.with_label(s.clone(), "not found") + } else { + diag + }) + })? } else { - return Err(IrError::Lowering(format!( - "unknown union type: {type_name}" - ))); + let diag = + edge_diagnostics::Diagnostic::error(format!("unknown union type: `{type_name}`")); + return Err(IrError::Diagnostic(if let Some(s) = span { + diag.with_label(s.clone(), "not found") + } else { + diag + })); }; variants .iter() .position(|(name, _)| name == variant_name) .ok_or_else(|| { let available: Vec<&str> = variants.iter().map(|(n, _)| n.as_str()).collect(); - IrError::Lowering(format!( - "no variant named `{variant_name}` in union `{type_name}`; available variants: {}", - available.join(", "), + let diag = edge_diagnostics::Diagnostic::error(format!( + "no variant named `{variant_name}` in union `{type_name}`", )) + .with_note(format!("available variants: {}", available.join(", "))); + IrError::Diagnostic(if let Some(s) = span { + diag.with_label(s.clone(), "variant not found") + } else { + diag + }) }) } @@ -49,19 +66,37 @@ impl AstToEgglog { type_name: &str, variant_name: &str, args: &[edge_ast::Expr], + span: Option<&edge_types::span::Span>, ) -> Result { - let idx = self.variant_index(type_name, variant_name)?; + let idx = self.variant_index(type_name, variant_name, span)?; // Resolve generic type names to monomorphized versions let resolved_name = if self.union_types.contains_key(type_name) { type_name.to_string() } else { - self.resolve_generic_type_name(type_name) - .ok_or_else(|| IrError::Lowering(format!("unknown union type: {type_name}")))? + self.resolve_generic_type_name(type_name).ok_or_else(|| { + let diag = edge_diagnostics::Diagnostic::error(format!( + "unknown union type: `{type_name}`", + )); + IrError::Diagnostic(if let Some(s) = span { + diag.with_label(s.clone(), "not found") + } else { + diag + }) + })? }; let variants = self .union_types .get(&resolved_name) - .ok_or_else(|| IrError::Lowering(format!("unknown union type: {type_name}")))?; + .ok_or_else(|| { + let diag = edge_diagnostics::Diagnostic::error(format!( + "unknown union type: `{type_name}`", + )); + IrError::Diagnostic(if let Some(s) = span { + diag.with_label(s.clone(), "not found") + } else { + diag + }) + })?; let has_data = variants.get(idx).map(|(_, d)| *d).unwrap_or(false); if !has_data || args.is_empty() { diff --git a/crates/ir/src/to_egglog/control_flow.rs b/crates/ir/src/to_egglog/control_flow.rs index e75bd43..50ba542 100644 --- a/crates/ir/src/to_egglog/control_flow.rs +++ b/crates/ir/src/to_egglog/control_flow.rs @@ -293,8 +293,22 @@ impl AstToEgglog { let item_ir = match item { edge_ast::LoopItem::Stmt(stmt) => self.lower_stmt(stmt)?, edge_ast::LoopItem::Expr(expr) => self.lower_expr(expr)?, - edge_ast::LoopItem::Break(_) | edge_ast::LoopItem::Continue(_) => { - // TODO: handle break/continue with control flow markers + edge_ast::LoopItem::Break(span) => { + self.warnings.push( + edge_diagnostics::Diagnostic::warning( + "`break` is not yet implemented and will be ignored", + ) + .with_label(span.clone(), "has no effect"), + ); + continue; + } + edge_ast::LoopItem::Continue(span) => { + self.warnings.push( + edge_diagnostics::Diagnostic::warning( + "`continue` is not yet implemented and will be ignored", + ) + .with_label(span.clone(), "has no effect"), + ); continue; } }; diff --git a/crates/ir/src/to_egglog/expr.rs b/crates/ir/src/to_egglog/expr.rs index 8d5011a..9c308d6 100644 --- a/crates/ir/src/to_egglog/expr.rs +++ b/crates/ir/src/to_egglog/expr.rs @@ -283,9 +283,25 @@ impl AstToEgglog { edge_ast::Stmt::Expr(expr) => self.lower_expr(expr), - edge_ast::Stmt::Break(_) | edge_ast::Stmt::Continue(_) => { - // Break/continue need special handling within loop context - // For now, return empty + edge_ast::Stmt::Break(span) => { + self.warnings.push( + edge_diagnostics::Diagnostic::warning( + "`break` is not yet implemented and will be ignored", + ) + .with_label(span.clone(), "has no effect"), + ); + Ok(ast_helpers::empty( + EvmType::Base(EvmBaseType::UnitT), + self.current_ctx.clone(), + )) + } + edge_ast::Stmt::Continue(span) => { + self.warnings.push( + edge_diagnostics::Diagnostic::warning( + "`continue` is not yet implemented and will be ignored", + ) + .with_label(span.clone(), "has no effect"), + ); Ok(ast_helpers::empty( EvmType::Base(EvmBaseType::UnitT), self.current_ctx.clone(), @@ -494,13 +510,13 @@ impl AstToEgglog { self.lower_field_access(obj, &field.name) } - edge_ast::Expr::Path(components, _span) => { + edge_ast::Expr::Path(components, span) => { // Check if this is a union variant path like Direction::North if components.len() == 2 { let type_name = &components[0].name; let variant_name = &components[1].name; if self.union_types.contains_key(type_name) { - return self.lower_union_instantiation_expr(type_name, variant_name, &[]); + return self.lower_union_instantiation_expr(type_name, variant_name, &[], Some(span)); } // Check for generic union types (e.g., Option::None where Option was monomorphized) if self.generic_type_templates.contains_key(type_name) { @@ -509,6 +525,7 @@ impl AstToEgglog { &mangled, variant_name, &[], + Some(span), ); } } @@ -571,8 +588,8 @@ impl AstToEgglog { self.lower_array_instantiation(elements) } - edge_ast::Expr::UnionInstantiation(type_name, variant_name, args, _span) => { - self.lower_union_instantiation_expr(&type_name.name, &variant_name.name, args) + edge_ast::Expr::UnionInstantiation(type_name, variant_name, args, span) => { + self.lower_union_instantiation_expr(&type_name.name, &variant_name.name, args, Some(span)) } edge_ast::Expr::PatternMatch(expr, pattern, _span) => { @@ -691,17 +708,16 @@ impl AstToEgglog { }; } } - span.map_or_else( - || Err(IrError::Lowering(format!("undefined variable: {name}"))), - |span| { - Err(IrError::Diagnostic( - edge_diagnostics::Diagnostic::error(format!( - "cannot find value `{name}` in this scope", - )) - .with_label(span.clone(), "not found in this scope"), - )) - }, - ) + // Always emit a Diagnostic error — use span when available + let diag = edge_diagnostics::Diagnostic::error(format!( + "cannot find value `{name}` in this scope", + )); + let diag = if let Some(span) = span { + diag.with_label(span.clone(), "not found in this scope") + } else { + diag + }; + Err(IrError::Diagnostic(diag)) } /// Lower an assignment expression. @@ -1163,9 +1179,12 @@ impl AstToEgglog { None => return Ok(None), }; - // Check if the LHS is a user-defined type + // Check if the LHS is a user-defined type (skip primitives — they use built-in ops) let lhs_type = self.infer_receiver_type(lhs); if let Some(ref type_name) = lhs_type { + if Self::is_primitive_type(type_name) { + return Ok(None); + } // Only dispatch to operator traits from std::ops. // User-defined traits named "Add" etc. do NOT get operator overloading. if !self.std_ops_traits.contains(trait_name) { diff --git a/crates/ir/src/to_egglog/pattern.rs b/crates/ir/src/to_egglog/pattern.rs index 836e916..9b5b4fb 100644 --- a/crates/ir/src/to_egglog/pattern.rs +++ b/crates/ir/src/to_egglog/pattern.rs @@ -17,7 +17,7 @@ impl AstToEgglog { pattern: &edge_ast::pattern::UnionPattern, ) -> Result { let disc_ir = self.lower_expr(expr)?; - let idx = self.variant_index(&pattern.union_name.name, &pattern.member_name.name)?; + let idx = self.variant_index(&pattern.union_name.name, &pattern.member_name.name, Some(&pattern.span))?; let idx_ir = ast_helpers::const_int(idx as i64, self.current_ctx.clone()); Ok(ast_helpers::eq(disc_ir, idx_ir)) } @@ -73,7 +73,7 @@ impl AstToEgglog { for arm in arms { match &arm.pattern { edge_ast::pattern::MatchPattern::Union(up) => { - let idx = self.variant_index(&up.union_name.name, &up.member_name.name)?; + let idx = self.variant_index(&up.union_name.name, &up.member_name.name, Some(&up.span))?; let bindings: Vec = up.bindings.iter().map(|b| b.name.clone()).collect(); variant_arms.push((idx, &arm.body, bindings)); @@ -176,7 +176,7 @@ impl AstToEgglog { Rc::clone(&disc_ir) }; - let idx = self.variant_index(&pattern.union_name.name, &pattern.member_name.name)?; + let idx = self.variant_index(&pattern.union_name.name, &pattern.member_name.name, Some(&pattern.span))?; let idx_ir = ast_helpers::const_int(idx as i64, self.current_ctx.clone()); let cond = ast_helpers::eq(disc_val, idx_ir); let inputs = diff --git a/crates/ir/src/to_egglog/storage.rs b/crates/ir/src/to_egglog/storage.rs index 9ea79e0..9fa9164 100644 --- a/crates/ir/src/to_egglog/storage.rs +++ b/crates/ir/src/to_egglog/storage.rs @@ -8,6 +8,7 @@ use crate::{ schema::{DataLocation, EvmExpr, RcExpr}, IrError, }; +use edge_diagnostics; impl AstToEgglog { /// Lower an emit statement. @@ -333,8 +334,10 @@ impl AstToEgglog { } } } - Err(IrError::Lowering(format!( - "cannot find storage field `{name}` in the current contract" - ))) + Err(IrError::Diagnostic( + edge_diagnostics::Diagnostic::error(format!( + "cannot find storage field `{name}` in the current contract", + )), + )) } } diff --git a/examples/tests/test_full_math.edge b/examples/tests/test_full_math.edge index f448c55..5232fec 100644 --- a/examples/tests/test_full_math.edge +++ b/examples/tests/test_full_math.edge @@ -15,34 +15,34 @@ fn _div_512(a: u256, b: u256, denominator: u256, prod0: u256, prod1: u256) -> (u if remainder > prod0 { borrow2 = 1; } - let p1: u256 = UnsafeSub::unsafe_sub(prod1, borrow2); - let p0: u256 = UnsafeSub::unsafe_sub(prod0, remainder); - let neg_denom: u256 = UnsafeSub::unsafe_sub(0, denominator); + let p1: u256 = prod1.unsafe_sub(borrow2); + let p0: u256 = prod0.unsafe_sub(remainder); + let neg_denom: u256 = 0.unsafe_sub(denominator); let twos: u256 = neg_denom & denominator; let d: u256 = denominator / twos; p0 = p0 / twos; - let twos_inv: u256 = UnsafeAdd::unsafe_add(UnsafeSub::unsafe_sub(0, twos) / twos, 1); - p0 = p0 | UnsafeMul::unsafe_mul(p1, twos_inv); - let inv: u256 = UnsafeMul::unsafe_mul(3, d) ^ 2; - inv = UnsafeMul::unsafe_mul(inv, UnsafeSub::unsafe_sub(2, UnsafeMul::unsafe_mul(d, inv))); - inv = UnsafeMul::unsafe_mul(inv, UnsafeSub::unsafe_sub(2, UnsafeMul::unsafe_mul(d, inv))); - inv = UnsafeMul::unsafe_mul(inv, UnsafeSub::unsafe_sub(2, UnsafeMul::unsafe_mul(d, inv))); - inv = UnsafeMul::unsafe_mul(inv, UnsafeSub::unsafe_sub(2, UnsafeMul::unsafe_mul(d, inv))); - inv = UnsafeMul::unsafe_mul(inv, UnsafeSub::unsafe_sub(2, UnsafeMul::unsafe_mul(d, inv))); - inv = UnsafeMul::unsafe_mul(inv, UnsafeSub::unsafe_sub(2, UnsafeMul::unsafe_mul(d, inv))); - let result: u256 = UnsafeMul::unsafe_mul(p0, inv); + let twos_inv: u256 = (0.unsafe_sub(twos) / twos).unsafe_add(1); + p0 = p0 | p1.unsafe_mul(twos_inv); + let inv: u256 = 3.unsafe_mul(d) ^ 2; + inv = inv.unsafe_mul(2.unsafe_sub(d.unsafe_mul(inv))); + inv = inv.unsafe_mul(2.unsafe_sub(d.unsafe_mul(inv))); + inv = inv.unsafe_mul(2.unsafe_sub(d.unsafe_mul(inv))); + inv = inv.unsafe_mul(2.unsafe_sub(d.unsafe_mul(inv))); + inv = inv.unsafe_mul(2.unsafe_sub(d.unsafe_mul(inv))); + inv = inv.unsafe_mul(2.unsafe_sub(d.unsafe_mul(inv))); + let result: u256 = p0.unsafe_mul(inv); return result; } fn _mul_div(a: u256, b: u256, denominator: u256) -> (u256) { let max_u256: u256 = ~0; let mm: u256 = _mulmod(a, b, max_u256); - let prod0: u256 = UnsafeMul::unsafe_mul(a, b); + let prod0: u256 = a.unsafe_mul(b); let borrow: u256 = 0; if mm < prod0 { borrow = 1; } - let prod1: u256 = UnsafeSub::unsafe_sub(UnsafeSub::unsafe_sub(mm, prod0), borrow); + let prod1: u256 = mm.unsafe_sub(prod0).unsafe_sub(borrow); let result: u256 = 0; if prod1 == 0 { if denominator == 0 { diff --git a/examples/tests/test_inlined_halt.edge b/examples/tests/test_inlined_halt.edge index a716df9..29cfd3a 100644 --- a/examples/tests/test_inlined_halt.edge +++ b/examples/tests/test_inlined_halt.edge @@ -10,14 +10,14 @@ fn _mulmod(a: u256, b: u256, n: u256) -> (u256) { fn _helper(a: u256, b: u256, c: u256, d: u256) -> (u256) { let remainder: u256 = _mulmod(a, b, c); - let result: u256 = UnsafeMul::unsafe_mul(d, remainder); + let result: u256 = d.unsafe_mul(remainder); return result; } fn _compute(a: u256, b: u256, denominator: u256) -> (u256) { let mm: u256 = _mulmod(a, b, ~0); - let prod0: u256 = UnsafeMul::unsafe_mul(a, b); - let prod1: u256 = UnsafeSub::unsafe_sub(mm, prod0); + let prod0: u256 = a.unsafe_mul(b); + let prod1: u256 = mm.unsafe_sub(prod0); let result: u256 = 0; if prod1 == 0 { if denominator == 0 { diff --git a/examples/tests/test_method_on_primitive.edge b/examples/tests/test_method_on_primitive.edge new file mode 100644 index 0000000..a0fd215 --- /dev/null +++ b/examples/tests/test_method_on_primitive.edge @@ -0,0 +1,26 @@ +// test_method_on_primitive.edge — Test trait method calls on primitive types +// +// Tests that trait methods can be called using dot syntax on primitives: +// a.unsafe_add(b) instead of UnsafeAdd::unsafe_add(a, b) + +use std::ops::UnsafeAdd; +use std::ops::UnsafeSub; +use std::ops::UnsafeMul; + +contract MethodOnPrimitive { + pub fn test_unsafe_add(a: u256, b: u256) -> (u256) { + return a.unsafe_add(b); + } + + pub fn test_unsafe_sub(a: u256, b: u256) -> (u256) { + return a.unsafe_sub(b); + } + + pub fn test_unsafe_mul(a: u256, b: u256) -> (u256) { + return a.unsafe_mul(b); + } + + pub fn test_qualified_still_works(a: u256, b: u256) -> (u256) { + return a.unsafe_add(b); + } +} diff --git a/examples/tests/test_signed_widths.edge b/examples/tests/test_signed_widths.edge index 3ef58b5..2a44867 100644 --- a/examples/tests/test_signed_widths.edge +++ b/examples/tests/test_signed_widths.edge @@ -77,17 +77,17 @@ contract TestSignedWidths { // Unsafe add: wraps without reverting pub fn i8_unsafe_add(a: i8, b: i8) -> (i8) { - return UnsafeAdd::unsafe_add(a, b); + return a.unsafe_add(b); } // Unsafe sub: wraps without reverting pub fn i8_unsafe_sub(a: i8, b: i8) -> (i8) { - return UnsafeSub::unsafe_sub(a, b); + return a.unsafe_sub(b); } // Unsafe mul: wraps without reverting pub fn i8_unsafe_mul(a: i8, b: i8) -> (i8) { - return UnsafeMul::unsafe_mul(a, b); + return a.unsafe_mul(b); } // ── Cast signed ↔ unsigned ── diff --git a/examples/tests/test_unsafe_arith.edge b/examples/tests/test_unsafe_arith.edge index ee426d5..9eea86c 100644 --- a/examples/tests/test_unsafe_arith.edge +++ b/examples/tests/test_unsafe_arith.edge @@ -10,33 +10,33 @@ use std::ops::UnsafeMul; contract TestUnsafeArith { // Basic unchecked add pub fn test_unsafe_add() -> (u256) { - return UnsafeAdd::unsafe_add(10, 32); + return 10.unsafe_add(32); } // Basic unchecked sub pub fn test_unsafe_sub() -> (u256) { - return UnsafeSub::unsafe_sub(50, 8); + return 50.unsafe_sub(8); } // Basic unchecked mul pub fn test_unsafe_mul() -> (u256) { - return UnsafeMul::unsafe_mul(6, 7); + return 6.unsafe_mul(7); } // Overflow wraps: MAX_U256 + 1 = 0 pub fn test_add_overflow() -> (u256) { let max: u256 = 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff; - return UnsafeAdd::unsafe_add(max, 1); + return max.unsafe_add(1); } // Underflow wraps: 0 - 1 = MAX_U256 pub fn test_sub_underflow() -> (u256) { - return UnsafeSub::unsafe_sub(0, 1); + return 0.unsafe_sub(1); } // Mul overflow wraps pub fn test_mul_overflow() -> (u256) { let max: u256 = 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff; - return UnsafeMul::unsafe_mul(max, 2); + return max.unsafe_mul(2); } } From ef964885c6d216421e54a3ec862cc3a9114702b1 Mon Sep 17 00:00:00 2001 From: brockelmore <31553173+brockelmore@users.noreply.github.com> Date: Wed, 11 Mar 2026 11:23:25 -0700 Subject: [PATCH 02/13] feat: generic Map trait dispatch, default struct derive_slot, auto-import globals - Fix monomorphization cache collision: Map and Map no longer share cache entries (use mangled type names instead of EvmType for cache keys) - Fix composite_base propagation: struct params inferred from type sigs during inlining now get composite_base set to their value (enables field access in trait method bodies) - Add struct param memory allocation for calldata-passed struct params in contract functions (CALLDATACOPY to allocated memory region) - Add default keccak-chained derive_slot for struct types without explicit UniqueSlot impl (Solidity nested mapping convention) - Auto-import std/globals (ops, map, option, result) without explicit use - Move std/ops.edge to std/globals/ops.edge, add globals/map.edge - Add build_type_param_subst with base name fallback for mangled generics - Add tracing at trace level for method dispatch and inline param binding - Egglog tracing now requires verbosity 5 (-vvvvv) instead of 4 - Add Map and Map e2e tests (21 map_std tests total) Co-Authored-By: Claude Opus 4.6 --- bin/edgec/src/main.rs | 4 +- crates/driver/src/compiler.rs | 193 ++++++++- crates/e2e/.gas-snapshot | 12 +- crates/e2e/tests/main.rs | 3 + crates/e2e/tests/suites/map_std_exec.rs | 518 ++++++++++++++++++++++++ crates/evm-tests/src/lib.rs | 7 +- crates/ir/src/lib.rs | 23 +- crates/ir/src/to_egglog/calls.rs | 493 +++++++++++++++++++++- crates/ir/src/to_egglog/control_flow.rs | 2 + crates/ir/src/to_egglog/expr.rs | 82 +++- crates/ir/src/to_egglog/function.rs | 129 +++++- crates/ir/src/to_egglog/mod.rs | 109 ++++- crates/ir/src/to_egglog/pattern.rs | 2 + crates/ir/src/to_egglog/storage.rs | 211 +--------- crates/ir/src/to_egglog/types.rs | 263 +++++++++++- crates/parser/src/parser.rs | 116 +++++- examples/erc20.edge | 4 +- examples/tests/stress_loops.edge | 2 +- examples/tests/stress_storage.edge | 4 +- examples/tests/test_erc20.edge | 4 +- examples/tests/test_map_std.edge | 120 ++++++ examples/tests/test_mappings.edge | 6 +- examples/tests/test_storage_heavy.edge | 4 +- examples/tokens/erc20.edge | 4 +- examples/tokens/erc721.edge | 2 +- std/access/roles.edge | 6 +- std/finance/amm.edge | 4 +- std/finance/multisig.edge | 14 +- std/finance/staking.edge | 8 +- std/globals/map.edge | 87 ++++ std/{ => globals}/ops.edge | 21 + std/globals/option.edge | 1 + std/globals/result.edge | 1 + std/patterns/factory.edge | 4 +- std/patterns/timelock.edge | 6 +- std/tokens/erc1155.edge | 4 +- std/tokens/erc20.edge | 4 +- std/tokens/weth.edge | 6 +- 38 files changed, 2168 insertions(+), 315 deletions(-) create mode 100644 crates/e2e/tests/suites/map_std_exec.rs create mode 100644 examples/tests/test_map_std.edge create mode 100644 std/globals/map.edge rename std/{ => globals}/ops.edge (67%) create mode 100644 std/globals/option.edge create mode 100644 std/globals/result.edge diff --git a/bin/edgec/src/main.rs b/bin/edgec/src/main.rs index 68f129f..86c6899 100644 --- a/bin/edgec/src/main.rs +++ b/bin/edgec/src/main.rs @@ -19,8 +19,8 @@ fn main() -> Result<()> { if let Some(level) = level { use tracing_subscriber::EnvFilter; - // Egglog is extremely noisy, suppress it unless TRACE level - let egglog_level = if level >= Level::TRACE { + // Egglog is extremely noisy — only enable at verbosity 5+ (-vvvvv) + let egglog_level = if cli.verbose >= 5 { "trace" } else { "warn" diff --git a/crates/driver/src/compiler.rs b/crates/driver/src/compiler.rs index bf9f5cd..332ec86 100644 --- a/crates/driver/src/compiler.rs +++ b/crates/driver/src/compiler.rs @@ -110,6 +110,15 @@ impl Compiler { .render_to_string(&path, &self.session.source) } + /// Parse and resolve imports, returning the preprocessed AST. + /// Useful for tests that need to control IR/codegen optimization levels separately. + pub fn parse_and_resolve(&mut self) -> Result { + let _tokens = self.lex()?; + let mut ast = self.parse()?; + self.resolve_imports(&mut ast)?; + Ok(ast) + } + /// Run the compilation pipeline pub fn compile(&mut self) -> Result { tracing::info!("Compiling {:?}", self.session.config.input_file); @@ -357,7 +366,7 @@ impl Compiler { fn parse(&mut self) -> Result { let mut parser = Parser::new(&self.session.source).map_err(|e| { self.session - .emit_error(Diagnostic::error(format!("parse error: {e}"))); + .emit_error(Self::parse_error_to_diagnostic(&e)); CompileError::ParseErrors })?; @@ -365,13 +374,44 @@ impl Compiler { Ok(program) => Ok(program), Err(e) => { self.session - .emit_error(Diagnostic::error(format!("parse error: {e}"))); + .emit_error(Self::parse_error_to_diagnostic(&e)); self.session.report_diagnostics(); Err(CompileError::ParseErrors) } } } + /// Convert a `ParseError` into a `Diagnostic` with proper span labels. + fn parse_error_to_diagnostic(e: &edge_parser::ParseError) -> Diagnostic { + use edge_parser::ParseError; + match e { + ParseError::UnexpectedToken { + found, + expected, + span, + } => Diagnostic::error(format!("expected {expected}, found {found}")) + .with_label(span.clone(), format!("expected {expected}")), + ParseError::InvalidTypeSig { message, span } => { + Diagnostic::error(format!("invalid type: {message}")) + .with_label(span.clone(), message.clone()) + } + ParseError::InvalidExpr { message, span } => { + Diagnostic::error(format!("invalid expression: {message}")) + .with_label(span.clone(), message.clone()) + } + ParseError::InvalidStmt { message, span } => { + Diagnostic::error(format!("invalid statement: {message}")) + .with_label(span.clone(), message.clone()) + } + ParseError::InvalidPattern { message, span } => { + Diagnostic::error(format!("invalid pattern: {message}")) + .with_label(span.clone(), message.clone()) + } + ParseError::UnexpectedEof => Diagnostic::error("unexpected end of file"), + ParseError::LexerError(msg) => Diagnostic::error(format!("lexer error: {msg}")), + } + } + /// Resolve `use std::...` imports by locating source for each imported module — /// first from an explicit filesystem override, then from the stdlib embedded in /// the binary — and merging their top-level items into the program AST. @@ -382,6 +422,10 @@ impl Compiler { /// 2. Embedded sources baked into the binary at compile time via `build.rs` /// (works on any machine with no extra setup). fn resolve_imports(&mut self, ast: &mut Program) -> Result<(), CompileError> { + // Auto-import globals: ops, map, option, result. + // These are always available without explicit `use` statements. + self.auto_import_globals(ast)?; + // Collect std imports from the AST. // Build a full path-segments list by combining intermediate `segments` with the final // `path` identifier. For example: @@ -516,6 +560,104 @@ impl Compiler { Ok(()) } + /// Auto-import all `std/globals/*.edge` files — these are always available + /// without explicit `use` statements. Prepends their declarations (types, + /// traits, impls, functions) to the AST. + fn auto_import_globals(&mut self, ast: &mut Program) -> Result<(), CompileError> { + // Order matters: ops first (trait defs), then map (uses ops traits). + let global_keys = ["globals/ops", "globals/option", "globals/result", "globals/map"]; + let mut new_stmts: Vec = Vec::new(); + + // Canonicalize the explicit override path once (if provided). + let explicit_std_path: Option = + self.session.config.std_path.as_ref().and_then(|p| { + let canon = std::fs::canonicalize(p).unwrap_or_else(|_| p.clone()); + if canon.is_dir() { + Some(canon) + } else { + None + } + }); + + for key in &global_keys { + let segments: Vec = key.split('/').map(String::from).collect(); + let source = if let Some(ref std_path) = explicit_std_path { + Self::try_read_from_fs(std_path, &segments) + .or_else(|| Self::try_read_from_embedded(&segments).map(String::from)) + } else { + Self::try_read_from_embedded(&segments).map(String::from) + }; + + let Some(source) = source else { + // Globals not available (e.g., downstream consumer without std/). + continue; + }; + + let mut parser = Parser::new(&source).map_err(|e| { + self.session.emit_error(Diagnostic::error(format!( + "parse error in globals `{key}`: {e}" + ))); + CompileError::ParseErrors + })?; + + let program = parser.parse().map_err(|e| { + self.session.emit_error(Diagnostic::error(format!( + "parse error in globals `{key}`: {e}" + ))); + self.session.report_diagnostics(); + CompileError::ParseErrors + })?; + + // Include everything except ModuleImport/ModuleDecl (internal imports). + for stmt in program.stmts { + if !matches!( + stmt, + edge_ast::Stmt::ModuleImport(_) | edge_ast::Stmt::ModuleDecl(_) + ) { + new_stmts.push(stmt); + } + } + } + + if !new_stmts.is_empty() { + // Collect names defined in the user's file so globals don't shadow them. + // Like Rust's prelude: local definitions take priority over auto-imports. + let user_defined: std::collections::HashSet = ast + .stmts + .iter() + .filter_map(|stmt| match stmt { + edge_ast::Stmt::TypeAssign(td, _, _) => Some(td.name.name.clone()), + edge_ast::Stmt::TraitDecl(tr, _) => Some(tr.name.name.clone()), + edge_ast::Stmt::FnAssign(fd, _) | edge_ast::Stmt::ComptimeFn(fd, _) => { + Some(fd.name.name.clone()) + } + _ => None, + }) + .collect(); + + // Filter out global statements whose name collides with user definitions. + new_stmts.retain(|stmt| { + let name = match stmt { + edge_ast::Stmt::TypeAssign(td, _, _) => Some(&td.name.name), + edge_ast::Stmt::TraitDecl(tr, _) => Some(&tr.name.name), + edge_ast::Stmt::FnAssign(fd, _) | edge_ast::Stmt::ComptimeFn(fd, _) => { + Some(&fd.name.name) + } + _ => None, + }; + if let Some(n) = name { + !user_defined.contains(n) + } else { + true + } + }); + + new_stmts.append(&mut ast.stmts); + ast.stmts = new_stmts; + } + Ok(()) + } + /// Resolve a set of import path segments to a `(module_key, source)` pair. /// /// Tries, in order: @@ -588,6 +730,53 @@ impl Compiler { } } + // Before giving up, check if this import points to a globals module + // (e.g., `use std::ops::Add` → file "ops" not found, but "globals/ops" exists). + // If so, the content is already auto-imported — just return it. + // + // Try two forms: + // 1. Full path: ["globals"] + segments (e.g., "globals/ops/Add") + // 2. Symbol-level: ["globals"] + segments[..n-1], symbol = segments[n-1] + // (e.g., "globals/ops" with symbol "Add") + { + // Form 1: full path with globals prefix + let fallback_segments: Vec = std::iter::once("globals".to_string()) + .chain(segments.iter().cloned()) + .collect(); + + if let Some(ref std_path) = explicit_std_path { + if let Some(source) = Self::try_read_from_fs(std_path, &fallback_segments) { + let key = fallback_segments.join("/"); + return Ok((key, source, None)); + } + } + + if let Some(source) = Self::try_read_from_embedded(&fallback_segments) { + let key = fallback_segments.join("/"); + return Ok((key, source.to_string(), None)); + } + + // Form 2: strip last segment as symbol name within globals file + if segments.len() > 1 { + let symbol = segments.last().unwrap().clone(); + let file_fallback: Vec = std::iter::once("globals".to_string()) + .chain(segments[..segments.len() - 1].iter().cloned()) + .collect(); + + if let Some(ref std_path) = explicit_std_path { + if let Some(source) = Self::try_read_from_fs(std_path, &file_fallback) { + let key = file_fallback.join("/"); + return Ok((key, source, Some(symbol))); + } + } + + if let Some(source) = Self::try_read_from_embedded(&file_fallback) { + let key = file_fallback.join("/"); + return Ok((key, source.to_string(), Some(symbol))); + } + } + } + // Nothing found — emit a helpful error. let module_path = segments.join("::"); self.session.emit_error(Diagnostic::error(format!( diff --git a/crates/e2e/.gas-snapshot b/crates/e2e/.gas-snapshot index 1ebf971..6010a33 100644 --- a/crates/e2e/.gas-snapshot +++ b/crates/e2e/.gas-snapshot @@ -68,14 +68,14 @@ test_loop_storage::get_total(), 2255, 2270, 2270, 2270 test_loop_storage::read_write_loop(uint256), 44981, 44996, 44996, 44996 test_loop_storage::reset(), 4757, 4775, 4775, 4775 test_mappings::counter_get(address), 2287, 2302, 2302, 2302 -test_mappings::counter_inc(address), 22446, 22453, 22453, 22453 -test_mappings::map_add(address,uint256), 5346, 5350, 5350, 5350 +test_mappings::counter_inc(address), 22449, 22456, 22456, 22456 +test_mappings::map_add(address,uint256), 5349, 5353, 5353, 5353 test_mappings::map_get(address), 2235, 2250, 2250, 2250 -test_mappings::map_set(address,uint256), 22332, 22345, 22345, 22345 +test_mappings::map_set(address,uint256), 22335, 22348, 22348, 22348 test_mappings::nested_get(address,address), 2449, 2455, 2455, 2455 -test_mappings::nested_set(address,address,uint256), 22444, 22448, 22448, 22448 -test_mappings::nested_two_spenders(address,address,address,uint256,uint256), 44693, 44679, 44679, 44679 -test_mappings::two_keys(address,address,uint256,uint256), 44473, 44474, 44474, 44474 +test_mappings::nested_set(address,address,uint256), 22447, 22451, 22451, 22451 +test_mappings::nested_two_spenders(address,address,address,uint256,uint256), 44699, 44685, 44685, 44685 +test_mappings::two_keys(address,address,uint256,uint256), 44479, 44480, 44480, 44480 test_merkle::hash_two(bytes32,bytes32), 516, 516, 516, 516 test_merkle::verify(bytes32,bytes32,bytes32[4],uint256), 1530, 1530, 1530, 1530 test_packed_storage::store_and_read_b(), 22327, 22289, 22273, 22273 diff --git a/crates/e2e/tests/main.rs b/crates/e2e/tests/main.rs index aae1e77..d440441 100644 --- a/crates/e2e/tests/main.rs +++ b/crates/e2e/tests/main.rs @@ -54,6 +54,9 @@ mod utils_exec; #[path = "suites/warnings.rs"] mod warnings; +#[path = "suites/map_std_exec.rs"] +mod map_std_exec; + #[path = "suites/int_widths_exec.rs"] mod int_widths_exec; #[path = "suites/large_int_literals.rs"] diff --git a/crates/e2e/tests/suites/map_std_exec.rs b/crates/e2e/tests/suites/map_std_exec.rs new file mode 100644 index 0000000..06ea7b4 --- /dev/null +++ b/crates/e2e/tests/suites/map_std_exec.rs @@ -0,0 +1,518 @@ +#![allow(missing_docs)] + +//! Execution-level tests for the std Map type. +//! +//! Tests compile test_map_std.edge, deploy on in-memory revm, and verify +//! basic Map get/set, index operators, direct custom storage, and +//! Map with user-defined Sload/Sstore impls. + +use crate::helpers::*; + +const CONTRACT: &str = "examples/tests/test_map_std.edge"; + +/// Pack two u128 values into a 32-byte big-endian representation: (a << 128) | b +fn pack_u128_pair(a: u128, b: u128) -> [u8; 32] { + let mut out = [0u8; 32]; + out[0..16].copy_from_slice(&a.to_be_bytes()); + out[16..32].copy_from_slice(&b.to_be_bytes()); + out +} + +// ============================================================================= +// Direct custom storage: set_custom(u128,u128) / get_custom() +// ============================================================================= + +#[test] +fn test_custom_storage_initially_zero() { + let bc = compile_contract(CONTRACT); + let mut evm = EvmHandle::new(bc); + // get_custom() returns CustomSStore — struct with 3 fields, so 3 words + // But actually the contract returns `custom` which is a storage field. + // CustomSStore has 3 fields: ignored(u256), packed_a(u128), packed_b(u128) + // When returned, it should be the packed u256 from storage. + let r = evm.call(calldata(selector("get_custom()"), &[])); + assert!(r.success, "get_custom() reverted"); + assert_eq!(decode_u256(&r.output), 0, "custom should start at 0"); +} + +#[test] +fn test_custom_storage_set_then_get() { + let bc = compile_contract(CONTRACT); + let mut evm = EvmHandle::new(bc); + + // set_custom(a=5, b=10) — packs as (5 << 128) | 10 + let r = evm.call(calldata( + selector("set_custom(uint128,uint128)"), + &[encode_u256(5), encode_u256(10)], + )); + assert!(r.success, "set_custom(5, 10) reverted"); + + let r = evm.call(calldata(selector("get_custom()"), &[])); + assert!(r.success, "get_custom() reverted"); + // The stored value is the packed combo: (packed_a << 128) | packed_b + // But the return type is CustomSStore, which goes through Sload. + // CustomSStore::sload reads packed_combo, then returns struct fields. + // The return will be the raw storage value or unpacked fields depending + // on how the compiler handles struct returns. + // For now just check it doesn't revert and returns non-zero. + assert!(r.output.len() >= 32, "should return at least 32 bytes"); +} + +// ============================================================================= +// Basic Map — get/set +// ============================================================================= + +#[test] +fn test_basic_map_get_initially_zero() { + let bc = compile_contract(CONTRACT); + let mut evm = EvmHandle::new(bc); + let r = evm.call(calldata( + selector("get_basic(uint256)"), + &[encode_u256(42)], + )); + assert!(r.success, "get_basic(42) reverted"); + assert_eq!(decode_u256(&r.output), 0, "unset key should return 0"); +} + +#[test] +fn test_basic_map_set_then_get() { + let bc = compile_contract(CONTRACT); + let mut evm = EvmHandle::new(bc); + + let r = evm.call(calldata( + selector("set_basic(uint256,uint256)"), + &[encode_u256(1), encode_u256(999)], + )); + assert!(r.success, "set_basic(1, 999) reverted"); + + let r = evm.call(calldata( + selector("get_basic(uint256)"), + &[encode_u256(1)], + )); + assert!(r.success, "get_basic(1) reverted"); + assert_eq!(decode_u256(&r.output), 999, "get_basic(1) should be 999"); +} + +#[test] +fn test_basic_map_different_keys_independent() { + let bc = compile_contract(CONTRACT); + let mut evm = EvmHandle::new(bc); + + let r = evm.call(calldata( + selector("set_basic(uint256,uint256)"), + &[encode_u256(10), encode_u256(100)], + )); + assert!(r.success, "set_basic(10, 100) reverted"); + + let r = evm.call(calldata( + selector("set_basic(uint256,uint256)"), + &[encode_u256(20), encode_u256(200)], + )); + assert!(r.success, "set_basic(20, 200) reverted"); + + let r = evm.call(calldata( + selector("get_basic(uint256)"), + &[encode_u256(10)], + )); + assert!(r.success); + assert_eq!(decode_u256(&r.output), 100); + + let r = evm.call(calldata( + selector("get_basic(uint256)"), + &[encode_u256(20)], + )); + assert!(r.success); + assert_eq!(decode_u256(&r.output), 200); +} + +#[test] +fn test_basic_map_overwrite() { + let bc = compile_contract(CONTRACT); + let mut evm = EvmHandle::new(bc); + + let r = evm.call(calldata( + selector("set_basic(uint256,uint256)"), + &[encode_u256(5), encode_u256(111)], + )); + assert!(r.success); + + let r = evm.call(calldata( + selector("set_basic(uint256,uint256)"), + &[encode_u256(5), encode_u256(222)], + )); + assert!(r.success); + + let r = evm.call(calldata( + selector("get_basic(uint256)"), + &[encode_u256(5)], + )); + assert!(r.success); + assert_eq!(decode_u256(&r.output), 222, "overwritten value should be 222"); +} + +// ============================================================================= +// Index operator — get_basic_by_indexable / set_basic_by_indexable +// ============================================================================= + +#[test] +fn test_basic_map_index_get() { + let bc = compile_contract(CONTRACT); + let mut evm = EvmHandle::new(bc); + + // Set via .set() + let r = evm.call(calldata( + selector("set_basic(uint256,uint256)"), + &[encode_u256(7), encode_u256(777)], + )); + assert!(r.success, "set_basic reverted"); + + // Read via index operator + let r = evm.call(calldata( + selector("get_basic_by_indexable(uint256)"), + &[encode_u256(7)], + )); + assert!(r.success, "get_basic_by_indexable reverted"); + assert_eq!(decode_u256(&r.output), 777); +} + +#[test] +fn test_basic_map_index_set() { + let bc = compile_contract(CONTRACT); + let mut evm = EvmHandle::new(bc); + + // Set via index operator + let r = evm.call(calldata( + selector("set_basic_by_indexable(uint256,uint256)"), + &[encode_u256(3), encode_u256(333)], + )); + assert!(r.success, "set_basic_by_indexable reverted"); + + // Read via .get() + let r = evm.call(calldata( + selector("get_basic(uint256)"), + &[encode_u256(3)], + )); + assert!(r.success, "get_basic reverted"); + assert_eq!(decode_u256(&r.output), 333); +} + +#[test] +fn test_basic_map_index_interop() { + let bc = compile_contract(CONTRACT); + let mut evm = EvmHandle::new(bc); + + // Set via index, read via index + let r = evm.call(calldata( + selector("set_basic_by_indexable(uint256,uint256)"), + &[encode_u256(99), encode_u256(9999)], + )); + assert!(r.success); + + let r = evm.call(calldata( + selector("get_basic_by_indexable(uint256)"), + &[encode_u256(99)], + )); + assert!(r.success); + assert_eq!(decode_u256(&r.output), 9999); + + // Also readable via .get() + let r = evm.call(calldata( + selector("get_basic(uint256)"), + &[encode_u256(99)], + )); + assert!(r.success); + assert_eq!(decode_u256(&r.output), 9999, ".get and index should read same slot"); +} + +// ============================================================================= +// Custom Sload/Sstore Map — Map +// get_custom(uint256), get_custom_by_indexable(uint256), +// set_custom(uint256, CustomSStore), set_custom_by_indexable(uint256, CustomSStore) +// ============================================================================= + +// Note: CustomSStore.sstore packs (packed_a << 128) | packed_b into a single u256. +// CustomSStore.sload reads that u256 and unpacks it back. +// The get_custom(uint256) return type is (u256), so it returns the raw packed value. + +#[test] +fn test_custom_map_get_initially_zero() { + let bc = compile_contract(CONTRACT); + let mut evm = EvmHandle::new(bc); + let r = evm.call(calldata( + selector("get_custom(uint256)"), + &[encode_u256(1)], + )); + assert!(r.success, "get_custom(1) reverted"); + assert_eq!(decode_u256(&r.output), 0, "unset custom map key should be 0"); +} + +#[test] +fn test_custom_map_get_by_indexable_initially_zero() { + let bc = compile_contract(CONTRACT); + let mut evm = EvmHandle::new(bc); + let r = evm.call(calldata( + selector("get_custom_by_indexable(uint256)"), + &[encode_u256(1)], + )); + assert!(r.success, "get_custom_by_indexable(1) reverted"); + assert_eq!(decode_u256(&r.output), 0); +} + +// Note: set_custom(uint256, CustomSStore) takes a struct with 3 fields as the +// second arg. ABI-encoding: 3 words (ignored, packed_a, packed_b) = 4 words total +// with the key. But the ABI signature for selector hashing depends on how Edge +// encodes struct params. We'll try the natural encoding. +// CustomSStore = { ignored: u256, packed_a: u128, packed_b: u128 } +// ABI sig might be: set_custom(uint256,uint256,uint128,uint128) or +// set_custom(uint256,(uint256,uint128,uint128)) + +// For now, test the functions that take simple u256 args (get_custom, get_custom_by_indexable) +// and verify they work after setting values via the basic u256 map functions. + +// ============================================================================= +// Double custom: Map +// CustomHash key uses user-defined UniqueSlot::derive_slot +// CustomSStore value uses user-defined Sload/Sstore +// ============================================================================= + +#[test] +fn test_double_custom_get_initially_zero() { + let bc = compile_contract(CONTRACT); + let mut evm = EvmHandle::new(bc); + // get_double_custom(a=1, b=2) — key is CustomHash{a:1, b:2} + let r = evm.call(calldata( + selector("get_double_custom(uint128,uint128)"), + &[encode_u256(1), encode_u256(2)], + )); + assert!(r.success, "get_double_custom(1,2) reverted"); + assert_eq!(decode_u256(&r.output), 0, "unset double custom key should return 0"); +} + +#[test] +fn test_double_custom_set_then_get() { + let bc = compile_contract(CONTRACT); + let mut evm = EvmHandle::new(bc); + + // set_double_custom(a=1, b=2, val_a=100, val_b=200) + let r = evm.call(calldata( + selector("set_double_custom(uint128,uint128,uint128,uint128)"), + &[encode_u256(1), encode_u256(2), encode_u256(100), encode_u256(200)], + )); + assert!(r.success, "set_double_custom reverted"); + + // get_double_custom(a=1, b=2) — should return packed (100 << 128) | 200 + let r = evm.call(calldata( + selector("get_double_custom(uint128,uint128)"), + &[encode_u256(1), encode_u256(2)], + )); + assert!(r.success, "get_double_custom reverted"); + assert!(r.output.len() >= 32); + // Packed as (val_a << 128) | val_b in a u256 + // val_a=100 in bytes 0..16, val_b=200 in bytes 16..32 + let packed = &r.output[0..32]; + assert!(packed.iter().any(|&b| b != 0), "stored value should be non-zero"); +} + +#[test] +fn test_double_custom_different_keys_independent() { + let bc = compile_contract(CONTRACT); + let mut evm = EvmHandle::new(bc); + + // Set key (1, 2) → val (10, 20) + let r = evm.call(calldata( + selector("set_double_custom(uint128,uint128,uint128,uint128)"), + &[encode_u256(1), encode_u256(2), encode_u256(10), encode_u256(20)], + )); + assert!(r.success); + + // Set key (3, 4) → val (30, 40) + let r = evm.call(calldata( + selector("set_double_custom(uint128,uint128,uint128,uint128)"), + &[encode_u256(3), encode_u256(4), encode_u256(30), encode_u256(40)], + )); + assert!(r.success); + + // Read key (1, 2) — should get val (10, 20) packed + let r = evm.call(calldata( + selector("get_double_custom(uint128,uint128)"), + &[encode_u256(1), encode_u256(2)], + )); + assert!(r.success); + // Expected packed value: (10 << 128) | 20 + // In big-endian 32 bytes: bytes[0..16] = 10, bytes[16..32] = 20 + let expected_1_2 = pack_u128_pair(10, 20); + assert_eq!(&r.output[0..32], &expected_1_2[..], "key (1,2) should have val (10,20)"); + + // Read key (3, 4) — should get val (30, 40) packed + let r = evm.call(calldata( + selector("get_double_custom(uint128,uint128)"), + &[encode_u256(3), encode_u256(4)], + )); + assert!(r.success); + let expected_3_4 = pack_u128_pair(30, 40); + assert_eq!(&r.output[0..32], &expected_3_4[..], "key (3,4) should have val (30,40)"); +} + +#[test] +fn test_double_custom_overwrite() { + let bc = compile_contract(CONTRACT); + let mut evm = EvmHandle::new(bc); + + // Set key (5, 6) → val (50, 60) + let r = evm.call(calldata( + selector("set_double_custom(uint128,uint128,uint128,uint128)"), + &[encode_u256(5), encode_u256(6), encode_u256(50), encode_u256(60)], + )); + assert!(r.success); + + // Overwrite key (5, 6) → val (55, 66) + let r = evm.call(calldata( + selector("set_double_custom(uint128,uint128,uint128,uint128)"), + &[encode_u256(5), encode_u256(6), encode_u256(55), encode_u256(66)], + )); + assert!(r.success); + + // Read key (5, 6) + let r = evm.call(calldata( + selector("get_double_custom(uint128,uint128)"), + &[encode_u256(5), encode_u256(6)], + )); + assert!(r.success); + let expected = pack_u128_pair(55, 66); + assert_eq!(&r.output[0..32], &expected[..], "overwritten value should be (55,66)"); +} + +// ============================================================================= +// Default derive_slot: Map +// DefaultKey has no UniqueSlot impl — compiler provides keccak-chained default. +// Slot = keccak256(y . keccak256(x . base_slot)) +// ============================================================================= + +#[test] +fn test_default_key_get_initially_zero() { + let bc = compile_contract(CONTRACT); + let mut evm = EvmHandle::new(bc); + let r = evm.call(calldata( + selector("get_default_key(uint256,uint256)"), + &[encode_u256(1), encode_u256(2)], + )); + assert!(r.success, "get_default_key(1,2) reverted"); + assert_eq!(decode_u256(&r.output), 0, "unset key should return 0"); +} + +#[test] +fn test_default_key_set_then_get() { + let bc = compile_contract(CONTRACT); + let mut evm = EvmHandle::new(bc); + + let r = evm.call(calldata( + selector("set_default_key(uint256,uint256,uint256)"), + &[encode_u256(10), encode_u256(20), encode_u256(999)], + )); + assert!(r.success, "set_default_key reverted"); + + let r = evm.call(calldata( + selector("get_default_key(uint256,uint256)"), + &[encode_u256(10), encode_u256(20)], + )); + assert!(r.success, "get_default_key reverted"); + assert_eq!(decode_u256(&r.output), 999, "should read back 999"); +} + +#[test] +fn test_default_key_different_keys_independent() { + let bc = compile_contract(CONTRACT); + let mut evm = EvmHandle::new(bc); + + // Set (1, 2) → 100 + let r = evm.call(calldata( + selector("set_default_key(uint256,uint256,uint256)"), + &[encode_u256(1), encode_u256(2), encode_u256(100)], + )); + assert!(r.success); + + // Set (3, 4) → 200 + let r = evm.call(calldata( + selector("set_default_key(uint256,uint256,uint256)"), + &[encode_u256(3), encode_u256(4), encode_u256(200)], + )); + assert!(r.success); + + // Read (1, 2) — should be 100 + let r = evm.call(calldata( + selector("get_default_key(uint256,uint256)"), + &[encode_u256(1), encode_u256(2)], + )); + assert!(r.success); + assert_eq!(decode_u256(&r.output), 100); + + // Read (3, 4) — should be 200 + let r = evm.call(calldata( + selector("get_default_key(uint256,uint256)"), + &[encode_u256(3), encode_u256(4)], + )); + assert!(r.success); + assert_eq!(decode_u256(&r.output), 200); +} + +#[test] +fn test_default_key_field_order_matters() { + let bc = compile_contract(CONTRACT); + let mut evm = EvmHandle::new(bc); + + // Set (1, 2) → 111 + let r = evm.call(calldata( + selector("set_default_key(uint256,uint256,uint256)"), + &[encode_u256(1), encode_u256(2), encode_u256(111)], + )); + assert!(r.success); + + // Read (2, 1) — should be 0, NOT 111 (field order matters in keccak chain) + let r = evm.call(calldata( + selector("get_default_key(uint256,uint256)"), + &[encode_u256(2), encode_u256(1)], + )); + assert!(r.success); + assert_eq!( + decode_u256(&r.output), + 0, + "swapped fields should map to different slot" + ); +} + +#[test] +fn test_default_key_overwrite() { + let bc = compile_contract(CONTRACT); + let mut evm = EvmHandle::new(bc); + + let r = evm.call(calldata( + selector("set_default_key(uint256,uint256,uint256)"), + &[encode_u256(5), encode_u256(6), encode_u256(50)], + )); + assert!(r.success); + + let r = evm.call(calldata( + selector("set_default_key(uint256,uint256,uint256)"), + &[encode_u256(5), encode_u256(6), encode_u256(60)], + )); + assert!(r.success); + + let r = evm.call(calldata( + selector("get_default_key(uint256,uint256)"), + &[encode_u256(5), encode_u256(6)], + )); + assert!(r.success); + assert_eq!(decode_u256(&r.output), 60, "overwritten value should be 60"); +} + +// ============================================================================= +// Unknown selector +// ============================================================================= + +#[test] +fn test_map_std_unknown_selector_reverts() { + let bc = compile_contract(CONTRACT); + let mut evm = EvmHandle::new(bc); + let r = evm.call(vec![0xde, 0xad, 0xbe, 0xef]); + assert!(!r.success, "unknown selector should revert"); +} diff --git a/crates/evm-tests/src/lib.rs b/crates/evm-tests/src/lib.rs index 508a311..28436c1 100644 --- a/crates/evm-tests/src/lib.rs +++ b/crates/evm-tests/src/lib.rs @@ -289,9 +289,10 @@ pub fn compile_edge_split( bytecode_opt_level: u8, optimize_for: edge_ir::OptimizeFor, ) -> Vec { - let source = std::fs::read_to_string(path).expect("failed to read source"); - let mut parser = edge_parser::Parser::new(&source).expect("failed to create parser"); - let ast = parser.parse().expect("parse failed"); + let mut config = CompilerConfig::new(PathBuf::from(path)); + config.emit = EmitKind::Bytecode; + let mut compiler = Compiler::new(config).expect("failed to create compiler"); + let ast = compiler.parse_and_resolve().expect("parse failed"); let ir_program = edge_ir::lower_and_optimize(&ast, ir_opt_level, optimize_for) .expect("IR optimization failed"); edge_codegen::compile(&ir_program, bytecode_opt_level, optimize_for).expect("codegen failed") diff --git a/crates/ir/src/lib.rs b/crates/ir/src/lib.rs index ae7a202..195e78f 100644 --- a/crates/ir/src/lib.rs +++ b/crates/ir/src/lib.rs @@ -551,7 +551,28 @@ mod tests { fn test_egglog_roundtrip_erc20() { let source = std::fs::read_to_string("../../examples/erc20.edge").unwrap(); let mut parser = edge_parser::Parser::new(&source).unwrap(); - let ast = parser.parse().unwrap(); + let mut ast = parser.parse().unwrap(); + + // Import globals (ops, map, etc.) the same way the driver does + let global_files = [ + "globals/ops", + "globals/option", + "globals/result", + "globals/map", + ]; + for key in &global_files { + let path = format!("../../std/{}.edge", key); + if let Ok(src) = std::fs::read_to_string(&path) { + if let Ok(mut p) = edge_parser::Parser::new(&src) { + if let Ok(globals_ast) = p.parse() { + // Prepend globals statements + let mut new_stmts = globals_ast.stmts; + new_stmts.append(&mut ast.stmts); + ast.stmts = new_stmts; + } + } + } + } let mut lowering = to_egglog::AstToEgglog::new(); let ir_program = lowering.lower_program(&ast).unwrap(); diff --git a/crates/ir/src/to_egglog/calls.rs b/crates/ir/src/to_egglog/calls.rs index d232137..16cf81d 100644 --- a/crates/ir/src/to_egglog/calls.rs +++ b/crates/ir/src/to_egglog/calls.rs @@ -1,5 +1,8 @@ //! Function call lowering: call resolution, inlining, builtin calls. +use std::collections::HashMap; +use std::rc::Rc; + use super::{AstToEgglog, FreeFnInfo, Scope, VarBinding}; use crate::{ ast_helpers, @@ -60,13 +63,18 @@ impl AstToEgglog { // Check for qualified trait/type call: Path(["Type", "method"]) if let edge_ast::Expr::Path(components, _) = callee { if components.len() == 2 { - let type_or_trait = &components[0].name; + // Resolve type parameter substitutions (e.g., V → u256 inside Map methods) + let resolved_type = self.type_param_subst + .get(&components[0].name) + .cloned() + .unwrap_or_else(|| components[0].name.clone()); + let type_or_trait = &resolved_type; let method_name = &components[1].name; let method_span = &components[1].span; // Check inherent methods: Type::method(receiver, args...) - if self.inherent_methods.contains_key(type_or_trait) { + if self.find_inherent_method(type_or_trait, method_name).is_some() { return self.lower_qualified_method_call( type_or_trait, method_name, @@ -86,6 +94,39 @@ impl AstToEgglog { method_span, ); } + + // Check primitive type qualified calls: u256::sload(slot), etc. + // This handles resolved type parameters like V::sload() where V = u256. + if Self::is_primitive_type(type_or_trait) { + return self.lower_qualified_trait_call( + type_or_trait, + method_name, + args, + method_span, + ); + } + + // Check trait impls for non-primitive types: Map::sload(slot), etc. + // Directly look up and inline the method from the type's trait impls. + if let Some((fn_decl, body)) = self.find_trait_method_for_type(type_or_trait, method_name) { + let params: Vec<(String, edge_ast::ty::TypeSig)> = fn_decl + .params + .iter() + .map(|(id, ty)| (id.name.clone(), ty.clone())) + .collect(); + return self.inline_function_call(¶ms, &body, args); + } + // Also check inherent methods on the type + if let Some(method) = self.find_inherent_method(type_or_trait, method_name) { + let fn_decl = method.fn_decl.clone(); + let body = method.body; + let params: Vec<(String, edge_ast::ty::TypeSig)> = fn_decl + .params + .iter() + .map(|(id, ty)| (id.name.clone(), ty.clone())) + .collect(); + return self.inline_function_call(¶ms, &body, args); + } } } @@ -171,8 +212,17 @@ impl AstToEgglog { ) -> Result { // Determine receiver type from scope bindings let receiver_type = self.infer_receiver_type(receiver); + let receiver_type_args = self.infer_receiver_type_args(receiver); + tracing::trace!( + "lower_method_call: .{}(), receiver_type={:?}", + method_name, + receiver_type + ); if let Some(ref type_name) = receiver_type { + // Build type param substitution map for generic types + let type_param_subst = self.build_type_param_subst(type_name, &receiver_type_args); + // Check inherent methods first if let Some(method) = self.find_inherent_method(type_name, method_name) { let fn_decl = method.fn_decl.clone(); @@ -185,7 +235,11 @@ impl AstToEgglog { .iter() .map(|(id, ty)| (id.name.clone(), ty.clone())) .collect(); - return self.inline_function_call(¶ms, &body, &all_args); + // Set type param substitutions for generic method bodies + let old_subst = std::mem::replace(&mut self.type_param_subst, type_param_subst.clone()); + let result = self.inline_function_call(¶ms, &body, &all_args); + self.type_param_subst = old_subst; + return result; } // Check trait impls @@ -197,7 +251,10 @@ impl AstToEgglog { .iter() .map(|(id, ty)| (id.name.clone(), ty.clone())) .collect(); - return self.inline_function_call(¶ms, &body, &all_args); + let old_subst = std::mem::replace(&mut self.type_param_subst, type_param_subst); + let result = self.inline_function_call(¶ms, &body, &all_args); + self.type_param_subst = old_subst; + return result; } // Check compiler-provided trait methods for primitive types @@ -215,6 +272,41 @@ impl AstToEgglog { let rhs = self.lower_expr(&args[0])?; return Ok(ast_helpers::bop(op, lhs, rhs)); } + + // Check compiler-provided stateful methods (derive_slot, sload, sstore) + { + let recv_ir = self.lower_expr(receiver)?; + let args_ir: Vec = args + .iter() + .map(|a| self.lower_expr(a)) + .collect::>()?; + if let Some(result) = + self.compiler_provided_stateful_method(method_name, Some(recv_ir), &args_ir) + { + return Ok(result); + } + } + } + + // Default derive_slot for struct types without explicit UniqueSlot impl. + // Chains keccak256 over each field like Solidity nested mappings: + // slot = keccak256(field_0 . base_slot) + // slot = keccak256(field_1 . slot) + // ... + if method_name == "derive_slot" + && self.std_ops_traits.contains("UniqueSlot") + && args.len() == 1 + { + if let Some(struct_info) = self.struct_types.get(type_name).cloned() { + let recv_ir = self.lower_expr(receiver)?; + let base_slot = self.lower_expr(&args[0])?; + let result = self.default_struct_derive_slot( + &recv_ir, + &base_slot, + &struct_info.fields, + ); + return Ok(result); + } } } @@ -257,6 +349,20 @@ impl AstToEgglog { let rhs = self.lower_expr(&args[0])?; return Ok(ast_helpers::bop(op, lhs, rhs)); } + + // Also check stateful methods for unknown receiver + { + let recv_ir = self.lower_expr(receiver)?; + let args_ir: Vec = args + .iter() + .map(|a| self.lower_expr(a)) + .collect::>()?; + if let Some(result) = + self.compiler_provided_stateful_method(method_name, Some(recv_ir), &args_ir) + { + return Ok(result); + } + } } // Fallback: treat as FunctionCall(FieldAccess(...), args) — lower normally @@ -338,6 +444,33 @@ impl AstToEgglog { let rhs = self.lower_expr(&args[1])?; return Ok(ast_helpers::bop(op, lhs, rhs)); } + + // Compiler-provided stateful methods (sload, sstore, derive_slot) + // For qualified calls: Sload::sload(slot) has no receiver (first arg is slot) + // Sstore::sstore(value, slot) has receiver as first arg + { + let args_ir: Vec = args + .iter() + .map(|a| self.lower_expr(a)) + .collect::>()?; + // For static methods like sload: no receiver, all args + if let Some(result) = + self.compiler_provided_stateful_method(method_name, None, &args_ir) + { + return Ok(result); + } + // For instance methods like sstore/derive_slot: first arg is receiver + if args_ir.len() >= 2 { + let recv = args_ir[0].clone(); + if let Some(result) = self.compiler_provided_stateful_method( + method_name, + Some(recv), + &args_ir[1..], + ) { + return Ok(result); + } + } + } } } @@ -496,7 +629,8 @@ impl AstToEgglog { return Some(ct.clone()); } // Fall back to primitive type name from EvmType - return Self::evm_type_to_name(&binding._ty); + let result = Self::evm_type_to_name(&binding._ty); + return result; } } None @@ -510,10 +644,108 @@ impl AstToEgglog { edge_ast::Lit::Int(_, None, _) => Some("u256".to_string()), _ => None, }, + // FieldAccess on self: `self.field` — look up the field binding + edge_ast::Expr::FieldAccess(obj, field, _) => { + if let edge_ast::Expr::Ident(ident) = obj.as_ref() { + if ident.name == "self" { + // Look up the field in scope + for scope in self.scopes.iter().rev() { + if let Some(binding) = scope.bindings.get(&field.name) { + if let Some(ref ct) = binding.composite_type { + return Some(ct.clone()); + } + return Self::evm_type_to_name(&binding._ty); + } + } + } + } + None + } + // ArrayIndex: base[index] — if base is a Map, the result type is the value type (V) + edge_ast::Expr::ArrayIndex(base, _, _, _) => { + let base_type = self.infer_receiver_type(base); + let base_args = self.infer_receiver_type_args(base); + if let Some(ref bt) = base_type { + if bt.starts_with("Map") && base_args.len() == 2 { + // V is the second type arg — use mangled name + return Some(Self::type_sig_mangle(&base_args[1])); + } + } + None + } _ => None, } } + /// Get the concrete type arguments for a receiver's generic composite type. + pub(crate) fn infer_receiver_type_args(&self, expr: &edge_ast::Expr) -> Vec { + match expr { + edge_ast::Expr::Ident(ident) => { + for scope in self.scopes.iter().rev() { + if let Some(binding) = scope.bindings.get(&ident.name) { + return binding.composite_type_args.clone(); + } + } + Vec::new() + } + edge_ast::Expr::FieldAccess(obj, field, _) => { + if let edge_ast::Expr::Ident(ident) = obj.as_ref() { + if ident.name == "self" { + for scope in self.scopes.iter().rev() { + if let Some(binding) = scope.bindings.get(&field.name) { + return binding.composite_type_args.clone(); + } + } + } + } + Vec::new() + } + // ArrayIndex: base[index] — if base is a Map, the result's type args come from V + edge_ast::Expr::ArrayIndex(base, _, _, _) => { + let base_args = self.infer_receiver_type_args(base); + if base_args.len() == 2 { + if let edge_ast::ty::TypeSig::Named(_, inner_args) = &base_args[1] { + return inner_args.clone(); + } + } + Vec::new() + } + _ => Vec::new(), + } + } + + /// Build a type parameter substitution map from a generic type's type params and concrete args. + /// E.g., for Map with args [addr, u256], returns {"K": "addr", "V": "u256"}. + /// For nested generics like Map>, V maps to "Map" (base name only). + fn build_type_param_subst( + &self, + type_name: &str, + type_args: &[edge_ast::ty::TypeSig], + ) -> HashMap { + if type_args.is_empty() { + return HashMap::new(); + } + // Try exact match first, then strip mangled suffix to find base template. + // E.g., "Map__CustomHash_CustomSStore" → try "Map" if exact lookup fails. + let template = self.generic_type_templates.get(type_name).or_else(|| { + let base = type_name.split("__").next().unwrap_or(type_name); + self.generic_type_templates.get(base) + }); + if let Some(template) = template { + template + .type_params + .iter() + .zip(type_args.iter()) + .map(|(param, arg)| { + let name = Self::type_sig_mangle(arg); + (param.name.name.clone(), name) + }) + .collect() + } else { + HashMap::new() + } + } + /// Convert an EvmType to a type name string (for primitives). fn evm_type_to_name(ty: &EvmType) -> Option { match ty { @@ -552,6 +784,7 @@ impl AstToEgglog { || type_name == "i256" || type_name == "bool" || type_name == "address" + || type_name == "b32" || type_name.starts_with("u") && type_name[1..].parse::().is_ok() || type_name.starts_with("i") @@ -571,6 +804,147 @@ impl AstToEgglog { } } + /// Compiler-provided complex trait methods for primitive types. + /// Unlike `compiler_provided_method` (simple binary ops), these produce + /// full IR expression trees with state threading. + /// + /// Returns `Some(ir_expr)` if the method was handled, `None` otherwise. + fn compiler_provided_stateful_method( + &mut self, + method_name: &str, + receiver_ir: Option, + args_ir: &[RcExpr], + ) -> Option { + use std::rc::Rc; + + match method_name { + // UniqueSlot::derive_slot(self, base_slot) → keccak256(key . base_slot) + "derive_slot" if self.std_ops_traits.contains("UniqueSlot") => { + let key = receiver_ir?; + let base_slot = args_ir.first()?; + let scratch = self.alloc_region(2); + // MSTORE(scratch, key) + let mstore_key = ast_helpers::mstore( + Rc::clone(&scratch), + key, + Rc::clone(&self.current_state), + ); + self.current_state = Rc::clone(&mstore_key); + // MSTORE(scratch+32, base_slot) + let slot_offset = ast_helpers::add( + Rc::clone(&scratch), + ast_helpers::const_int(32, self.current_ctx.clone()), + ); + let mstore_slot = ast_helpers::mstore( + slot_offset, + Rc::clone(base_slot), + Rc::clone(&self.current_state), + ); + self.current_state = Rc::clone(&mstore_slot); + // KECCAK256(scratch, 64, state) + let computed_slot = ast_helpers::keccak256( + scratch, + ast_helpers::const_int(64, self.current_ctx.clone()), + Rc::clone(&self.current_state), + ); + let side_effects = ast_helpers::concat(mstore_key, mstore_slot); + Some(ast_helpers::concat(side_effects, computed_slot)) + } + + // Sload::sload(slot) → SLOAD(slot, state) — static method (no receiver) + "sload" if self.std_ops_traits.contains("Sload") => { + let slot = if let Some(recv) = receiver_ir { + // Called as receiver.sload() — receiver is the slot + recv + } else { + // Called as Type::sload(slot) — first arg is the slot + args_ir.first()?.clone() + }; + Some(ast_helpers::sload(slot, Rc::clone(&self.current_state))) + } + + // Sstore::sstore(self, slot) → SSTORE(slot, value, state) + "sstore" if self.std_ops_traits.contains("Sstore") => { + let value = receiver_ir?; + let slot = args_ir.first()?; + let store = ast_helpers::sstore( + Rc::clone(slot), + value, + Rc::clone(&self.current_state), + ); + self.current_state = Rc::clone(&store); + Some(store) + } + + _ => None, + } + } + + /// Default `derive_slot` for struct types without an explicit `UniqueSlot` impl. + /// + /// Follows Solidity's nested mapping convention — each field is chained + /// through keccak256 as if it were a separate mapping level: + /// + /// ```text + /// slot = keccak256(field_0 . base_slot) + /// slot = keccak256(field_1 . slot) + /// slot = keccak256(field_2 . slot) + /// ... + /// ``` + fn default_struct_derive_slot( + &mut self, + receiver_ir: &RcExpr, + base_slot: &RcExpr, + fields: &[(String, EvmType)], + ) -> RcExpr { + let scratch = self.alloc_region(2); + let mut current_slot = Rc::clone(base_slot); + let mut side_effects = ast_helpers::empty( + EvmType::Base(EvmBaseType::UnitT), + self.current_ctx.clone(), + ); + + for (i, (_name, _ty)) in fields.iter().enumerate() { + // Load field value: MLOAD(receiver + i*32) + let field_offset = ast_helpers::add( + Rc::clone(receiver_ir), + ast_helpers::const_int((i * 32) as i64, self.current_ctx.clone()), + ); + let field_val = ast_helpers::mload(field_offset, Rc::clone(&self.current_state)); + + // MSTORE(scratch, field_value) + let mstore_field = ast_helpers::mstore( + Rc::clone(&scratch), + field_val, + Rc::clone(&self.current_state), + ); + self.current_state = Rc::clone(&mstore_field); + side_effects = ast_helpers::concat(side_effects, mstore_field); + + // MSTORE(scratch+32, current_slot) + let slot_offset = ast_helpers::add( + Rc::clone(&scratch), + ast_helpers::const_int(32, self.current_ctx.clone()), + ); + let mstore_slot = ast_helpers::mstore( + slot_offset, + current_slot, + Rc::clone(&self.current_state), + ); + self.current_state = Rc::clone(&mstore_slot); + side_effects = ast_helpers::concat(side_effects, mstore_slot); + + // slot = keccak256(scratch, 64) + current_slot = ast_helpers::keccak256( + Rc::clone(&scratch), + ast_helpers::const_int(64, self.current_ctx.clone()), + Rc::clone(&self.current_state), + ); + } + + ast_helpers::concat(side_effects, current_slot) + } + /// Infer the `EvmType` of an expression (best-effort, defaults to u256). pub(crate) fn infer_expr_type(&self, expr: &edge_ast::Expr) -> EvmType { match expr { @@ -656,12 +1030,55 @@ impl AstToEgglog { .collect::>()?; // Before pushing a new scope, look up composite info for args that are identifiers - // (needed for method calls where `self` refers to a struct variable) - let mut arg_composite: Vec)>> = Vec::new(); + // (needed for method calls where `self` refers to a struct variable or generic type) + tracing::trace!( + "inline_function_call: params={:?}, args={}", + params.iter().map(|(n, _)| n.as_str()).collect::>(), + args.len() + ); + let mut arg_composite: Vec, Vec)>> = Vec::new(); for arg in args { if let edge_ast::Expr::Ident(ident) = arg { let info = self.lookup_composite_info(&ident.name); - arg_composite.push(info.map(|(ct, cb)| (ct, Some(cb)))); + if let Some((ct, cb)) = info { + arg_composite.push(Some((ct, Some(cb), Vec::new()))); + } else { + // Check for composite_type without composite_base (e.g., Map type aliases) + let mut found = false; + for scope in self.scopes.iter().rev() { + if let Some(binding) = scope.bindings.get(&ident.name) { + if let Some(ref ct) = binding.composite_type { + arg_composite.push(Some((ct.clone(), None, binding.composite_type_args.clone()))); + found = true; + } + break; + } + } + if !found { + arg_composite.push(None); + } + } + } else if let edge_ast::Expr::ArrayIndex(base, _, _, _) = arg { + // For ArrayIndex args (e.g., map[key] as self parameter), + // infer the value type from the base Map's type args. + let base_type = self.infer_receiver_type(base); + let base_args = self.infer_receiver_type_args(base); + if let Some(ref bt) = base_type { + if bt.starts_with("Map") && base_args.len() == 2 { + let value_mangled = Self::type_sig_mangle(&base_args[1]); + // Extract inner type args if V is a generic type + let inner_args = if let edge_ast::ty::TypeSig::Named(_, inner) = &base_args[1] { + inner.clone() + } else { + Vec::new() + }; + arg_composite.push(Some((value_mangled, None, inner_args))); + } else { + arg_composite.push(None); + } + } else { + arg_composite.push(None); + } } else { arg_composite.push(None); } @@ -674,24 +1091,69 @@ impl AstToEgglog { .get(i) .cloned() .unwrap_or_else(|| ast_helpers::const_int(0, self.current_ctx.clone())); - let (mut composite_type, composite_base) = arg_composite + let (mut composite_type, mut composite_base, composite_type_args) = arg_composite .get(i) .and_then(|c| c.as_ref()) - .map(|(ct, cb)| (Some(ct.clone()), cb.clone())) - .unwrap_or((None, None)); + .map(|(ct, cb, ta)| (Some(ct.clone()), cb.clone(), ta.clone())) + .unwrap_or((None, None, Vec::new())); + + // If the parameter has a primitive type annotation, don't inherit + // composite_type from the argument — prevents Map type leaking through + // when Map.get passes `self` (Map) to derive_slot(base_slot: u256). + if matches!(param_ty, edge_ast::ty::TypeSig::Primitive(_)) && composite_type.is_some() { + // Only clear if the composite type doesn't match a known struct/union + // (the argument may be a struct disguised as u256 in the EVM) + if let Some(ref ct) = composite_type { + if !self.struct_types.contains_key(ct) && !self.union_types.contains_key(ct) { + composite_type = None; + } + } + } // If composite_type is still None, check if the param type sig names // a known struct/union type — this enables trait method dispatch on // generic parameters after monomorphization substitutes concrete types. + // Also resolve generic type parameters (K, V, etc.) through type_param_subst. if composite_type.is_none() { - if let edge_ast::ty::TypeSig::Named(ref name, _) = param_ty { - if self.struct_types.contains_key(&name.name) - || self.union_types.contains_key(&name.name) + if let edge_ast::ty::TypeSig::Named(ref name, ref type_args) = param_ty { + let resolved_name = self + .type_param_subst + .get(&name.name) + .cloned() + .unwrap_or_else(|| name.name.clone()); + if self.struct_types.contains_key(&resolved_name) + || self.union_types.contains_key(&resolved_name) { - composite_type = Some(name.name.clone()); + composite_type = Some(resolved_name); + } else if type_args.is_empty() { + // Check if resolved name is a generic type that was + // monomorphized (e.g., Result__u256) + let mangled = Self::type_sig_mangle(param_ty); + if self.struct_types.contains_key(&mangled) + || self.union_types.contains_key(&mangled) + { + composite_type = Some(mangled); + } } } } + + // If we inferred composite_type from the type sig but have no + // composite_base, set it to the param value — for struct types + // the value IS the memory base address. + if composite_type.is_some() && composite_base.is_none() { + if let Some(ref ct) = composite_type { + if self.struct_types.contains_key(ct) { + composite_base = Some(Rc::clone(&val)); + } + } + } + tracing::trace!( + " param={}, composite_type={:?}, has_base={}", + param_name, + composite_type, + composite_base.is_some() + ); let binding = VarBinding { value: val, location: DataLocation::Stack, @@ -700,6 +1162,7 @@ impl AstToEgglog { let_bind_name: None, composite_type, composite_base, + composite_type_args, }; self.scopes .last_mut() diff --git a/crates/ir/src/to_egglog/control_flow.rs b/crates/ir/src/to_egglog/control_flow.rs index 50ba542..dc861ef 100644 --- a/crates/ir/src/to_egglog/control_flow.rs +++ b/crates/ir/src/to_egglog/control_flow.rs @@ -66,6 +66,7 @@ impl AstToEgglog { let_bind_name: Some(var_name), composite_type: None, composite_base: None, + composite_type_args: Vec::new(), }, ); } @@ -176,6 +177,7 @@ impl AstToEgglog { let_bind_name: Some(var_name.clone()), composite_type: None, composite_base: None, + composite_type_args: Vec::new(), }; self.scopes .last_mut() diff --git a/crates/ir/src/to_egglog/expr.rs b/crates/ir/src/to_egglog/expr.rs index 9c308d6..a2bdbd7 100644 --- a/crates/ir/src/to_egglog/expr.rs +++ b/crates/ir/src/to_egglog/expr.rs @@ -132,6 +132,7 @@ impl AstToEgglog { let_bind_name: Some(var_name.clone()), composite_type, composite_base: None, + composite_type_args: Vec::new(), }; self.scopes .last_mut() @@ -194,6 +195,25 @@ impl AstToEgglog { } } self.last_composite_alloc = None; + + // Intercept ArrayIndex write for Index/Map dispatch: base[index] = val → base.set(index, val) + if let edge_ast::Expr::ArrayIndex(arr_base, arr_index, _, arr_span) = lhs { + if let Some(result) = self.try_lower_storage_array_write(arr_base, arr_index, &rhs_ir)? { + return Ok(result); + } + if let Some(result) = self.try_lower_array_element_write(arr_base, arr_index, &rhs_ir)? { + return Ok(result); + } + if self.std_ops_traits.contains("Index") { + return self.lower_method_call( + arr_base, + "set", + &[arr_index.as_ref().clone(), rhs.clone()], + arr_span, + ); + } + } + self.lower_assignment_with_composite(lhs, rhs_ir, rhs_composite.as_ref()) } @@ -212,6 +232,7 @@ impl AstToEgglog { let_bind_name: None, composite_type: None, composite_base: None, + composite_type_args: Vec::new(), }; self.scopes .last_mut() @@ -441,6 +462,25 @@ impl AstToEgglog { } } self.last_composite_alloc = None; + + // Intercept ArrayIndex write for Index/Map dispatch: base[index] = val → base.set(index, val) + if let edge_ast::Expr::ArrayIndex(arr_base, arr_index, _, arr_span) = lhs.as_ref() { + if let Some(result) = self.try_lower_storage_array_write(arr_base, arr_index, &rhs_ir)? { + return Ok(result); + } + if let Some(result) = self.try_lower_array_element_write(arr_base, arr_index, &rhs_ir)? { + return Ok(result); + } + if self.std_ops_traits.contains("Index") { + return self.lower_method_call( + arr_base, + "set", + &[arr_index.as_ref().clone(), rhs.as_ref().clone()], + arr_span, + ); + } + } + self.lower_assignment_with_composite(lhs, rhs_ir, rhs_composite.as_ref()) } @@ -500,8 +540,23 @@ impl AstToEgglog { } // Check if base is a memory-backed array/struct variable - self.try_lower_array_element_read(base, index)? - .map_or_else(|| self.lower_mapping_read(base, index), Ok) + if let Some(result) = self.try_lower_array_element_read(base, index)? { + return Ok(result); + } + + // Try Index trait dispatch: base[index] → base.index(index) + if self.std_ops_traits.contains("Index") { + return self.lower_method_call( + base, + "index", + &[index.as_ref().clone()], + _span, + ); + } + + Err(IrError::Unsupported( + "array index on non-array type; use Map.get(key) for mappings".to_owned(), + )) } edge_ast::Expr::Paren(inner, _span) => self.lower_expr(inner), @@ -680,6 +735,19 @@ impl AstToEgglog { // Search scopes from innermost to outermost for scope in self.scopes.iter().rev() { if let Some(binding) = scope.bindings.get(name) { + // Unit-typed storage fields return the slot constant directly. + // They have no data to SLOAD — their value IS the slot number. + // This enables types like Map = () to work as slot references. + // Note: () can lower as either Base(UnitT) or TupleT([]). + let is_unit = matches!(binding._ty, EvmType::Base(EvmBaseType::UnitT)) + || matches!(&binding._ty, EvmType::TupleT(v) if v.is_empty()); + if binding.storage_slot.is_some() && is_unit + { + return Ok(ast_helpers::const_int( + binding.storage_slot.unwrap_or(0) as i64, + self.current_ctx.clone(), + )); + } return match binding.location { DataLocation::Storage => { // Persistent storage variable: emit SLOAD @@ -821,8 +889,13 @@ impl AstToEgglog { if let Some(result) = self.try_lower_storage_array_write(base, index, &rhs_ir)? { return Ok(result); } - self.try_lower_array_element_write(base, index, &rhs_ir)? - .map_or_else(|| self.lower_mapping_write(base, index, rhs_ir), Ok) + if let Some(result) = self.try_lower_array_element_write(base, index, &rhs_ir)? { + return Ok(result); + } + // Index write dispatch is handled in the Assign branch above + Err(IrError::Unsupported( + "array index write on non-array type; use Map.set(key, val) for mappings".to_owned(), + )) } edge_ast::Expr::FieldAccess(obj, field, _span) => { // Storage-backed packed struct sub-field write: self.color.r = 5 @@ -1632,6 +1705,7 @@ impl AstToEgglog { let_bind_name: Some(var_name.clone()), composite_type: None, composite_base: None, + composite_type_args: Vec::new(), }; // Get the original name (without prefix) for scope lookup let orig_name = outputs diff --git a/crates/ir/src/to_egglog/function.rs b/crates/ir/src/to_egglog/function.rs index 48fd62a..edbc925 100644 --- a/crates/ir/src/to_egglog/function.rs +++ b/crates/ir/src/to_egglog/function.rs @@ -67,6 +67,7 @@ impl AstToEgglog { let_bind_name: None, composite_type: Some(format!("__array__{n}")), composite_base: Some(base_ir), + composite_type_args: Vec::new(), }; self.scopes .last_mut() @@ -74,6 +75,34 @@ impl AstToEgglog { .bindings .insert(ident.name.clone(), binding); calldata_offset += n * 32; + } else if let Some(struct_info) = self.resolve_struct_param_type(type_sig) { + // Struct parameter: allocate memory and copy fields from calldata + let n_fields = struct_info.fields.len(); + let base_ir = self.alloc_region(n_fields); + + // Copy each field from calldata to memory + let cd_off = + ast_helpers::const_int(calldata_offset as i64, self.current_ctx.clone()); + let size = ast_helpers::const_int((n_fields * 32) as i64, self.current_ctx.clone()); + let copy = ast_helpers::calldatacopy(Rc::clone(&base_ir), cd_off, size); + array_param_prefix = ast_helpers::concat(array_param_prefix, copy); + + let binding = VarBinding { + value: Rc::clone(&base_ir), + location: DataLocation::Stack, + storage_slot: None, + _ty: ty, + let_bind_name: None, + composite_type: Some(struct_info.name), + composite_base: Some(base_ir), + composite_type_args: Vec::new(), + }; + self.scopes + .last_mut() + .expect("scope stack empty") + .bindings + .insert(ident.name.clone(), binding); + calldata_offset += n_fields * 32; } else { // Scalar parameter: single 32-byte calldataload let raw_val = Rc::new(EvmExpr::Bop( @@ -105,6 +134,7 @@ impl AstToEgglog { let_bind_name: None, composite_type: None, composite_base: None, + composite_type_args: Vec::new(), }; self.scopes .last_mut() @@ -123,14 +153,38 @@ impl AstToEgglog { // Prepend array parameter loading before body let full_body = ast_helpers::concat(array_param_prefix, body_ir); - // Append a STOP (RETURN with 0 size) after the body. - // If the body already ends with RETURN, this is unreachable dead code. - let stop = ast_helpers::return_op( - ast_helpers::const_int(0, self.current_ctx.clone()), - ast_helpers::const_int(0, self.current_ctx.clone()), - Rc::clone(&self.current_state), - ); - Ok(ast_helpers::concat(full_body, stop)) + // If the function declares a return type and the body ends with a bare + // expression statement (trailing expression, Rust-style implicit return), + // wrap it with MSTORE + RETURN so the value is ABI-encoded in the output. + // Other endings (Return, Match, IfElse, etc.) handle their own returns. + let is_trailing_expr = body.stmts.last().is_some_and(|item| { + matches!(item, edge_ast::BlockItem::Stmt(s) if matches!(s.as_ref(), edge_ast::Stmt::Expr(..))) + || matches!(item, edge_ast::BlockItem::Expr(..)) + }); + if !fn_decl.returns.is_empty() && is_trailing_expr { + // Implicit return: body_ir's trailing value is the return value. + // Emit MSTORE(buf, value) + RETURN(buf, 32). + let ret_buf = self.alloc_region(1); + let size = ast_helpers::const_int(32, self.current_ctx.clone()); + let mstore_expr = ast_helpers::mstore( + Rc::clone(&ret_buf), + full_body, + Rc::clone(&self.current_state), + ); + self.current_state = Rc::clone(&mstore_expr); + let ret = + ast_helpers::return_op(ret_buf, size, Rc::clone(&self.current_state)); + Ok(ast_helpers::concat(mstore_expr, ret)) + } else { + // No return type, or body already has explicit return. + // Append RETURN(0, 0) as a fallthrough stop. + let stop = ast_helpers::return_op( + ast_helpers::const_int(0, self.current_ctx.clone()), + ast_helpers::const_int(0, self.current_ctx.clone()), + Rc::clone(&self.current_state), + ); + Ok(ast_helpers::concat(full_body, stop)) + } } /// Lower a standalone function. @@ -184,6 +238,7 @@ impl AstToEgglog { None }, composite_base: None, // dynamic base — resolved at element access + composite_type_args: Vec::new(), }; self.scopes .last_mut() @@ -323,19 +378,21 @@ impl AstToEgglog { // Lower all statements let mut stmts: Vec = Vec::new(); - for item in &block.stmts { + let last_idx = block.stmts.len().saturating_sub(1); + for (idx, item) in block.stmts.iter().enumerate() { let ir = match item { edge_ast::BlockItem::Stmt(stmt) => { - // Check for expression-statements with unused return values - if let edge_ast::Stmt::Expr(expr) = stmt.as_ref() { - self.check_unused_return_value(expr); + // Check for expression-statements with unused return values. + // Skip the last statement — it's the tail expression (block's + // return value) and its value IS consumed by the caller. + if idx != last_idx { + if let edge_ast::Stmt::Expr(expr) = stmt.as_ref() { + self.check_unused_return_value(expr); + } } self.lower_stmt(stmt)? } - edge_ast::BlockItem::Expr(expr) => { - self.check_unused_return_value(expr); - self.lower_expr(expr)? - } + edge_ast::BlockItem::Expr(expr) => self.lower_expr(expr)?, }; stmts.push(ir); } @@ -514,6 +571,7 @@ impl AstToEgglog { let_bind_name: None, composite_type: None, composite_base: None, + composite_type_args: Vec::new(), }; self.scopes .last_mut() @@ -548,4 +606,43 @@ impl AstToEgglog { self.lowered_functions.push(func_node); Ok(()) } + + /// Check if a parameter type sig resolves to a known struct type. + /// Returns (struct_name, field_count) if so. + pub(crate) fn resolve_struct_param_type( + &self, + type_sig: &edge_ast::ty::TypeSig, + ) -> Option { + let resolved = self.resolve_type_alias(type_sig); + let name = match resolved { + edge_ast::ty::TypeSig::Named(ident, _) => &ident.name, + _ => return None, + }; + + // Direct lookup + if let Some(info) = self.struct_types.get(name.as_str()) { + return Some(StructParamInfo { + name: name.clone(), + fields: info.fields.clone(), + }); + } + + // Try resolving through type_param_subst (for generic params like V → CustomSStore) + if let Some(resolved_name) = self.type_param_subst.get(name.as_str()) { + if let Some(info) = self.struct_types.get(resolved_name.as_str()) { + return Some(StructParamInfo { + name: resolved_name.clone(), + fields: info.fields.clone(), + }); + } + } + + None + } +} + +/// Info about a struct-typed function parameter. +pub(crate) struct StructParamInfo { + pub name: String, + pub fields: Vec<(String, EvmType)>, } diff --git a/crates/ir/src/to_egglog/mod.rs b/crates/ir/src/to_egglog/mod.rs index e25cb52..d547afd 100644 --- a/crates/ir/src/to_egglog/mod.rs +++ b/crates/ir/src/to_egglog/mod.rs @@ -14,7 +14,7 @@ mod pattern; mod storage; mod types; -use std::{collections::HashSet, rc::Rc}; +use std::{collections::{HashMap, HashSet}, rc::Rc}; use indexmap::IndexMap; @@ -93,6 +93,8 @@ pub(crate) struct VarBinding { pub composite_type: Option, /// For struct/array-typed variables: the memory base offset pub composite_base: Option, + /// For generic composite types: the concrete type arguments (e.g., [addr, u256] for Map) + pub composite_type_args: Vec, } /// Scope for variable resolution during lowering. @@ -136,6 +138,14 @@ pub(crate) struct GenericTypeTemplate { pub type_sig: edge_ast::ty::TypeSig, } +/// Stored impl block for a generic type, used during monomorphization. +#[derive(Debug, Clone)] +pub(crate) struct GenericImplBlock { + pub type_params: Vec, + pub trait_impl: Option, // trait name, or None for inherent impl + pub items: Vec, +} + /// Packed layout for a single field within a packed struct. #[derive(Debug, Clone)] pub(crate) struct PackedFieldLayout { @@ -276,6 +286,8 @@ pub struct AstToEgglog { pub(crate) inline_counter: usize, /// Prefix for variable names when inlining (empty at top level) pub(crate) inline_prefix: String, + /// Active type parameter substitutions (e.g., {"K": "addr", "V": "u256"} when inlining Map methods) + pub(crate) type_param_subst: HashMap, /// Union/enum type declarations: `type_name` -> `[(variant_name, has_data)]` /// Variant index is its position in the vector. pub(crate) union_types: IndexMap>, @@ -298,8 +310,10 @@ pub struct AstToEgglog { // ---- Generics & Traits ---- /// Generic type templates: name -> template info (type params + original `TypeSig`) pub(crate) generic_type_templates: IndexMap, + /// Generic impl blocks: base_type_name -> list of impl blocks (for monomorphization) + pub(crate) generic_impl_blocks: IndexMap>, /// Cache of monomorphized types: (`generic_name`, `concrete_types`) -> `mangled_name` - pub(crate) monomorphized_types: IndexMap<(String, Vec), String>, + pub(crate) monomorphized_types: IndexMap<(String, Vec), String>, /// Generic function templates: name -> `FreeFnInfo` (with `type_params`) pub(crate) generic_fn_templates: IndexMap, /// Cache of monomorphized function bodies: `mangled_name` -> `FreeFnInfo` @@ -349,6 +363,7 @@ impl AstToEgglog { inline_depth: 0, inline_counter: 0, inline_prefix: String::new(), + type_param_subst: HashMap::new(), union_types: IndexMap::new(), struct_types: IndexMap::new(), type_aliases: IndexMap::new(), @@ -357,6 +372,7 @@ impl AstToEgglog { last_composite_alloc: None, module_prefixes: HashSet::new(), generic_type_templates: IndexMap::new(), + generic_impl_blocks: IndexMap::new(), monomorphized_types: IndexMap::new(), generic_fn_templates: IndexMap::new(), monomorphized_fns: IndexMap::new(), @@ -379,6 +395,16 @@ impl AstToEgglog { crate::ast_helpers::mem_region(id, size_words as i64) } + /// Extract the type name and type args from a Named type sig, unwrapping Pointer wrappers. + /// Returns (base_name, type_args), e.g., ("Map", [addr, u256]) from `&s Map`. + fn extract_named_type(type_sig: &edge_ast::ty::TypeSig) -> Option<(String, Vec)> { + match type_sig { + edge_ast::ty::TypeSig::Named(name, args) => Some((name.name.clone(), args.clone())), + edge_ast::ty::TypeSig::Pointer(_, inner) => Self::extract_named_type(inner), + _ => None, + } + } + /// Lower an entire program. pub fn lower_program(&mut self, program: &edge_ast::Program) -> Result { let mut contracts = Vec::new(); @@ -420,7 +446,16 @@ impl AstToEgglog { "UnsafeAdd", "UnsafeSub", "UnsafeMul", + "UniqueSlot", + "Sload", + "Sstore", + "Index", ]; + // Storage/hashing traits are fundamental (auto-imported from globals). + // Always enable them so compiler-provided impls work without explicit `use`. + for name in ["UniqueSlot", "Sload", "Sstore", "Index"] { + self.std_ops_traits.insert(name.to_string()); + } for stmt in &program.stmts { if let edge_ast::Stmt::ModuleImport(import) = stmt { if import.root.name == "std" { @@ -456,6 +491,36 @@ impl AstToEgglog { } } + // Register compiler-provided trait impls for primitive types so that + // trait bound validation in monomorphize_type() passes for types like + // `Map` which requires `addr: UniqueSlot` and `u256: Sload & Sstore`. + { + let primitive_types = [ + "u256", "u248", "u240", "u232", "u224", "u216", "u208", "u200", + "u192", "u184", "u176", "u168", "u160", "u152", "u144", "u136", + "u128", "u120", "u112", "u104", "u96", "u88", "u80", "u72", + "u64", "u56", "u48", "u40", "u32", "u24", "u16", "u8", + "i256", "i248", "i240", "i232", "i224", "i216", "i208", "i200", + "i192", "i184", "i176", "i168", "i160", "i152", "i144", "i136", + "i128", "i120", "i112", "i104", "i96", "i88", "i80", "i72", + "i64", "i56", "i48", "i40", "i32", "i24", "i16", "i8", + "address", "bool", "b32", + ]; + let primitive_traits = ["UniqueSlot", "Sload", "Sstore"]; + for prim in &primitive_types { + for trait_name in &primitive_traits { + // Empty methods — compiler-provided dispatch handles actual codegen + self.trait_impls.insert( + (prim.to_string(), trait_name.to_string()), + TraitImplInfo { + methods: IndexMap::new(), + span: edge_types::span::Span::EOF, + }, + ); + } + } + } + // First pass: collect event declarations and free/comptime function bodies. // Free/comptime functions must be collected before const evaluation // because constants may call them (e.g. `const BASE_FEE = base_fee()`). @@ -528,6 +593,7 @@ impl AstToEgglog { let_bind_name: None, composite_type: None, composite_base: None, + composite_type_args: Vec::new(), }; self.scopes .last_mut() @@ -617,6 +683,22 @@ impl AstToEgglog { } edge_ast::Stmt::ImplBlock(impl_block) => { let type_name = impl_block.ty_name.name.clone(); + + // Store generic impl blocks for monomorphization + if !impl_block.type_params.is_empty() + || self.generic_type_templates.contains_key(&type_name) + { + let trait_name = impl_block.trait_impl.as_ref().map(|(n, _)| n.name.clone()); + self.generic_impl_blocks + .entry(type_name.clone()) + .or_default() + .push(GenericImplBlock { + type_params: impl_block.type_params.clone(), + trait_impl: trait_name, + items: impl_block.items.clone(), + }); + } + if let Some((ref trait_name, _)) = impl_block.trait_impl { // Trait impl — collect methods and validate against trait definition let mut methods = IndexMap::new(); @@ -854,7 +936,26 @@ impl AstToEgglog { self.storage_fields.push(field_ir); // Check if the field type resolves to a packed struct - let composite_type = self.resolve_storage_packed_struct_type(type_sig); + let mut composite_type = self.resolve_storage_packed_struct_type(type_sig); + + // For generic named types (e.g., Map), set composite_type to + // the monomorphized name so method dispatch finds concrete methods. + let mut composite_type_args = Vec::new(); + if composite_type.is_none() { + if let Some((name, args)) = Self::extract_named_type(type_sig) { + if !args.is_empty() { + // Use monomorphized name (e.g., "Map__address_u256") + if let Ok(mangled) = self.try_monomorphize_named_type(&name, &args, None) { + composite_type = mangled; + } else { + composite_type = Some(name); + } + } else { + composite_type = Some(name); + } + composite_type_args = args; + } + } // Register in scope with the correct location let binding = VarBinding { @@ -868,6 +969,7 @@ impl AstToEgglog { let_bind_name: None, composite_type, composite_base: None, + composite_type_args, }; self.scopes .last_mut() @@ -892,6 +994,7 @@ impl AstToEgglog { let_bind_name: None, composite_type: None, composite_base: None, + composite_type_args: Vec::new(), }; self.scopes .last_mut() diff --git a/crates/ir/src/to_egglog/pattern.rs b/crates/ir/src/to_egglog/pattern.rs index 9b5b4fb..9b14cac 100644 --- a/crates/ir/src/to_egglog/pattern.rs +++ b/crates/ir/src/to_egglog/pattern.rs @@ -126,6 +126,7 @@ impl AstToEgglog { let_bind_name: Some(var_name), composite_type: None, composite_base: None, + composite_type_args: Vec::new(), }, ); } @@ -204,6 +205,7 @@ impl AstToEgglog { let_bind_name: Some(var_name), composite_type: None, composite_base: None, + composite_type_args: Vec::new(), }, ); } diff --git a/crates/ir/src/to_egglog/storage.rs b/crates/ir/src/to_egglog/storage.rs index 9fa9164..c350fcf 100644 --- a/crates/ir/src/to_egglog/storage.rs +++ b/crates/ir/src/to_egglog/storage.rs @@ -1,4 +1,4 @@ -//! Storage and mapping lowering: emit, mapping slots, storage reads/writes. +//! Storage lowering: emit statements, storage field lookup. use std::rc::Rc; @@ -116,215 +116,6 @@ impl AstToEgglog { } } - /// Compute the storage slot for a mapping access. - /// - /// For `mapping[key]` at base slot `s`, Solidity uses: - /// `keccak256(abi.encode(key, s))` where key is left-padded to 32 bytes - /// at memory[0..32] and s is at memory[32..64]. - /// - /// Returns `(side_effects_expr, computed_slot_expr)` where `side_effects_expr` - /// is a Concat of MSTOREs that must be emitted before the slot is used. - pub(crate) fn compute_mapping_slot(&mut self, key: RcExpr, base_slot: i64) -> (RcExpr, RcExpr) { - let ctx = self.current_ctx.clone(); - // Allocate a 2-word scratch region for keccak input - let scratch = self.alloc_region(2); - // MSTORE(scratch, key) - let mstore_key = - ast_helpers::mstore(Rc::clone(&scratch), key, Rc::clone(&self.current_state)); - self.current_state = Rc::clone(&mstore_key); - // MSTORE(scratch+32, base_slot) - let slot_offset = - ast_helpers::add(Rc::clone(&scratch), ast_helpers::const_int(32, ctx.clone())); - let mstore_slot = ast_helpers::mstore( - slot_offset, - ast_helpers::const_int(base_slot, ctx.clone()), - Rc::clone(&self.current_state), - ); - self.current_state = Rc::clone(&mstore_slot); - // KECCAK256(scratch, 64, state) — state captures the memory contents - let computed_slot = ast_helpers::keccak256( - scratch, - ast_helpers::const_int(64, ctx), - Rc::clone(&self.current_state), - ); - let side_effects = ast_helpers::concat(mstore_key, mstore_slot); - (side_effects, computed_slot) - } - - /// Compute the storage slot for a nested mapping access. - /// - /// For `mapping[key1][key2]`, uses `keccak256(key2 . keccak256(key1 . base_slot))`. - /// - /// Uses memory[0..64] for the first level and memory[64..128] for the second - /// to avoid the second level's MSTORE overwriting the first level's data before - /// KECCAK256 reads it. - pub(crate) fn compute_nested_mapping_slot( - &mut self, - outer_key: RcExpr, - inner_key: RcExpr, - base_slot: i64, - ) -> (RcExpr, RcExpr) { - let ctx = self.current_ctx.clone(); - // Allocate two separate 2-word scratch regions so the second level's - // MSTORE doesn't overwrite the first level's data before KECCAK256. - let scratch1 = self.alloc_region(2); - let scratch2 = self.alloc_region(2); - // First level: keccak256(key1 . base_slot) at scratch1 - let mstore_key1 = ast_helpers::mstore( - Rc::clone(&scratch1), - outer_key, - Rc::clone(&self.current_state), - ); - self.current_state = Rc::clone(&mstore_key1); - let mstore_slot1 = ast_helpers::mstore( - ast_helpers::add( - Rc::clone(&scratch1), - ast_helpers::const_int(32, ctx.clone()), - ), - ast_helpers::const_int(base_slot, ctx.clone()), - Rc::clone(&self.current_state), - ); - self.current_state = Rc::clone(&mstore_slot1); - // inner_slot — KECCAK256(scratch1, 64, state) - let inner_slot = ast_helpers::keccak256( - scratch1, - ast_helpers::const_int(64, ctx.clone()), - Rc::clone(&self.current_state), - ); - // Second level: keccak256(key2 . inner_slot) at scratch2 - let mstore_key2 = ast_helpers::mstore( - Rc::clone(&scratch2), - inner_key, - Rc::clone(&self.current_state), - ); - self.current_state = Rc::clone(&mstore_key2); - let mstore_slot2 = ast_helpers::mstore( - ast_helpers::add( - Rc::clone(&scratch2), - ast_helpers::const_int(32, ctx.clone()), - ), - inner_slot, - Rc::clone(&self.current_state), - ); - self.current_state = Rc::clone(&mstore_slot2); - let computed_slot = ast_helpers::keccak256( - scratch2, - ast_helpers::const_int(64, ctx), - Rc::clone(&self.current_state), - ); - let side_effects = ast_helpers::concat( - ast_helpers::concat(mstore_key1, mstore_slot1), - ast_helpers::concat(mstore_key2, mstore_slot2), - ); - (side_effects, computed_slot) - } - - /// Lower a mapping read: `field[key]` or `field[key1][key2]`. - pub(crate) fn lower_mapping_read( - &mut self, - base: &edge_ast::Expr, - index: &edge_ast::Expr, - ) -> Result { - // Check for nested mapping: base is itself an ArrayIndex - if let edge_ast::Expr::ArrayIndex(outer_base, outer_index, _, _) = base { - // nested: outer_base[outer_index][index] - let field_name = match &**outer_base { - edge_ast::Expr::Ident(id) => &id.name, - _ => { - return Err(IrError::Unsupported( - "nested mapping on non-identifier".to_owned(), - )); - } - }; - let (base_slot, location) = self.find_storage_slot(field_name)?; - let outer_key = self.lower_expr(outer_index)?; - let inner_key = self.lower_expr(index)?; - let (side_effects, computed_slot) = - self.compute_nested_mapping_slot(outer_key, inner_key, base_slot as i64); - let load = match location { - DataLocation::Transient => { - ast_helpers::tload(computed_slot, Rc::clone(&self.current_state)) - } - _ => ast_helpers::sload(computed_slot, Rc::clone(&self.current_state)), - }; - return Ok(ast_helpers::concat(side_effects, load)); - } - - // Simple mapping: field[key] - let field_name = match base { - edge_ast::Expr::Ident(id) => &id.name, - _ => { - return Err(IrError::Unsupported( - "mapping on non-identifier base".to_owned(), - )); - } - }; - let (base_slot, location) = self.find_storage_slot(field_name)?; - let key = self.lower_expr(index)?; - let (side_effects, computed_slot) = self.compute_mapping_slot(key, base_slot as i64); - let load = match location { - DataLocation::Transient => { - ast_helpers::tload(computed_slot, Rc::clone(&self.current_state)) - } - _ => ast_helpers::sload(computed_slot, Rc::clone(&self.current_state)), - }; - Ok(ast_helpers::concat(side_effects, load)) - } - - /// Lower a mapping write: `field[key] = value` or `field[key1][key2] = value`. - pub(crate) fn lower_mapping_write( - &mut self, - base: &edge_ast::Expr, - index: &edge_ast::Expr, - value: RcExpr, - ) -> Result { - // Check for nested mapping - if let edge_ast::Expr::ArrayIndex(outer_base, outer_index, _, _) = base { - let field_name = match &**outer_base { - edge_ast::Expr::Ident(id) => &id.name, - _ => { - return Err(IrError::Unsupported( - "nested mapping on non-identifier".to_owned(), - )); - } - }; - let (base_slot, location) = self.find_storage_slot(field_name)?; - let outer_key = self.lower_expr(outer_index)?; - let inner_key = self.lower_expr(index)?; - let (side_effects, computed_slot) = - self.compute_nested_mapping_slot(outer_key, inner_key, base_slot as i64); - let store = match location { - DataLocation::Transient => { - ast_helpers::tstore(computed_slot, value, Rc::clone(&self.current_state)) - } - _ => ast_helpers::sstore(computed_slot, value, Rc::clone(&self.current_state)), - }; - self.current_state = Rc::clone(&store); - return Ok(ast_helpers::concat(side_effects, store)); - } - - // Simple mapping write - let field_name = match base { - edge_ast::Expr::Ident(id) => &id.name, - _ => { - return Err(IrError::Unsupported( - "mapping on non-identifier base".to_owned(), - )); - } - }; - let (base_slot, location) = self.find_storage_slot(field_name)?; - let key = self.lower_expr(index)?; - let (side_effects, computed_slot) = self.compute_mapping_slot(key, base_slot as i64); - let store = match location { - DataLocation::Transient => { - ast_helpers::tstore(computed_slot, value, Rc::clone(&self.current_state)) - } - _ => ast_helpers::sstore(computed_slot, value, Rc::clone(&self.current_state)), - }; - self.current_state = Rc::clone(&store); - Ok(ast_helpers::concat(side_effects, store)) - } - /// Find the storage slot index and data location for a named field. pub(crate) fn find_storage_slot(&self, name: &str) -> Result<(usize, DataLocation), IrError> { for scope in self.scopes.iter().rev() { diff --git a/crates/ir/src/to_egglog/types.rs b/crates/ir/src/to_egglog/types.rs index 9b847fd..c883988 100644 --- a/crates/ir/src/to_egglog/types.rs +++ b/crates/ir/src/to_egglog/types.rs @@ -2,6 +2,7 @@ use std::collections::{HashMap, HashSet}; +use indexmap::IndexMap; use super::{AstToEgglog, StructTypeInfo}; use crate::{ schema::{DataLocation, EvmBaseType, EvmType}, @@ -285,6 +286,119 @@ impl AstToEgglog { } } + /// Substitute type parameters in a code block (AST-level). + /// Replaces type param names in Path expressions (e.g., V::sload → u256::sload). + /// For generic types like Map, uses the mangled name (Map__address_u256) + /// so that qualified calls resolve to monomorphized trait impls. + fn substitute_code_block( + block: &edge_ast::CodeBlock, + subst: &HashMap, + ) -> edge_ast::CodeBlock { + // Build a string→string map for path substitution using mangled names + let name_subst: HashMap<&str, String> = subst.iter().map(|(k, v)| { + (k.as_str(), Self::type_sig_mangle(v)) + }).collect(); + + edge_ast::CodeBlock { + stmts: block.stmts.iter().map(|item| { + Self::substitute_block_item(item, &name_subst) + }).collect(), + span: block.span.clone(), + } + } + + fn substitute_block_item( + item: &edge_ast::stmt::BlockItem, + subst: &HashMap<&str, String>, + ) -> edge_ast::stmt::BlockItem { + match item { + edge_ast::stmt::BlockItem::Stmt(stmt) => { + edge_ast::stmt::BlockItem::Stmt(Box::new(Self::substitute_stmt(stmt, subst))) + } + edge_ast::stmt::BlockItem::Expr(expr) => { + edge_ast::stmt::BlockItem::Expr(Self::substitute_expr(expr, subst)) + } + } + } + + fn substitute_stmt( + stmt: &edge_ast::Stmt, + subst: &HashMap<&str, String>, + ) -> edge_ast::Stmt { + match stmt { + edge_ast::Stmt::VarDecl(ident, ty, init, span) => { + edge_ast::Stmt::VarDecl( + ident.clone(), + ty.clone(), + init.as_ref().map(|e| Box::new(Self::substitute_expr(e, subst))), + span.clone(), + ) + } + edge_ast::Stmt::VarAssign(lhs, rhs, span) => { + edge_ast::Stmt::VarAssign( + Self::substitute_expr(lhs, subst), + Self::substitute_expr(rhs, subst), + span.clone(), + ) + } + edge_ast::Stmt::Return(Some(expr), span) => { + edge_ast::Stmt::Return(Some(Self::substitute_expr(expr, subst)), span.clone()) + } + edge_ast::Stmt::Expr(expr) => { + edge_ast::Stmt::Expr(Self::substitute_expr(expr, subst)) + } + other => other.clone(), + } + } + + fn substitute_expr( + expr: &edge_ast::Expr, + subst: &HashMap<&str, String>, + ) -> edge_ast::Expr { + match expr { + edge_ast::Expr::Path(components, span) => { + let new_components: Vec = components.iter().map(|c| { + if let Some(replacement) = subst.get(c.name.as_str()) { + edge_ast::Ident { name: replacement.clone(), span: c.span.clone() } + } else { + c.clone() + } + }).collect(); + edge_ast::Expr::Path(new_components, span.clone()) + } + edge_ast::Expr::FunctionCall(callee, args, turbofish, span) => { + edge_ast::Expr::FunctionCall( + Box::new(Self::substitute_expr(callee, subst)), + args.iter().map(|a| Self::substitute_expr(a, subst)).collect(), + turbofish.clone(), + span.clone(), + ) + } + edge_ast::Expr::FieldAccess(obj, field, span) => { + edge_ast::Expr::FieldAccess( + Box::new(Self::substitute_expr(obj, subst)), + field.clone(), + span.clone(), + ) + } + edge_ast::Expr::Binary(lhs, op, rhs, span) => { + edge_ast::Expr::Binary( + Box::new(Self::substitute_expr(lhs, subst)), + op.clone(), + Box::new(Self::substitute_expr(rhs, subst)), + span.clone(), + ) + } + edge_ast::Expr::Paren(inner, span) => { + edge_ast::Expr::Paren( + Box::new(Self::substitute_expr(inner, subst)), + span.clone(), + ) + } + _ => expr.clone(), + } + } + /// Try to monomorphize a generic union from a variant constructor call. /// /// Given `Result::Ok(42)` where `Result = Ok(T) | Err(u256)`: @@ -354,12 +468,13 @@ impl AstToEgglog { type_args: &[edge_ast::ty::TypeSig], span: Option<&edge_types::span::Span>, ) -> Result { - // Lower type args to EvmType for caching - let concrete_types: Vec = - type_args.iter().map(|t| self.lower_type_sig(t)).collect(); + // Use mangled type names for caching — EvmType loses source-level + // distinctions (e.g., CustomHash and u256 both lower to UIntT(256)). + let cache_key_types: Vec = + type_args.iter().map(Self::type_sig_mangle).collect(); // Check cache - let cache_key = (generic_name.to_string(), concrete_types); + let cache_key = (generic_name.to_string(), cache_key_types); if let Some(mangled) = self.monomorphized_types.get(&cache_key) { return Ok(mangled.clone()); } @@ -408,9 +523,27 @@ impl AstToEgglog { for (tp, arg) in template.type_params.iter().zip(type_args.iter()) { if !tp.constraints.is_empty() { let concrete_name = Self::type_sig_display(arg); + // For generic type args (e.g., Map), also try the mangled name + // since monomorphized impls are registered under the mangled name. + let mangled_name = if let edge_ast::ty::TypeSig::Named(name, inner_args) = arg { + if !inner_args.is_empty() { + // Ensure the inner type is monomorphized first + match self.try_monomorphize_named_type(&name.name, inner_args, span) { + Ok(Some(m)) => Some(m), + _ => None, + } + } else { + None + } + } else { + None + }; for constraint in &tp.constraints { let key = (concrete_name.clone(), constraint.name.clone()); - if !self.trait_impls.contains_key(&key) { + let mangled_key = mangled_name.as_ref().map(|m| (m.clone(), constraint.name.clone())); + let satisfied = self.trait_impls.contains_key(&key) + || mangled_key.as_ref().map_or(false, |k| self.trait_impls.contains_key(k)); + if !satisfied { let mut diag = edge_diagnostics::Diagnostic::error(format!( "the trait bound `{}: {}` is not satisfied", concrete_name, constraint.name, @@ -442,9 +575,8 @@ impl AstToEgglog { .map(|(param, arg)| (param.name.name.clone(), arg.clone())) .collect(); - // Use source-level type names for mangling to distinguish struct types - // that lower to the same EVM representation. - let type_name_strs: Vec = type_args.iter().map(Self::type_sig_display).collect(); + // Use mangled type names for identifier-safe names (no angle brackets). + let type_name_strs: Vec = type_args.iter().map(Self::type_sig_mangle).collect(); let mangled = format!("{generic_name}__{}", type_name_strs.join("_")); // Substitute and register @@ -480,6 +612,80 @@ impl AstToEgglog { } } + // Monomorphize impl blocks for this generic type + if let Some(impl_blocks) = self.generic_impl_blocks.get(generic_name).cloned() { + for gib in &impl_blocks { + // Build substitution from the generic impl's type params to concrete args + let impl_subst: HashMap = if gib.type_params.is_empty() { + // Use the type template's params (e.g., `impl Map` where K,V from the type) + subst.clone() + } else { + gib.type_params.iter() + .zip(type_args.iter()) + .map(|(param, arg)| (param.name.name.clone(), arg.clone())) + .collect() + }; + + // Substitute type params in method bodies and register under mangled name + let concrete_methods: Vec = gib.items.iter().map(|item| { + match item { + edge_ast::item::ImplItem::FnAssign(fn_decl, body) => { + let new_params: Vec<(edge_ast::Ident, edge_ast::ty::TypeSig)> = fn_decl.params.iter().map(|(id, ty)| { + (id.clone(), Self::substitute_type_params(ty, &impl_subst)) + }).collect(); + let new_returns: Vec = fn_decl.returns.iter().map(|ty| { + Self::substitute_type_params(ty, &impl_subst) + }).collect(); + let new_fn_decl = edge_ast::item::FnDecl { + name: fn_decl.name.clone(), + params: new_params, + returns: new_returns, + type_params: Vec::new(), // concrete, no type params + is_pub: fn_decl.is_pub, + is_ext: fn_decl.is_ext, + is_mut: fn_decl.is_mut, + span: fn_decl.span.clone(), + }; + // Substitute type params in body expressions + let new_body = Self::substitute_code_block(body, &impl_subst); + edge_ast::item::ImplItem::FnAssign(new_fn_decl, new_body) + } + other => other.clone(), + } + }).collect(); + + if let Some(ref trait_name) = gib.trait_impl { + // Trait impl: register under mangled type name + let mut methods = IndexMap::new(); + for item in &concrete_methods { + if let edge_ast::item::ImplItem::FnAssign(fn_decl, body) = item { + methods.insert(fn_decl.name.name.clone(), (fn_decl.clone(), body.clone())); + } + } + self.trait_impls.insert( + (mangled.clone(), trait_name.clone()), + super::TraitImplInfo { + methods, + span: edge_types::span::Span::EOF, + }, + ); + } else { + // Inherent impl: register methods under mangled type name + let methods: Vec = concrete_methods.iter().filter_map(|item| { + if let edge_ast::item::ImplItem::FnAssign(fn_decl, body) = item { + Some(super::InherentMethod { + fn_decl: fn_decl.clone(), + body: body.clone(), + }) + } else { + None + } + }).collect(); + self.inherent_methods.entry(mangled.clone()).or_default().extend(methods); + } + } + } + self.monomorphized_types.insert(cache_key, mangled.clone()); Ok(mangled) } @@ -574,10 +780,49 @@ impl AstToEgglog { Ok(()) } + /// Mangle a `TypeSig` into an identifier-safe name for use as mangled type names. + /// E.g., `Map` → `Map__address_u256`, nested types recursively mangled. + pub(crate) fn type_sig_mangle(ty: &edge_ast::ty::TypeSig) -> String { + match ty { + edge_ast::ty::TypeSig::Primitive(p) => { + use edge_ast::ty::PrimitiveType; + match p { + PrimitiveType::UInt(n) => format!("u{n}"), + PrimitiveType::Int(n) => format!("i{n}"), + PrimitiveType::FixedBytes(n) => format!("b{n}"), + PrimitiveType::Address => "address".to_string(), + PrimitiveType::Bool => "bool".to_string(), + PrimitiveType::Bit => "bit".to_string(), + } + } + edge_ast::ty::TypeSig::Named(ident, args) => { + if args.is_empty() { + ident.name.clone() + } else { + let arg_strs: Vec = args.iter() + .map(Self::type_sig_mangle) + .collect(); + format!("{}__{}", ident.name, arg_strs.join("_")) + } + } + _ => "unknown".to_string(), + } + } + /// Simple display for a `TypeSig` (for error messages). pub(crate) fn type_sig_display(ty: &edge_ast::ty::TypeSig) -> String { match ty { - edge_ast::ty::TypeSig::Primitive(p) => format!("{p:?}").to_lowercase(), + edge_ast::ty::TypeSig::Primitive(p) => { + use edge_ast::ty::PrimitiveType; + match p { + PrimitiveType::UInt(n) => format!("u{n}"), + PrimitiveType::Int(n) => format!("i{n}"), + PrimitiveType::FixedBytes(n) => format!("b{n}"), + PrimitiveType::Address => "address".to_string(), + PrimitiveType::Bool => "bool".to_string(), + PrimitiveType::Bit => "bit".to_string(), + } + } edge_ast::ty::TypeSig::Named(ident, args) => { if args.is_empty() { ident.name.clone() diff --git a/crates/parser/src/parser.rs b/crates/parser/src/parser.rs index 175e86f..78248f4 100644 --- a/crates/parser/src/parser.rs +++ b/crates/parser/src/parser.rs @@ -238,7 +238,39 @@ impl Parser { if token.kind == kind { Ok(self.advance()) } else { - Err(ParseError::unexpected(&token.kind, &kind, token.span)) + // For missing delimiters (;, ), ], }), point at the end of the + // previous token — that's where the delimiter was expected. + let span = if matches!( + kind, + TokenKind::Semicolon + | TokenKind::CloseParen + | TokenKind::CloseBracket + | TokenKind::CloseBrace + ) { + self.prev_token_end_span().unwrap_or(token.span) + } else { + token.span + }; + Err(ParseError::unexpected(&token.kind, &kind, span)) + } + } + + /// Check if the token after the current one is `::` (without advancing). + fn lookahead_double_colon(&self) -> bool { + self.cursor + 1 < self.tokens.len() && self.tokens[self.cursor + 1].kind == TokenKind::DoubleColon + } + + /// Get a zero-width span at the end of the previous token. + fn prev_token_end_span(&self) -> Option { + if self.cursor > 0 { + let prev = &self.tokens[self.cursor - 1]; + Some(Span { + start: prev.span.end, + end: prev.span.end, + file: prev.span.file.clone(), + }) + } else { + None } } @@ -1718,6 +1750,88 @@ impl Parser { let lit = Lit::Str(s, token.span); Ok(Expr::Literal(Box::new(lit))) } + // Primitive type used as a path root: u256::sload(...), address::default(), etc. + TokenKind::DataType(ref dt) if self.lookahead_double_colon() => { + let name = match dt { + edge_types::tokens::DataType::Primitive(pt) => { + let ast_pt = self.convert_primitive_type(pt.clone()); + ast_pt.to_string() + } + edge_types::tokens::DataType::Unknown => { + return Err(ParseError::InvalidExpr { + message: "Unknown data type".to_string(), + span: self.peek().span.clone(), + }); + } + }; + let token = self.advance(); + let ident = Ident { + name, + span: token.span.clone(), + }; + + // Parse :: path (same as Ident path handling below) + let mut path_segments = vec![ident]; + let mut turbofish_type_args: Vec = vec![]; + while self.check(&TokenKind::DoubleColon) { + self.advance(); + if self.check(&TokenKind::Operator(Operator::Comparison( + ComparisonOperator::LessThan, + ))) { + turbofish_type_args = self.parse_turbofish_type_args()?; + break; + } + if let TokenKind::Ident(next_name) = self.peek().kind.clone() { + let next_token = self.advance(); + path_segments.push(Ident { + name: next_name, + span: next_token.span, + }); + } else { + return Err(ParseError::InvalidExpr { + message: "Expected identifier after ::".to_string(), + span: self.peek().span.clone(), + }); + } + } + + self.skip_whitespace_and_comments(); + if self.check(&TokenKind::OpenParen) { + self.advance(); + let mut args = Vec::new(); + while !self.check(&TokenKind::CloseParen) && !self.is_at_end() { + self.skip_whitespace_and_comments(); + if self.check(&TokenKind::CloseParen) { + break; + } + args.push(self.parse_expr()?); + self.skip_whitespace_and_comments(); + if !self.check(&TokenKind::CloseParen) { + self.expect(TokenKind::Comma)?; + } + } + let end = self.expect(TokenKind::CloseParen)?; + let span = Span { + start: token.span.start, + end: end.span.end, + file: token.span.file, + }; + Ok(Expr::FunctionCall( + Box::new(Expr::Path(path_segments, span.clone())), + args, + turbofish_type_args, + span, + )) + } else { + let end_span = path_segments.last().unwrap().span.clone(); + let span = Span { + start: token.span.start, + end: end_span.end, + file: token.span.file, + }; + Ok(Expr::Path(path_segments, span)) + } + } TokenKind::Ident(name) => { let token = self.advance(); let ident = Ident { diff --git a/examples/erc20.edge b/examples/erc20.edge index fead975..3016c97 100644 --- a/examples/erc20.edge +++ b/examples/erc20.edge @@ -37,10 +37,10 @@ contract ERC20 { let total_supply: &s u256; // Balances mapping: account -> amount - let balances: &s map; + let balances: &s Map; // Allowances mapping: owner -> spender -> amount - let allowances: &s map>; + let allowances: &s Map>; // Metadata functions pub fn totalSupply() -> (u256) { diff --git a/examples/tests/stress_loops.edge b/examples/tests/stress_loops.edge index 55a3815..9e65673 100644 --- a/examples/tests/stress_loops.edge +++ b/examples/tests/stress_loops.edge @@ -11,7 +11,7 @@ abi IStressLoops { } contract StressLoops { - let values: &s u256; + let values: &s Map; let count: &s u256; // Simple accumulator loop: sum 1..n. diff --git a/examples/tests/stress_storage.edge b/examples/tests/stress_storage.edge index a2b92da..8c2027e 100644 --- a/examples/tests/stress_storage.edge +++ b/examples/tests/stress_storage.edge @@ -19,8 +19,8 @@ abi IStressStorage { contract StressStorage { let total: &s u256; - let balances: &s u256; - let nonces: &s u256; + let balances: &s Map; + let nonces: &s Map; // Reads @caller, reads balance, writes balance, writes total, emits event. // Tests: interleaved SLOAD, SSTORE, MSTORE for keccak, and LetBind variables. diff --git a/examples/tests/test_erc20.edge b/examples/tests/test_erc20.edge index 7ee4c64..cf0e6e3 100644 --- a/examples/tests/test_erc20.edge +++ b/examples/tests/test_erc20.edge @@ -17,8 +17,8 @@ abi IERC20Test { contract ERC20Test { let total_supply: &s u256; - let balances: &s u256; - let allowances: &s u256; + let balances: &s Map; + let allowances: &s Map>; pub fn totalSupply() -> (u256) { return total_supply; diff --git a/examples/tests/test_map_std.edge b/examples/tests/test_map_std.edge new file mode 100644 index 0000000..ce660e0 --- /dev/null +++ b/examples/tests/test_map_std.edge @@ -0,0 +1,120 @@ + +use std::ops::UniqueSlot; +use std::ops::Sstore; +use std::ops::Sload; + +type CustomSStore = { + ignored: u256, + packed_a: u128, + packed_b: u128 +}; + +type CustomHash = { + a: u128, + b: u128 +}; + +// Struct key with NO UniqueSlot impl — uses default keccak-chained derive_slot +type DefaultKey = { + x: u256, + y: u256 +}; + +impl CustomHash: UniqueSlot { + fn derive_slot(self, base_slot: u256) -> u256 { + let packed_combo: u256 = (self.a << 128) | self.b; + base_slot + packed_combo + } +} + +impl CustomSStore: Sstore { + fn sstore(self, base_slot: u256) { + let packed_combo: u256 = (self.packed_a << 128) | self.packed_b; + packed_combo.sstore(base_slot); + } +} + +impl CustomSStore: Sload { + fn sload(base_slot: u256) -> Self { + let packed_combo = u256::sload(base_slot); + let a = packed_combo >> 128; + let b = packed_combo & ((1 << 128) - 1); + CustomSStore { ignored: 0, packed_a: a, packed_b: b } + } +} + +contract TestMappings { + let basic_map: &s Map; + let custom: &s CustomSStore; + let custom_sstore_map: &s Map; + let double_custom_sstore_map: &s Map; + let default_key_map: &s Map; + + // Getters + pub fn get_custom() -> CustomSStore { + custom + } + + pub fn get_basic(key: u256) -> u256 { + basic_map.get(key) + } + + pub fn get_basic_by_indexable(key: u256) -> u256 { + basic_map[key] + } + + pub fn get_custom(key: u256) -> u256 { + let val: CustomSStore = custom_sstore_map.get(key); + (val.packed_a << 128) | val.packed_b + } + + pub fn get_custom_by_indexable(key: u256) -> u256 { + let val: CustomSStore = custom_sstore_map[key]; + (val.packed_a << 128) | val.packed_b + } + + // Setters + pub mut fn set_custom(a: u128, b: u128) { + custom = CustomSStore { ignored: 10000, packed_a: a, packed_b: b } + } + + pub mut fn set_basic(key: u256, val: u256) -> u256 { + basic_map.set(key, val); + } + + pub mut fn set_basic_by_indexable(key: u256, val: u256) -> u256 { + basic_map[key] = val; + } + + pub mut fn set_custom(key: u256, val: CustomSStore) { + custom_sstore_map.set(key, val); + } + + pub mut fn set_custom_by_indexable(key: u256, val: CustomSStore) { + custom_sstore_map[key] = val; + } + + // Double custom: CustomHash key + CustomSStore value + pub fn get_double_custom(a: u128, b: u128) -> u256 { + let key = CustomHash { a: a, b: b }; + let val: CustomSStore = double_custom_sstore_map.get(key); + (val.packed_a << 128) | val.packed_b + } + + pub mut fn set_double_custom(a: u128, b: u128, val_a: u128, val_b: u128) { + let key = CustomHash { a: a, b: b }; + let val = CustomSStore { ignored: 0, packed_a: val_a, packed_b: val_b }; + double_custom_sstore_map.set(key, val); + } + + // Default derive_slot (no UniqueSlot impl on DefaultKey) + pub fn get_default_key(x: u256, y: u256) -> u256 { + let key = DefaultKey { x: x, y: y }; + default_key_map.get(key) + } + + pub mut fn set_default_key(x: u256, y: u256, val: u256) { + let key = DefaultKey { x: x, y: y }; + default_key_map.set(key, val); + } +} diff --git a/examples/tests/test_mappings.edge b/examples/tests/test_mappings.edge index c6da0fc..5738a92 100644 --- a/examples/tests/test_mappings.edge +++ b/examples/tests/test_mappings.edge @@ -1,9 +1,9 @@ // test_mappings.edge — Execution tests for simple and nested mappings contract TestMappings { - let balances: &s u256; - let allowances: &s u256; - let counters: &s u256; + let balances: &s Map; + let allowances: &s Map>; + let counters: &s Map; // Simple mapping: set and get pub fn map_set(key: addr, value: u256) { diff --git a/examples/tests/test_storage_heavy.edge b/examples/tests/test_storage_heavy.edge index 886e63a..0cf0a28 100644 --- a/examples/tests/test_storage_heavy.edge +++ b/examples/tests/test_storage_heavy.edge @@ -21,8 +21,8 @@ contract TestStorageHeavy { let field_c: &s u256; let field_d: &s u256; let field_e: &s u256; - let balances: &s u256; - let allowances: &s u256; + let balances: &s Map; + let allowances: &s Map>; pub fn set_all(a: u256, b: u256, c: u256, d: u256, e: u256) { field_a = a; diff --git a/examples/tokens/erc20.edge b/examples/tokens/erc20.edge index 3f43301..dd3f807 100644 --- a/examples/tokens/erc20.edge +++ b/examples/tokens/erc20.edge @@ -48,10 +48,10 @@ contract Airdrop { // ── State ───────────────────────────────────────────────────────────────── // Amount of tokens each address is entitled to claim. - let allocations: &s map; + let allocations: &s Map; // Tracks whether an address has already claimed their allocation. - let claimed: &s map; + let claimed: &s Map; // Total tokens distributed so far. let total_distributed: &s u256; diff --git a/examples/tokens/erc721.edge b/examples/tokens/erc721.edge index aef0d5a..92d6059 100644 --- a/examples/tokens/erc721.edge +++ b/examples/tokens/erc721.edge @@ -57,7 +57,7 @@ contract ArtCollection { let mint_open: &s bool; // Per-token URIs for metadata (tokenId -> URI). - let token_uris: &s map; + let token_uris: &s Map; // Base URI prepended to all token metadata paths. let base_uri: &s b32; diff --git a/std/access/roles.edge b/std/access/roles.edge index ccccbcf..1b178f5 100644 --- a/std/access/roles.edge +++ b/std/access/roles.edge @@ -1,7 +1,7 @@ // roles.edge — Multi-role authority with role-based access control // // What this demonstrates: -// - Nested maps: map> +// - Nested maps: Map> // - const role identifiers (b32 type) // - hasRole/grantRole/revokeRole pattern // - Events with indexed fields @@ -43,10 +43,10 @@ abi IAccessControl { contract AccessControl { // Nested mapping: role -> account -> has role. - let roles: &s map>; + let roles: &s Map>; // Mapping from role to its admin role. - let role_admin: &s map; + let role_admin: &s Map; // Check if an account has a specific role. pub fn hasRole(role: b32, account: addr) -> (bool) { diff --git a/std/finance/amm.edge b/std/finance/amm.edge index e16eb14..38caeff 100644 --- a/std/finance/amm.edge +++ b/std/finance/amm.edge @@ -6,7 +6,7 @@ // - Events: Swap, AddLiquidity, RemoveLiquidity // - abi interface definition (IAMM) // - @caller() builtin -// - map storage mapping +// - Map storage mapping // - Complex arithmetic expressions // - Multiple pub fn functions // @@ -50,7 +50,7 @@ contract AMM { // LP token state. let total_supply: &s u256; - let lp_balances: &s map; + let lp_balances: &s Map; // Token addresses. let token0: &s addr; diff --git a/std/finance/multisig.edge b/std/finance/multisig.edge index da22d39..bf8248c 100644 --- a/std/finance/multisig.edge +++ b/std/finance/multisig.edge @@ -45,28 +45,28 @@ contract Multisig { let owner_count: &s u256; // Whether an address is an owner. - let is_owner: &s map; + let is_owner: &s Map; // Proposal counter (next proposal ID). let proposal_count: &s u256; // Proposal targets. - let proposal_targets: &s map; + let proposal_targets: &s Map; // Proposal values. - let proposal_values: &s map; + let proposal_values: &s Map; // Proposal data. - let proposal_data: &s map; + let proposal_data: &s Map; // Proposal states: 0=Pending, 1=Approved, 2=Executed, 3=Cancelled. - let proposal_states: &s map; + let proposal_states: &s Map; // Number of confirmations per proposal. - let confirmation_count: &s map; + let confirmation_count: &s Map; // Whether an owner has confirmed a proposal. - let confirmations: &s map>; + let confirmations: &s Map>; // Create a new proposal. Only owners can propose. pub fn propose(target: addr, value: u256, data: b32) -> (u256) { diff --git a/std/finance/staking.edge b/std/finance/staking.edge index 94564fb..127aad8 100644 --- a/std/finance/staking.edge +++ b/std/finance/staking.edge @@ -4,7 +4,7 @@ // - Complex &s state: multiple maps, u256 arithmetic // - Staked/Withdrawn/RewardPaid events // - while loops for time-based reward calculations -// - map storage mappings +// - Map storage mappings // - @caller() builtin // - Internal helper functions for reward accumulation // - Arithmetic: multiplication, division, addition, subtraction @@ -47,13 +47,13 @@ contract Staking { let total_staked: &s u256; // Per-user staked balances. - let staked_balances: &s map; + let staked_balances: &s Map; // Per-user accumulated rewards. - let rewards: &s map; + let rewards: &s Map; // Per-user reward per token paid (for pro-rata calculation). - let user_reward_per_token_paid: &s map; + let user_reward_per_token_paid: &s Map; // Global reward per token stored. let reward_per_token_stored: &s u256; diff --git a/std/globals/map.edge b/std/globals/map.edge new file mode 100644 index 0000000..6a0e847 --- /dev/null +++ b/std/globals/map.edge @@ -0,0 +1,87 @@ +// map.edge — Generic storage mapping type +// +// Map is a zero-storage type — at runtime it's just a u256 (the base slot). +// K and V are phantom type parameters that guide dispatch via monomorphization. +// +// Key insight for nested maps (Map>): +// Map implements Sload as identity (no actual SLOAD — returns the slot as-is). +// So `outer.get(k1)` derives a slot and "loads" the inner Map, which is just +// that derived slot. Then `.get(k2)` derives again and SLOADs the leaf value. +// +// Requirements: +// - Keys must implement Hash (derive_slot: key + base_slot → keccak256 → new slot) +// - Leaf values must implement Sload + Sstore (actual SLOAD/SSTORE opcodes) +// - Map values implement Sload as identity (no SLOAD, slot passthrough) +// +// Usage: +// use std::map::Map; +// +// contract MyContract { +// let balances: &s Map; +// let allowances: &s Map>; +// +// fn get_balance(key: u256) -> u256 { +// self.balances.get(key) +// } +// +// fn get_allowance(owner: address, spender: address) -> u256 { +// self.allowances.get(owner).get(spender) +// } +// +// fn set_balance(key: u256, val: u256) { +// self.balances.set(key, val); +// } +// } + +use std::ops::UniqueSlot; +use std::ops::Sload; +use std::ops::Sstore; + +// Map is just a u256 at runtime — the base slot number. +// Contract fields declared as `let m: &s Map` evaluate to +// the slot constant (no SLOAD). Map is a zero-storage type. +type Map = (); + +impl Map { + // Derive the storage slot for `key` and load the value. + // For leaf V (u256, etc): V::sload does an actual SLOAD. + // For nested V (Map): V::sload returns the slot as-is (identity). + fn get(self, key: K) -> (V) { + let slot: u256 = key.derive_slot(self); + V::sload(slot) + } + + // Derive the storage slot for `key` and store the value. + fn set(self, key: K, val: V) { + let slot: u256 = key.derive_slot(self); + val.sstore(slot); + } +} + +// Map implements Sload as identity — "loading" a Map from a slot +// just returns the slot itself. No actual SLOAD is emitted. +// This is what makes nested maps (Map>) work: +// the outer get() derives a slot and passes it through as the +// inner Map's base slot, without touching storage. +impl Map: Sload { + fn sload(slot: u256) -> Self { + slot + } +} + +// Map implements Sstore as a no-op — storing a Map to a slot does nothing. +// This exists to satisfy the V: Sstore bound on nested Map> +// declarations. In practice, individual leaf values are stored directly. +impl Map: Sstore { + fn sstore(self, slot: u256) { + // No-op: Map values don't get stored to individual slots. + // Nested maps derive slots and store leaf values directly. + } +} + +// Allow for map[key] +impl Map: Index { + fn index(self, index: K) -> (V) { + self.get(index) + } +} diff --git a/std/ops.edge b/std/globals/ops.edge similarity index 67% rename from std/ops.edge rename to std/globals/ops.edge index b06e800..247586f 100644 --- a/std/ops.edge +++ b/std/globals/ops.edge @@ -56,3 +56,24 @@ trait UnsafeSub { trait UnsafeMul { fn unsafe_mul(self, rhs: Self) -> (Self); } + +// Index trait allows for operator overloading of indexing, i.e. my_map[a][b] or my_array[0] +// using a custom index and output. +trait Index { + fn index(self, index: Idx) -> (Output); +} + +trait UniqueSlot { + // Derive a storage slot from this key and a base slot. + // EVM convention: keccak256(abi.encode(key, base_slot)) + // Compiler provides implementations for primitive types. + fn derive_slot(self, base_slot: u256) -> u256; +} + +trait Sstore { + fn sstore(self, base_slot: u256); +} + +trait Sload { + fn sload(base_slot: u256) -> Self; +} diff --git a/std/globals/option.edge b/std/globals/option.edge new file mode 100644 index 0000000..b2695fc --- /dev/null +++ b/std/globals/option.edge @@ -0,0 +1 @@ +type Option = None | Some(T); diff --git a/std/globals/result.edge b/std/globals/result.edge new file mode 100644 index 0000000..a1dd1f7 --- /dev/null +++ b/std/globals/result.edge @@ -0,0 +1 @@ +type Result = Ok(T) | Err(E); diff --git a/std/patterns/factory.edge b/std/patterns/factory.edge index 5484fd3..e1b7f06 100644 --- a/std/patterns/factory.edge +++ b/std/patterns/factory.edge @@ -5,7 +5,7 @@ // - b32 type for salt/bytecode hashes // - @caller() builtin // - Events (Deployed) with indexed fields -// - map storage mapping for tracking deployments +// - Map storage mapping for tracking deployments // - Bitwise operators for address derivation // - Multiple return patterns // @@ -29,7 +29,7 @@ abi IFactory { contract Factory { // Mapping from salt to deployed contract address. - let deployments: &s map; + let deployments: &s Map; // Number of contracts deployed. let deploy_count: &s u256; diff --git a/std/patterns/timelock.edge b/std/patterns/timelock.edge index 867a479..eb2ca42 100644 --- a/std/patterns/timelock.edge +++ b/std/patterns/timelock.edge @@ -49,13 +49,13 @@ contract Timelock { let admin: &s addr; // Operation execution timestamps (0 = not scheduled, >0 = ready-at time). - let timestamps: &s map; + let timestamps: &s Map; // Whether an operation has been executed. - let executed: &s map; + let executed: &s Map; // Whether an operation has been cancelled. - let cancelled: &s map; + let cancelled: &s Map; // Schedule a new timelocked operation. pub fn schedule(id: b32, target: addr, value: u256, delay: u256) { diff --git a/std/tokens/erc1155.edge b/std/tokens/erc1155.edge index fc427b4..d44aaff 100644 --- a/std/tokens/erc1155.edge +++ b/std/tokens/erc1155.edge @@ -62,10 +62,10 @@ contract ERC1155 { // ── State ───────────────────────────────────────────────────────────────── // Balances: owner -> token ID -> amount. - let balances: &s map>; + let balances: &s Map>; // Operator approvals: owner -> operator -> approved. - let approval_for_all: &s map>; + let approval_for_all: &s Map>; // ── Public Read Functions ───────────────────────────────────────────────── diff --git a/std/tokens/erc20.edge b/std/tokens/erc20.edge index 581eca0..23ef72c 100644 --- a/std/tokens/erc20.edge +++ b/std/tokens/erc20.edge @@ -61,10 +61,10 @@ contract ERC20 { let total_supply: &s u256; // Balance of each account (address -> amount). - let balances: &s map; + let balances: &s Map; // Allowances: owner -> spender -> amount. - let allowances: &s map>; + let allowances: &s Map>; // ── Public Read Functions ───────────────────────────────────────────────── diff --git a/std/tokens/weth.edge b/std/tokens/weth.edge index 1d2431e..74e3c99 100644 --- a/std/tokens/weth.edge +++ b/std/tokens/weth.edge @@ -6,7 +6,7 @@ // - @caller() and @callvalue() builtins // - pub/fn function visibility // - emit keyword for logging events -// - map storage mapping +// - Map storage mapping // // Usage: // use std::tokens::weth::IWETH; @@ -38,10 +38,10 @@ contract WETH { let total_supply: &s u256; // Balance of each account (address -> amount). - let balances: &s map; + let balances: &s Map; // Allowances: owner -> spender -> amount. - let allowances: &s map>; + let allowances: &s Map>; // Deposit ETH and mint equivalent WETH to the caller. pub fn deposit() { From decb3210e32ec3f05282ac3007b954f3f232a185 Mon Sep 17 00:00:00 2001 From: brockelmore <31553173+brockelmore@users.noreply.github.com> Date: Wed, 11 Mar 2026 11:29:34 -0700 Subject: [PATCH 03/13] =?UTF-8?q?refactor:=20simplify=20calls.rs=20?= =?UTF-8?q?=E2=80=94=20extract=20helpers,=20remove=20StructParamInfo?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Extract `lookup_binding_for_expr()` to deduplicate scope traversal in `infer_receiver_type` and `infer_receiver_type_args` - Extract `try_compiler_stateful_dispatch()` to deduplicate the lower-receiver + lower-args + compiler_provided_stateful_method pattern - Replace `StructParamInfo` struct with simple `(String, usize)` tuple — callsite only needs name and field count, no need to clone fields Co-Authored-By: Claude Opus 4.6 --- crates/ir/src/to_egglog/calls.rs | 130 +++++++++++++--------------- crates/ir/src/to_egglog/function.rs | 25 ++---- 2 files changed, 65 insertions(+), 90 deletions(-) diff --git a/crates/ir/src/to_egglog/calls.rs b/crates/ir/src/to_egglog/calls.rs index 16cf81d..ccac6cc 100644 --- a/crates/ir/src/to_egglog/calls.rs +++ b/crates/ir/src/to_egglog/calls.rs @@ -274,17 +274,10 @@ impl AstToEgglog { } // Check compiler-provided stateful methods (derive_slot, sload, sstore) + if let Some(result) = + self.try_compiler_stateful_dispatch(receiver, method_name, args)? { - let recv_ir = self.lower_expr(receiver)?; - let args_ir: Vec = args - .iter() - .map(|a| self.lower_expr(a)) - .collect::>()?; - if let Some(result) = - self.compiler_provided_stateful_method(method_name, Some(recv_ir), &args_ir) - { - return Ok(result); - } + return Ok(result); } } @@ -351,17 +344,10 @@ impl AstToEgglog { } // Also check stateful methods for unknown receiver + if let Some(result) = + self.try_compiler_stateful_dispatch(receiver, method_name, args)? { - let recv_ir = self.lower_expr(receiver)?; - let args_ir: Vec = args - .iter() - .map(|a| self.lower_expr(a)) - .collect::>()?; - if let Some(result) = - self.compiler_provided_stateful_method(method_name, Some(recv_ir), &args_ir) - { - return Ok(result); - } + return Ok(result); } } @@ -619,22 +605,42 @@ impl AstToEgglog { } /// Infer the type of a receiver expression (best-effort). - pub(crate) fn infer_receiver_type(&self, expr: &edge_ast::Expr) -> Option { - match expr { - edge_ast::Expr::Ident(ident) => { - for scope in self.scopes.iter().rev() { - if let Some(binding) = scope.bindings.get(&ident.name) { - // Composite type (struct/union/array) takes priority - if let Some(ref ct) = binding.composite_type { - return Some(ct.clone()); - } - // Fall back to primitive type name from EvmType - let result = Self::evm_type_to_name(&binding._ty); - return result; + /// Look up the scope binding for an expression (Ident or self.field). + /// Returns the variable name and binding reference if found. + fn lookup_binding_for_expr<'a>(&'a self, expr: &edge_ast::Expr) -> Option<&'a super::VarBinding> { + let var_name = match expr { + edge_ast::Expr::Ident(ident) => &ident.name, + edge_ast::Expr::FieldAccess(obj, field, _) => { + if let edge_ast::Expr::Ident(ident) = obj.as_ref() { + if ident.name == "self" { + &field.name + } else { + return None; } + } else { + return None; } - None } + _ => return None, + }; + for scope in self.scopes.iter().rev() { + if let Some(binding) = scope.bindings.get(var_name) { + return Some(binding); + } + } + None + } + + pub(crate) fn infer_receiver_type(&self, expr: &edge_ast::Expr) -> Option { + // Try direct binding lookup first + if let Some(binding) = self.lookup_binding_for_expr(expr) { + if let Some(ref ct) = binding.composite_type { + return Some(ct.clone()); + } + return Self::evm_type_to_name(&binding._ty); + } + + match expr { edge_ast::Expr::StructInstantiation(_, type_name, _, _) => Some(type_name.name.clone()), edge_ast::Expr::Literal(lit) => match lit.as_ref() { edge_ast::Lit::Bool(_, _) => Some("bool".to_string()), @@ -644,30 +650,12 @@ impl AstToEgglog { edge_ast::Lit::Int(_, None, _) => Some("u256".to_string()), _ => None, }, - // FieldAccess on self: `self.field` — look up the field binding - edge_ast::Expr::FieldAccess(obj, field, _) => { - if let edge_ast::Expr::Ident(ident) = obj.as_ref() { - if ident.name == "self" { - // Look up the field in scope - for scope in self.scopes.iter().rev() { - if let Some(binding) = scope.bindings.get(&field.name) { - if let Some(ref ct) = binding.composite_type { - return Some(ct.clone()); - } - return Self::evm_type_to_name(&binding._ty); - } - } - } - } - None - } // ArrayIndex: base[index] — if base is a Map, the result type is the value type (V) edge_ast::Expr::ArrayIndex(base, _, _, _) => { let base_type = self.infer_receiver_type(base); let base_args = self.infer_receiver_type_args(base); if let Some(ref bt) = base_type { if bt.starts_with("Map") && base_args.len() == 2 { - // V is the second type arg — use mangled name return Some(Self::type_sig_mangle(&base_args[1])); } } @@ -679,27 +667,11 @@ impl AstToEgglog { /// Get the concrete type arguments for a receiver's generic composite type. pub(crate) fn infer_receiver_type_args(&self, expr: &edge_ast::Expr) -> Vec { + if let Some(binding) = self.lookup_binding_for_expr(expr) { + return binding.composite_type_args.clone(); + } + match expr { - edge_ast::Expr::Ident(ident) => { - for scope in self.scopes.iter().rev() { - if let Some(binding) = scope.bindings.get(&ident.name) { - return binding.composite_type_args.clone(); - } - } - Vec::new() - } - edge_ast::Expr::FieldAccess(obj, field, _) => { - if let edge_ast::Expr::Ident(ident) = obj.as_ref() { - if ident.name == "self" { - for scope in self.scopes.iter().rev() { - if let Some(binding) = scope.bindings.get(&field.name) { - return binding.composite_type_args.clone(); - } - } - } - } - Vec::new() - } // ArrayIndex: base[index] — if base is a Map, the result's type args come from V edge_ast::Expr::ArrayIndex(base, _, _, _) => { let base_args = self.infer_receiver_type_args(base); @@ -806,6 +778,22 @@ impl AstToEgglog { /// Compiler-provided complex trait methods for primitive types. /// Unlike `compiler_provided_method` (simple binary ops), these produce + /// Lower receiver + args and try compiler-provided stateful method dispatch. + /// Used for `.derive_slot()`, `.sload()`, `.sstore()` on primitives. + fn try_compiler_stateful_dispatch( + &mut self, + receiver: &edge_ast::Expr, + method_name: &str, + args: &[edge_ast::Expr], + ) -> Result, IrError> { + let recv_ir = self.lower_expr(receiver)?; + let args_ir: Vec = args + .iter() + .map(|a| self.lower_expr(a)) + .collect::>()?; + Ok(self.compiler_provided_stateful_method(method_name, Some(recv_ir), &args_ir)) + } + /// full IR expression trees with state threading. /// /// Returns `Some(ir_expr)` if the method was handled, `None` otherwise. diff --git a/crates/ir/src/to_egglog/function.rs b/crates/ir/src/to_egglog/function.rs index edbc925..a12ff25 100644 --- a/crates/ir/src/to_egglog/function.rs +++ b/crates/ir/src/to_egglog/function.rs @@ -75,9 +75,8 @@ impl AstToEgglog { .bindings .insert(ident.name.clone(), binding); calldata_offset += n * 32; - } else if let Some(struct_info) = self.resolve_struct_param_type(type_sig) { + } else if let Some((struct_name, n_fields)) = self.resolve_struct_param_type(type_sig) { // Struct parameter: allocate memory and copy fields from calldata - let n_fields = struct_info.fields.len(); let base_ir = self.alloc_region(n_fields); // Copy each field from calldata to memory @@ -93,7 +92,7 @@ impl AstToEgglog { storage_slot: None, _ty: ty, let_bind_name: None, - composite_type: Some(struct_info.name), + composite_type: Some(struct_name), composite_base: Some(base_ir), composite_type_args: Vec::new(), }; @@ -608,11 +607,11 @@ impl AstToEgglog { } /// Check if a parameter type sig resolves to a known struct type. - /// Returns (struct_name, field_count) if so. + /// Returns `(struct_name, field_count)` if so. pub(crate) fn resolve_struct_param_type( &self, type_sig: &edge_ast::ty::TypeSig, - ) -> Option { + ) -> Option<(String, usize)> { let resolved = self.resolve_type_alias(type_sig); let name = match resolved { edge_ast::ty::TypeSig::Named(ident, _) => &ident.name, @@ -621,28 +620,16 @@ impl AstToEgglog { // Direct lookup if let Some(info) = self.struct_types.get(name.as_str()) { - return Some(StructParamInfo { - name: name.clone(), - fields: info.fields.clone(), - }); + return Some((name.clone(), info.fields.len())); } // Try resolving through type_param_subst (for generic params like V → CustomSStore) if let Some(resolved_name) = self.type_param_subst.get(name.as_str()) { if let Some(info) = self.struct_types.get(resolved_name.as_str()) { - return Some(StructParamInfo { - name: resolved_name.clone(), - fields: info.fields.clone(), - }); + return Some((resolved_name.clone(), info.fields.len())); } } None } } - -/// Info about a struct-typed function parameter. -pub(crate) struct StructParamInfo { - pub name: String, - pub fields: Vec<(String, EvmType)>, -} From 0552552e2c519964e8f1fa6d2350996e3bd3a260 Mon Sep 17 00:00:00 2001 From: brockelmore <31553173+brockelmore@users.noreply.github.com> Date: Wed, 11 Mar 2026 12:12:41 -0700 Subject: [PATCH 04/13] refactor: replace hardcoded Map checks with Index trait lookup, fix generics soundness - Replace starts_with("Map") checks in infer_receiver_type, infer_receiver_type_args, and inline_function_call with Index trait impl Output type lookup - Add trait_type_args to TraitImplInfo and GenericImplBlock for type-system-based dispatch - Fix is_primitive_type to validate width ranges (8..=256 step 8) matching lexer rules - Fix resolve_generic_type_name to return None on ambiguous multiple monomorphizations - Add resolve_generic_type_name_with_args for precise resolution with type context - Add type_sig_hint threading from VarDecl to struct instantiation for disambiguation - Fix composite_type_args propagation in inline_function_call (was dropping to Vec::new) - Improve error messages: "ambiguous generic type" instead of "unknown" for multi-monomorph Co-Authored-By: Claude Opus 4.6 --- crates/ir/src/to_egglog/calls.rs | 93 +++++++++++++++++++--------- crates/ir/src/to_egglog/composite.rs | 50 ++++++++++++--- crates/ir/src/to_egglog/expr.rs | 3 + crates/ir/src/to_egglog/mod.rs | 23 +++++++ crates/ir/src/to_egglog/types.rs | 58 +++++++++++++++-- 5 files changed, 187 insertions(+), 40 deletions(-) diff --git a/crates/ir/src/to_egglog/calls.rs b/crates/ir/src/to_egglog/calls.rs index ccac6cc..6bb548a 100644 --- a/crates/ir/src/to_egglog/calls.rs +++ b/crates/ir/src/to_egglog/calls.rs @@ -650,13 +650,12 @@ impl AstToEgglog { edge_ast::Lit::Int(_, None, _) => Some("u256".to_string()), _ => None, }, - // ArrayIndex: base[index] — if base is a Map, the result type is the value type (V) + // ArrayIndex: base[index] — if base implements Index, the result type is Index::Output edge_ast::Expr::ArrayIndex(base, _, _, _) => { let base_type = self.infer_receiver_type(base); - let base_args = self.infer_receiver_type_args(base); if let Some(ref bt) = base_type { - if bt.starts_with("Map") && base_args.len() == 2 { - return Some(Self::type_sig_mangle(&base_args[1])); + if let Some(output) = self.index_output_type(bt) { + return Some(output); } } None @@ -665,6 +664,28 @@ impl AstToEgglog { } } + /// Look up the Index trait impl's Output type for a given type name. + /// Returns the mangled name of the Output type if the type implements Index. + pub(crate) fn index_output_type(&self, type_name: &str) -> Option { + if let Some(impl_info) = self.trait_impls.get(&(type_name.to_string(), "Index".to_string())) { + // Index — Output is the second type arg + if impl_info.trait_type_args.len() >= 2 { + return Some(Self::type_sig_mangle(&impl_info.trait_type_args[1])); + } + } + None + } + + /// Look up the Index trait impl's Output TypeSig for a given type name. + pub(crate) fn index_output_type_sig(&self, type_name: &str) -> Option { + if let Some(impl_info) = self.trait_impls.get(&(type_name.to_string(), "Index".to_string())) { + if impl_info.trait_type_args.len() >= 2 { + return Some(impl_info.trait_type_args[1].clone()); + } + } + None + } + /// Get the concrete type arguments for a receiver's generic composite type. pub(crate) fn infer_receiver_type_args(&self, expr: &edge_ast::Expr) -> Vec { if let Some(binding) = self.lookup_binding_for_expr(expr) { @@ -672,12 +693,14 @@ impl AstToEgglog { } match expr { - // ArrayIndex: base[index] — if base is a Map, the result's type args come from V + // ArrayIndex: base[index] — result's type args come from Index::Output edge_ast::Expr::ArrayIndex(base, _, _, _) => { - let base_args = self.infer_receiver_type_args(base); - if base_args.len() == 2 { - if let edge_ast::ty::TypeSig::Named(_, inner_args) = &base_args[1] { - return inner_args.clone(); + let base_type = self.infer_receiver_type(base); + if let Some(ref bt) = base_type { + if let Some(output_sig) = self.index_output_type_sig(bt) { + if let edge_ast::ty::TypeSig::Named(_, inner_args) = &output_sig { + return inner_args.clone(); + } } } Vec::new() @@ -752,17 +775,13 @@ impl AstToEgglog { /// Check if a type name refers to a primitive type (not a user-defined composite). pub(crate) fn is_primitive_type(type_name: &str) -> bool { - type_name == "u256" - || type_name == "i256" - || type_name == "bool" - || type_name == "address" - || type_name == "b32" - || type_name.starts_with("u") - && type_name[1..].parse::().is_ok() - || type_name.starts_with("i") - && type_name[1..].parse::().is_ok() - || type_name.starts_with("bytes") - && type_name[5..].parse::().is_ok() + matches!(type_name, "bool" | "address" | "b32" | "bit") + || type_name.strip_prefix('u').and_then(|s| s.parse::().ok()) + .is_some_and(|w| (8..=256).contains(&w) && w % 8 == 0) + || type_name.strip_prefix('i').and_then(|s| s.parse::().ok()) + .is_some_and(|w| (8..=256).contains(&w) && w % 8 == 0) + || type_name.strip_prefix("bytes").and_then(|s| s.parse::().ok()) + .is_some_and(|n| (1..=32).contains(&n)) } /// Look up a compiler-provided trait method for a primitive type. @@ -1029,7 +1048,11 @@ impl AstToEgglog { if let edge_ast::Expr::Ident(ident) = arg { let info = self.lookup_composite_info(&ident.name); if let Some((ct, cb)) = info { - arg_composite.push(Some((ct, Some(cb), Vec::new()))); + // Also grab composite_type_args from the binding + let type_args = self.lookup_binding_for_expr(arg) + .map(|b| b.composite_type_args.clone()) + .unwrap_or_default(); + arg_composite.push(Some((ct, Some(cb), type_args))); } else { // Check for composite_type without composite_base (e.g., Map type aliases) let mut found = false; @@ -1048,14 +1071,12 @@ impl AstToEgglog { } } else if let edge_ast::Expr::ArrayIndex(base, _, _, _) = arg { // For ArrayIndex args (e.g., map[key] as self parameter), - // infer the value type from the base Map's type args. + // infer the value type from the base type's Index impl Output. let base_type = self.infer_receiver_type(base); - let base_args = self.infer_receiver_type_args(base); if let Some(ref bt) = base_type { - if bt.starts_with("Map") && base_args.len() == 2 { - let value_mangled = Self::type_sig_mangle(&base_args[1]); - // Extract inner type args if V is a generic type - let inner_args = if let edge_ast::ty::TypeSig::Named(_, inner) = &base_args[1] { + if let Some(output_sig) = self.index_output_type_sig(bt) { + let value_mangled = Self::type_sig_mangle(&output_sig); + let inner_args = if let edge_ast::ty::TypeSig::Named(_, inner) = &output_sig { inner.clone() } else { Vec::new() @@ -1079,7 +1100,7 @@ impl AstToEgglog { .get(i) .cloned() .unwrap_or_else(|| ast_helpers::const_int(0, self.current_ctx.clone())); - let (mut composite_type, mut composite_base, composite_type_args) = arg_composite + let (mut composite_type, mut composite_base, mut composite_type_args) = arg_composite .get(i) .and_then(|c| c.as_ref()) .map(|(ct, cb, ta)| (Some(ct.clone()), cb.clone(), ta.clone())) @@ -1113,6 +1134,10 @@ impl AstToEgglog { || self.union_types.contains_key(&resolved_name) { composite_type = Some(resolved_name); + // Propagate type args from the param type sig + if composite_type_args.is_empty() && !type_args.is_empty() { + composite_type_args = type_args.clone(); + } } else if type_args.is_empty() { // Check if resolved name is a generic type that was // monomorphized (e.g., Result__u256) @@ -1122,6 +1147,18 @@ impl AstToEgglog { { composite_type = Some(mangled); } + } else { + // Named type with type args — try monomorphizing + if let Ok(Some(mangled)) = self.try_monomorphize_named_type( + &resolved_name, + type_args, + None, + ) { + composite_type = Some(mangled); + if composite_type_args.is_empty() { + composite_type_args = type_args.clone(); + } + } } } } diff --git a/crates/ir/src/to_egglog/composite.rs b/crates/ir/src/to_egglog/composite.rs index d690043..c7c4654 100644 --- a/crates/ir/src/to_egglog/composite.rs +++ b/crates/ir/src/to_egglog/composite.rs @@ -33,8 +33,17 @@ impl AstToEgglog { }) })? } else { - let diag = - edge_diagnostics::Diagnostic::error(format!("unknown union type: `{type_name}`")); + // Check if resolution failed due to ambiguity (multiple monomorphizations) + let candidate_count = self.monomorphized_types.iter() + .filter(|((base, _), _)| base == type_name) + .count(); + let diag = if candidate_count > 1 { + edge_diagnostics::Diagnostic::error(format!( + "ambiguous generic type `{type_name}`: {candidate_count} monomorphizations exist", + )).with_note("provide explicit type arguments to disambiguate") + } else { + edge_diagnostics::Diagnostic::error(format!("unknown union type: `{type_name}`")) + }; return Err(IrError::Diagnostic(if let Some(s) = span { diag.with_label(s.clone(), "not found") } else { @@ -74,9 +83,18 @@ impl AstToEgglog { type_name.to_string() } else { self.resolve_generic_type_name(type_name).ok_or_else(|| { - let diag = edge_diagnostics::Diagnostic::error(format!( - "unknown union type: `{type_name}`", - )); + let candidate_count = self.monomorphized_types.iter() + .filter(|((base, _), _)| base == type_name) + .count(); + let diag = if candidate_count > 1 { + edge_diagnostics::Diagnostic::error(format!( + "ambiguous generic type `{type_name}`: {candidate_count} monomorphizations exist", + )).with_note("provide explicit type arguments to disambiguate") + } else { + edge_diagnostics::Diagnostic::error(format!( + "unknown union type: `{type_name}`", + )) + }; IrError::Diagnostic(if let Some(s) = span { diag.with_label(s.clone(), "not found") } else { @@ -139,12 +157,28 @@ impl AstToEgglog { type_name: &str, fields: &[(edge_ast::Ident, edge_ast::Expr)], ) -> Result { - // Resolve generic struct names to monomorphized versions + // Resolve generic struct names to monomorphized versions. + // Use type_sig_hint from VarDecl annotation when available for precise resolution. let resolved_name = if self.struct_types.contains_key(type_name) { type_name.to_string() } else { - self.resolve_generic_type_name(type_name) - .unwrap_or_else(|| type_name.to_string()) + // Try precise resolution via type_sig_hint first + let from_hint = if let Some(edge_ast::ty::TypeSig::Named(ref hint_name, ref hint_args)) = self.type_sig_hint { + if (hint_name.name == type_name || hint_name.name.starts_with(type_name)) && !hint_args.is_empty() { + self.resolve_generic_type_name_with_args(type_name, hint_args) + } else { + None + } + } else { + None + }; + if let Some(resolved) = from_hint { + resolved + } else { + // Fall back to unambiguous resolution + self.resolve_generic_type_name(type_name) + .unwrap_or_else(|| type_name.to_string()) + } }; let struct_info = self.struct_types.get(&resolved_name).cloned(); diff --git a/crates/ir/src/to_egglog/expr.rs b/crates/ir/src/to_egglog/expr.rs index a2bdbd7..5087983 100644 --- a/crates/ir/src/to_egglog/expr.rs +++ b/crates/ir/src/to_egglog/expr.rs @@ -143,7 +143,10 @@ impl AstToEgglog { // If there's an initializer, emit VarStore for the assignment if let Some(init) = init_expr { self.last_composite_alloc = None; + // Set type sig hint so struct instantiation can disambiguate generics + self.type_sig_hint = type_sig.as_ref().cloned(); let rhs_ir = self.lower_expr(init)?; + self.type_sig_hint = None; // Track composite type from RHS if applicable if let Some((comp_type, comp_base)) = self.last_composite_alloc.take() { if let Some(scope) = self.scopes.last_mut() { diff --git a/crates/ir/src/to_egglog/mod.rs b/crates/ir/src/to_egglog/mod.rs index d547afd..92c97ee 100644 --- a/crates/ir/src/to_egglog/mod.rs +++ b/crates/ir/src/to_egglog/mod.rs @@ -143,6 +143,8 @@ pub(crate) struct GenericTypeTemplate { pub(crate) struct GenericImplBlock { pub type_params: Vec, pub trait_impl: Option, // trait name, or None for inherent impl + /// The trait's type arguments (e.g., `[K, V]` for `impl Foo: Index`) + pub trait_type_params: Vec, pub items: Vec, } @@ -246,6 +248,8 @@ pub(crate) struct TraitInfo { #[derive(Debug, Clone)] pub(crate) struct TraitImplInfo { pub methods: IndexMap, + /// Trait type arguments from the impl declaration (e.g., `[K, V]` for `impl Foo: Index`). + pub trait_type_args: Vec, pub span: edge_types::span::Span, } @@ -332,6 +336,9 @@ pub struct AstToEgglog { /// Type hint from assignment target, used for generic return-type inference. /// Set before lowering the RHS of a typed variable assignment, cleared after. pub(crate) type_hint: Option, + /// TypeSig hint from assignment target, used to disambiguate generic struct instantiation. + /// Set before lowering the RHS of a typed variable declaration, cleared after. + pub(crate) type_sig_hint: Option, /// Compiler warnings collected during lowering pub(crate) warnings: Vec, } @@ -382,6 +389,7 @@ impl AstToEgglog { _self_type: None, std_ops_traits: HashSet::new(), type_hint: None, + type_sig_hint: None, warnings: Vec::new(), } } @@ -514,6 +522,7 @@ impl AstToEgglog { (prim.to_string(), trait_name.to_string()), TraitImplInfo { methods: IndexMap::new(), + trait_type_args: Vec::new(), span: edge_types::span::Span::EOF, }, ); @@ -689,12 +698,16 @@ impl AstToEgglog { || self.generic_type_templates.contains_key(&type_name) { let trait_name = impl_block.trait_impl.as_ref().map(|(n, _)| n.name.clone()); + let trait_type_params = impl_block.trait_impl.as_ref() + .map(|(_, params)| params.clone()) + .unwrap_or_default(); self.generic_impl_blocks .entry(type_name.clone()) .or_default() .push(GenericImplBlock { type_params: impl_block.type_params.clone(), trait_impl: trait_name, + trait_type_params, items: impl_block.items.clone(), }); } @@ -753,10 +766,20 @@ impl AstToEgglog { } } + // Extract trait type args from the impl declaration + let trait_type_args: Vec = impl_block.trait_impl + .as_ref() + .map(|(_, params)| { + params.iter() + .map(|p| edge_ast::ty::TypeSig::Named(p.name.clone(), Vec::new())) + .collect() + }) + .unwrap_or_default(); self.trait_impls.insert( (type_name, trait_name.name.clone()), TraitImplInfo { methods, + trait_type_args, span: impl_block.span.clone(), }, ); diff --git a/crates/ir/src/to_egglog/types.rs b/crates/ir/src/to_egglog/types.rs index c883988..3ec7ee2 100644 --- a/crates/ir/src/to_egglog/types.rs +++ b/crates/ir/src/to_egglog/types.rs @@ -41,20 +41,62 @@ impl AstToEgglog { } /// Resolve a generic type name (e.g., "Result") to its monomorphized name (e.g., "`Result__u256`"). - /// Searches `union_types` and `struct_types` for any key starting with `"{name}__"`. - /// Returns the first match found. + /// Returns `Some` only when there's exactly one monomorphization (unambiguous). + /// When there are multiple, returns `None` — caller should use + /// `resolve_generic_type_name_with_args` for precise resolution. pub(crate) fn resolve_generic_type_name(&self, name: &str) -> Option { + // Check monomorphized_types cache for entries with this base name + let candidates: Vec<&String> = self.monomorphized_types.iter() + .filter(|((base, _), _)| base == name) + .map(|(_, mangled)| mangled) + .collect(); + if candidates.len() == 1 { + return Some(candidates[0].clone()); + } + if candidates.len() > 1 { + // Multiple monomorphizations — ambiguous, return None + return None; + } + + // Fallback: scan union_types and struct_types for "{name}__" prefix, + // but only return if unambiguous. + let mut fallback_candidates = Vec::new(); let prefix = format!("{name}__"); for key in self.union_types.keys() { if key.starts_with(&prefix) { - return Some(key.clone()); + fallback_candidates.push(key.clone()); } } for key in self.struct_types.keys() { if key.starts_with(&prefix) { - return Some(key.clone()); + fallback_candidates.push(key.clone()); } } + if fallback_candidates.len() == 1 { + return Some(fallback_candidates.into_iter().next().unwrap()); + } + None + } + + /// Resolve a generic type name with specific type args to its monomorphized name. + /// More precise than `resolve_generic_type_name` when multiple monomorphizations exist. + pub(crate) fn resolve_generic_type_name_with_args( + &self, + name: &str, + type_args: &[edge_ast::ty::TypeSig], + ) -> Option { + let mangled_args: Vec = type_args.iter() + .map(|a| Self::type_sig_mangle(a)) + .collect(); + let cache_key = (name.to_string(), mangled_args); + if let Some(mangled) = self.monomorphized_types.get(&cache_key) { + return Some(mangled.clone()); + } + // Fallback: construct the expected mangled name and check if it exists + let expected = format!("{}_{}", name, cache_key.1.join("_")); + if self.union_types.contains_key(&expected) || self.struct_types.contains_key(&expected) { + return Some(expected); + } None } @@ -662,10 +704,18 @@ impl AstToEgglog { methods.insert(fn_decl.name.name.clone(), (fn_decl.clone(), body.clone())); } } + // Substitute type params in trait type args to get concrete types + let trait_type_args: Vec = gib.trait_type_params.iter() + .map(|p| { + let sig = edge_ast::ty::TypeSig::Named(p.name.clone(), Vec::new()); + Self::substitute_type_params(&sig, &impl_subst) + }) + .collect(); self.trait_impls.insert( (mangled.clone(), trait_name.clone()), super::TraitImplInfo { methods, + trait_type_args, span: edge_types::span::Span::EOF, }, ); From c5e12384781b5ab979f5502d36e765a6b84b3953 Mon Sep 17 00:00:00 2001 From: brockelmore <31553173+brockelmore@users.noreply.github.com> Date: Wed, 11 Mar 2026 12:19:32 -0700 Subject: [PATCH 05/13] chore: fix clippy warnings and format Co-Authored-By: Claude Opus 4.6 --- bin/edgec/src/main.rs | 6 +- crates/driver/src/compiler.rs | 32 +-- crates/e2e/tests/suites/map_std_exec.rs | 126 +++++++----- crates/ir/src/lib.rs | 2 +- crates/ir/src/to_egglog/calls.rs | 161 +++++++++------ crates/ir/src/to_egglog/composite.rs | 60 +++--- crates/ir/src/to_egglog/control_flow.rs | 4 +- crates/ir/src/to_egglog/expr.rs | 46 +++-- crates/ir/src/to_egglog/function.rs | 5 +- crates/ir/src/to_egglog/mod.rs | 48 +++-- crates/ir/src/to_egglog/pattern.rs | 22 +- crates/ir/src/to_egglog/storage.rs | 12 +- crates/ir/src/to_egglog/types.rs | 257 +++++++++++++----------- crates/parser/src/parser.rs | 3 +- 14 files changed, 447 insertions(+), 337 deletions(-) diff --git a/bin/edgec/src/main.rs b/bin/edgec/src/main.rs index 86c6899..ffb3a59 100644 --- a/bin/edgec/src/main.rs +++ b/bin/edgec/src/main.rs @@ -20,11 +20,7 @@ fn main() -> Result<()> { if let Some(level) = level { use tracing_subscriber::EnvFilter; // Egglog is extremely noisy — only enable at verbosity 5+ (-vvvvv) - let egglog_level = if cli.verbose >= 5 { - "trace" - } else { - "warn" - }; + let egglog_level = if cli.verbose >= 5 { "trace" } else { "warn" }; let filter = format!("edge={level},egglog={egglog_level},{level}"); tracing_subscriber::fmt() .with_env_filter(EnvFilter::new(filter)) diff --git a/crates/driver/src/compiler.rs b/crates/driver/src/compiler.rs index 332ec86..d30a051 100644 --- a/crates/driver/src/compiler.rs +++ b/crates/driver/src/compiler.rs @@ -365,16 +365,14 @@ impl Compiler { /// Run the parser and produce an AST fn parse(&mut self) -> Result { let mut parser = Parser::new(&self.session.source).map_err(|e| { - self.session - .emit_error(Self::parse_error_to_diagnostic(&e)); + self.session.emit_error(Self::parse_error_to_diagnostic(&e)); CompileError::ParseErrors })?; match parser.parse() { Ok(program) => Ok(program), Err(e) => { - self.session - .emit_error(Self::parse_error_to_diagnostic(&e)); + self.session.emit_error(Self::parse_error_to_diagnostic(&e)); self.session.report_diagnostics(); Err(CompileError::ParseErrors) } @@ -565,7 +563,12 @@ impl Compiler { /// traits, impls, functions) to the AST. fn auto_import_globals(&mut self, ast: &mut Program) -> Result<(), CompileError> { // Order matters: ops first (trait defs), then map (uses ops traits). - let global_keys = ["globals/ops", "globals/option", "globals/result", "globals/map"]; + let global_keys = [ + "globals/ops", + "globals/option", + "globals/result", + "globals/map", + ]; let mut new_stmts: Vec = Vec::new(); // Canonicalize the explicit override path once (if provided). @@ -581,12 +584,13 @@ impl Compiler { for key in &global_keys { let segments: Vec = key.split('/').map(String::from).collect(); - let source = if let Some(ref std_path) = explicit_std_path { - Self::try_read_from_fs(std_path, &segments) - .or_else(|| Self::try_read_from_embedded(&segments).map(String::from)) - } else { - Self::try_read_from_embedded(&segments).map(String::from) - }; + let source = explicit_std_path.as_ref().map_or_else( + || Self::try_read_from_embedded(&segments).map(String::from), + |std_path| { + Self::try_read_from_fs(std_path, &segments) + .or_else(|| Self::try_read_from_embedded(&segments).map(String::from)) + }, + ); let Some(source) = source else { // Globals not available (e.g., downstream consumer without std/). @@ -645,11 +649,7 @@ impl Compiler { } _ => None, }; - if let Some(n) = name { - !user_defined.contains(n) - } else { - true - } + name.is_none_or(|n| !user_defined.contains(n)) }); new_stmts.append(&mut ast.stmts); diff --git a/crates/e2e/tests/suites/map_std_exec.rs b/crates/e2e/tests/suites/map_std_exec.rs index 06ea7b4..5d6e957 100644 --- a/crates/e2e/tests/suites/map_std_exec.rs +++ b/crates/e2e/tests/suites/map_std_exec.rs @@ -2,9 +2,9 @@ //! Execution-level tests for the std Map type. //! -//! Tests compile test_map_std.edge, deploy on in-memory revm, and verify +//! Tests compile `test_map_std.edge`, deploy on in-memory revm, and verify //! basic Map get/set, index operators, direct custom storage, and -//! Map with user-defined Sload/Sstore impls. +//! `Map` with user-defined Sload/Sstore impls. use crate::helpers::*; @@ -66,10 +66,7 @@ fn test_custom_storage_set_then_get() { fn test_basic_map_get_initially_zero() { let bc = compile_contract(CONTRACT); let mut evm = EvmHandle::new(bc); - let r = evm.call(calldata( - selector("get_basic(uint256)"), - &[encode_u256(42)], - )); + let r = evm.call(calldata(selector("get_basic(uint256)"), &[encode_u256(42)])); assert!(r.success, "get_basic(42) reverted"); assert_eq!(decode_u256(&r.output), 0, "unset key should return 0"); } @@ -85,10 +82,7 @@ fn test_basic_map_set_then_get() { )); assert!(r.success, "set_basic(1, 999) reverted"); - let r = evm.call(calldata( - selector("get_basic(uint256)"), - &[encode_u256(1)], - )); + let r = evm.call(calldata(selector("get_basic(uint256)"), &[encode_u256(1)])); assert!(r.success, "get_basic(1) reverted"); assert_eq!(decode_u256(&r.output), 999, "get_basic(1) should be 999"); } @@ -110,17 +104,11 @@ fn test_basic_map_different_keys_independent() { )); assert!(r.success, "set_basic(20, 200) reverted"); - let r = evm.call(calldata( - selector("get_basic(uint256)"), - &[encode_u256(10)], - )); + let r = evm.call(calldata(selector("get_basic(uint256)"), &[encode_u256(10)])); assert!(r.success); assert_eq!(decode_u256(&r.output), 100); - let r = evm.call(calldata( - selector("get_basic(uint256)"), - &[encode_u256(20)], - )); + let r = evm.call(calldata(selector("get_basic(uint256)"), &[encode_u256(20)])); assert!(r.success); assert_eq!(decode_u256(&r.output), 200); } @@ -142,12 +130,13 @@ fn test_basic_map_overwrite() { )); assert!(r.success); - let r = evm.call(calldata( - selector("get_basic(uint256)"), - &[encode_u256(5)], - )); + let r = evm.call(calldata(selector("get_basic(uint256)"), &[encode_u256(5)])); assert!(r.success); - assert_eq!(decode_u256(&r.output), 222, "overwritten value should be 222"); + assert_eq!( + decode_u256(&r.output), + 222, + "overwritten value should be 222" + ); } // ============================================================================= @@ -188,10 +177,7 @@ fn test_basic_map_index_set() { assert!(r.success, "set_basic_by_indexable reverted"); // Read via .get() - let r = evm.call(calldata( - selector("get_basic(uint256)"), - &[encode_u256(3)], - )); + let r = evm.call(calldata(selector("get_basic(uint256)"), &[encode_u256(3)])); assert!(r.success, "get_basic reverted"); assert_eq!(decode_u256(&r.output), 333); } @@ -216,12 +202,13 @@ fn test_basic_map_index_interop() { assert_eq!(decode_u256(&r.output), 9999); // Also readable via .get() - let r = evm.call(calldata( - selector("get_basic(uint256)"), - &[encode_u256(99)], - )); + let r = evm.call(calldata(selector("get_basic(uint256)"), &[encode_u256(99)])); assert!(r.success); - assert_eq!(decode_u256(&r.output), 9999, ".get and index should read same slot"); + assert_eq!( + decode_u256(&r.output), + 9999, + ".get and index should read same slot" + ); } // ============================================================================= @@ -238,12 +225,13 @@ fn test_basic_map_index_interop() { fn test_custom_map_get_initially_zero() { let bc = compile_contract(CONTRACT); let mut evm = EvmHandle::new(bc); - let r = evm.call(calldata( - selector("get_custom(uint256)"), - &[encode_u256(1)], - )); + let r = evm.call(calldata(selector("get_custom(uint256)"), &[encode_u256(1)])); assert!(r.success, "get_custom(1) reverted"); - assert_eq!(decode_u256(&r.output), 0, "unset custom map key should be 0"); + assert_eq!( + decode_u256(&r.output), + 0, + "unset custom map key should be 0" + ); } #[test] @@ -285,7 +273,11 @@ fn test_double_custom_get_initially_zero() { &[encode_u256(1), encode_u256(2)], )); assert!(r.success, "get_double_custom(1,2) reverted"); - assert_eq!(decode_u256(&r.output), 0, "unset double custom key should return 0"); + assert_eq!( + decode_u256(&r.output), + 0, + "unset double custom key should return 0" + ); } #[test] @@ -296,7 +288,12 @@ fn test_double_custom_set_then_get() { // set_double_custom(a=1, b=2, val_a=100, val_b=200) let r = evm.call(calldata( selector("set_double_custom(uint128,uint128,uint128,uint128)"), - &[encode_u256(1), encode_u256(2), encode_u256(100), encode_u256(200)], + &[ + encode_u256(1), + encode_u256(2), + encode_u256(100), + encode_u256(200), + ], )); assert!(r.success, "set_double_custom reverted"); @@ -310,7 +307,10 @@ fn test_double_custom_set_then_get() { // Packed as (val_a << 128) | val_b in a u256 // val_a=100 in bytes 0..16, val_b=200 in bytes 16..32 let packed = &r.output[0..32]; - assert!(packed.iter().any(|&b| b != 0), "stored value should be non-zero"); + assert!( + packed.iter().any(|&b| b != 0), + "stored value should be non-zero" + ); } #[test] @@ -321,14 +321,24 @@ fn test_double_custom_different_keys_independent() { // Set key (1, 2) → val (10, 20) let r = evm.call(calldata( selector("set_double_custom(uint128,uint128,uint128,uint128)"), - &[encode_u256(1), encode_u256(2), encode_u256(10), encode_u256(20)], + &[ + encode_u256(1), + encode_u256(2), + encode_u256(10), + encode_u256(20), + ], )); assert!(r.success); // Set key (3, 4) → val (30, 40) let r = evm.call(calldata( selector("set_double_custom(uint128,uint128,uint128,uint128)"), - &[encode_u256(3), encode_u256(4), encode_u256(30), encode_u256(40)], + &[ + encode_u256(3), + encode_u256(4), + encode_u256(30), + encode_u256(40), + ], )); assert!(r.success); @@ -341,7 +351,11 @@ fn test_double_custom_different_keys_independent() { // Expected packed value: (10 << 128) | 20 // In big-endian 32 bytes: bytes[0..16] = 10, bytes[16..32] = 20 let expected_1_2 = pack_u128_pair(10, 20); - assert_eq!(&r.output[0..32], &expected_1_2[..], "key (1,2) should have val (10,20)"); + assert_eq!( + &r.output[0..32], + &expected_1_2[..], + "key (1,2) should have val (10,20)" + ); // Read key (3, 4) — should get val (30, 40) packed let r = evm.call(calldata( @@ -350,7 +364,11 @@ fn test_double_custom_different_keys_independent() { )); assert!(r.success); let expected_3_4 = pack_u128_pair(30, 40); - assert_eq!(&r.output[0..32], &expected_3_4[..], "key (3,4) should have val (30,40)"); + assert_eq!( + &r.output[0..32], + &expected_3_4[..], + "key (3,4) should have val (30,40)" + ); } #[test] @@ -361,14 +379,24 @@ fn test_double_custom_overwrite() { // Set key (5, 6) → val (50, 60) let r = evm.call(calldata( selector("set_double_custom(uint128,uint128,uint128,uint128)"), - &[encode_u256(5), encode_u256(6), encode_u256(50), encode_u256(60)], + &[ + encode_u256(5), + encode_u256(6), + encode_u256(50), + encode_u256(60), + ], )); assert!(r.success); // Overwrite key (5, 6) → val (55, 66) let r = evm.call(calldata( selector("set_double_custom(uint128,uint128,uint128,uint128)"), - &[encode_u256(5), encode_u256(6), encode_u256(55), encode_u256(66)], + &[ + encode_u256(5), + encode_u256(6), + encode_u256(55), + encode_u256(66), + ], )); assert!(r.success); @@ -379,7 +407,11 @@ fn test_double_custom_overwrite() { )); assert!(r.success); let expected = pack_u128_pair(55, 66); - assert_eq!(&r.output[0..32], &expected[..], "overwritten value should be (55,66)"); + assert_eq!( + &r.output[0..32], + &expected[..], + "overwritten value should be (55,66)" + ); } // ============================================================================= diff --git a/crates/ir/src/lib.rs b/crates/ir/src/lib.rs index 195e78f..1ef96bb 100644 --- a/crates/ir/src/lib.rs +++ b/crates/ir/src/lib.rs @@ -561,7 +561,7 @@ mod tests { "globals/map", ]; for key in &global_files { - let path = format!("../../std/{}.edge", key); + let path = format!("../../std/{key}.edge"); if let Ok(src) = std::fs::read_to_string(&path) { if let Ok(mut p) = edge_parser::Parser::new(&src) { if let Ok(globals_ast) = p.parse() { diff --git a/crates/ir/src/to_egglog/calls.rs b/crates/ir/src/to_egglog/calls.rs index 6bb548a..5bb6588 100644 --- a/crates/ir/src/to_egglog/calls.rs +++ b/crates/ir/src/to_egglog/calls.rs @@ -1,7 +1,6 @@ //! Function call lowering: call resolution, inlining, builtin calls. -use std::collections::HashMap; -use std::rc::Rc; +use std::{collections::HashMap, rc::Rc}; use super::{AstToEgglog, FreeFnInfo, Scope, VarBinding}; use crate::{ @@ -26,20 +25,35 @@ impl AstToEgglog { let type_name = &components[0].name; let variant_name = &components[1].name; if self.union_types.contains_key(type_name) { - return self.lower_union_instantiation_expr(type_name, variant_name, args, Some(span)); + return self.lower_union_instantiation_expr( + type_name, + variant_name, + args, + Some(span), + ); } // Check for generic union types (e.g., Result::Ok(42) where Result was monomorphized) if self.generic_type_templates.contains_key(type_name) { // First try to find an already-monomorphized version if let Some(mangled) = self.resolve_generic_type_name(type_name) { - return self.lower_union_instantiation_expr(&mangled, variant_name, args, Some(span)); + return self.lower_union_instantiation_expr( + &mangled, + variant_name, + args, + Some(span), + ); } // No monomorphized version yet — try to infer type params from // the constructor argument and monomorphize on the fly. if let Some(mangled) = self.try_monomorphize_union_from_constructor(type_name, variant_name, args)? { - return self.lower_union_instantiation_expr(&mangled, variant_name, args, Some(span)); + return self.lower_union_instantiation_expr( + &mangled, + variant_name, + args, + Some(span), + ); } return Err(IrError::Diagnostic( edge_diagnostics::Diagnostic::error(format!( @@ -64,7 +78,8 @@ impl AstToEgglog { if let edge_ast::Expr::Path(components, _) = callee { if components.len() == 2 { // Resolve type parameter substitutions (e.g., V → u256 inside Map methods) - let resolved_type = self.type_param_subst + let resolved_type = self + .type_param_subst .get(&components[0].name) .cloned() .unwrap_or_else(|| components[0].name.clone()); @@ -74,7 +89,10 @@ impl AstToEgglog { let method_span = &components[1].span; // Check inherent methods: Type::method(receiver, args...) - if self.find_inherent_method(type_or_trait, method_name).is_some() { + if self + .find_inherent_method(type_or_trait, method_name) + .is_some() + { return self.lower_qualified_method_call( type_or_trait, method_name, @@ -108,7 +126,9 @@ impl AstToEgglog { // Check trait impls for non-primitive types: Map::sload(slot), etc. // Directly look up and inline the method from the type's trait impls. - if let Some((fn_decl, body)) = self.find_trait_method_for_type(type_or_trait, method_name) { + if let Some((fn_decl, body)) = + self.find_trait_method_for_type(type_or_trait, method_name) + { let params: Vec<(String, edge_ast::ty::TypeSig)> = fn_decl .params .iter() @@ -236,7 +256,7 @@ impl AstToEgglog { .map(|(id, ty)| (id.name.clone(), ty.clone())) .collect(); // Set type param substitutions for generic method bodies - let old_subst = std::mem::replace(&mut self.type_param_subst, type_param_subst.clone()); + let old_subst = std::mem::replace(&mut self.type_param_subst, type_param_subst); let result = self.inline_function_call(¶ms, &body, &all_args); self.type_param_subst = old_subst; return result; @@ -293,11 +313,8 @@ impl AstToEgglog { if let Some(struct_info) = self.struct_types.get(type_name).cloned() { let recv_ir = self.lower_expr(receiver)?; let base_slot = self.lower_expr(&args[0])?; - let result = self.default_struct_derive_slot( - &recv_ir, - &base_slot, - &struct_info.fields, - ); + let result = + self.default_struct_derive_slot(&recv_ir, &base_slot, &struct_info.fields); return Ok(result); } } @@ -415,7 +432,7 @@ impl AstToEgglog { let receiver_type = self.infer_receiver_type(&args[0]); let is_primitive = receiver_type .as_ref() - .map_or(true, |t| Self::is_primitive_type(t)); + .is_none_or(|t| Self::is_primitive_type(t)); if is_primitive { if let Some(op) = self.compiler_provided_method(method_name) { if args.len() != 2 { @@ -447,7 +464,7 @@ impl AstToEgglog { } // For instance methods like sstore/derive_slot: first arg is receiver if args_ir.len() >= 2 { - let recv = args_ir[0].clone(); + let recv = Rc::clone(&args_ir[0]); if let Some(result) = self.compiler_provided_stateful_method( method_name, Some(recv), @@ -607,7 +624,10 @@ impl AstToEgglog { /// Infer the type of a receiver expression (best-effort). /// Look up the scope binding for an expression (Ident or self.field). /// Returns the variable name and binding reference if found. - fn lookup_binding_for_expr<'a>(&'a self, expr: &edge_ast::Expr) -> Option<&'a super::VarBinding> { + fn lookup_binding_for_expr<'a>( + &'a self, + expr: &edge_ast::Expr, + ) -> Option<&'a super::VarBinding> { let var_name = match expr { edge_ast::Expr::Ident(ident) => &ident.name, edge_ast::Expr::FieldAccess(obj, field, _) => { @@ -644,9 +664,7 @@ impl AstToEgglog { edge_ast::Expr::StructInstantiation(_, type_name, _, _) => Some(type_name.name.clone()), edge_ast::Expr::Literal(lit) => match lit.as_ref() { edge_ast::Lit::Bool(_, _) => Some("bool".to_string()), - edge_ast::Lit::Int(_, Some(pt), _) => { - Some(Self::primitive_type_to_name(pt)) - } + edge_ast::Lit::Int(_, Some(pt), _) => Some(Self::primitive_type_to_name(pt)), edge_ast::Lit::Int(_, None, _) => Some("u256".to_string()), _ => None, }, @@ -667,7 +685,10 @@ impl AstToEgglog { /// Look up the Index trait impl's Output type for a given type name. /// Returns the mangled name of the Output type if the type implements Index. pub(crate) fn index_output_type(&self, type_name: &str) -> Option { - if let Some(impl_info) = self.trait_impls.get(&(type_name.to_string(), "Index".to_string())) { + if let Some(impl_info) = self + .trait_impls + .get(&(type_name.to_string(), "Index".to_string())) + { // Index — Output is the second type arg if impl_info.trait_type_args.len() >= 2 { return Some(Self::type_sig_mangle(&impl_info.trait_type_args[1])); @@ -676,9 +697,12 @@ impl AstToEgglog { None } - /// Look up the Index trait impl's Output TypeSig for a given type name. + /// Look up the Index trait impl's Output `TypeSig` for a given type name. pub(crate) fn index_output_type_sig(&self, type_name: &str) -> Option { - if let Some(impl_info) = self.trait_impls.get(&(type_name.to_string(), "Index".to_string())) { + if let Some(impl_info) = self + .trait_impls + .get(&(type_name.to_string(), "Index".to_string())) + { if impl_info.trait_type_args.len() >= 2 { return Some(impl_info.trait_type_args[1].clone()); } @@ -687,7 +711,10 @@ impl AstToEgglog { } /// Get the concrete type arguments for a receiver's generic composite type. - pub(crate) fn infer_receiver_type_args(&self, expr: &edge_ast::Expr) -> Vec { + pub(crate) fn infer_receiver_type_args( + &self, + expr: &edge_ast::Expr, + ) -> Vec { if let Some(binding) = self.lookup_binding_for_expr(expr) { return binding.composite_type_args.clone(); } @@ -697,10 +724,10 @@ impl AstToEgglog { edge_ast::Expr::ArrayIndex(base, _, _, _) => { let base_type = self.infer_receiver_type(base); if let Some(ref bt) = base_type { - if let Some(output_sig) = self.index_output_type_sig(bt) { - if let edge_ast::ty::TypeSig::Named(_, inner_args) = &output_sig { - return inner_args.clone(); - } + if let Some(edge_ast::ty::TypeSig::Named(_, inner_args)) = + self.index_output_type_sig(bt).as_ref() + { + return inner_args.clone(); } } Vec::new() @@ -726,7 +753,7 @@ impl AstToEgglog { let base = type_name.split("__").next().unwrap_or(type_name); self.generic_type_templates.get(base) }); - if let Some(template) = template { + template.map_or_else(HashMap::new, |template| { template .type_params .iter() @@ -736,12 +763,10 @@ impl AstToEgglog { (param.name.name.clone(), name) }) .collect() - } else { - HashMap::new() - } + }) } - /// Convert an EvmType to a type name string (for primitives). + /// Convert an `EvmType` to a type name string (for primitives). fn evm_type_to_name(ty: &EvmType) -> Option { match ty { EvmType::Base(base) => match base { @@ -758,7 +783,7 @@ impl AstToEgglog { } } - /// Convert a PrimitiveType to a type name string. + /// Convert a `PrimitiveType` to a type name string. fn primitive_type_to_name(pt: &edge_ast::ty::PrimitiveType) -> String { use edge_ast::ty::PrimitiveType; match pt { @@ -776,16 +801,22 @@ impl AstToEgglog { /// Check if a type name refers to a primitive type (not a user-defined composite). pub(crate) fn is_primitive_type(type_name: &str) -> bool { matches!(type_name, "bool" | "address" | "b32" | "bit") - || type_name.strip_prefix('u').and_then(|s| s.parse::().ok()) + || type_name + .strip_prefix('u') + .and_then(|s| s.parse::().ok()) .is_some_and(|w| (8..=256).contains(&w) && w % 8 == 0) - || type_name.strip_prefix('i').and_then(|s| s.parse::().ok()) + || type_name + .strip_prefix('i') + .and_then(|s| s.parse::().ok()) .is_some_and(|w| (8..=256).contains(&w) && w % 8 == 0) - || type_name.strip_prefix("bytes").and_then(|s| s.parse::().ok()) + || type_name + .strip_prefix("bytes") + .and_then(|s| s.parse::().ok()) .is_some_and(|n| (1..=32).contains(&n)) } /// Look up a compiler-provided trait method for a primitive type. - /// Returns the binary op if the method matches an imported std::ops trait. + /// Returns the binary op if the method matches an imported `std::ops` trait. fn compiler_provided_method(&self, method_name: &str) -> Option { match method_name { "unsafe_add" if self.std_ops_traits.contains("UnsafeAdd") => Some(EvmBinaryOp::Add), @@ -831,11 +862,8 @@ impl AstToEgglog { let base_slot = args_ir.first()?; let scratch = self.alloc_region(2); // MSTORE(scratch, key) - let mstore_key = ast_helpers::mstore( - Rc::clone(&scratch), - key, - Rc::clone(&self.current_state), - ); + let mstore_key = + ast_helpers::mstore(Rc::clone(&scratch), key, Rc::clone(&self.current_state)); self.current_state = Rc::clone(&mstore_key); // MSTORE(scratch+32, base_slot) let slot_offset = ast_helpers::add( @@ -865,7 +893,7 @@ impl AstToEgglog { recv } else { // Called as Type::sload(slot) — first arg is the slot - args_ir.first()?.clone() + Rc::clone(args_ir.first()?) }; Some(ast_helpers::sload(slot, Rc::clone(&self.current_state))) } @@ -874,11 +902,8 @@ impl AstToEgglog { "sstore" if self.std_ops_traits.contains("Sstore") => { let value = receiver_ir?; let slot = args_ir.first()?; - let store = ast_helpers::sstore( - Rc::clone(slot), - value, - Rc::clone(&self.current_state), - ); + let store = + ast_helpers::sstore(Rc::clone(slot), value, Rc::clone(&self.current_state)); self.current_state = Rc::clone(&store); Some(store) } @@ -906,10 +931,8 @@ impl AstToEgglog { ) -> RcExpr { let scratch = self.alloc_region(2); let mut current_slot = Rc::clone(base_slot); - let mut side_effects = ast_helpers::empty( - EvmType::Base(EvmBaseType::UnitT), - self.current_ctx.clone(), - ); + let mut side_effects = + ast_helpers::empty(EvmType::Base(EvmBaseType::UnitT), self.current_ctx.clone()); for (i, (_name, _ty)) in fields.iter().enumerate() { // Load field value: MLOAD(receiver + i*32) @@ -933,11 +956,8 @@ impl AstToEgglog { Rc::clone(&scratch), ast_helpers::const_int(32, self.current_ctx.clone()), ); - let mstore_slot = ast_helpers::mstore( - slot_offset, - current_slot, - Rc::clone(&self.current_state), - ); + let mstore_slot = + ast_helpers::mstore(slot_offset, current_slot, Rc::clone(&self.current_state)); self.current_state = Rc::clone(&mstore_slot); side_effects = ast_helpers::concat(side_effects, mstore_slot); @@ -1043,13 +1063,17 @@ impl AstToEgglog { params.iter().map(|(n, _)| n.as_str()).collect::>(), args.len() ); - let mut arg_composite: Vec, Vec)>> = Vec::new(); + #[allow(clippy::type_complexity)] + let mut arg_composite: Vec< + Option<(String, Option, Vec)>, + > = Vec::new(); for arg in args { if let edge_ast::Expr::Ident(ident) = arg { let info = self.lookup_composite_info(&ident.name); if let Some((ct, cb)) = info { // Also grab composite_type_args from the binding - let type_args = self.lookup_binding_for_expr(arg) + let type_args = self + .lookup_binding_for_expr(arg) .map(|b| b.composite_type_args.clone()) .unwrap_or_default(); arg_composite.push(Some((ct, Some(cb), type_args))); @@ -1059,7 +1083,11 @@ impl AstToEgglog { for scope in self.scopes.iter().rev() { if let Some(binding) = scope.bindings.get(&ident.name) { if let Some(ref ct) = binding.composite_type { - arg_composite.push(Some((ct.clone(), None, binding.composite_type_args.clone()))); + arg_composite.push(Some(( + ct.clone(), + None, + binding.composite_type_args.clone(), + ))); found = true; } break; @@ -1076,7 +1104,8 @@ impl AstToEgglog { if let Some(ref bt) = base_type { if let Some(output_sig) = self.index_output_type_sig(bt) { let value_mangled = Self::type_sig_mangle(&output_sig); - let inner_args = if let edge_ast::ty::TypeSig::Named(_, inner) = &output_sig { + let inner_args = if let edge_ast::ty::TypeSig::Named(_, inner) = &output_sig + { inner.clone() } else { Vec::new() @@ -1149,11 +1178,9 @@ impl AstToEgglog { } } else { // Named type with type args — try monomorphizing - if let Ok(Some(mangled)) = self.try_monomorphize_named_type( - &resolved_name, - type_args, - None, - ) { + if let Ok(Some(mangled)) = + self.try_monomorphize_named_type(&resolved_name, type_args, None) + { composite_type = Some(mangled); if composite_type_args.is_empty() { composite_type_args = type_args.clone(); diff --git a/crates/ir/src/to_egglog/composite.rs b/crates/ir/src/to_egglog/composite.rs index c7c4654..a56be4c 100644 --- a/crates/ir/src/to_egglog/composite.rs +++ b/crates/ir/src/to_egglog/composite.rs @@ -2,13 +2,14 @@ use std::rc::Rc; +use edge_diagnostics; + use super::AstToEgglog; use crate::{ ast_helpers, schema::{EvmBaseType, EvmBinaryOp, EvmType, RcExpr}, IrError, }; -use edge_diagnostics; impl AstToEgglog { /// Look up the variant index for a union type. @@ -24,8 +25,9 @@ impl AstToEgglog { v } else if let Some(mangled) = self.resolve_generic_type_name(type_name) { self.union_types.get(&mangled).ok_or_else(|| { - let diag = - edge_diagnostics::Diagnostic::error(format!("unknown union type: `{type_name}`")); + let diag = edge_diagnostics::Diagnostic::error(format!( + "unknown union type: `{type_name}`" + )); IrError::Diagnostic(if let Some(s) = span { diag.with_label(s.clone(), "not found") } else { @@ -34,7 +36,9 @@ impl AstToEgglog { })? } else { // Check if resolution failed due to ambiguity (multiple monomorphizations) - let candidate_count = self.monomorphized_types.iter() + let candidate_count = self + .monomorphized_types + .iter() .filter(|((base, _), _)| base == type_name) .count(); let diag = if candidate_count > 1 { @@ -102,19 +106,15 @@ impl AstToEgglog { }) })? }; - let variants = self - .union_types - .get(&resolved_name) - .ok_or_else(|| { - let diag = edge_diagnostics::Diagnostic::error(format!( - "unknown union type: `{type_name}`", - )); - IrError::Diagnostic(if let Some(s) = span { - diag.with_label(s.clone(), "not found") - } else { - diag - }) - })?; + let variants = self.union_types.get(&resolved_name).ok_or_else(|| { + let diag = + edge_diagnostics::Diagnostic::error(format!("unknown union type: `{type_name}`",)); + IrError::Diagnostic(if let Some(s) = span { + diag.with_label(s.clone(), "not found") + } else { + diag + }) + })?; let has_data = variants.get(idx).map(|(_, d)| *d).unwrap_or(false); if !has_data || args.is_empty() { @@ -163,22 +163,24 @@ impl AstToEgglog { type_name.to_string() } else { // Try precise resolution via type_sig_hint first - let from_hint = if let Some(edge_ast::ty::TypeSig::Named(ref hint_name, ref hint_args)) = self.type_sig_hint { - if (hint_name.name == type_name || hint_name.name.starts_with(type_name)) && !hint_args.is_empty() { - self.resolve_generic_type_name_with_args(type_name, hint_args) + let from_hint = + if let Some(edge_ast::ty::TypeSig::Named(ref hint_name, ref hint_args)) = + self.type_sig_hint + { + if (hint_name.name == type_name || hint_name.name.starts_with(type_name)) + && !hint_args.is_empty() + { + self.resolve_generic_type_name_with_args(type_name, hint_args) + } else { + None + } } else { None - } - } else { - None - }; - if let Some(resolved) = from_hint { - resolved - } else { - // Fall back to unambiguous resolution + }; + from_hint.unwrap_or_else(|| { self.resolve_generic_type_name(type_name) .unwrap_or_else(|| type_name.to_string()) - } + }) }; let struct_info = self.struct_types.get(&resolved_name).cloned(); diff --git a/crates/ir/src/to_egglog/control_flow.rs b/crates/ir/src/to_egglog/control_flow.rs index dc861ef..af8832a 100644 --- a/crates/ir/src/to_egglog/control_flow.rs +++ b/crates/ir/src/to_egglog/control_flow.rs @@ -66,7 +66,7 @@ impl AstToEgglog { let_bind_name: Some(var_name), composite_type: None, composite_base: None, - composite_type_args: Vec::new(), + composite_type_args: Vec::new(), }, ); } @@ -177,7 +177,7 @@ impl AstToEgglog { let_bind_name: Some(var_name.clone()), composite_type: None, composite_base: None, - composite_type_args: Vec::new(), + composite_type_args: Vec::new(), }; self.scopes .last_mut() diff --git a/crates/ir/src/to_egglog/expr.rs b/crates/ir/src/to_egglog/expr.rs index 5087983..d7a7d00 100644 --- a/crates/ir/src/to_egglog/expr.rs +++ b/crates/ir/src/to_egglog/expr.rs @@ -201,10 +201,14 @@ impl AstToEgglog { // Intercept ArrayIndex write for Index/Map dispatch: base[index] = val → base.set(index, val) if let edge_ast::Expr::ArrayIndex(arr_base, arr_index, _, arr_span) = lhs { - if let Some(result) = self.try_lower_storage_array_write(arr_base, arr_index, &rhs_ir)? { + if let Some(result) = + self.try_lower_storage_array_write(arr_base, arr_index, &rhs_ir)? + { return Ok(result); } - if let Some(result) = self.try_lower_array_element_write(arr_base, arr_index, &rhs_ir)? { + if let Some(result) = + self.try_lower_array_element_write(arr_base, arr_index, &rhs_ir)? + { return Ok(result); } if self.std_ops_traits.contains("Index") { @@ -468,10 +472,14 @@ impl AstToEgglog { // Intercept ArrayIndex write for Index/Map dispatch: base[index] = val → base.set(index, val) if let edge_ast::Expr::ArrayIndex(arr_base, arr_index, _, arr_span) = lhs.as_ref() { - if let Some(result) = self.try_lower_storage_array_write(arr_base, arr_index, &rhs_ir)? { + if let Some(result) = + self.try_lower_storage_array_write(arr_base, arr_index, &rhs_ir)? + { return Ok(result); } - if let Some(result) = self.try_lower_array_element_write(arr_base, arr_index, &rhs_ir)? { + if let Some(result) = + self.try_lower_array_element_write(arr_base, arr_index, &rhs_ir)? + { return Ok(result); } if self.std_ops_traits.contains("Index") { @@ -549,12 +557,7 @@ impl AstToEgglog { // Try Index trait dispatch: base[index] → base.index(index) if self.std_ops_traits.contains("Index") { - return self.lower_method_call( - base, - "index", - &[index.as_ref().clone()], - _span, - ); + return self.lower_method_call(base, "index", &[index.as_ref().clone()], _span); } Err(IrError::Unsupported( @@ -574,7 +577,12 @@ impl AstToEgglog { let type_name = &components[0].name; let variant_name = &components[1].name; if self.union_types.contains_key(type_name) { - return self.lower_union_instantiation_expr(type_name, variant_name, &[], Some(span)); + return self.lower_union_instantiation_expr( + type_name, + variant_name, + &[], + Some(span), + ); } // Check for generic union types (e.g., Option::None where Option was monomorphized) if self.generic_type_templates.contains_key(type_name) { @@ -646,9 +654,13 @@ impl AstToEgglog { self.lower_array_instantiation(elements) } - edge_ast::Expr::UnionInstantiation(type_name, variant_name, args, span) => { - self.lower_union_instantiation_expr(&type_name.name, &variant_name.name, args, Some(span)) - } + edge_ast::Expr::UnionInstantiation(type_name, variant_name, args, span) => self + .lower_union_instantiation_expr( + &type_name.name, + &variant_name.name, + args, + Some(span), + ), edge_ast::Expr::PatternMatch(expr, pattern, _span) => { self.lower_pattern_match(expr, pattern) @@ -744,8 +756,7 @@ impl AstToEgglog { // Note: () can lower as either Base(UnitT) or TupleT([]). let is_unit = matches!(binding._ty, EvmType::Base(EvmBaseType::UnitT)) || matches!(&binding._ty, EvmType::TupleT(v) if v.is_empty()); - if binding.storage_slot.is_some() && is_unit - { + if binding.storage_slot.is_some() && is_unit { return Ok(ast_helpers::const_int( binding.storage_slot.unwrap_or(0) as i64, self.current_ctx.clone(), @@ -897,7 +908,8 @@ impl AstToEgglog { } // Index write dispatch is handled in the Assign branch above Err(IrError::Unsupported( - "array index write on non-array type; use Map.set(key, val) for mappings".to_owned(), + "array index write on non-array type; use Map.set(key, val) for mappings" + .to_owned(), )) } edge_ast::Expr::FieldAccess(obj, field, _span) => { diff --git a/crates/ir/src/to_egglog/function.rs b/crates/ir/src/to_egglog/function.rs index a12ff25..3dbc509 100644 --- a/crates/ir/src/to_egglog/function.rs +++ b/crates/ir/src/to_egglog/function.rs @@ -171,8 +171,7 @@ impl AstToEgglog { Rc::clone(&self.current_state), ); self.current_state = Rc::clone(&mstore_expr); - let ret = - ast_helpers::return_op(ret_buf, size, Rc::clone(&self.current_state)); + let ret = ast_helpers::return_op(ret_buf, size, Rc::clone(&self.current_state)); Ok(ast_helpers::concat(mstore_expr, ret)) } else { // No return type, or body already has explicit return. @@ -570,7 +569,7 @@ impl AstToEgglog { let_bind_name: None, composite_type: None, composite_base: None, - composite_type_args: Vec::new(), + composite_type_args: Vec::new(), }; self.scopes .last_mut() diff --git a/crates/ir/src/to_egglog/mod.rs b/crates/ir/src/to_egglog/mod.rs index 92c97ee..291f1e5 100644 --- a/crates/ir/src/to_egglog/mod.rs +++ b/crates/ir/src/to_egglog/mod.rs @@ -14,7 +14,10 @@ mod pattern; mod storage; mod types; -use std::{collections::{HashMap, HashSet}, rc::Rc}; +use std::{ + collections::{HashMap, HashSet}, + rc::Rc, +}; use indexmap::IndexMap; @@ -314,7 +317,7 @@ pub struct AstToEgglog { // ---- Generics & Traits ---- /// Generic type templates: name -> template info (type params + original `TypeSig`) pub(crate) generic_type_templates: IndexMap, - /// Generic impl blocks: base_type_name -> list of impl blocks (for monomorphization) + /// Generic impl blocks: `base_type_name` -> list of impl blocks (for monomorphization) pub(crate) generic_impl_blocks: IndexMap>, /// Cache of monomorphized types: (`generic_name`, `concrete_types`) -> `mangled_name` pub(crate) monomorphized_types: IndexMap<(String, Vec), String>, @@ -336,7 +339,7 @@ pub struct AstToEgglog { /// Type hint from assignment target, used for generic return-type inference. /// Set before lowering the RHS of a typed variable assignment, cleared after. pub(crate) type_hint: Option, - /// TypeSig hint from assignment target, used to disambiguate generic struct instantiation. + /// `TypeSig` hint from assignment target, used to disambiguate generic struct instantiation. /// Set before lowering the RHS of a typed variable declaration, cleared after. pub(crate) type_sig_hint: Option, /// Compiler warnings collected during lowering @@ -404,8 +407,10 @@ impl AstToEgglog { } /// Extract the type name and type args from a Named type sig, unwrapping Pointer wrappers. - /// Returns (base_name, type_args), e.g., ("Map", [addr, u256]) from `&s Map`. - fn extract_named_type(type_sig: &edge_ast::ty::TypeSig) -> Option<(String, Vec)> { + /// Returns (`base_name`, `type_args`), e.g., ("Map", [addr, u256]) from `&s Map`. + fn extract_named_type( + type_sig: &edge_ast::ty::TypeSig, + ) -> Option<(String, Vec)> { match type_sig { edge_ast::ty::TypeSig::Named(name, args) => Some((name.name.clone(), args.clone())), edge_ast::ty::TypeSig::Pointer(_, inner) => Self::extract_named_type(inner), @@ -504,14 +509,12 @@ impl AstToEgglog { // `Map` which requires `addr: UniqueSlot` and `u256: Sload & Sstore`. { let primitive_types = [ - "u256", "u248", "u240", "u232", "u224", "u216", "u208", "u200", - "u192", "u184", "u176", "u168", "u160", "u152", "u144", "u136", - "u128", "u120", "u112", "u104", "u96", "u88", "u80", "u72", - "u64", "u56", "u48", "u40", "u32", "u24", "u16", "u8", - "i256", "i248", "i240", "i232", "i224", "i216", "i208", "i200", - "i192", "i184", "i176", "i168", "i160", "i152", "i144", "i136", - "i128", "i120", "i112", "i104", "i96", "i88", "i80", "i72", - "i64", "i56", "i48", "i40", "i32", "i24", "i16", "i8", + "u256", "u248", "u240", "u232", "u224", "u216", "u208", "u200", "u192", "u184", + "u176", "u168", "u160", "u152", "u144", "u136", "u128", "u120", "u112", "u104", + "u96", "u88", "u80", "u72", "u64", "u56", "u48", "u40", "u32", "u24", "u16", "u8", + "i256", "i248", "i240", "i232", "i224", "i216", "i208", "i200", "i192", "i184", + "i176", "i168", "i160", "i152", "i144", "i136", "i128", "i120", "i112", "i104", + "i96", "i88", "i80", "i72", "i64", "i56", "i48", "i40", "i32", "i24", "i16", "i8", "address", "bool", "b32", ]; let primitive_traits = ["UniqueSlot", "Sload", "Sstore"]; @@ -697,8 +700,11 @@ impl AstToEgglog { if !impl_block.type_params.is_empty() || self.generic_type_templates.contains_key(&type_name) { - let trait_name = impl_block.trait_impl.as_ref().map(|(n, _)| n.name.clone()); - let trait_type_params = impl_block.trait_impl.as_ref() + let trait_name = + impl_block.trait_impl.as_ref().map(|(n, _)| n.name.clone()); + let trait_type_params = impl_block + .trait_impl + .as_ref() .map(|(_, params)| params.clone()) .unwrap_or_default(); self.generic_impl_blocks @@ -767,11 +773,15 @@ impl AstToEgglog { } // Extract trait type args from the impl declaration - let trait_type_args: Vec = impl_block.trait_impl + let trait_type_args: Vec = impl_block + .trait_impl .as_ref() .map(|(_, params)| { - params.iter() - .map(|p| edge_ast::ty::TypeSig::Named(p.name.clone(), Vec::new())) + params + .iter() + .map(|p| { + edge_ast::ty::TypeSig::Named(p.name.clone(), Vec::new()) + }) .collect() }) .unwrap_or_default(); @@ -1017,7 +1027,7 @@ impl AstToEgglog { let_bind_name: None, composite_type: None, composite_base: None, - composite_type_args: Vec::new(), + composite_type_args: Vec::new(), }; self.scopes .last_mut() diff --git a/crates/ir/src/to_egglog/pattern.rs b/crates/ir/src/to_egglog/pattern.rs index 9b14cac..b7c2546 100644 --- a/crates/ir/src/to_egglog/pattern.rs +++ b/crates/ir/src/to_egglog/pattern.rs @@ -17,7 +17,11 @@ impl AstToEgglog { pattern: &edge_ast::pattern::UnionPattern, ) -> Result { let disc_ir = self.lower_expr(expr)?; - let idx = self.variant_index(&pattern.union_name.name, &pattern.member_name.name, Some(&pattern.span))?; + let idx = self.variant_index( + &pattern.union_name.name, + &pattern.member_name.name, + Some(&pattern.span), + )?; let idx_ir = ast_helpers::const_int(idx as i64, self.current_ctx.clone()); Ok(ast_helpers::eq(disc_ir, idx_ir)) } @@ -73,7 +77,11 @@ impl AstToEgglog { for arm in arms { match &arm.pattern { edge_ast::pattern::MatchPattern::Union(up) => { - let idx = self.variant_index(&up.union_name.name, &up.member_name.name, Some(&up.span))?; + let idx = self.variant_index( + &up.union_name.name, + &up.member_name.name, + Some(&up.span), + )?; let bindings: Vec = up.bindings.iter().map(|b| b.name.clone()).collect(); variant_arms.push((idx, &arm.body, bindings)); @@ -126,7 +134,7 @@ impl AstToEgglog { let_bind_name: Some(var_name), composite_type: None, composite_base: None, - composite_type_args: Vec::new(), + composite_type_args: Vec::new(), }, ); } @@ -177,7 +185,11 @@ impl AstToEgglog { Rc::clone(&disc_ir) }; - let idx = self.variant_index(&pattern.union_name.name, &pattern.member_name.name, Some(&pattern.span))?; + let idx = self.variant_index( + &pattern.union_name.name, + &pattern.member_name.name, + Some(&pattern.span), + )?; let idx_ir = ast_helpers::const_int(idx as i64, self.current_ctx.clone()); let cond = ast_helpers::eq(disc_val, idx_ir); let inputs = @@ -205,7 +217,7 @@ impl AstToEgglog { let_bind_name: Some(var_name), composite_type: None, composite_base: None, - composite_type_args: Vec::new(), + composite_type_args: Vec::new(), }, ); } diff --git a/crates/ir/src/to_egglog/storage.rs b/crates/ir/src/to_egglog/storage.rs index c350fcf..a4d2304 100644 --- a/crates/ir/src/to_egglog/storage.rs +++ b/crates/ir/src/to_egglog/storage.rs @@ -2,13 +2,14 @@ use std::rc::Rc; +use edge_diagnostics; + use super::AstToEgglog; use crate::{ ast_helpers, schema::{DataLocation, EvmExpr, RcExpr}, IrError, }; -use edge_diagnostics; impl AstToEgglog { /// Lower an emit statement. @@ -117,6 +118,7 @@ impl AstToEgglog { } /// Find the storage slot index and data location for a named field. + #[allow(dead_code)] pub(crate) fn find_storage_slot(&self, name: &str) -> Result<(usize, DataLocation), IrError> { for scope in self.scopes.iter().rev() { if let Some(binding) = scope.bindings.get(name) { @@ -125,10 +127,8 @@ impl AstToEgglog { } } } - Err(IrError::Diagnostic( - edge_diagnostics::Diagnostic::error(format!( - "cannot find storage field `{name}` in the current contract", - )), - )) + Err(IrError::Diagnostic(edge_diagnostics::Diagnostic::error( + format!("cannot find storage field `{name}` in the current contract",), + ))) } } diff --git a/crates/ir/src/to_egglog/types.rs b/crates/ir/src/to_egglog/types.rs index 3ec7ee2..0f2d8ea 100644 --- a/crates/ir/src/to_egglog/types.rs +++ b/crates/ir/src/to_egglog/types.rs @@ -3,6 +3,7 @@ use std::collections::{HashMap, HashSet}; use indexmap::IndexMap; + use super::{AstToEgglog, StructTypeInfo}; use crate::{ schema::{DataLocation, EvmBaseType, EvmType}, @@ -46,7 +47,9 @@ impl AstToEgglog { /// `resolve_generic_type_name_with_args` for precise resolution. pub(crate) fn resolve_generic_type_name(&self, name: &str) -> Option { // Check monomorphized_types cache for entries with this base name - let candidates: Vec<&String> = self.monomorphized_types.iter() + let candidates: Vec<&String> = self + .monomorphized_types + .iter() .filter(|((base, _), _)| base == name) .map(|(_, mangled)| mangled) .collect(); @@ -85,9 +88,7 @@ impl AstToEgglog { name: &str, type_args: &[edge_ast::ty::TypeSig], ) -> Option { - let mangled_args: Vec = type_args.iter() - .map(|a| Self::type_sig_mangle(a)) - .collect(); + let mangled_args: Vec = type_args.iter().map(Self::type_sig_mangle).collect(); let cache_key = (name.to_string(), mangled_args); if let Some(mangled) = self.monomorphized_types.get(&cache_key) { return Some(mangled.clone()); @@ -329,22 +330,25 @@ impl AstToEgglog { } /// Substitute type parameters in a code block (AST-level). - /// Replaces type param names in Path expressions (e.g., V::sload → u256::sload). - /// For generic types like Map, uses the mangled name (Map__address_u256) + /// Replaces type param names in Path expressions (e.g., `V::sload` → `u256::sload`). + /// For generic types like Map, uses the mangled name (`Map__address_u256`) /// so that qualified calls resolve to monomorphized trait impls. fn substitute_code_block( block: &edge_ast::CodeBlock, subst: &HashMap, ) -> edge_ast::CodeBlock { // Build a string→string map for path substitution using mangled names - let name_subst: HashMap<&str, String> = subst.iter().map(|(k, v)| { - (k.as_str(), Self::type_sig_mangle(v)) - }).collect(); + let name_subst: HashMap<&str, String> = subst + .iter() + .map(|(k, v)| (k.as_str(), Self::type_sig_mangle(v))) + .collect(); edge_ast::CodeBlock { - stmts: block.stmts.iter().map(|item| { - Self::substitute_block_item(item, &name_subst) - }).collect(), + stmts: block + .stmts + .iter() + .map(|item| Self::substitute_block_item(item, &name_subst)) + .collect(), span: block.span.clone(), } } @@ -363,79 +367,68 @@ impl AstToEgglog { } } - fn substitute_stmt( - stmt: &edge_ast::Stmt, - subst: &HashMap<&str, String>, - ) -> edge_ast::Stmt { + fn substitute_stmt(stmt: &edge_ast::Stmt, subst: &HashMap<&str, String>) -> edge_ast::Stmt { match stmt { - edge_ast::Stmt::VarDecl(ident, ty, init, span) => { - edge_ast::Stmt::VarDecl( - ident.clone(), - ty.clone(), - init.as_ref().map(|e| Box::new(Self::substitute_expr(e, subst))), - span.clone(), - ) - } - edge_ast::Stmt::VarAssign(lhs, rhs, span) => { - edge_ast::Stmt::VarAssign( - Self::substitute_expr(lhs, subst), - Self::substitute_expr(rhs, subst), - span.clone(), - ) - } + edge_ast::Stmt::VarDecl(ident, ty, init, span) => edge_ast::Stmt::VarDecl( + ident.clone(), + ty.clone(), + init.as_ref() + .map(|e| Box::new(Self::substitute_expr(e, subst))), + span.clone(), + ), + edge_ast::Stmt::VarAssign(lhs, rhs, span) => edge_ast::Stmt::VarAssign( + Self::substitute_expr(lhs, subst), + Self::substitute_expr(rhs, subst), + span.clone(), + ), edge_ast::Stmt::Return(Some(expr), span) => { edge_ast::Stmt::Return(Some(Self::substitute_expr(expr, subst)), span.clone()) } - edge_ast::Stmt::Expr(expr) => { - edge_ast::Stmt::Expr(Self::substitute_expr(expr, subst)) - } + edge_ast::Stmt::Expr(expr) => edge_ast::Stmt::Expr(Self::substitute_expr(expr, subst)), other => other.clone(), } } - fn substitute_expr( - expr: &edge_ast::Expr, - subst: &HashMap<&str, String>, - ) -> edge_ast::Expr { + fn substitute_expr(expr: &edge_ast::Expr, subst: &HashMap<&str, String>) -> edge_ast::Expr { match expr { edge_ast::Expr::Path(components, span) => { - let new_components: Vec = components.iter().map(|c| { - if let Some(replacement) = subst.get(c.name.as_str()) { - edge_ast::Ident { name: replacement.clone(), span: c.span.clone() } - } else { - c.clone() - } - }).collect(); + let new_components: Vec = components + .iter() + .map(|c| { + subst.get(c.name.as_str()).map_or_else( + || c.clone(), + |replacement| edge_ast::Ident { + name: replacement.clone(), + span: c.span.clone(), + }, + ) + }) + .collect(); edge_ast::Expr::Path(new_components, span.clone()) } edge_ast::Expr::FunctionCall(callee, args, turbofish, span) => { edge_ast::Expr::FunctionCall( Box::new(Self::substitute_expr(callee, subst)), - args.iter().map(|a| Self::substitute_expr(a, subst)).collect(), + args.iter() + .map(|a| Self::substitute_expr(a, subst)) + .collect(), turbofish.clone(), span.clone(), ) } - edge_ast::Expr::FieldAccess(obj, field, span) => { - edge_ast::Expr::FieldAccess( - Box::new(Self::substitute_expr(obj, subst)), - field.clone(), - span.clone(), - ) - } - edge_ast::Expr::Binary(lhs, op, rhs, span) => { - edge_ast::Expr::Binary( - Box::new(Self::substitute_expr(lhs, subst)), - op.clone(), - Box::new(Self::substitute_expr(rhs, subst)), - span.clone(), - ) - } + edge_ast::Expr::FieldAccess(obj, field, span) => edge_ast::Expr::FieldAccess( + Box::new(Self::substitute_expr(obj, subst)), + field.clone(), + span.clone(), + ), + edge_ast::Expr::Binary(lhs, op, rhs, span) => edge_ast::Expr::Binary( + Box::new(Self::substitute_expr(lhs, subst)), + *op, + Box::new(Self::substitute_expr(rhs, subst)), + span.clone(), + ), edge_ast::Expr::Paren(inner, span) => { - edge_ast::Expr::Paren( - Box::new(Self::substitute_expr(inner, subst)), - span.clone(), - ) + edge_ast::Expr::Paren(Box::new(Self::substitute_expr(inner, subst)), span.clone()) } _ => expr.clone(), } @@ -512,8 +505,7 @@ impl AstToEgglog { ) -> Result { // Use mangled type names for caching — EvmType loses source-level // distinctions (e.g., CustomHash and u256 both lower to UIntT(256)). - let cache_key_types: Vec = - type_args.iter().map(Self::type_sig_mangle).collect(); + let cache_key_types: Vec = type_args.iter().map(Self::type_sig_mangle).collect(); // Check cache let cache_key = (generic_name.to_string(), cache_key_types); @@ -582,9 +574,13 @@ impl AstToEgglog { }; for constraint in &tp.constraints { let key = (concrete_name.clone(), constraint.name.clone()); - let mangled_key = mangled_name.as_ref().map(|m| (m.clone(), constraint.name.clone())); + let mangled_key = mangled_name + .as_ref() + .map(|m| (m.clone(), constraint.name.clone())); let satisfied = self.trait_impls.contains_key(&key) - || mangled_key.as_ref().map_or(false, |k| self.trait_impls.contains_key(k)); + || mangled_key + .as_ref() + .is_some_and(|k| self.trait_impls.contains_key(k)); if !satisfied { let mut diag = edge_diagnostics::Diagnostic::error(format!( "the trait bound `{}: {}` is not satisfied", @@ -658,54 +654,73 @@ impl AstToEgglog { if let Some(impl_blocks) = self.generic_impl_blocks.get(generic_name).cloned() { for gib in &impl_blocks { // Build substitution from the generic impl's type params to concrete args - let impl_subst: HashMap = if gib.type_params.is_empty() { - // Use the type template's params (e.g., `impl Map` where K,V from the type) - subst.clone() - } else { - gib.type_params.iter() - .zip(type_args.iter()) - .map(|(param, arg)| (param.name.name.clone(), arg.clone())) - .collect() - }; + let impl_subst: HashMap = + if gib.type_params.is_empty() { + // Use the type template's params (e.g., `impl Map` where K,V from the type) + subst.clone() + } else { + gib.type_params + .iter() + .zip(type_args.iter()) + .map(|(param, arg)| (param.name.name.clone(), arg.clone())) + .collect() + }; // Substitute type params in method bodies and register under mangled name - let concrete_methods: Vec = gib.items.iter().map(|item| { - match item { - edge_ast::item::ImplItem::FnAssign(fn_decl, body) => { - let new_params: Vec<(edge_ast::Ident, edge_ast::ty::TypeSig)> = fn_decl.params.iter().map(|(id, ty)| { - (id.clone(), Self::substitute_type_params(ty, &impl_subst)) - }).collect(); - let new_returns: Vec = fn_decl.returns.iter().map(|ty| { - Self::substitute_type_params(ty, &impl_subst) - }).collect(); - let new_fn_decl = edge_ast::item::FnDecl { - name: fn_decl.name.clone(), - params: new_params, - returns: new_returns, - type_params: Vec::new(), // concrete, no type params - is_pub: fn_decl.is_pub, - is_ext: fn_decl.is_ext, - is_mut: fn_decl.is_mut, - span: fn_decl.span.clone(), - }; - // Substitute type params in body expressions - let new_body = Self::substitute_code_block(body, &impl_subst); - edge_ast::item::ImplItem::FnAssign(new_fn_decl, new_body) + let concrete_methods: Vec = gib + .items + .iter() + .map(|item| { + match item { + edge_ast::item::ImplItem::FnAssign(fn_decl, body) => { + let new_params: Vec<(edge_ast::Ident, edge_ast::ty::TypeSig)> = + fn_decl + .params + .iter() + .map(|(id, ty)| { + ( + id.clone(), + Self::substitute_type_params(ty, &impl_subst), + ) + }) + .collect(); + let new_returns: Vec = fn_decl + .returns + .iter() + .map(|ty| Self::substitute_type_params(ty, &impl_subst)) + .collect(); + let new_fn_decl = edge_ast::item::FnDecl { + name: fn_decl.name.clone(), + params: new_params, + returns: new_returns, + type_params: Vec::new(), // concrete, no type params + is_pub: fn_decl.is_pub, + is_ext: fn_decl.is_ext, + is_mut: fn_decl.is_mut, + span: fn_decl.span.clone(), + }; + // Substitute type params in body expressions + let new_body = Self::substitute_code_block(body, &impl_subst); + edge_ast::item::ImplItem::FnAssign(new_fn_decl, new_body) + } + other => other.clone(), } - other => other.clone(), - } - }).collect(); + }) + .collect(); if let Some(ref trait_name) = gib.trait_impl { // Trait impl: register under mangled type name let mut methods = IndexMap::new(); for item in &concrete_methods { if let edge_ast::item::ImplItem::FnAssign(fn_decl, body) = item { - methods.insert(fn_decl.name.name.clone(), (fn_decl.clone(), body.clone())); + methods + .insert(fn_decl.name.name.clone(), (fn_decl.clone(), body.clone())); } } // Substitute type params in trait type args to get concrete types - let trait_type_args: Vec = gib.trait_type_params.iter() + let trait_type_args: Vec = gib + .trait_type_params + .iter() .map(|p| { let sig = edge_ast::ty::TypeSig::Named(p.name.clone(), Vec::new()); Self::substitute_type_params(&sig, &impl_subst) @@ -721,17 +736,23 @@ impl AstToEgglog { ); } else { // Inherent impl: register methods under mangled type name - let methods: Vec = concrete_methods.iter().filter_map(|item| { - if let edge_ast::item::ImplItem::FnAssign(fn_decl, body) = item { - Some(super::InherentMethod { - fn_decl: fn_decl.clone(), - body: body.clone(), - }) - } else { - None - } - }).collect(); - self.inherent_methods.entry(mangled.clone()).or_default().extend(methods); + let methods: Vec = concrete_methods + .iter() + .filter_map(|item| { + if let edge_ast::item::ImplItem::FnAssign(fn_decl, body) = item { + Some(super::InherentMethod { + fn_decl: fn_decl.clone(), + body: body.clone(), + }) + } else { + None + } + }) + .collect(); + self.inherent_methods + .entry(mangled.clone()) + .or_default() + .extend(methods); } } } @@ -849,9 +870,7 @@ impl AstToEgglog { if args.is_empty() { ident.name.clone() } else { - let arg_strs: Vec = args.iter() - .map(Self::type_sig_mangle) - .collect(); + let arg_strs: Vec = args.iter().map(Self::type_sig_mangle).collect(); format!("{}__{}", ident.name, arg_strs.join("_")) } } diff --git a/crates/parser/src/parser.rs b/crates/parser/src/parser.rs index 78248f4..e1ba434 100644 --- a/crates/parser/src/parser.rs +++ b/crates/parser/src/parser.rs @@ -257,7 +257,8 @@ impl Parser { /// Check if the token after the current one is `::` (without advancing). fn lookahead_double_colon(&self) -> bool { - self.cursor + 1 < self.tokens.len() && self.tokens[self.cursor + 1].kind == TokenKind::DoubleColon + self.cursor + 1 < self.tokens.len() + && self.tokens[self.cursor + 1].kind == TokenKind::DoubleColon } /// Get a zero-width span at the end of the previous token. From 1603af103a1582a65dda7ab0e18839dbdfb7f6a8 Mon Sep 17 00:00:00 2001 From: brockelmore <31553173+brockelmore@users.noreply.github.com> Date: Thu, 12 Mar 2026 08:35:16 -0700 Subject: [PATCH 06/13] feat: Vec dynamic memory allocation + DAG-aware IR optimizations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements Vec with runtime memory allocation (DynAlloc, @alloc, @size_of, &dm pointers, compound assignment, Mstore/Mload/Mcopy traits, memory field writes, pass-by-reference inlining). Adds DAG-aware s-expression serialization to keep egglog input proportional to DAG size instead of tree-expanded size. Fixes exponential IR blowup in var_opt by adding memoization to optimize_expr, substitute_var, and Rc::ptr_eq short-circuits to substitute_args and rename_locals_rec — reducing Vec test IR from 43K to 1.5K DAG nodes. Co-Authored-By: Claude Opus 4.6 --- crates/ast/src/expr.rs | 6 +- crates/ast/src/ty.rs | 3 + crates/codegen/src/dispatcher.rs | 76 +- crates/codegen/src/expr_compiler.rs | 306 ++++++- crates/driver/src/compiler.rs | 1 + crates/e2e/.gas-snapshot | 11 +- crates/e2e/tests/main.rs | 3 + crates/e2e/tests/suites/vec_exec.rs | 76 ++ crates/ir/src/ast_helpers.rs | 6 + crates/ir/src/cleanup.rs | 8 + crates/ir/src/lib.rs | 1031 ++++++++++++++++++++++- crates/ir/src/mem_region.rs | 14 +- crates/ir/src/optimizations/memory.egg | 21 +- crates/ir/src/pretty.rs | 22 +- crates/ir/src/schema.egg | 4 + crates/ir/src/schema.rs | 5 + crates/ir/src/sexp.rs | 333 +++++++- crates/ir/src/storage_hoist.rs | 8 + crates/ir/src/to_egglog/calls.rs | 193 ++++- crates/ir/src/to_egglog/composite.rs | 9 + crates/ir/src/to_egglog/control_flow.rs | 2 + crates/ir/src/to_egglog/expr.rs | 233 ++++- crates/ir/src/to_egglog/function.rs | 20 + crates/ir/src/to_egglog/mod.rs | 38 +- crates/ir/src/to_egglog/pattern.rs | 2 + crates/ir/src/to_egglog/types.rs | 132 ++- crates/ir/src/var_opt.rs | 528 +++++++++--- crates/lexer/src/lexer.rs | 12 + crates/parser/src/parser.rs | 70 +- crates/types/src/tokens/locations.rs | 6 + examples/tests/test_vec.edge | 66 ++ std/globals/ops.edge | 12 + std/globals/vec.edge | 71 ++ 33 files changed, 3105 insertions(+), 223 deletions(-) create mode 100644 crates/e2e/tests/suites/vec_exec.rs create mode 100644 examples/tests/test_vec.edge create mode 100644 std/globals/vec.edge diff --git a/crates/ast/src/expr.rs b/crates/ast/src/expr.rs index 074da7b..7dda2a4 100644 --- a/crates/ast/src/expr.rs +++ b/crates/ast/src/expr.rs @@ -81,8 +81,8 @@ pub enum Expr { /// Path expression: `a::b::c` Path(Vec, Span), - /// Builtin call: @builtin(args...) - At(Ident, Vec, Span), + /// Builtin call: `@builtin(args...)` or `@builtin::(args...)` + At(Ident, Vec, Vec, Span), /// Assignment: lhs = rhs Assign(Box, Box, Span), @@ -118,7 +118,7 @@ impl Expr { Self::Paren(_, span) => span.clone(), Self::Comptime(_, span) => span.clone(), Self::Path(_, span) => span.clone(), - Self::At(_, _, span) => span.clone(), + Self::At(_, _, _, span) => span.clone(), Self::Assign(_, _, span) => span.clone(), Self::InlineAsm(_, _, _, span) => span.clone(), Self::Cast(_, _, span) => span.clone(), diff --git a/crates/ast/src/ty.rs b/crates/ast/src/ty.rs index de98fc1..2119faa 100644 --- a/crates/ast/src/ty.rs +++ b/crates/ast/src/ty.rs @@ -21,6 +21,8 @@ pub enum Location { ImmutableCode, /// External code (&ec) ExternalCode, + /// Dynamic memory (&dm) + DynamicMemory, } impl std::fmt::Display for Location { @@ -33,6 +35,7 @@ impl std::fmt::Display for Location { Self::Returndata => write!(f, "&rd"), Self::ImmutableCode => write!(f, "&ic"), Self::ExternalCode => write!(f, "&ec"), + Self::DynamicMemory => write!(f, "&dm"), } } } diff --git a/crates/codegen/src/dispatcher.rs b/crates/codegen/src/dispatcher.rs index f932636..369b280 100644 --- a/crates/codegen/src/dispatcher.rs +++ b/crates/codegen/src/dispatcher.rs @@ -7,6 +7,55 @@ use edge_ir::{schema::EvmContract, var_opt}; use crate::{assembler::Assembler, expr_compiler::ExprCompiler}; +/// Recursively check if an IR tree contains any `DynAlloc` nodes. +fn contains_dyn_alloc(expr: &edge_ir::schema::RcExpr) -> bool { + use edge_ir::schema::EvmExpr; + match expr.as_ref() { + EvmExpr::DynAlloc(_) => true, + EvmExpr::Bop(_, a, b) | EvmExpr::Concat(a, b) | EvmExpr::DoWhile(a, b) => { + contains_dyn_alloc(a) || contains_dyn_alloc(b) + } + EvmExpr::Uop(_, a) => contains_dyn_alloc(a), + EvmExpr::Top(_, a, b, c) + | EvmExpr::If(_, a, b, c) + | EvmExpr::Revert(a, b, c) + | EvmExpr::ReturnOp(a, b, c) => { + contains_dyn_alloc(a) || contains_dyn_alloc(b) || contains_dyn_alloc(c) + } + EvmExpr::LetBind(_, init, body) => contains_dyn_alloc(init) || contains_dyn_alloc(body), + EvmExpr::VarStore(_, val) => contains_dyn_alloc(val), + EvmExpr::EnvRead(_, state) => contains_dyn_alloc(state), + EvmExpr::EnvRead1(_, arg, state) => contains_dyn_alloc(arg) || contains_dyn_alloc(state), + EvmExpr::Log(_, topics, offset, size, state) => { + topics.iter().any(contains_dyn_alloc) + || contains_dyn_alloc(offset) + || contains_dyn_alloc(size) + || contains_dyn_alloc(state) + } + EvmExpr::ExtCall(a, b, c, d, e, f, g) => { + contains_dyn_alloc(a) + || contains_dyn_alloc(b) + || contains_dyn_alloc(c) + || contains_dyn_alloc(d) + || contains_dyn_alloc(e) + || contains_dyn_alloc(f) + || contains_dyn_alloc(g) + } + EvmExpr::Function(_, _, _, body) => contains_dyn_alloc(body), + EvmExpr::Call(_, args) => args.iter().any(contains_dyn_alloc), + EvmExpr::InlineAsm(inputs, _, _) => inputs.iter().any(contains_dyn_alloc), + EvmExpr::Get(inner, _) => contains_dyn_alloc(inner), + EvmExpr::Const(..) + | EvmExpr::Var(_) + | EvmExpr::Drop(_) + | EvmExpr::Arg(_, _) + | EvmExpr::Empty(_, _) + | EvmExpr::StorageField(_, _, _) + | EvmExpr::MemRegion(_, _) + | EvmExpr::Selector(_) => false, + } +} + /// Generate the function dispatcher for a contract. /// /// The dispatcher compiles the runtime IR which contains the full @@ -32,9 +81,32 @@ pub fn generate_dispatcher(asm: &mut Assembler, contract: &EvmContract) { .or_insert(alloc); } } + // Compute the DynAlloc floor: the minimum address DynAlloc may return. + // Without this, DynAlloc (which uses MSIZE) could return pointers that + // overlap with LetBind slots whose MSTORE hasn't happened yet. + // + // We simulate the codegen's LetBind allocation to find the peak offset. + // This mirrors compile_if (doesn't restore next_let_offset across branches) + // and Function (does restore), giving the exact peak rather than a loose bound. + let has_dyn_alloc = contains_dyn_alloc(&contract.runtime) + || contract.internal_functions.iter().any(contains_dyn_alloc); + let dyn_alloc_floor = if has_dyn_alloc { + let mut all_exprs: Vec<&edge_ir::schema::RcExpr> = vec![&contract.runtime]; + for func in &contract.internal_functions { + all_exprs.push(func); + } + ExprCompiler::compute_peak_let_offset(&allocations, contract.memory_high_water, &all_exprs) + } else { + 0 + }; + // Start LetBind slots after IR-allocated memory regions (arrays, structs) - let mut compiler = - ExprCompiler::with_allocations_and_base(asm, allocations, contract.memory_high_water); + let mut compiler = ExprCompiler::with_allocations_base_and_floor( + asm, + allocations, + contract.memory_high_water, + dyn_alloc_floor, + ); // Collect fn_info from both runtime and internal functions compiler.collect_fn_info(&contract.runtime); for func in &contract.internal_functions { diff --git a/crates/codegen/src/expr_compiler.rs b/crates/codegen/src/expr_compiler.rs index 13bd48b..bdd3376 100644 --- a/crates/codegen/src/expr_compiler.rs +++ b/crates/codegen/src/expr_compiler.rs @@ -54,6 +54,10 @@ pub struct ExprCompiler<'a> { /// Inner function metadata: name -> (`param_count`, `return_count`) /// Populated by a pre-pass over the IR tree before compilation. fn_info: HashMap, + /// Minimum address that `DynAlloc` may return. + /// Ensures `DynAlloc` pointers don't overlap with `LetBind` memory slots. + /// Set to `memory_high_water + num_memory_vars * 32` when `DynAlloc` is used. + dyn_alloc_floor: usize, } impl<'a> ExprCompiler<'a> { @@ -79,6 +83,17 @@ impl<'a> ExprCompiler<'a> { asm: &'a mut Assembler, allocation_modes: HashMap, memory_base: usize, + ) -> Self { + Self::with_allocations_base_and_floor(asm, allocation_modes, memory_base, 0) + } + + /// Create an expression compiler with allocation modes, a custom base offset, + /// and a `DynAlloc` floor (minimum address for dynamic memory allocation). + pub fn with_allocations_base_and_floor( + asm: &'a mut Assembler, + allocation_modes: HashMap, + memory_base: usize, + dyn_alloc_floor: usize, ) -> Self { Self { asm, @@ -94,6 +109,7 @@ impl<'a> ExprCompiler<'a> { overflow_revert_label: None, revert_trampoline_label: None, fn_info: HashMap::new(), + dyn_alloc_floor, } } @@ -170,6 +186,104 @@ impl<'a> ExprCompiler<'a> { ); } + EvmExpr::DynAlloc(size) => { + // Dynamic memory allocation using max(MSIZE, floor). + // + // The floor ensures DynAlloc pointers don't overlap with + // LetBind memory slots whose MSTORE hasn't happened yet. + // If MSIZE is already past the floor (common case), MSIZE wins. + // + // When floor > 0, we emit: + // MSIZE → [ms] + // DUP1 → [ms, ms] + // PUSH floor → [ms, ms, floor] + // GT → [ms, ms>floor] + // PUSH skip → [ms, ms>floor, skip] + // JUMPI → [ms] -- if ms>floor, keep ms + // POP → [] + // PUSH floor → [floor] + // skip: JUMPDEST → [base] = max(ms, floor) + // + // Then allocate: + // DUP1 → [base, base] + // compile(size) → [base, base, size] + // ADD → [base, base+size] + // PUSH0 → [base, base+size, 0] + // SWAP1 → [base, 0, base+size] + // MSTORE → [base] (expands memory) + // + // Net stack effect: +1 (the base pointer) + if self.dyn_alloc_floor > 0 { + // Emit max(MSIZE, floor) + // + // Stack sequence: + // MSIZE → [ms] + // DUP1 → [ms, ms] + // PUSH floor → [floor, ms, ms] + // GT (EVM: a > b where a=TOS=floor, b=ms) + // → [floor>ms, ms] + // ... + // + // We want: if ms >= floor, keep ms (skip). + // if ms < floor, replace ms with floor. + // + // EVM GT(floor, ms) = floor > ms. + // When floor > ms (need floor): GT=1, JUMPI takes jump. + // But we want to REPLACE ms, not skip! + // So we must NOT jump when floor > ms. + // + // Fix: use ISZERO to invert, or just use LT instead. + // With GT: floor > ms means we need floor. So jump should + // go to the replacement path, not the skip path. + // + // Simplest fix: use GT but jump means "ms is big enough, skip". + // GT(floor, ms) = floor > ms → ms is NOT big enough. + // So: ISZERO + JUMPI to skip when ms IS big enough. + let skip_label = self.asm.fresh_label("dyn_alloc_skip"); + self.asm.emit_op(Opcode::MSize); + self.stack_depth += 1; + self.asm.emit_op(Opcode::Dup1); + self.stack_depth += 1; + self.asm.emit_push_usize(self.dyn_alloc_floor); + self.stack_depth += 1; + // GT: floor > ms? + self.asm.emit_op(Opcode::Gt); + self.stack_depth -= 1; + // ISZERO: !(floor > ms) = ms >= floor + self.asm.emit_op(Opcode::IsZero); + // stack: [ms, ms >= floor] + self.asm.emit(AsmInstruction::JumpITo(skip_label.clone())); + self.stack_depth -= 1; // JumpITo: PUSH(+1) JUMPI(-2) = net -1 + // Fall-through: floor > ms, use floor instead + self.asm.emit_op(Opcode::Pop); + self.stack_depth -= 1; + self.asm.emit_push_usize(self.dyn_alloc_floor); + self.stack_depth += 1; + self.asm.emit(AsmInstruction::Label(skip_label)); + // stack: [base] where base = max(MSIZE, floor) + } else { + // No floor needed — MSIZE is sufficient + self.asm.emit_op(Opcode::MSize); + self.stack_depth += 1; + } + + // Expand memory: MSTORE(base + size, 0) + self.asm.emit_op(Opcode::Dup1); + self.stack_depth += 1; + self.compile_expr(size); + // stack: [base, base, size] + self.asm.emit_op(Opcode::Add); + self.stack_depth -= 1; + // stack: [base, base+size] + self.asm.emit_op(Opcode::Push0); + self.stack_depth += 1; + self.asm.emit_op(Opcode::Swap1); + // stack: [base, 0, base+size] + self.asm.emit_op(Opcode::MStore); + self.stack_depth -= 2; + // stack: [base] — the returned pointer + } + EvmExpr::Empty(_, _) | EvmExpr::StorageField(_, _, _) => { // Empty: unit — no value on stack. // StorageField: declarations don't emit code. @@ -473,6 +587,36 @@ impl<'a> ExprCompiler<'a> { } } + /// Compute the peak `next_let_offset` by simulating `LetBind` allocation. + /// + /// Walks the IR tree in the same order as `compile_expr`, tracking + /// memory-mode `LetBind` slot allocation/deallocation. Returns the + /// highest `next_let_offset` reached during the traversal. + /// + /// This mirrors the actual codegen behavior: + /// - `If` saves/restores `let_bindings` and `free_slots` but NOT + /// `next_let_offset` (branches get non-overlapping slots) + /// - `Function` saves/restores everything including `next_let_offset` + /// - `Drop` reclaims slots to the free list for reuse + pub fn compute_peak_let_offset( + allocation_modes: &HashMap, + memory_base: usize, + exprs: &[&RcExpr], + ) -> usize { + let mut state = LetOffsetSim { + let_bindings: HashMap::new(), + free_slots: Vec::new(), + next_let_offset: memory_base, + peak: memory_base, + allocation_modes, + stack_var_count: 0, + }; + for expr in exprs { + state.walk(expr); + } + state.peak + } + /// Compile a `LetBind`: allocate variable, compile body, clean up. fn compile_let_bind(&mut self, name: &str, value: &RcExpr, body: &RcExpr) { // Decide allocation mode with stack depth safety check: @@ -1318,10 +1462,7 @@ impl<'a> ExprCompiler<'a> { EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) => Self::count_var_reads(name, a), EvmExpr::Top(op, a, b, c) => { use EvmTernaryOp::*; - let c_is_state = matches!( - op, - SStore | TStore | MStore | MStore8 | Keccak256 | CalldataCopy | Mcopy - ); + let c_is_state = matches!(op, SStore | TStore | MStore | MStore8 | Keccak256); Self::count_var_reads(name, a) + Self::count_var_reads(name, b) + if c_is_state { @@ -1499,6 +1640,163 @@ impl<'a> ExprCompiler<'a> { } } +/// Simulates `LetBind` memory slot allocation to compute peak `next_let_offset`. +/// +/// Mirrors the allocation behavior in `ExprCompiler::compile_let_bind` and +/// the save/restore behavior in `compile_if` (does NOT restore `next_let_offset` +/// across if branches) and `compile_expr` for `Function` (DOES restore). +struct LetOffsetSim<'a> { + let_bindings: HashMap, + free_slots: Vec, + next_let_offset: usize, + peak: usize, + allocation_modes: &'a HashMap, + stack_var_count: usize, +} + +impl<'a> LetOffsetSim<'a> { + fn alloc_mode(&self, name: &str) -> AllocationMode { + self.allocation_modes + .get(name) + .map(|a| a.mode) + .unwrap_or(AllocationMode::Memory) + } + + fn is_memory_mode(&self, name: &str) -> bool { + self.alloc_mode(name) == AllocationMode::Memory || self.stack_var_count >= 14 + } + + fn walk(&mut self, expr: &RcExpr) { + match expr.as_ref() { + EvmExpr::LetBind(name, init, body) => { + self.walk(init); + if self.is_memory_mode(name) { + // Allocate a memory slot + let offset = if let Some(reused) = self.free_slots.pop() { + tracing::trace!( + "LetOffsetSim: {name} → reused slot {reused}, next={}, peak={}", + self.next_let_offset, + self.peak + ); + reused + } else { + let off = self.next_let_offset; + self.next_let_offset += 32; + if self.next_let_offset > self.peak { + self.peak = self.next_let_offset; + } + tracing::trace!( + "LetOffsetSim: {name} → new slot {off}, next={}, peak={}", + self.next_let_offset, + self.peak + ); + off + }; + self.let_bindings.insert(name.clone(), offset); + self.walk(body); + // Free slot if not already freed by Drop + if self.let_bindings.get(name) == Some(&offset) { + self.free_slots.push(offset); + } + self.let_bindings.remove(name); + } else { + self.stack_var_count += 1; + self.walk(body); + self.stack_var_count = self.stack_var_count.saturating_sub(1); + } + } + EvmExpr::Drop(name) => { + if let Some(offset) = self.let_bindings.remove(name) { + self.free_slots.push(offset); + } else { + // Stack mode drop — decrement count + self.stack_var_count = self.stack_var_count.saturating_sub(1); + } + } + EvmExpr::If(_, cond, then_body, else_body) => { + self.walk(cond); + // Save state for branches (matching compile_if behavior) + let saved_bindings = self.let_bindings.clone(); + let saved_free = self.free_slots.clone(); + let saved_stack_count = self.stack_var_count; + self.walk(then_body); + // Restore for else branch (but NOT next_let_offset!) + self.let_bindings = saved_bindings; + self.free_slots = saved_free; + self.stack_var_count = saved_stack_count; + self.walk(else_body); + } + EvmExpr::Function(_, _, _, body) => { + // Functions save/restore everything including next_let_offset + let saved_bindings = self.let_bindings.clone(); + let saved_free = self.free_slots.clone(); + let saved_offset = self.next_let_offset; + let saved_stack_count = self.stack_var_count; + self.let_bindings.clear(); + self.stack_var_count = 0; + self.walk(body); + self.let_bindings = saved_bindings; + self.free_slots = saved_free; + self.next_let_offset = saved_offset; + self.stack_var_count = saved_stack_count; + } + // Recurse into children for everything else + EvmExpr::Concat(a, b) + | EvmExpr::Bop(_, a, b) + | EvmExpr::DoWhile(a, b) + | EvmExpr::EnvRead1(_, a, b) => { + self.walk(a); + self.walk(b); + } + EvmExpr::VarStore(_, val) => self.walk(val), + EvmExpr::Top(_, a, b, c) | EvmExpr::Revert(a, b, c) | EvmExpr::ReturnOp(a, b, c) => { + self.walk(a); + self.walk(b); + self.walk(c); + } + EvmExpr::Uop(_, a) + | EvmExpr::DynAlloc(a) + | EvmExpr::Get(a, _) + | EvmExpr::EnvRead(_, a) => self.walk(a), + EvmExpr::Log(_, topics, offset, size, state) => { + for t in topics { + self.walk(t); + } + self.walk(offset); + self.walk(size); + self.walk(state); + } + EvmExpr::ExtCall(a, b, c, d, e, f, g) => { + self.walk(a); + self.walk(b); + self.walk(c); + self.walk(d); + self.walk(e); + self.walk(f); + self.walk(g); + } + EvmExpr::Call(_, args) => { + for a in args { + self.walk(a); + } + } + EvmExpr::InlineAsm(inputs, _, _) => { + for i in inputs { + self.walk(i); + } + } + // Leaf nodes — nothing to recurse into + EvmExpr::Const(..) + | EvmExpr::Var(_) + | EvmExpr::Arg(_, _) + | EvmExpr::Empty(_, _) + | EvmExpr::StorageField(_, _, _) + | EvmExpr::MemRegion(_, _) + | EvmExpr::Selector(_) => {} + } + } +} + /// Convert a u64 to minimal big-endian bytes. fn minimal_be_bytes_u64(val: u64) -> Vec { if val == 0 { diff --git a/crates/driver/src/compiler.rs b/crates/driver/src/compiler.rs index d30a051..f7d3cb0 100644 --- a/crates/driver/src/compiler.rs +++ b/crates/driver/src/compiler.rs @@ -568,6 +568,7 @@ impl Compiler { "globals/option", "globals/result", "globals/map", + "globals/vec", ]; let mut new_stmts: Vec = Vec::new(); diff --git a/crates/e2e/.gas-snapshot b/crates/e2e/.gas-snapshot index 6010a33..abf4eda 100644 --- a/crates/e2e/.gas-snapshot +++ b/crates/e2e/.gas-snapshot @@ -119,9 +119,9 @@ test_trait_bounds::test_multiple_bounds_other(), 228, 221, 221, 221 test_trait_bounds::test_multiple_bounds(), 123, 116, 116, 116 test_trait_bounds::test_scale_other(), 361, 349, 193, 193 test_trait_bounds::test_scale_wrapper(), 260, 248, 169, 169 -test_trait_bounds::test_type_bound_other(), 233, 211, 218, 218 -test_trait_bounds::test_type_bound(), 206, 184, 191, 191 -test_traits::test_add_overload(), 218, 180, 207, 207 +test_trait_bounds::test_type_bound_other(), 233, 211, 211, 211 +test_trait_bounds::test_type_bound(), 206, 184, 184, 184 +test_traits::test_add_overload(), 218, 180, 177, 177 test_traits::test_double_method(), 207, 195, 116, 116 test_traits::test_double_then_triple(), 339, 327, 168, 168 test_traits::test_double(), 259, 247, 168, 168 @@ -141,3 +141,8 @@ test_unsafe_arith::test_sub_underflow(), 108, 108, 108, 108 test_unsafe_arith::test_unsafe_add(), 135, 129, 129, 129 test_unsafe_arith::test_unsafe_mul(), 112, 104, 104, 104 test_unsafe_arith::test_unsafe_sub(), 161, 155, 155, 155 +test_vec::test_get(), 1329, 1072, 1072, 1072 +test_vec::test_grow(), 3115, 2491, 2491, 2491 +test_vec::test_index(), 1034, 798, 798, 798 +test_vec::test_new_and_push(), 1082, 903, 903, 903 +test_vec::test_set(), 1514, 1174, 1106, 1106 diff --git a/crates/e2e/tests/main.rs b/crates/e2e/tests/main.rs index d440441..bcb9924 100644 --- a/crates/e2e/tests/main.rs +++ b/crates/e2e/tests/main.rs @@ -57,6 +57,9 @@ mod warnings; #[path = "suites/map_std_exec.rs"] mod map_std_exec; +#[path = "suites/vec_exec.rs"] +mod vec_exec; + #[path = "suites/int_widths_exec.rs"] mod int_widths_exec; #[path = "suites/large_int_literals.rs"] diff --git a/crates/e2e/tests/suites/vec_exec.rs b/crates/e2e/tests/suites/vec_exec.rs new file mode 100644 index 0000000..68071b5 --- /dev/null +++ b/crates/e2e/tests/suites/vec_exec.rs @@ -0,0 +1,76 @@ +#![allow(missing_docs)] + +//! Execution-level tests for Vec dynamic memory allocation. + +use crate::helpers::*; + +const CONTRACT: &str = "examples/tests/test_vec.edge"; + +#[test] +fn test_vec_new_and_push() { + for_all_opt_levels(CONTRACT, |evm, opt| { + let r = evm.call(calldata(selector("test_new_and_push()"), &[])); + assert!(r.success, "test_new_and_push() reverted at O{opt}"); + assert_eq!( + decode_u256(&r.output), + 3, + "len should be 3 after 3 pushes (O{opt})" + ); + }); +} + +#[test] +fn test_vec_get() { + for_all_opt_levels(CONTRACT, |evm, opt| { + let r = evm.call(calldata(selector("test_get()"), &[])); + assert!(r.success, "test_get() reverted at O{opt}"); + assert_eq!( + decode_u256(&r.output), + 200, + "get(1) should return second element (200) (O{opt})" + ); + }); +} + +#[test] +fn test_vec_set() { + for_all_opt_levels(CONTRACT, |evm, opt| { + let r = evm.call(calldata(selector("test_set()"), &[])); + assert!( + r.success, + "test_set() reverted at O{opt}; gas_used={}", + r.gas_used + ); + assert_eq!( + decode_u256(&r.output), + 999, + "set(1, 999) then get(1) should return 999 (O{opt})" + ); + }); +} + +#[test] +fn test_vec_grow() { + for_all_opt_levels(CONTRACT, |evm, opt| { + let r = evm.call(calldata(selector("test_grow()"), &[])); + assert!(r.success, "test_grow() reverted at O{opt}"); + assert_eq!( + decode_u256(&r.output), + 15, + "sum of elements 1..5 should be 15 after growth (O{opt})" + ); + }); +} + +#[test] +fn test_vec_index() { + for_all_opt_levels(CONTRACT, |evm, opt| { + let r = evm.call(calldata(selector("test_index()"), &[])); + assert!( + r.success, + "test_index() reverted at O{opt}; gas_used={}", + r.gas_used + ); + assert_eq!(decode_u256(&r.output), 84, "v[1] should return 84 (O{opt})"); + }); +} diff --git a/crates/ir/src/ast_helpers.rs b/crates/ir/src/ast_helpers.rs index 51d91ac..33b2b56 100644 --- a/crates/ir/src/ast_helpers.rs +++ b/crates/ir/src/ast_helpers.rs @@ -283,6 +283,12 @@ pub fn mem_region(region_id: i64, size_words: i64) -> RcExpr { Rc::new(EvmExpr::MemRegion(region_id, size_words)) } +/// Dynamic memory allocation: allocate `size` bytes at runtime using MSIZE. +/// Returns the base address of the allocated region. +pub fn dyn_alloc(size: RcExpr) -> RcExpr { + Rc::new(EvmExpr::DynAlloc(size)) +} + // ---- Integer width helpers ---- /// Create a mask constant for the given bit width: `(1 << bit_width) - 1`. diff --git a/crates/ir/src/cleanup.rs b/crates/ir/src/cleanup.rs index 57a324e..3ef91d1 100644 --- a/crates/ir/src/cleanup.rs +++ b/crates/ir/src/cleanup.rs @@ -157,6 +157,14 @@ fn cleanup_expr(expr: &RcExpr) -> RcExpr { *num_outputs, )), + EvmExpr::DynAlloc(size) => { + let ns = cleanup_expr(size); + if Rc::ptr_eq(&ns, size) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::DynAlloc(ns)) + } + // Leaf nodes — no children to clean EvmExpr::Arg(..) | EvmExpr::Const(..) diff --git a/crates/ir/src/lib.rs b/crates/ir/src/lib.rs index 1ef96bb..631a599 100644 --- a/crates/ir/src/lib.rs +++ b/crates/ir/src/lib.rs @@ -36,12 +36,972 @@ pub mod to_egglog; pub mod u256_sort; pub mod var_opt; -use std::rc::Rc; +use std::{ + collections::HashMap, + hash::{Hash, Hasher}, + rc::Rc, +}; pub use costs::OptimizeFor; use schema::{EvmBaseType, EvmConstant, EvmType}; pub use schema::{EvmContract, EvmExpr, EvmProgram, RcExpr}; +// ============================================================ +// Hash-consing: re-establish Rc sharing after tree-rebuilding passes +// ============================================================ +// +// var_opt reconstructs IR nodes, breaking Rc sharing. This pass walks +// bottom-up and deduplicates structurally identical subtrees into +// shared Rc pointers, restoring the compact DAG representation. +// After hash-consing, DAG-aware s-expression serialization can emit +// compact egglog programs. + +/// Hash-cons an IR expression tree: deduplicate structurally identical +/// subtrees into shared `Rc` pointers. +pub fn hash_cons(expr: &RcExpr) -> RcExpr { + let mut cache: HashMap = HashMap::new(); + hash_cons_rec(expr, &mut cache) +} + +/// Hash-cons all expressions in a program. +pub fn hash_cons_program(program: &mut EvmProgram) { + let mut cache: HashMap = HashMap::new(); + for contract in &mut program.contracts { + contract.runtime = hash_cons_rec(&contract.runtime, &mut cache); + for func in &mut contract.internal_functions { + *func = hash_cons_rec(func, &mut cache); + } + contract.constructor = hash_cons_rec(&contract.constructor, &mut cache); + } + for func in &mut program.free_functions { + *func = hash_cons_rec(func, &mut cache); + } +} + +/// Hash-cons a single expression tree, restoring Rc sharing for structurally identical subtrees. +pub fn hash_cons_expr(expr: &RcExpr) -> RcExpr { + let mut cache: HashMap = HashMap::new(); + hash_cons_rec(expr, &mut cache) +} + +/// A hash key that captures node identity by type + inline data + child Rc pointers. +/// Since children are hash-consed first, pointer equality <=> structural equality. +#[derive(Clone, PartialEq, Eq)] +pub(crate) struct HashConsKey { + /// Compact byte representation of the node + bytes: Vec, +} + +impl Hash for HashConsKey { + fn hash(&self, state: &mut H) { + self.bytes.hash(state); + } +} + +impl HashConsKey { + pub(crate) fn new() -> Self { + Self { + bytes: Vec::with_capacity(64), + } + } + + pub(crate) fn tag(&mut self, tag: u8) { + self.bytes.push(tag); + } + + pub(crate) fn ptr(&mut self, rc: &RcExpr) { + let p = Rc::as_ptr(rc) as usize; + self.bytes.extend_from_slice(&p.to_le_bytes()); + } + + pub(crate) fn str(&mut self, s: &str) { + self.bytes + .extend_from_slice(&(s.len() as u32).to_le_bytes()); + self.bytes.extend_from_slice(s.as_bytes()); + } + + pub(crate) fn usize(&mut self, v: usize) { + self.bytes.extend_from_slice(&v.to_le_bytes()); + } + + pub(crate) fn i64(&mut self, v: i64) { + self.bytes.extend_from_slice(&v.to_le_bytes()); + } + + pub(crate) fn i32(&mut self, v: i32) { + self.bytes.extend_from_slice(&v.to_le_bytes()); + } + + pub(crate) fn bool(&mut self, v: bool) { + self.bytes.push(v as u8); + } + + pub(crate) fn u8(&mut self, v: u8) { + self.bytes.push(v); + } + + pub(crate) fn u16(&mut self, v: u16) { + self.bytes.extend_from_slice(&v.to_le_bytes()); + } +} + +pub(crate) fn key_for_type(k: &mut HashConsKey, ty: &EvmType) { + match ty { + EvmType::Base(b) => { + k.tag(0); + key_for_basetype(k, b); + } + EvmType::TupleT(types) => { + k.tag(1); + k.usize(types.len()); + for t in types { + key_for_basetype(k, t); + } + } + EvmType::ArrayT(elem, len) => { + k.tag(2); + key_for_basetype(k, elem); + k.usize(*len); + } + } +} + +pub(crate) fn key_for_basetype(k: &mut HashConsKey, bt: &EvmBaseType) { + match bt { + EvmBaseType::UIntT(n) => { + k.tag(0); + k.u16(*n); + } + EvmBaseType::IntT(n) => { + k.tag(1); + k.u16(*n); + } + EvmBaseType::BytesT(n) => { + k.tag(2); + k.u8(*n); + } + EvmBaseType::AddrT => k.tag(3), + EvmBaseType::BoolT => k.tag(4), + EvmBaseType::UnitT => k.tag(5), + EvmBaseType::StateT => k.tag(6), + } +} + +pub(crate) fn key_for_const(k: &mut HashConsKey, c: &EvmConstant) { + match c { + EvmConstant::SmallInt(i) => { + k.tag(0); + k.i64(*i); + } + EvmConstant::LargeInt(s) => { + k.tag(1); + k.str(s); + } + EvmConstant::Bool(b) => { + k.tag(2); + k.bool(*b); + } + EvmConstant::Addr(s) => { + k.tag(3); + k.str(s); + } + } +} + +pub(crate) fn key_for_ctx(k: &mut HashConsKey, ctx: &schema::EvmContext) { + match ctx { + schema::EvmContext::InFunction(name) => { + k.tag(0); + k.str(name); + } + schema::EvmContext::InBranch(b, pred, input) => { + k.tag(1); + k.bool(*b); + k.ptr(pred); + k.ptr(input); + } + schema::EvmContext::InLoop(input, pred) => { + k.tag(2); + k.ptr(input); + k.ptr(pred); + } + } +} + +fn hash_cons_rec(expr: &RcExpr, cache: &mut HashMap) -> RcExpr { + // Build key and hash-cons children first + let mut k = HashConsKey::new(); + + macro_rules! child { + ($e:expr) => { + hash_cons_rec($e, cache) + }; + } + + let result: RcExpr = match expr.as_ref() { + EvmExpr::Arg(ty, ctx) => { + k.tag(0); + key_for_type(&mut k, ty); + key_for_ctx(&mut k, ctx); + if let Some(cached) = cache.get(&k) { + return Rc::clone(cached); + } + Rc::clone(expr) + } + EvmExpr::Const(c, ty, ctx) => { + k.tag(1); + key_for_const(&mut k, c); + key_for_type(&mut k, ty); + key_for_ctx(&mut k, ctx); + if let Some(cached) = cache.get(&k) { + return Rc::clone(cached); + } + Rc::clone(expr) + } + EvmExpr::Empty(ty, ctx) => { + k.tag(2); + key_for_type(&mut k, ty); + key_for_ctx(&mut k, ctx); + if let Some(cached) = cache.get(&k) { + return Rc::clone(cached); + } + Rc::clone(expr) + } + EvmExpr::Bop(op, l, r) => { + let nl = child!(l); + let nr = child!(r); + k.tag(3); + k.u8(*op as u8); + k.ptr(&nl); + k.ptr(&nr); + if let Some(cached) = cache.get(&k) { + return Rc::clone(cached); + } + if Rc::ptr_eq(&nl, l) && Rc::ptr_eq(&nr, r) { + Rc::clone(expr) + } else { + Rc::new(EvmExpr::Bop(*op, nl, nr)) + } + } + EvmExpr::Uop(op, a) => { + let na = child!(a); + k.tag(4); + k.u8(*op as u8); + k.ptr(&na); + if let Some(cached) = cache.get(&k) { + return Rc::clone(cached); + } + if Rc::ptr_eq(&na, a) { + Rc::clone(expr) + } else { + Rc::new(EvmExpr::Uop(*op, na)) + } + } + EvmExpr::Top(op, a, b, c) => { + let na = child!(a); + let nb = child!(b); + let nc = child!(c); + k.tag(5); + k.u8(*op as u8); + k.ptr(&na); + k.ptr(&nb); + k.ptr(&nc); + if let Some(cached) = cache.get(&k) { + return Rc::clone(cached); + } + if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) && Rc::ptr_eq(&nc, c) { + Rc::clone(expr) + } else { + Rc::new(EvmExpr::Top(*op, na, nb, nc)) + } + } + EvmExpr::Get(a, idx) => { + let na = child!(a); + k.tag(6); + k.ptr(&na); + k.usize(*idx); + if let Some(cached) = cache.get(&k) { + return Rc::clone(cached); + } + if Rc::ptr_eq(&na, a) { + Rc::clone(expr) + } else { + Rc::new(EvmExpr::Get(na, *idx)) + } + } + EvmExpr::Concat(a, b) => { + let na = child!(a); + let nb = child!(b); + k.tag(7); + k.ptr(&na); + k.ptr(&nb); + if let Some(cached) = cache.get(&k) { + return Rc::clone(cached); + } + if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) { + Rc::clone(expr) + } else { + Rc::new(EvmExpr::Concat(na, nb)) + } + } + EvmExpr::If(cond, inputs, t, e) => { + let nc = child!(cond); + let ni = child!(inputs); + let nt = child!(t); + let ne = child!(e); + k.tag(8); + k.ptr(&nc); + k.ptr(&ni); + k.ptr(&nt); + k.ptr(&ne); + if let Some(cached) = cache.get(&k) { + return Rc::clone(cached); + } + if Rc::ptr_eq(&nc, cond) + && Rc::ptr_eq(&ni, inputs) + && Rc::ptr_eq(&nt, t) + && Rc::ptr_eq(&ne, e) + { + Rc::clone(expr) + } else { + Rc::new(EvmExpr::If(nc, ni, nt, ne)) + } + } + EvmExpr::DoWhile(a, b) => { + let na = child!(a); + let nb = child!(b); + k.tag(9); + k.ptr(&na); + k.ptr(&nb); + if let Some(cached) = cache.get(&k) { + return Rc::clone(cached); + } + if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) { + Rc::clone(expr) + } else { + Rc::new(EvmExpr::DoWhile(na, nb)) + } + } + EvmExpr::EnvRead(op, st) => { + let ns = child!(st); + k.tag(10); + k.u8(*op as u8); + k.ptr(&ns); + if let Some(cached) = cache.get(&k) { + return Rc::clone(cached); + } + if Rc::ptr_eq(&ns, st) { + Rc::clone(expr) + } else { + Rc::new(EvmExpr::EnvRead(*op, ns)) + } + } + EvmExpr::EnvRead1(op, arg, st) => { + let na = child!(arg); + let ns = child!(st); + k.tag(11); + k.u8(*op as u8); + k.ptr(&na); + k.ptr(&ns); + if let Some(cached) = cache.get(&k) { + return Rc::clone(cached); + } + if Rc::ptr_eq(&na, arg) && Rc::ptr_eq(&ns, st) { + Rc::clone(expr) + } else { + Rc::new(EvmExpr::EnvRead1(*op, na, ns)) + } + } + EvmExpr::Log(n, topics, doff, dsz, st) => { + let new_topics: Vec<_> = topics.iter().map(|t| child!(t)).collect(); + let nd = child!(doff); + let ns = child!(dsz); + let nst = child!(st); + k.tag(12); + k.usize(*n); + for t in &new_topics { + k.ptr(t); + } + k.ptr(&nd); + k.ptr(&ns); + k.ptr(&nst); + if let Some(cached) = cache.get(&k) { + return Rc::clone(cached); + } + Rc::new(EvmExpr::Log(*n, new_topics, nd, ns, nst)) + } + EvmExpr::Revert(a, b, c) => { + let na = child!(a); + let nb = child!(b); + let nc = child!(c); + k.tag(13); + k.ptr(&na); + k.ptr(&nb); + k.ptr(&nc); + if let Some(cached) = cache.get(&k) { + return Rc::clone(cached); + } + if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) && Rc::ptr_eq(&nc, c) { + Rc::clone(expr) + } else { + Rc::new(EvmExpr::Revert(na, nb, nc)) + } + } + EvmExpr::ReturnOp(a, b, c) => { + let na = child!(a); + let nb = child!(b); + let nc = child!(c); + k.tag(14); + k.ptr(&na); + k.ptr(&nb); + k.ptr(&nc); + if let Some(cached) = cache.get(&k) { + return Rc::clone(cached); + } + if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) && Rc::ptr_eq(&nc, c) { + Rc::clone(expr) + } else { + Rc::new(EvmExpr::ReturnOp(na, nb, nc)) + } + } + EvmExpr::ExtCall(a, b, c, d, e, f, g) => { + let na = child!(a); + let nb = child!(b); + let nc = child!(c); + let nd = child!(d); + let ne = child!(e); + let nf = child!(f); + let ng = child!(g); + k.tag(15); + k.ptr(&na); + k.ptr(&nb); + k.ptr(&nc); + k.ptr(&nd); + k.ptr(&ne); + k.ptr(&nf); + k.ptr(&ng); + if let Some(cached) = cache.get(&k) { + return Rc::clone(cached); + } + Rc::new(EvmExpr::ExtCall(na, nb, nc, nd, ne, nf, ng)) + } + EvmExpr::Call(name, args) => { + let new_args: Vec<_> = args.iter().map(|a| child!(a)).collect(); + k.tag(16); + k.str(name); + for a in &new_args { + k.ptr(a); + } + if let Some(cached) = cache.get(&k) { + return Rc::clone(cached); + } + Rc::new(EvmExpr::Call(name.clone(), new_args)) + } + EvmExpr::Selector(s) => { + k.tag(17); + k.str(s); + if let Some(cached) = cache.get(&k) { + return Rc::clone(cached); + } + Rc::clone(expr) + } + EvmExpr::LetBind(name, value, body) => { + let nv = child!(value); + let nb = child!(body); + k.tag(18); + k.str(name); + k.ptr(&nv); + k.ptr(&nb); + if let Some(cached) = cache.get(&k) { + return Rc::clone(cached); + } + if Rc::ptr_eq(&nv, value) && Rc::ptr_eq(&nb, body) { + Rc::clone(expr) + } else { + Rc::new(EvmExpr::LetBind(name.clone(), nv, nb)) + } + } + EvmExpr::Var(name) => { + k.tag(19); + k.str(name); + if let Some(cached) = cache.get(&k) { + return Rc::clone(cached); + } + Rc::clone(expr) + } + EvmExpr::VarStore(name, val) => { + let nv = child!(val); + k.tag(20); + k.str(name); + k.ptr(&nv); + if let Some(cached) = cache.get(&k) { + return Rc::clone(cached); + } + if Rc::ptr_eq(&nv, val) { + Rc::clone(expr) + } else { + Rc::new(EvmExpr::VarStore(name.clone(), nv)) + } + } + EvmExpr::Drop(name) => { + k.tag(21); + k.str(name); + if let Some(cached) = cache.get(&k) { + return Rc::clone(cached); + } + Rc::clone(expr) + } + EvmExpr::Function(name, in_ty, out_ty, body) => { + let nb = child!(body); + k.tag(22); + k.str(name); + key_for_type(&mut k, in_ty); + key_for_type(&mut k, out_ty); + k.ptr(&nb); + if let Some(cached) = cache.get(&k) { + return Rc::clone(cached); + } + if Rc::ptr_eq(&nb, body) { + Rc::clone(expr) + } else { + Rc::new(EvmExpr::Function( + name.clone(), + in_ty.clone(), + out_ty.clone(), + nb, + )) + } + } + EvmExpr::StorageField(name, slot, ty) => { + k.tag(23); + k.str(name); + k.usize(*slot); + key_for_type(&mut k, ty); + if let Some(cached) = cache.get(&k) { + return Rc::clone(cached); + } + Rc::clone(expr) + } + EvmExpr::InlineAsm(inputs, hex, num_outputs) => { + let new_inputs: Vec<_> = inputs.iter().map(|a| child!(a)).collect(); + k.tag(24); + k.str(hex); + k.i32(*num_outputs); + for a in &new_inputs { + k.ptr(a); + } + if let Some(cached) = cache.get(&k) { + return Rc::clone(cached); + } + Rc::new(EvmExpr::InlineAsm(new_inputs, hex.clone(), *num_outputs)) + } + EvmExpr::MemRegion(id, size) => { + k.tag(25); + k.i64(*id); + k.i64(*size); + if let Some(cached) = cache.get(&k) { + return Rc::clone(cached); + } + Rc::clone(expr) + } + EvmExpr::DynAlloc(size) => { + let ns = child!(size); + k.tag(26); + k.ptr(&ns); + if let Some(cached) = cache.get(&k) { + return Rc::clone(cached); + } + if Rc::ptr_eq(&ns, size) { + Rc::clone(expr) + } else { + Rc::new(EvmExpr::DynAlloc(ns)) + } + } + }; + + cache.insert(k, Rc::clone(&result)); + result +} + +/// Count unique IR DAG nodes by variant name. +pub fn ir_stats(expr: &RcExpr) -> IrStats { + let mut stats = IrStats::default(); + let mut visited = std::collections::HashSet::new(); + ir_stats_dag(expr, &mut stats, 0, &mut visited); + // Top-level Concat chain breakdown (not DAG-deduped — shows structural layout) + collect_top_concat_sizes_dag(expr, &mut stats.top_concat_child_sizes, 0); + stats +} + +/// Count unique DAG nodes (Rc pointer identity). +pub fn dag_node_count(expr: &RcExpr) -> usize { + let mut visited = std::collections::HashSet::new(); + dag_count_rec(expr, &mut visited) +} + +fn dag_count_rec(expr: &RcExpr, visited: &mut std::collections::HashSet) -> usize { + let ptr = Rc::as_ptr(expr) as usize; + if !visited.insert(ptr) { + return 0; + } + let mut count = 1usize; + macro_rules! add { + ($e:expr) => { + count += dag_count_rec($e, visited); + }; + } + match expr.as_ref() { + EvmExpr::Arg(..) + | EvmExpr::Const(..) + | EvmExpr::Empty(..) + | EvmExpr::Var(..) + | EvmExpr::Drop(..) + | EvmExpr::StorageField(..) + | EvmExpr::MemRegion(..) + | EvmExpr::Selector(..) => {} + EvmExpr::Uop(_, a) + | EvmExpr::VarStore(_, a) + | EvmExpr::Get(a, _) + | EvmExpr::EnvRead(_, a) + | EvmExpr::DynAlloc(a) => { + add!(a); + } + EvmExpr::Bop(_, a, b) + | EvmExpr::Concat(a, b) + | EvmExpr::DoWhile(a, b) + | EvmExpr::EnvRead1(_, a, b) => { + add!(a); + add!(b); + } + EvmExpr::LetBind(_, a, b) => { + add!(a); + add!(b); + } + EvmExpr::Top(_, a, b, c) | EvmExpr::Revert(a, b, c) | EvmExpr::ReturnOp(a, b, c) => { + add!(a); + add!(b); + add!(c); + } + EvmExpr::If(a, b, c, d) => { + add!(a); + add!(b); + add!(c); + add!(d); + } + EvmExpr::Function(_, _, _, a) => { + add!(a); + } + EvmExpr::Call(_, args) => { + for a in args { + add!(a); + } + } + EvmExpr::Log(_, topics, a, b, c) => { + for t in topics { + add!(t); + } + add!(a); + add!(b); + add!(c); + } + EvmExpr::ExtCall(a, b, c, d, e, f, g) => { + add!(a); + add!(b); + add!(c); + add!(d); + add!(e); + add!(f); + add!(g); + } + EvmExpr::InlineAsm(inputs, _, _) => { + for a in inputs { + add!(a); + } + } + } + count +} + +/// Walk the top-level Concat spine and record each child's DAG size + label. +/// Recurses into LetBind bodies and If branches to break down the dispatcher. +fn collect_top_concat_sizes_dag(expr: &RcExpr, out: &mut Vec<(String, usize)>, depth: usize) { + if depth > 6 { + out.push(( + format!("{}...(depth limit)", " ".repeat(depth)), + dag_node_count(expr), + )); + return; + } + let indent = " ".repeat(depth); + match expr.as_ref() { + EvmExpr::Concat(a, b) => { + collect_top_concat_sizes_dag(a, out, depth); + collect_top_concat_sizes_dag(b, out, depth); + } + EvmExpr::LetBind(name, init, body) => { + out.push(( + format!("{indent}LetBind({name}) init"), + dag_node_count(init), + )); + collect_top_concat_sizes_dag(body, out, depth + 1); + } + EvmExpr::If(pred, _inputs, then_body, else_body) => { + out.push((format!("{indent}If pred"), dag_node_count(pred))); + let then_count = dag_node_count(then_body); + let else_count = dag_node_count(else_body); + if then_count > 100 { + out.push((format!("{indent} then:"), then_count)); + collect_top_concat_sizes_dag(then_body, out, depth + 2); + } else { + out.push((format!("{indent} then"), then_count)); + } + if else_count > 100 { + out.push((format!("{indent} else:"), else_count)); + collect_top_concat_sizes_dag(else_body, out, depth + 2); + } else { + out.push((format!("{indent} else"), else_count)); + } + } + _ => { + let label = match expr.as_ref() { + EvmExpr::ReturnOp(..) => format!("{indent}ReturnOp"), + EvmExpr::Revert(..) => format!("{indent}Revert"), + EvmExpr::VarStore(name, _) => format!("{indent}VarStore({name})"), + EvmExpr::Drop(name) => format!("{indent}Drop({name})"), + EvmExpr::Empty(..) => format!("{indent}Empty"), + other => format!("{indent}{:?}", std::mem::discriminant(other)), + }; + out.push((label, dag_node_count(expr))); + } + } +} + +/// Accumulated IR statistics. +#[derive(Debug, Default)] +pub struct IrStats { + /// Count of nodes per variant name + pub node_counts: HashMap<&'static str, usize>, + /// Total node count + pub total_nodes: usize, + /// Maximum tree depth + pub max_depth: usize, + /// Count of LetBind nodes (proxy for variable allocations) + pub let_binds: usize, + /// Count of Function nodes + pub functions: usize, + /// Count of Call nodes + pub calls: usize, + /// Count of Concat nodes (chaining) + pub concats: usize, + /// Count of If nodes + pub ifs: usize, + /// Count of VarStore nodes + pub var_stores: usize, + /// Count of Var nodes (reads) + pub var_reads: usize, + /// Count of DynAlloc nodes + pub dyn_allocs: usize, + /// Per-variable Var read counts + pub var_read_names: HashMap, + /// Per-variable LetBind counts + pub let_bind_names: HashMap, + /// Per-variable VarStore counts + pub var_store_names: HashMap, + /// Subtree sizes for top-level Concat children (to identify where bulk lives) + pub top_concat_child_sizes: Vec<(String, usize)>, +} + +impl std::fmt::Display for IrStats { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, " total_nodes: {}", self.total_nodes)?; + writeln!(f, " max_depth: {}", self.max_depth)?; + writeln!(f, " let_binds: {}", self.let_binds)?; + writeln!(f, " functions: {}", self.functions)?; + writeln!(f, " calls: {}", self.calls)?; + writeln!(f, " concats: {}", self.concats)?; + writeln!(f, " ifs: {}", self.ifs)?; + writeln!(f, " var_stores: {}", self.var_stores)?; + writeln!(f, " var_reads: {}", self.var_reads)?; + writeln!(f, " dyn_allocs: {}", self.dyn_allocs)?; + // Top node types by count + let mut sorted: Vec<_> = self.node_counts.iter().collect(); + sorted.sort_by(|a, b| b.1.cmp(a.1)); + writeln!(f, " top node types:")?; + for (name, count) in sorted.iter().take(15) { + writeln!(f, " {name:20} {count}")?; + } + // Top Var reads by name + let mut var_sorted: Vec<_> = self.var_read_names.iter().collect(); + var_sorted.sort_by(|a, b| b.1.cmp(a.1)); + writeln!(f, " top Var reads by name:")?; + for (name, count) in var_sorted.iter().take(20) { + writeln!(f, " {name:40} {count}")?; + } + // LetBind names + let mut lb_sorted: Vec<_> = self.let_bind_names.iter().collect(); + lb_sorted.sort_by(|a, b| b.1.cmp(a.1)); + writeln!(f, " LetBind names:")?; + for (name, count) in lb_sorted.iter().take(20) { + writeln!(f, " {name:40} {count}")?; + } + // VarStore names + let mut vs_sorted: Vec<_> = self.var_store_names.iter().collect(); + vs_sorted.sort_by(|a, b| b.1.cmp(a.1)); + if !vs_sorted.is_empty() { + writeln!(f, " VarStore names:")?; + for (name, count) in vs_sorted.iter().take(20) { + writeln!(f, " {name:40} {count}")?; + } + } + // Top concat child sizes + if !self.top_concat_child_sizes.is_empty() { + writeln!(f, " top-level Concat children (label, nodes):")?; + for (label, size) in &self.top_concat_child_sizes { + writeln!(f, " {label:40} {size}")?; + } + } + Ok(()) + } +} + +fn ir_stats_dag( + expr: &RcExpr, + stats: &mut IrStats, + depth: usize, + visited: &mut std::collections::HashSet, +) { + let ptr = Rc::as_ptr(expr) as usize; + if !visited.insert(ptr) { + return; + } + stats.total_nodes += 1; + if depth > stats.max_depth { + stats.max_depth = depth; + } + let variant_name = match expr.as_ref() { + EvmExpr::Arg(..) => "Arg", + EvmExpr::Const(..) => "Const", + EvmExpr::Empty(..) => "Empty", + EvmExpr::Bop(op, ..) => { + let name = match op { + schema::EvmBinaryOp::Add => "Bop::Add", + schema::EvmBinaryOp::Sub => "Bop::Sub", + schema::EvmBinaryOp::Mul => "Bop::Mul", + schema::EvmBinaryOp::CheckedAdd => "Bop::CheckedAdd", + schema::EvmBinaryOp::CheckedSub => "Bop::CheckedSub", + schema::EvmBinaryOp::CheckedMul => "Bop::CheckedMul", + schema::EvmBinaryOp::SLoad => "Bop::SLoad", + schema::EvmBinaryOp::MLoad => "Bop::MLoad", + schema::EvmBinaryOp::Lt => "Bop::Lt", + schema::EvmBinaryOp::Gt => "Bop::Gt", + schema::EvmBinaryOp::Eq => "Bop::Eq", + _ => "Bop::Other", + }; + *stats.node_counts.entry(name).or_default() += 1; + "Bop" + } + EvmExpr::Uop(..) => "Uop", + EvmExpr::Top(op, ..) => { + let name = match op { + schema::EvmTernaryOp::MStore => "Top::MStore", + schema::EvmTernaryOp::SStore => "Top::SStore", + schema::EvmTernaryOp::Keccak256 => "Top::Keccak256", + schema::EvmTernaryOp::Mcopy => "Top::Mcopy", + _ => "Top::Other", + }; + *stats.node_counts.entry(name).or_default() += 1; + "Top" + } + EvmExpr::Get(..) => "Get", + EvmExpr::Concat(..) => { + stats.concats += 1; + "Concat" + } + EvmExpr::If(..) => { + stats.ifs += 1; + "If" + } + EvmExpr::DoWhile(..) => "DoWhile", + EvmExpr::EnvRead(..) => "EnvRead", + EvmExpr::EnvRead1(..) => "EnvRead1", + EvmExpr::Log(..) => "Log", + EvmExpr::Revert(..) => "Revert", + EvmExpr::ReturnOp(..) => "ReturnOp", + EvmExpr::ExtCall(..) => "ExtCall", + EvmExpr::Call(..) => { + stats.calls += 1; + "Call" + } + EvmExpr::Selector(..) => "Selector", + EvmExpr::LetBind(name, ..) => { + stats.let_binds += 1; + *stats.let_bind_names.entry(name.clone()).or_default() += 1; + "LetBind" + } + EvmExpr::Var(name) => { + stats.var_reads += 1; + *stats.var_read_names.entry(name.clone()).or_default() += 1; + "Var" + } + EvmExpr::VarStore(name, ..) => { + stats.var_stores += 1; + *stats.var_store_names.entry(name.clone()).or_default() += 1; + "VarStore" + } + EvmExpr::Drop(..) => "Drop", + EvmExpr::Function(..) => { + stats.functions += 1; + "Function" + } + EvmExpr::StorageField(..) => "StorageField", + EvmExpr::InlineAsm(..) => "InlineAsm", + EvmExpr::MemRegion(..) => "MemRegion", + EvmExpr::DynAlloc(..) => { + stats.dyn_allocs += 1; + "DynAlloc" + } + }; + *stats.node_counts.entry(variant_name).or_default() += 1; + + // Recurse into children + let d = depth + 1; + macro_rules! go { ($e:expr) => { ir_stats_dag($e, stats, d, visited) }; } + match expr.as_ref() { + EvmExpr::Arg(..) + | EvmExpr::Const(..) + | EvmExpr::Empty(..) + | EvmExpr::Var(..) + | EvmExpr::Drop(..) + | EvmExpr::StorageField(..) + | EvmExpr::MemRegion(..) + | EvmExpr::Selector(..) => {} + EvmExpr::Uop(_, a) + | EvmExpr::VarStore(_, a) + | EvmExpr::Get(a, _) + | EvmExpr::EnvRead(_, a) + | EvmExpr::DynAlloc(a) => go!(a), + EvmExpr::Bop(_, a, b) + | EvmExpr::Concat(a, b) + | EvmExpr::DoWhile(a, b) + | EvmExpr::EnvRead1(_, a, b) => { go!(a); go!(b); } + EvmExpr::LetBind(_, a, b) => { go!(a); go!(b); } + EvmExpr::Top(_, a, b, c) | EvmExpr::Revert(a, b, c) | EvmExpr::ReturnOp(a, b, c) => { + go!(a); go!(b); go!(c); + } + EvmExpr::If(a, b, c, e) => { go!(a); go!(b); go!(c); go!(e); } + EvmExpr::Function(_, _, _, body) => go!(body), + EvmExpr::Call(_, args) => { for a in args { go!(a); } } + EvmExpr::Log(_, topics, doff, dsz, state) => { + for t in topics { go!(t); } + go!(doff); go!(dsz); go!(state); + } + EvmExpr::ExtCall(a, b, c, e, f, g, h) => { + go!(a); go!(b); go!(c); go!(e); go!(f); go!(g); go!(h); + } + EvmExpr::InlineAsm(inputs, _, _) => { for inp in inputs { go!(inp); } } + } +} + /// Errors that can occur during IR lowering or optimization. #[derive(Debug, thiserror::Error)] pub enum IrError { @@ -125,6 +1085,14 @@ pub fn lower_and_optimize( let mut lowering = to_egglog::AstToEgglog::new(); let mut ir_program = lowering.lower_program(program)?; tracing::debug!(" lowering: {:?}", t.elapsed()); + for c in &ir_program.contracts { + let dag = dag_node_count(&c.runtime); + tracing::debug!(" [{}] after lowering: {} DAG nodes, {} fns", c.name, dag, c.internal_functions.len()); + if tracing::enabled!(tracing::Level::TRACE) { + let stats = ir_stats(&c.runtime); + tracing::trace!(" [{}] IR stats after lowering:\n{stats}", c.name); + } + } // 2. Variable optimizations (store-forwarding, dead elim, inlining, const prop) // Runs at ALL optimization levels since these are cheap deterministic transforms. @@ -132,6 +1100,14 @@ pub fn lower_and_optimize( let t = std::time::Instant::now(); var_opt::optimize_program(&mut ir_program, optimization_level); tracing::debug!(" var_opt: {:?}", t.elapsed()); + for c in &ir_program.contracts { + let dag = dag_node_count(&c.runtime); + tracing::debug!(" [{}] after var_opt: {} DAG nodes, {} fns", c.name, dag, c.internal_functions.len()); + if tracing::enabled!(tracing::Level::TRACE) { + let stats = ir_stats(&c.runtime); + tracing::trace!(" [{}] IR stats after var_opt:\n{stats}", c.name); + } + } // 3. Storage optimizations: // a) Hoist storage ops out of loops (LICM) — egglog can't model iteration @@ -170,9 +1146,21 @@ pub fn lower_and_optimize( let schedule = schedule::make_schedule(optimization_level); let mut optimized_contracts = Vec::new(); + // Re-establish Rc sharing broken by var_opt/storage_hoist/mem_region. + // Must run right before serialization, after all IR transform passes. + let t = std::time::Instant::now(); + hash_cons_program(&mut ir_program); + tracing::debug!(" hash_cons: {:?}", t.elapsed()); + for c in &ir_program.contracts { + let dag = dag_node_count(&c.runtime); + tracing::debug!(" [{}] after hash_cons: {} DAG nodes", c.name, dag); + } + for contract in &ir_program.contracts { let t_contract = std::time::Instant::now(); - let runtime_sexp = sexp::expr_to_sexp(&contract.runtime); + + // DAG-aware serialization: emit shared sub-expressions as egglog let-bindings + let (shared_lets, runtime_sexp, mut next_id) = sexp::expr_to_sexp_dag(&contract.runtime, 0); // Collect immutable variable names for bound propagation in egglog let immutable_vars = var_opt::collect_immutable_vars(&contract.runtime); @@ -185,18 +1173,36 @@ pub fn lower_and_optimize( // the inline rule (Call + Function → body) can fire. let mut func_lets = String::new(); for (i, func) in contract.internal_functions.iter().enumerate() { - let func_sexp = sexp::expr_to_sexp(func); + let (func_shared, func_sexp, new_next_id) = sexp::expr_to_sexp_dag(func, next_id); + next_id = new_next_id; + if !func_shared.is_empty() { + func_lets.push_str(&func_shared); + func_lets.push('\n'); + } func_lets.push_str(&format!("(let __fn_{i} {func_sexp})\n")); } let egglog_program = format!( - "{}\n\n(let __runtime {})\n{}\n{}\n{}\n\n(extract __runtime)\n", + "{}\n\n{}\n(let __runtime {})\n{}\n{}\n{}\n\n(extract __runtime)\n", prologue(optimize_for), + shared_lets, runtime_sexp, func_lets, immutable_facts, schedule ); + let prologue_len = prologue(optimize_for).len(); + tracing::debug!( + " [{}] egglog input: {} bytes (prologue: {}, shared_lets: {}, runtime_sexp: {}, func_lets: {}, immutable: {}, schedule: {})", + contract.name, + egglog_program.len(), + prologue_len, + shared_lets.len(), + runtime_sexp.len(), + func_lets.len(), + immutable_facts.len(), + schedule.len(), + ); let t_egg = std::time::Instant::now(); let mut egraph = create_egraph(); @@ -216,12 +1222,15 @@ pub fn lower_and_optimize( optimization_level ); + let t_phase = std::time::Instant::now(); let mut optimized_runtime = sexp::sexp_to_expr(extracted_sexp)?; + tracing::debug!(" sexp_to_expr: {:?}", t_phase.elapsed()); // Check for compile-time-detectable constant overflows in narrow types. // This catches overflow revealed by egglog const-folding (e.g. through // inlined constants). The lowering-time check catches literal cases with // source spans; this is the fallback for optimization-revealed cases. + let t_phase = std::time::Instant::now(); let overflow_errors = check_const_overflow(&optimized_runtime); if !overflow_errors.is_empty() { let mut diag = edge_diagnostics::Diagnostic::error( @@ -232,13 +1241,21 @@ pub fn lower_and_optimize( } return Err(IrError::Diagnostic(diag)); } + tracing::debug!(" check_const_overflow: {:?}", t_phase.elapsed()); // Post-egglog cleanup: simplify state params and remove dead code + let t_phase = std::time::Instant::now(); optimized_runtime = cleanup::cleanup_expr_pub(&optimized_runtime); + tracing::debug!(" cleanup: {:?}", t_phase.elapsed()); + + let t_phase = std::time::Instant::now(); + optimized_runtime = hash_cons_expr(&optimized_runtime); + tracing::debug!(" post-egglog hash_cons: {:?} (dag={})", t_phase.elapsed(), dag_node_count(&optimized_runtime)); // Only keep internal functions still referenced (directly or transitively) // by Call nodes in the optimized runtime. Monomorphized functions that // were inlined by egglog are no longer needed. + let t_phase = std::time::Instant::now(); let mut referenced = collect_call_names(&optimized_runtime); // Transitively collect: if a kept function calls another, keep that too loop { @@ -268,10 +1285,11 @@ pub fn lower_and_optimize( if !referenced.contains(name.as_str()) { continue; } - let func_sexp = sexp::expr_to_sexp(func); + let (func_shared, func_sexp, _) = sexp::expr_to_sexp_dag(func, 0); let func_program = format!( - "{}\n\n(let __func {})\n\n{}\n\n(extract __func)\n", + "{}\n\n{}\n(let __func {})\n\n{}\n\n(extract __func)\n", prologue(optimize_for), + func_shared, func_sexp, schedule ); @@ -286,6 +1304,7 @@ pub fn lower_and_optimize( let optimized_func = cleanup::cleanup_expr_pub(&optimized_func); optimized_functions.push(optimized_func); } + tracing::debug!(" collect+optimize fns: {:?} ({} kept)", t_phase.elapsed(), optimized_functions.len()); tracing::debug!( " contract {} total: {:?}", diff --git a/crates/ir/src/mem_region.rs b/crates/ir/src/mem_region.rs index 0a20bdc..cd767e0 100644 --- a/crates/ir/src/mem_region.rs +++ b/crates/ir/src/mem_region.rs @@ -133,9 +133,10 @@ fn collect_region_scopes(expr: &RcExpr) -> RegionScope { } // Unary children - EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) | EvmExpr::VarStore(_, a) => { - collect_region_scopes(a) - } + EvmExpr::Uop(_, a) + | EvmExpr::Get(a, _) + | EvmExpr::VarStore(_, a) + | EvmExpr::DynAlloc(a) => collect_region_scopes(a), // Multi-child nodes EvmExpr::Log(_, topics, d, s, st) => { @@ -375,6 +376,13 @@ fn replace_regions(expr: &RcExpr, assignments: &BTreeMap) -> RcExpr .collect(); Rc::new(EvmExpr::InlineAsm(ni, hex.clone(), *num_outputs)) } + EvmExpr::DynAlloc(size) => { + let ns = replace_regions(size, assignments); + if Rc::ptr_eq(&ns, size) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::DynAlloc(ns)) + } // Leaf nodes — no MemRegion possible EvmExpr::Const(..) | EvmExpr::Arg(..) diff --git a/crates/ir/src/optimizations/memory.egg b/crates/ir/src/optimizations/memory.egg index c49f2c9..a5e74d5 100644 --- a/crates/ir/src/optimizations/memory.egg +++ b/crates/ir/src/optimizations/memory.egg @@ -5,9 +5,28 @@ ;; ---- Redundant load after store ---- ;; MLoad(offset, MStore(offset, val, state)) -> val -(rewrite (Bop (OpMLoad) offset (Top (OpMStore) offset val state)) +;; SAFETY NOTE: This forwarding is semantically correct in the state-threaded IR, +;; but can produce wrong codegen when `val` contains MLOAD(offset, s_old) because +;; codegen ignores state parameters — both MLOAD(offset, s_old) and +;; MLOAD(offset, s_new) compile to the same MLOAD(offset), reading final memory. +;; This is only a problem when `val` references the SAME offset being stored, +;; creating a read-modify-write cycle (e.g., `self.len += 1`). +;; We restrict to MemRegion-based offsets (compile-time known, typically write-once) +;; and keep the generic rule only for values that are constants. + +;; Safe for MemRegion-based offsets (static allocations) +(rewrite (Bop (OpMLoad) (MemRegion r s) (Top (OpMStore) (MemRegion r s) val state)) + val :subsume :ruleset memory-opt) +(rewrite (Bop (OpMLoad) (Bop (OpAdd) (MemRegion r s) off) + (Top (OpMStore) (Bop (OpAdd) (MemRegion r s) off) val state)) val :subsume :ruleset memory-opt) +;; Safe for any offset when val is a constant (no MLOAD dependency) +(rule ((= e (Bop (OpMLoad) offset (Top (OpMStore) offset val state))) + (= val (Const c ty ctx))) + ((union e val)) + :ruleset memory-opt) + ;; ---- Redundant store of loaded value ---- ;; MStore(offset, MLoad(offset, state), state) -> state (rewrite (Top (OpMStore) offset (Bop (OpMLoad) offset state) state) diff --git a/crates/ir/src/pretty.rs b/crates/ir/src/pretty.rs index ce5e8eb..0133d2b 100644 --- a/crates/ir/src/pretty.rs +++ b/crates/ir/src/pretty.rs @@ -279,7 +279,8 @@ fn inline_width(expr: &RcExpr) -> Option { | EvmExpr::ReturnOp(..) | EvmExpr::Call(..) | EvmExpr::VarStore(..) - | EvmExpr::InlineAsm(..) => None, + | EvmExpr::InlineAsm(..) + | EvmExpr::DynAlloc(_) => None, // never inline — has sub-expression EvmExpr::MemRegion(id, sz) => Some(format!("region({id}, {sz})").len()), } } @@ -642,6 +643,17 @@ fn pp(expr: &RcExpr, depth: usize, buf: &mut String) { indent(depth, buf); buf.push_str(&format!("region({id}, {sz})")); } + EvmExpr::DynAlloc(size) => { + indent(depth, buf); + if fits_inline(size, budget(depth).saturating_sub(8)) { + buf.push_str("@alloc("); + pp_inline(size, buf); + } else { + buf.push_str("@alloc(\n"); + pp(size, depth + 1, buf); + } + buf.push(')'); + } } } @@ -766,10 +778,16 @@ fn pp_oneline(expr: &RcExpr, buf: &mut String) { buf.push_str(&format!("asm({num_outputs}){{ {disasm} }}")); } EvmExpr::MemRegion(id, sz) => buf.push_str(&format!("region({id}, {sz})")), + EvmExpr::DynAlloc(size) => { + buf.push_str("@alloc("); + pp_oneline(size, buf); + buf.push(')'); + } } } -/// Produce a compact one-line IR summary for **statement-level** nodes. +/// Produce a compact one-line IR summary +/// (`DynAlloc` is a statement-level node) for **statement-level** nodes. /// /// Returns `None` for leaf/value expressions (Const, Var, Bop, Uop, etc.) /// that don't merit their own comment in assembly output. diff --git a/crates/ir/src/schema.egg b/crates/ir/src/schema.egg index d956858..3cdd6bb 100644 --- a/crates/ir/src/schema.egg +++ b/crates/ir/src/schema.egg @@ -250,6 +250,10 @@ ;; Different region IDs are guaranteed non-overlapping. (constructor MemRegion (i64 i64) EvmExpr) +;; Dynamic memory allocation: (DynAlloc size_bytes) -> base address +;; Uses MSIZE at runtime. NOT pure — memory expansion is observable. +(constructor DynAlloc (EvmExpr) EvmExpr) + ;; ============================================================ ;; Rulesets ;; ============================================================ diff --git a/crates/ir/src/schema.rs b/crates/ir/src/schema.rs index b29d44a..ebbcd6d 100644 --- a/crates/ir/src/schema.rs +++ b/crates/ir/src/schema.rs @@ -350,6 +350,11 @@ pub enum EvmExpr { /// Different region IDs are guaranteed to be non-overlapping. /// Resolved to a concrete offset by `assign_memory_offsets` after egglog extraction. MemRegion(i64, i64), + + /// Dynamic memory allocation: size in bytes → base address. + /// Uses MSIZE to find the current memory high-water mark and expands memory. + /// NOT pure — memory expansion is an observable side effect. + DynAlloc(RcExpr), } // ============================================================ diff --git a/crates/ir/src/sexp.rs b/crates/ir/src/sexp.rs index 2dc58ce..5eed021 100644 --- a/crates/ir/src/sexp.rs +++ b/crates/ir/src/sexp.rs @@ -3,7 +3,7 @@ //! Converts between `EvmExpr` and egglog-compatible s-expression strings. //! Used to insert IR into an egglog `EGraph` and extract optimized results. -use std::rc::Rc; +use std::{collections::HashMap, rc::Rc}; use crate::{ schema::{ @@ -155,6 +155,7 @@ pub fn expr_to_sexp(expr: &EvmExpr) -> String { format!("(InlineAsm {list} \"{hex}\" {num_outputs})") } EvmExpr::MemRegion(id, size) => format!("(MemRegion {id} {size})"), + EvmExpr::DynAlloc(size) => format!("(DynAlloc {})", expr_to_sexp(size)), } } @@ -297,6 +298,330 @@ fn list_to_sexp(exprs: &[RcExpr]) -> String { }) } +// ============================================================ +// DAG-aware S-expression conversion +// ============================================================ +// +// The IR is a DAG (via Rc sharing), but expr_to_sexp expands it into a tree. +// For Vec with 5 push calls, this blows up from ~1,500 DAG nodes to +// ~867,000 expanded nodes (33 MB of s-expression text). +// +// This module detects shared Rc nodes and emits egglog `(let __sN ...)` bindings +// for them. Subsequent references use the binding name instead of re-expanding. +// This keeps the s-expression size proportional to the DAG, not the expanded tree. + +use std::collections::HashSet; + +/// Convert an `RcExpr` DAG to egglog s-expressions with `let`-bindings for shared nodes. +/// +/// Returns `(let_bindings, main_expr)` where `let_bindings` contains +/// `(let __sN )` declarations in dependency order. +/// +/// `id_offset` ensures unique names across multiple calls within the same +/// egglog program (e.g., runtime + internal functions). Returns the next +/// available ID after this call. +pub fn expr_to_sexp_dag(expr: &RcExpr, id_offset: usize) -> (String, String, usize) { + // Pass 1: count how many parent edges each Rc node has (DAG-aware traversal) + let mut ref_counts: HashMap = HashMap::new(); + let mut visited: HashSet = HashSet::new(); + count_refs_dag(expr, &mut ref_counts, &mut visited); + + // Check if any node is referenced more than once + let has_sharing = ref_counts.values().any(|&c| c > 1); + if !has_sharing { + return (String::new(), expr_to_sexp(expr), id_offset); + } + + // Pass 2: serialize with let-bindings for shared nodes + let mut ctx = DagSexpCtx { + ref_counts, + named: HashMap::new(), + let_bindings: Vec::new(), + next_id: id_offset, + }; + let main = dag_sexp_rec(expr, &mut ctx); + (ctx.let_bindings.join("\n"), main, ctx.next_id) +} + +struct DagSexpCtx { + ref_counts: HashMap, + named: HashMap, + let_bindings: Vec, + next_id: usize, +} + +fn ptr_id(expr: &RcExpr) -> usize { + Rc::as_ptr(expr) as usize +} + +/// Count references to each Rc node. Recurses into children only once per node. +fn count_refs_dag(expr: &RcExpr, counts: &mut HashMap, visited: &mut HashSet) { + let id = ptr_id(expr); + *counts.entry(id).or_default() += 1; + if !visited.insert(id) { + return; + } + macro_rules! visit { + ($e:expr) => { + count_refs_dag($e, counts, visited); + }; + } + match expr.as_ref() { + EvmExpr::Arg(..) + | EvmExpr::Const(..) + | EvmExpr::Empty(..) + | EvmExpr::Var(..) + | EvmExpr::Drop(..) + | EvmExpr::StorageField(..) + | EvmExpr::MemRegion(..) + | EvmExpr::Selector(..) => {} + EvmExpr::Uop(_, a) + | EvmExpr::VarStore(_, a) + | EvmExpr::Get(a, _) + | EvmExpr::EnvRead(_, a) + | EvmExpr::DynAlloc(a) => { + visit!(a); + } + EvmExpr::Bop(_, a, b) + | EvmExpr::Concat(a, b) + | EvmExpr::DoWhile(a, b) + | EvmExpr::EnvRead1(_, a, b) => { + visit!(a); + visit!(b); + } + EvmExpr::LetBind(_, a, b) => { + visit!(a); + visit!(b); + } + EvmExpr::Top(_, a, b, c) | EvmExpr::Revert(a, b, c) | EvmExpr::ReturnOp(a, b, c) => { + visit!(a); + visit!(b); + visit!(c); + } + EvmExpr::If(a, b, c, d) => { + visit!(a); + visit!(b); + visit!(c); + visit!(d); + } + EvmExpr::Function(_, _, _, a) => { + visit!(a); + } + EvmExpr::Call(_, args) => { + for a in args { + visit!(a); + } + } + EvmExpr::Log(_, topics, a, b, c) => { + for t in topics { + visit!(t); + } + visit!(a); + visit!(b); + visit!(c); + } + EvmExpr::ExtCall(a, b, c, d, e, f, g) => { + visit!(a); + visit!(b); + visit!(c); + visit!(d); + visit!(e); + visit!(f); + visit!(g); + } + EvmExpr::InlineAsm(inputs, _, _) => { + for a in inputs { + visit!(a); + } + } + } +} + +/// Serialize a node, emitting let-bindings for shared sub-expressions. +fn dag_sexp_rec(expr: &RcExpr, ctx: &mut DagSexpCtx) -> String { + let id = ptr_id(expr); + + // If this shared node was already serialized, return its name + if let Some(name) = ctx.named.get(&id) { + return name.clone(); + } + + // Serialize the node itself (recurse into children via dag_sexp_rec) + let sexp = dag_sexp_node(expr, ctx); + + // If multiply-referenced, emit a let-binding and return the name + let is_shared = ctx.ref_counts.get(&id).copied().unwrap_or(0) > 1; + if is_shared { + // Don't share trivial leaf nodes (saves let-binding overhead) + let is_leaf = matches!( + expr.as_ref(), + EvmExpr::Arg(..) + | EvmExpr::Const(..) + | EvmExpr::Empty(..) + | EvmExpr::Var(..) + | EvmExpr::Drop(..) + | EvmExpr::Selector(..) + | EvmExpr::MemRegion(..) + | EvmExpr::StorageField(..) + ); + if !is_leaf { + let name = format!("__s{}", ctx.next_id); + ctx.next_id += 1; + ctx.named.insert(id, name.clone()); + ctx.let_bindings.push(format!("(let {name} {sexp})")); + return name; + } + } + + sexp +} + +/// Serialize a single node's s-expression, using dag_sexp_rec for children. +fn dag_sexp_node(expr: &RcExpr, ctx: &mut DagSexpCtx) -> String { + match expr.as_ref() { + EvmExpr::Arg(ty, c) => format!("(Arg {} {})", type_sexp(ty), ctx_sexp(c)), + EvmExpr::Const(c, ty, cx) => { + format!( + "(Const {} {} {})", + const_sexp(c), + type_sexp(ty), + ctx_sexp(cx) + ) + } + EvmExpr::Empty(ty, c) => format!("(Empty {} {})", type_sexp(ty), ctx_sexp(c)), + EvmExpr::Bop(op, l, r) => { + format!( + "(Bop {} {} {})", + binop_sexp(op), + dag_sexp_rec(l, ctx), + dag_sexp_rec(r, ctx) + ) + } + EvmExpr::Uop(op, e) => format!("(Uop {} {})", unop_sexp(op), dag_sexp_rec(e, ctx)), + EvmExpr::Top(op, a, b, c) => { + format!( + "(Top {} {} {} {})", + ternop_sexp(op), + dag_sexp_rec(a, ctx), + dag_sexp_rec(b, ctx), + dag_sexp_rec(c, ctx) + ) + } + EvmExpr::Get(e, idx) => format!("(Get {} {})", dag_sexp_rec(e, ctx), idx), + EvmExpr::Concat(a, b) => { + format!("(Concat {} {})", dag_sexp_rec(a, ctx), dag_sexp_rec(b, ctx)) + } + EvmExpr::If(cond, inputs, t, e) => { + format!( + "(If {} {} {} {})", + dag_sexp_rec(cond, ctx), + dag_sexp_rec(inputs, ctx), + dag_sexp_rec(t, ctx), + dag_sexp_rec(e, ctx) + ) + } + EvmExpr::DoWhile(inputs, body) => { + format!( + "(DoWhile {} {})", + dag_sexp_rec(inputs, ctx), + dag_sexp_rec(body, ctx) + ) + } + EvmExpr::EnvRead(op, st) => { + format!("(EnvRead {} {})", envop_sexp(op), dag_sexp_rec(st, ctx)) + } + EvmExpr::EnvRead1(op, arg, st) => { + format!( + "(EnvRead1 {} {} {})", + envop_sexp(op), + dag_sexp_rec(arg, ctx), + dag_sexp_rec(st, ctx) + ) + } + EvmExpr::Log(n, topics, data_offset, data_size, st) => { + let topics_s = dag_list_to_sexp(topics, ctx); + format!( + "(Log {} {} {} {} {})", + n, + topics_s, + dag_sexp_rec(data_offset, ctx), + dag_sexp_rec(data_size, ctx), + dag_sexp_rec(st, ctx) + ) + } + EvmExpr::Revert(off, sz, st) => { + format!( + "(Revert {} {} {})", + dag_sexp_rec(off, ctx), + dag_sexp_rec(sz, ctx), + dag_sexp_rec(st, ctx) + ) + } + EvmExpr::ReturnOp(off, sz, st) => { + format!( + "(ReturnOp {} {} {})", + dag_sexp_rec(off, ctx), + dag_sexp_rec(sz, ctx), + dag_sexp_rec(st, ctx) + ) + } + EvmExpr::ExtCall(tgt, val, ao, al, ro, rl, st) => { + format!( + "(ExtCall {} {} {} {} {} {} {})", + dag_sexp_rec(tgt, ctx), + dag_sexp_rec(val, ctx), + dag_sexp_rec(ao, ctx), + dag_sexp_rec(al, ctx), + dag_sexp_rec(ro, ctx), + dag_sexp_rec(rl, ctx), + dag_sexp_rec(st, ctx) + ) + } + EvmExpr::Call(name, args) => { + let list = dag_list_to_sexp(args, ctx); + format!("(Call \"{name}\" {list})") + } + EvmExpr::Selector(sig) => format!("(Selector \"{sig}\")"), + EvmExpr::LetBind(name, value, body) => { + format!( + "(LetBind \"{}\" {} {})", + name, + dag_sexp_rec(value, ctx), + dag_sexp_rec(body, ctx) + ) + } + EvmExpr::Var(name) => format!("(Var \"{name}\")"), + EvmExpr::VarStore(name, value) => { + format!("(VarStore \"{}\" {})", name, dag_sexp_rec(value, ctx)) + } + EvmExpr::Drop(name) => format!("(Drop \"{name}\")"), + EvmExpr::Function(name, in_ty, out_ty, body) => { + format!( + "(Function \"{}\" {} {} {})", + name, + type_sexp(in_ty), + type_sexp(out_ty), + dag_sexp_rec(body, ctx) + ) + } + EvmExpr::StorageField(name, slot, ty) => { + format!("(StorageField \"{}\" {} {})", name, slot, type_sexp(ty)) + } + EvmExpr::InlineAsm(inputs, hex, num_outputs) => { + let list = dag_list_to_sexp(inputs, ctx); + format!("(InlineAsm {list} \"{hex}\" {num_outputs})") + } + EvmExpr::MemRegion(id, size) => format!("(MemRegion {id} {size})"), + EvmExpr::DynAlloc(size) => format!("(DynAlloc {})", dag_sexp_rec(size, ctx)), + } +} + +fn dag_list_to_sexp(exprs: &[RcExpr], ctx: &mut DagSexpCtx) -> String { + exprs.iter().rev().fold("(Nil)".to_owned(), |acc, e| { + format!("(Cons {} {})", dag_sexp_rec(e, ctx), acc) + }) +} + // ============================================================ // S-expression string → EvmExpr // ============================================================ @@ -551,6 +876,10 @@ fn sexp_to_evm_expr(sexp: &Sexp) -> Result { let size = atom_i64(&items[2])?; Ok(Rc::new(EvmExpr::MemRegion(id, size))) } + "DynAlloc" => { + let size = sexp_to_evm_expr(&items[1])?; + Ok(Rc::new(EvmExpr::DynAlloc(size))) + } other => Err(IrError::Extraction(format!( "unknown expression constructor: {other}" ))), @@ -1003,7 +1332,7 @@ fn is_leaf_form(tree: &STree) -> bool { // Context | "InFunction" // Leaves - | "Selector" | "Var" | "VarStore" | "StorageField" | "MemRegion" + | "Selector" | "Var" | "VarStore" | "StorageField" | "MemRegion" | "DynAlloc" // Empty/Arg (no sub-expressions) | "Arg" | "Empty" | "Const" ) diff --git a/crates/ir/src/storage_hoist.rs b/crates/ir/src/storage_hoist.rs index d69bf4d..72a8fef 100644 --- a/crates/ir/src/storage_hoist.rs +++ b/crates/ir/src/storage_hoist.rs @@ -312,6 +312,10 @@ fn replace_sloads_inline(expr: &RcExpr, known: &HashMap) -> RcE .collect(); Rc::new(EvmExpr::InlineAsm(new_inputs, hex.clone(), *num_outputs)) } + EvmExpr::DynAlloc(size) => { + let ns = replace_sloads_inline(size, known); + Rc::new(EvmExpr::DynAlloc(ns)) + } } } @@ -1057,6 +1061,10 @@ fn replace_storage(expr: &RcExpr, key: &SlotKey, var_name: &str, replace_stores: nb, )) } + EvmExpr::DynAlloc(size) => { + let ns = replace_storage(size, key, var_name, replace_stores); + Rc::new(EvmExpr::DynAlloc(ns)) + } // Leaf nodes — no children EvmExpr::Const(..) | EvmExpr::Arg(..) diff --git a/crates/ir/src/to_egglog/calls.rs b/crates/ir/src/to_egglog/calls.rs index 5bb6588..58261cf 100644 --- a/crates/ir/src/to_egglog/calls.rs +++ b/crates/ir/src/to_egglog/calls.rs @@ -34,37 +34,34 @@ impl AstToEgglog { } // Check for generic union types (e.g., Result::Ok(42) where Result was monomorphized) if self.generic_type_templates.contains_key(type_name) { - // First try to find an already-monomorphized version if let Some(mangled) = self.resolve_generic_type_name(type_name) { - return self.lower_union_instantiation_expr( - &mangled, - variant_name, - args, - Some(span), - ); - } - // No monomorphized version yet — try to infer type params from - // the constructor argument and monomorphize on the fly. - if let Some(mangled) = - self.try_monomorphize_union_from_constructor(type_name, variant_name, args)? - { - return self.lower_union_instantiation_expr( - &mangled, + // Only treat as union constructor if the monomorphized type is actually a union + if self.union_types.contains_key(&mangled) { + return self.lower_union_instantiation_expr( + &mangled, + variant_name, + args, + Some(span), + ); + } + // Not a union (e.g., generic struct) — fall through to qualified method call + } else { + // No monomorphized version yet — try to infer type params from + // the constructor argument and monomorphize on the fly. + if let Some(mangled) = self.try_monomorphize_union_from_constructor( + type_name, variant_name, args, - Some(span), - ); + )? { + return self.lower_union_instantiation_expr( + &mangled, + variant_name, + args, + Some(span), + ); + } + // Could not infer — fall through to qualified method call } - return Err(IrError::Diagnostic( - edge_diagnostics::Diagnostic::error(format!( - "cannot infer type parameters for generic type `{type_name}`", - )) - .with_label( - span.clone(), - format!("cannot infer type arguments from `{type_name}::{variant_name}(...)`"), - ) - .with_note("provide explicit type arguments, e.g. `{type_name}::{variant_name}(...)`".to_string()), - )); } } } @@ -147,6 +144,30 @@ impl AstToEgglog { .collect(); return self.inline_function_call(¶ms, &body, args); } + + // Try resolving generic type to monomorphized name (e.g., Vec → Vec__u256) + if let Some(mangled) = self.resolve_generic_type_name(type_or_trait) { + if let Some(method) = self.find_inherent_method(&mangled, method_name) { + let fn_decl = method.fn_decl.clone(); + let body = method.body; + let params: Vec<(String, edge_ast::ty::TypeSig)> = fn_decl + .params + .iter() + .map(|(id, ty)| (id.name.clone(), ty.clone())) + .collect(); + return self.inline_function_call(¶ms, &body, args); + } + if let Some((fn_decl, body)) = + self.find_trait_method_for_type(&mangled, method_name) + { + let params: Vec<(String, edge_ast::ty::TypeSig)> = fn_decl + .params + .iter() + .map(|(id, ty)| (id.name.clone(), ty.clone())) + .collect(); + return self.inline_function_call(¶ms, &body, args); + } + } } } @@ -292,20 +313,12 @@ impl AstToEgglog { let rhs = self.lower_expr(&args[0])?; return Ok(ast_helpers::bop(op, lhs, rhs)); } - - // Check compiler-provided stateful methods (derive_slot, sload, sstore) - if let Some(result) = - self.try_compiler_stateful_dispatch(receiver, method_name, args)? - { - return Ok(result); - } } // Default derive_slot for struct types without explicit UniqueSlot impl. - // Chains keccak256 over each field like Solidity nested mappings: - // slot = keccak256(field_0 . base_slot) - // slot = keccak256(field_1 . slot) - // ... + // Must come BEFORE try_compiler_stateful_dispatch because the generic + // derive_slot there treats the receiver as a single 32-byte key, whereas + // struct keys need per-field keccak chaining. if method_name == "derive_slot" && self.std_ops_traits.contains("UniqueSlot") && args.len() == 1 @@ -318,6 +331,13 @@ impl AstToEgglog { return Ok(result); } } + + // Check compiler-provided stateful methods (derive_slot, sload, sstore, mstore, mload, mcopy) + if let Some(result) = + self.try_compiler_stateful_dispatch(receiver, method_name, args)? + { + return Ok(result); + } } // If receiver type is known but no method found, give a clear error @@ -908,6 +928,34 @@ impl AstToEgglog { Some(store) } + // Mstore::mstore(self, offset) → MSTORE(offset, value, state) + "mstore" if self.std_ops_traits.contains("Mstore") => { + let value = receiver_ir?; + let offset = args_ir.first()?; + let store = + ast_helpers::mstore(Rc::clone(offset), value, Rc::clone(&self.current_state)); + self.current_state = Rc::clone(&store); + Some(store) + } + + // Mload::mload(offset) → MLOAD(offset, state) + "mload" if self.std_ops_traits.contains("Mload") => { + let offset = if let Some(recv) = receiver_ir { + recv + } else { + Rc::clone(args_ir.first()?) + }; + Some(ast_helpers::mload(offset, Rc::clone(&self.current_state))) + } + + // Mcopy::mcopy(self, dest, size) → Top(Mcopy, dest, src, size) + "mcopy" if self.std_ops_traits.contains("Mcopy") => { + let src = receiver_ir?; + let dest = args_ir.first()?; + let size = args_ir.get(1)?; + Some(ast_helpers::mcopy(Rc::clone(dest), src, Rc::clone(size))) + } + _ => None, } } @@ -990,7 +1038,7 @@ impl AstToEgglog { } edge_ast::Expr::Cast(_, target_type, _) => self.lower_type_sig(target_type), edge_ast::Expr::Paren(inner, _) => self.infer_expr_type(inner), - edge_ast::Expr::At(name, _, _) => match name.name.as_str() { + edge_ast::Expr::At(name, _, _, _) => match name.name.as_str() { "caller" | "origin" | "coinbase" | "address" => EvmType::Base(EvmBaseType::AddrT), _ => EvmType::Base(EvmBaseType::UIntT(256)), }, @@ -1122,6 +1170,39 @@ impl AstToEgglog { } } + // Detect &dm self aliasing — if param[0] is "self" and the receiver arg + // is a &dm variable, alias self to the caller's LetBind instead of copying. + // This allows mutations inside the method (e.g., self = new_ptr during Vec + // growth) to transparently update the caller's variable. + let self_alias_info: Option<(String, VarBinding)> = + if !params.is_empty() && params[0].0 == "self" && !args.is_empty() { + if let edge_ast::Expr::Ident(ident) = &args[0] { + // Look up receiver in current scope (before pushing new scope) + let binding = self + .scopes + .iter() + .rev() + .find_map(|s| s.bindings.get(&ident.name).cloned()); + if let Some(b) = binding { + if b.is_dynamic_memory { + if let Some(ref let_bind_name) = b.let_bind_name { + Some((let_bind_name.clone(), b)) + } else { + None + } + } else { + None + } + } else { + None + } + } else { + None + } + } else { + None + }; + self.scopes.push(Scope::new()); for (i, (param_name, param_ty)) in params.iter().enumerate() { let ty = self.lower_type_sig(param_ty); @@ -1206,6 +1287,39 @@ impl AstToEgglog { composite_type, composite_base.is_some() ); + + // If this is the `self` parameter and we detected &dm aliasing, + // create an aliased binding that references the caller's LetBind variable + // instead of copying the value. This way mutations to `self` inside the + // method (e.g., `self = new_ptr`) update the caller's variable. + if i == 0 && param_name == "self" { + if let Some((ref alias_name, ref alias_binding)) = self_alias_info { + let aliased = VarBinding { + value: ast_helpers::var(alias_name.clone()), + location: DataLocation::Memory, + storage_slot: None, + _ty: ty, + let_bind_name: Some(alias_name.clone()), + composite_type: alias_binding.composite_type.clone(), + composite_base: alias_binding.composite_base.clone(), + composite_type_args: alias_binding.composite_type_args.clone(), + is_dynamic_memory: true, + }; + self.scopes + .last_mut() + .expect("scope stack empty") + .bindings + .insert("self".to_string(), aliased); + continue; + } + } + + // Check if the parameter type has &dm annotation + let is_dm_param = matches!( + param_ty, + edge_ast::ty::TypeSig::Pointer(edge_ast::ty::Location::DynamicMemory, _) + ); + let binding = VarBinding { value: val, location: DataLocation::Stack, @@ -1215,6 +1329,7 @@ impl AstToEgglog { composite_type, composite_base, composite_type_args, + is_dynamic_memory: is_dm_param, }; self.scopes .last_mut() diff --git a/crates/ir/src/to_egglog/composite.rs b/crates/ir/src/to_egglog/composite.rs index a56be4c..7e1bf0f 100644 --- a/crates/ir/src/to_egglog/composite.rs +++ b/crates/ir/src/to_egglog/composite.rs @@ -20,6 +20,7 @@ impl AstToEgglog { variant_name: &str, span: Option<&edge_types::span::Span>, ) -> Result { + tracing::trace!("variant_index: type_name={type_name}, variant_name={variant_name}"); // Try direct lookup first let variants = if let Some(v) = self.union_types.get(type_name) { v @@ -592,6 +593,14 @@ impl AstToEgglog { return Ok(Some(ast_helpers::concat(bounds_ir, load))); } } + // If the type has an Index trait impl, defer to trait dispatch + // instead of raw MLOAD (e.g., Vec should use Index::index, not raw field access). + if self + .trait_impls + .contains_key(&(type_name, "Index".to_string())) + { + return Ok(None); + } let word_size = ast_helpers::const_int(32, self.current_ctx.clone()); let offset = ast_helpers::add(base_expr, ast_helpers::mul(idx_ir, word_size)); return Ok(Some(ast_helpers::mload( diff --git a/crates/ir/src/to_egglog/control_flow.rs b/crates/ir/src/to_egglog/control_flow.rs index af8832a..ba8c883 100644 --- a/crates/ir/src/to_egglog/control_flow.rs +++ b/crates/ir/src/to_egglog/control_flow.rs @@ -67,6 +67,7 @@ impl AstToEgglog { composite_type: None, composite_base: None, composite_type_args: Vec::new(), + is_dynamic_memory: false, }, ); } @@ -178,6 +179,7 @@ impl AstToEgglog { composite_type: None, composite_base: None, composite_type_args: Vec::new(), + is_dynamic_memory: false, }; self.scopes .last_mut() diff --git a/crates/ir/src/to_egglog/expr.rs b/crates/ir/src/to_egglog/expr.rs index d7a7d00..5c14858 100644 --- a/crates/ir/src/to_egglog/expr.rs +++ b/crates/ir/src/to_egglog/expr.rs @@ -107,8 +107,26 @@ impl AstToEgglog { // If the type annotation is a generic type (e.g., Result), // trigger monomorphization so the concrete type is registered. + // Also handle &dm Pointer wrapper around a generic type. let mut composite_type = None; - if let Some(edge_ast::ty::TypeSig::Named(name_ident, type_args)) = type_sig { + let is_dynamic_memory; + + // Unwrap &dm pointer to get inner type sig for monomorphization + let inner_type_sig = match type_sig { + Some(edge_ast::ty::TypeSig::Pointer( + edge_ast::ty::Location::DynamicMemory, + inner, + )) => { + is_dynamic_memory = true; + Some(inner.as_ref()) + } + _ => { + is_dynamic_memory = false; + type_sig.as_ref() + } + }; + + if let Some(edge_ast::ty::TypeSig::Named(name_ident, type_args)) = inner_type_sig { if !type_args.is_empty() && self.generic_type_templates.contains_key(&name_ident.name) { @@ -119,11 +137,22 @@ impl AstToEgglog { )? { composite_type = Some(mangled); } + } else if self.struct_types.contains_key(&name_ident.name) { + // Non-generic struct type (e.g., &dm MyStruct) + composite_type = Some(name_ident.name.clone()); } } let zero = ast_helpers::const_int(0, self.current_ctx.clone()); let var_name = format!("{}__local_{}", self.inline_prefix, ident.name); + + // For &dm bindings, composite_base is Var(let_bind_name) — the pointer itself + let composite_base = if is_dynamic_memory && composite_type.is_some() { + Some(ast_helpers::var(var_name.clone())) + } else { + None + }; + let binding = VarBinding { value: zero, location: DataLocation::Memory, @@ -131,8 +160,9 @@ impl AstToEgglog { _ty: ty, let_bind_name: Some(var_name.clone()), composite_type, - composite_base: None, + composite_base, composite_type_args: Vec::new(), + is_dynamic_memory, }; self.scopes .last_mut() @@ -147,14 +177,19 @@ impl AstToEgglog { self.type_sig_hint = type_sig.as_ref().cloned(); let rhs_ir = self.lower_expr(init)?; self.type_sig_hint = None; - // Track composite type from RHS if applicable - if let Some((comp_type, comp_base)) = self.last_composite_alloc.take() { - if let Some(scope) = self.scopes.last_mut() { - if let Some(binding) = scope.bindings.get_mut(&ident.name) { - binding.composite_type = Some(comp_type); - binding.composite_base = Some(comp_base); + // Track composite type from RHS if applicable. + // For &dm bindings, don't override — composite_base is Var(let_bind_name). + if !is_dynamic_memory { + if let Some((comp_type, comp_base)) = self.last_composite_alloc.take() { + if let Some(scope) = self.scopes.last_mut() { + if let Some(binding) = scope.bindings.get_mut(&ident.name) { + binding.composite_type = Some(comp_type); + binding.composite_base = Some(comp_base); + } } } + } else { + self.last_composite_alloc.take(); } Ok(ast_helpers::var_store(var_name, rhs_ir)) } else { @@ -240,6 +275,7 @@ impl AstToEgglog { composite_type: None, composite_base: None, composite_type_args: Vec::new(), + is_dynamic_memory: false, }; self.scopes .last_mut() @@ -444,8 +480,8 @@ impl AstToEgglog { self.lower_function_call(callee, args, type_args, span) } - edge_ast::Expr::At(builtin_name, args, _span) => { - self.lower_builtin(&builtin_name.name, args) + edge_ast::Expr::At(builtin_name, type_args, args, _span) => { + self.lower_builtin(&builtin_name.name, type_args, args) } edge_ast::Expr::Assign(lhs, rhs, _span) => { @@ -913,14 +949,45 @@ impl AstToEgglog { )) } edge_ast::Expr::FieldAccess(obj, field, _span) => { - // Storage-backed packed struct sub-field write: self.color.r = 5 + // 1. Storage-backed packed struct sub-field write: self.color.r = 5 if let edge_ast::Expr::Ident(ident) = obj.as_ref() { - if let Some(result) = - self.try_lower_storage_packed_field_write(&ident.name, &field.name, rhs_ir)? - { + if let Some(result) = self.try_lower_storage_packed_field_write( + &ident.name, + &field.name, + Rc::clone(&rhs_ir), + )? { return Ok(result); } } + + // 2. Memory-backed struct field write: p.x = 10 + if let edge_ast::Expr::Ident(ident) = obj.as_ref() { + if let Some((type_name, base_expr)) = self.lookup_composite_info(&ident.name) { + if let Some(struct_info) = self.struct_types.get(&type_name).cloned() { + if let Some(field_idx) = struct_info + .fields + .iter() + .position(|(n, _)| n == &field.name) + { + let offset = ast_helpers::add( + base_expr, + ast_helpers::const_int( + (field_idx * 32) as i64, + self.current_ctx.clone(), + ), + ); + let store = ast_helpers::mstore( + offset, + rhs_ir, + Rc::clone(&self.current_state), + ); + self.current_state = Rc::clone(&store); + return Ok(store); + } + } + } + } + Err(IrError::Unsupported( "field access assignment target not yet supported".to_owned(), )) @@ -1273,6 +1340,17 @@ impl AstToEgglog { if Self::is_primitive_type(type_name) { return Ok(None); } + // &dm types are u256 pointers — use primitive ops for arithmetic + if let edge_ast::Expr::Ident(ident) = lhs { + for scope in self.scopes.iter().rev() { + if let Some(binding) = scope.bindings.get(&ident.name) { + if binding.is_dynamic_memory { + return Ok(None); + } + break; + } + } + } // Only dispatch to operator traits from std::ops. // User-defined traits named "Add" etc. do NOT get operator overloading. if !self.std_ops_traits.contains(trait_name) { @@ -1405,12 +1483,54 @@ impl AstToEgglog { )) } - /// Lower a builtin call (@caller, @callvalue, etc.). + /// Lower a builtin call (`@caller`, `@callvalue`, `@size_of`, `@alloc`, etc.). pub(crate) fn lower_builtin( - &self, + &mut self, name: &str, - _args: &[edge_ast::Expr], + type_args: &[edge_ast::ty::TypeSig], + args: &[edge_ast::Expr], ) -> Result { + // Handle builtins that take type arguments or value arguments + match name { + "size_of" => { + if type_args.len() != 1 { + return Err(IrError::Unsupported( + "@size_of requires exactly 1 type argument".to_owned(), + )); + } + if !args.is_empty() { + return Err(IrError::Unsupported( + "@size_of takes no value arguments".to_owned(), + )); + } + tracing::trace!( + "lower_builtin size_of: type_arg={:?}, subst={:?}", + type_args[0], + self.type_param_subst + ); + let size = self.compute_type_size(&type_args[0])?; + return Ok(ast_helpers::const_int( + size as i64, + self.current_ctx.clone(), + )); + } + "alloc" => { + if !type_args.is_empty() { + return Err(IrError::Unsupported( + "@alloc takes no type arguments".to_owned(), + )); + } + if args.len() != 1 { + return Err(IrError::Unsupported( + "@alloc requires exactly 1 argument (size in bytes)".to_owned(), + )); + } + let size_ir = self.lower_expr(&args[0])?; + return Ok(ast_helpers::dyn_alloc(size_ir)); + } + _ => {} + } + let env_op = match name { "caller" => EvmEnvOp::Caller, "callvalue" | "value" => EvmEnvOp::CallValue, @@ -1438,6 +1558,84 @@ impl AstToEgglog { ))) } + /// Compute the byte size of a type for `@size_of`. + fn compute_type_size(&self, ty: &edge_ast::ty::TypeSig) -> Result { + match ty { + edge_ast::ty::TypeSig::Named(name, type_params) => { + // Resolve through type_param_subst (handles generic contexts) + let resolved = self + .type_param_subst + .get(&name.name) + .cloned() + .unwrap_or_else(|| name.name.clone()); + + // Check if it's a primitive type name + match resolved.as_str() { + "u256" | "u128" | "u64" | "u32" | "u16" | "u8" | "i256" | "i128" | "i64" + | "i32" | "i16" | "i8" | "bool" | "addr" | "address" => return Ok(32), + _ => {} + } + + // Try monomorphized name first + let mangled = if !type_params.is_empty() { + Self::type_sig_mangle(&edge_ast::ty::TypeSig::Named( + name.clone(), + type_params.clone(), + )) + } else { + resolved.clone() + }; + + // Look up in struct_types + self.struct_types + .get(&mangled) + .or_else(|| self.struct_types.get(&resolved)) + .map_or_else( + || { + if self.union_types.contains_key(&mangled) + || self.union_types.contains_key(&resolved) + { + Ok(64) // unions: 2 words (discriminant + data) + } else { + // Might be a type param - check type_param_subst + self.type_param_subst.get(&name.name).map_or_else( + || { + Err(IrError::Unsupported(format!( + "cannot compute size of unknown type: {}", + name.name + ))) + }, + |concrete| { + // Recursively resolve + let concrete_sig = edge_ast::ty::TypeSig::Named( + edge_ast::Ident { + name: concrete.clone(), + span: name.span.clone(), + }, + vec![], + ); + self.compute_type_size(&concrete_sig) + }, + ) + } + }, + |info| { + if info.is_packed { + info.packed_layout + .as_ref() + .map_or(Ok(32), |layout| Ok(layout.word_count * 32)) + } else { + Ok(info.fields.len() * 32) + } + }, + ) + } + edge_ast::ty::TypeSig::Tuple(fields) => Ok(fields.len() * 32), + edge_ast::ty::TypeSig::Pointer(_, inner) => self.compute_type_size(inner), + _ => Ok(32), // default to 32 for other types + } + } + /// Lower `return (a, b, c)` — MSTORE each element at sequential 32-byte /// offsets, then RETURN the entire memory range. /// @@ -1721,6 +1919,7 @@ impl AstToEgglog { composite_type: None, composite_base: None, composite_type_args: Vec::new(), + is_dynamic_memory: false, }; // Get the original name (without prefix) for scope lookup let orig_name = outputs diff --git a/crates/ir/src/to_egglog/function.rs b/crates/ir/src/to_egglog/function.rs index 3dbc509..e5f4bf6 100644 --- a/crates/ir/src/to_egglog/function.rs +++ b/crates/ir/src/to_egglog/function.rs @@ -68,6 +68,7 @@ impl AstToEgglog { composite_type: Some(format!("__array__{n}")), composite_base: Some(base_ir), composite_type_args: Vec::new(), + is_dynamic_memory: false, }; self.scopes .last_mut() @@ -95,6 +96,7 @@ impl AstToEgglog { composite_type: Some(struct_name), composite_base: Some(base_ir), composite_type_args: Vec::new(), + is_dynamic_memory: false, }; self.scopes .last_mut() @@ -134,6 +136,7 @@ impl AstToEgglog { composite_type: None, composite_base: None, composite_type_args: Vec::new(), + is_dynamic_memory: false, }; self.scopes .last_mut() @@ -237,6 +240,7 @@ impl AstToEgglog { }, composite_base: None, // dynamic base — resolved at element access composite_type_args: Vec::new(), + is_dynamic_memory: false, }; self.scopes .last_mut() @@ -362,6 +366,11 @@ impl AstToEgglog { // First pass: scan for VarDecl names to identify memory-backed locals. // We need this list BEFORE lowering to know which variables to wrap in LetBinds. + tracing::trace!( + "lower_code_block: inline_depth={}, n_items={}", + self.inline_depth, + block.stmts.len() + ); let var_decl_names: Vec = block .stmts .iter() @@ -422,6 +431,10 @@ impl AstToEgglog { let idx = stmts .iter() .position(|s| matches!(s.as_ref(), EvmExpr::VarStore(n, _) if n == &var_name)); + tracing::trace!( + "store-fwd: var={var_name}, found_idx={idx:?}, n_stmts={}", + stmts.len() + ); let Some(idx) = idx else { continue }; // All preceding statements must be Empty (uninit VarDecl) or VarStore @@ -451,6 +464,7 @@ impl AstToEgglog { continue; } + tracing::trace!("store-fwd: inserting init for {var_name}"); var_inits.insert(var_name, init_val); stmts.remove(idx); } @@ -475,6 +489,11 @@ impl AstToEgglog { // we insert Drops between the side-effect prefix and the return value. for name in var_decl_names.iter().rev() { let var_name = format!("{prefix}__local_{name}"); + tracing::trace!( + "letbind: wrapping {var_name}, has_init={}, inline_depth={}", + var_inits.contains_key(&var_name), + self.inline_depth + ); if self.inline_depth == 0 { // Normal (non-inline): append Drop after the body. result = ast_helpers::concat(result, ast_helpers::drop_var(var_name.clone())); @@ -570,6 +589,7 @@ impl AstToEgglog { composite_type: None, composite_base: None, composite_type_args: Vec::new(), + is_dynamic_memory: false, }; self.scopes .last_mut() diff --git a/crates/ir/src/to_egglog/mod.rs b/crates/ir/src/to_egglog/mod.rs index 291f1e5..90f2dcf 100644 --- a/crates/ir/src/to_egglog/mod.rs +++ b/crates/ir/src/to_egglog/mod.rs @@ -47,7 +47,9 @@ pub(crate) fn references_any_var(expr: &RcExpr, names: &HashSet<&str>) -> bool { EvmExpr::Bop(_, a, b) | EvmExpr::Concat(a, b) | EvmExpr::DoWhile(a, b) => { references_any_var(a, names) || references_any_var(b, names) } - EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) => references_any_var(a, names), + EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) | EvmExpr::DynAlloc(a) => { + references_any_var(a, names) + } EvmExpr::Top(_, a, b, c) | EvmExpr::Revert(a, b, c) | EvmExpr::ReturnOp(a, b, c) => { references_any_var(a, names) || references_any_var(b, names) @@ -98,6 +100,8 @@ pub(crate) struct VarBinding { pub composite_base: Option, /// For generic composite types: the concrete type arguments (e.g., [addr, u256] for Map) pub composite_type_args: Vec, + /// Whether this variable is a dynamically-allocated memory pointer (&dm type) + pub is_dynamic_memory: bool, } /// Scope for variable resolution during lowering. @@ -463,10 +467,21 @@ impl AstToEgglog { "Sload", "Sstore", "Index", + "Mstore", + "Mload", + "Mcopy", ]; // Storage/hashing traits are fundamental (auto-imported from globals). // Always enable them so compiler-provided impls work without explicit `use`. - for name in ["UniqueSlot", "Sload", "Sstore", "Index"] { + for name in [ + "UniqueSlot", + "Sload", + "Sstore", + "Index", + "Mstore", + "Mload", + "Mcopy", + ] { self.std_ops_traits.insert(name.to_string()); } for stmt in &program.stmts { @@ -606,6 +621,7 @@ impl AstToEgglog { composite_type: None, composite_base: None, composite_type_args: Vec::new(), + is_dynamic_memory: false, }; self.scopes .last_mut() @@ -696,10 +712,12 @@ impl AstToEgglog { edge_ast::Stmt::ImplBlock(impl_block) => { let type_name = impl_block.ty_name.name.clone(); - // Store generic impl blocks for monomorphization - if !impl_block.type_params.is_empty() - || self.generic_type_templates.contains_key(&type_name) - { + // Store generic impl blocks for monomorphization. + // Skip normal registration for generic types — their methods + // are registered during monomorphization under the mangled name. + let is_generic = !impl_block.type_params.is_empty() + || self.generic_type_templates.contains_key(&type_name); + if is_generic { let trait_name = impl_block.trait_impl.as_ref().map(|(n, _)| n.name.clone()); let trait_type_params = impl_block @@ -718,7 +736,11 @@ impl AstToEgglog { }); } - if let Some((ref trait_name, _)) = impl_block.trait_impl { + // For generic impl blocks, don't register unsubstituted methods. + // They'll be registered under the mangled name during monomorphization. + if is_generic { + // Skip normal processing + } else if let Some((ref trait_name, _)) = impl_block.trait_impl { // Trait impl — collect methods and validate against trait definition let mut methods = IndexMap::new(); for item in &impl_block.items { @@ -1003,6 +1025,7 @@ impl AstToEgglog { composite_type, composite_base: None, composite_type_args, + is_dynamic_memory: false, }; self.scopes .last_mut() @@ -1028,6 +1051,7 @@ impl AstToEgglog { composite_type: None, composite_base: None, composite_type_args: Vec::new(), + is_dynamic_memory: false, }; self.scopes .last_mut() diff --git a/crates/ir/src/to_egglog/pattern.rs b/crates/ir/src/to_egglog/pattern.rs index b7c2546..88a1312 100644 --- a/crates/ir/src/to_egglog/pattern.rs +++ b/crates/ir/src/to_egglog/pattern.rs @@ -135,6 +135,7 @@ impl AstToEgglog { composite_type: None, composite_base: None, composite_type_args: Vec::new(), + is_dynamic_memory: false, }, ); } @@ -218,6 +219,7 @@ impl AstToEgglog { composite_type: None, composite_base: None, composite_type_args: Vec::new(), + is_dynamic_memory: false, }, ); } diff --git a/crates/ir/src/to_egglog/types.rs b/crates/ir/src/to_egglog/types.rs index 0f2d8ea..61c95b3 100644 --- a/crates/ir/src/to_egglog/types.rs +++ b/crates/ir/src/to_egglog/types.rs @@ -347,7 +347,7 @@ impl AstToEgglog { stmts: block .stmts .iter() - .map(|item| Self::substitute_block_item(item, &name_subst)) + .map(|item| Self::substitute_block_item(item, &name_subst, subst)) .collect(), span: block.span.clone(), } @@ -356,40 +356,84 @@ impl AstToEgglog { fn substitute_block_item( item: &edge_ast::stmt::BlockItem, subst: &HashMap<&str, String>, + type_subst: &HashMap, ) -> edge_ast::stmt::BlockItem { match item { - edge_ast::stmt::BlockItem::Stmt(stmt) => { - edge_ast::stmt::BlockItem::Stmt(Box::new(Self::substitute_stmt(stmt, subst))) - } + edge_ast::stmt::BlockItem::Stmt(stmt) => edge_ast::stmt::BlockItem::Stmt(Box::new( + Self::substitute_stmt(stmt, subst, type_subst), + )), edge_ast::stmt::BlockItem::Expr(expr) => { - edge_ast::stmt::BlockItem::Expr(Self::substitute_expr(expr, subst)) + edge_ast::stmt::BlockItem::Expr(Self::substitute_expr(expr, subst, type_subst)) } } } - fn substitute_stmt(stmt: &edge_ast::Stmt, subst: &HashMap<&str, String>) -> edge_ast::Stmt { + fn substitute_stmt( + stmt: &edge_ast::Stmt, + subst: &HashMap<&str, String>, + type_subst: &HashMap, + ) -> edge_ast::Stmt { match stmt { edge_ast::Stmt::VarDecl(ident, ty, init, span) => edge_ast::Stmt::VarDecl( ident.clone(), ty.clone(), init.as_ref() - .map(|e| Box::new(Self::substitute_expr(e, subst))), + .map(|e| Box::new(Self::substitute_expr(e, subst, type_subst))), span.clone(), ), edge_ast::Stmt::VarAssign(lhs, rhs, span) => edge_ast::Stmt::VarAssign( - Self::substitute_expr(lhs, subst), - Self::substitute_expr(rhs, subst), + Self::substitute_expr(lhs, subst, type_subst), + Self::substitute_expr(rhs, subst, type_subst), + span.clone(), + ), + edge_ast::Stmt::Return(Some(expr), span) => edge_ast::Stmt::Return( + Some(Self::substitute_expr(expr, subst, type_subst)), span.clone(), ), - edge_ast::Stmt::Return(Some(expr), span) => { - edge_ast::Stmt::Return(Some(Self::substitute_expr(expr, subst)), span.clone()) + edge_ast::Stmt::Expr(expr) => { + edge_ast::Stmt::Expr(Self::substitute_expr(expr, subst, type_subst)) + } + edge_ast::Stmt::IfElse(branches, else_block) => { + let new_branches: Vec<(edge_ast::Expr, edge_ast::CodeBlock)> = branches + .iter() + .map(|(cond, block)| { + ( + Self::substitute_expr(cond, subst, type_subst), + Self::substitute_code_block_with(block, subst, type_subst), + ) + }) + .collect(); + edge_ast::Stmt::IfElse( + new_branches, + else_block + .as_ref() + .map(|eb| Self::substitute_code_block_with(eb, subst, type_subst)), + ) } - edge_ast::Stmt::Expr(expr) => edge_ast::Stmt::Expr(Self::substitute_expr(expr, subst)), other => other.clone(), } } - fn substitute_expr(expr: &edge_ast::Expr, subst: &HashMap<&str, String>) -> edge_ast::Expr { + fn substitute_code_block_with( + block: &edge_ast::CodeBlock, + subst: &HashMap<&str, String>, + type_subst: &HashMap, + ) -> edge_ast::CodeBlock { + edge_ast::CodeBlock { + stmts: block + .stmts + .iter() + .map(|item| Self::substitute_block_item(item, subst, type_subst)) + .collect(), + span: block.span.clone(), + } + } + + fn substitute_expr( + expr: &edge_ast::Expr, + subst: &HashMap<&str, String>, + type_subst: &HashMap, + ) -> edge_ast::Expr { match expr { edge_ast::Expr::Path(components, span) => { let new_components: Vec = components @@ -407,29 +451,67 @@ impl AstToEgglog { edge_ast::Expr::Path(new_components, span.clone()) } edge_ast::Expr::FunctionCall(callee, args, turbofish, span) => { + let new_turbofish: Vec = turbofish + .iter() + .map(|ts| Self::substitute_type_params(ts, type_subst)) + .collect(); edge_ast::Expr::FunctionCall( - Box::new(Self::substitute_expr(callee, subst)), + Box::new(Self::substitute_expr(callee, subst, type_subst)), args.iter() - .map(|a| Self::substitute_expr(a, subst)) + .map(|a| Self::substitute_expr(a, subst, type_subst)) + .collect(), + new_turbofish, + span.clone(), + ) + } + edge_ast::Expr::At(name, type_args, args, span) => { + tracing::trace!( + "substitute_expr At: name={}, type_args={type_args:?}", + name.name + ); + let new_type_args: Vec = type_args + .iter() + .map(|ts| Self::substitute_type_params(ts, type_subst)) + .collect(); + tracing::trace!("substitute_expr At: new_type_args={new_type_args:?}"); + edge_ast::Expr::At( + name.clone(), + new_type_args, + args.iter() + .map(|a| Self::substitute_expr(a, subst, type_subst)) .collect(), - turbofish.clone(), span.clone(), ) } edge_ast::Expr::FieldAccess(obj, field, span) => edge_ast::Expr::FieldAccess( - Box::new(Self::substitute_expr(obj, subst)), + Box::new(Self::substitute_expr(obj, subst, type_subst)), field.clone(), span.clone(), ), edge_ast::Expr::Binary(lhs, op, rhs, span) => edge_ast::Expr::Binary( - Box::new(Self::substitute_expr(lhs, subst)), + Box::new(Self::substitute_expr(lhs, subst, type_subst)), *op, - Box::new(Self::substitute_expr(rhs, subst)), + Box::new(Self::substitute_expr(rhs, subst, type_subst)), span.clone(), ), - edge_ast::Expr::Paren(inner, span) => { - edge_ast::Expr::Paren(Box::new(Self::substitute_expr(inner, subst)), span.clone()) - } + edge_ast::Expr::Assign(lhs, rhs, span) => edge_ast::Expr::Assign( + Box::new(Self::substitute_expr(lhs, subst, type_subst)), + Box::new(Self::substitute_expr(rhs, subst, type_subst)), + span.clone(), + ), + edge_ast::Expr::Paren(inner, span) => edge_ast::Expr::Paren( + Box::new(Self::substitute_expr(inner, subst, type_subst)), + span.clone(), + ), + edge_ast::Expr::Ident(ident) => subst.get(ident.name.as_str()).map_or_else( + || expr.clone(), + |replacement| { + edge_ast::Expr::Ident(edge_ast::Ident { + name: replacement.clone(), + span: ident.span.clone(), + }) + }, + ), _ => expr.clone(), } } @@ -580,7 +662,11 @@ impl AstToEgglog { let satisfied = self.trait_impls.contains_key(&key) || mangled_key .as_ref() - .is_some_and(|k| self.trait_impls.contains_key(k)); + .is_some_and(|k| self.trait_impls.contains_key(k)) + // Compiler provides default derive_slot for struct types, + // so consider UniqueSlot satisfied for any known struct. + || (constraint.name == "UniqueSlot" + && self.struct_types.contains_key(&concrete_name)); if !satisfied { let mut diag = edge_diagnostics::Diagnostic::error(format!( "the trait bound `{}: {}` is not satisfied", diff --git a/crates/ir/src/var_opt.rs b/crates/ir/src/var_opt.rs index 34689ed..9654c3a 100644 --- a/crates/ir/src/var_opt.rs +++ b/crates/ir/src/var_opt.rs @@ -91,7 +91,7 @@ fn collect_allocations(expr: &RcExpr, result: &mut HashMap { + EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) | EvmExpr::DynAlloc(a) => { collect_allocations(a, result); } EvmExpr::Top(_, a, b, c) | EvmExpr::Revert(a, b, c) | EvmExpr::ReturnOp(a, b, c) => { @@ -158,6 +158,7 @@ fn collect_allocations(expr: &RcExpr, result: &mut HashMap= 1 { // Inline: substitute args, rename locals, splice body at call site. // Include both internal and free functions. @@ -170,9 +171,11 @@ pub fn optimize_program(program: &mut crate::schema::EvmProgram, optimization_le .cloned() .collect(); inline_calls(&mut contract.runtime, &all_functions); + tracing::debug!(" var_opt after inline_calls: {} DAG nodes", crate::dag_node_count(&contract.runtime)); } // Insert early Drops in halting branches for better dead-var-elim contract.runtime = insert_early_drops(&contract.runtime); + tracing::debug!(" var_opt after insert_early_drops: {} DAG nodes", crate::dag_node_count(&contract.runtime)); contract.constructor = insert_early_drops(&contract.constructor); // NOTE: tighten_drops runs LATER in the pipeline (after store forwarding // at O0, or after egglog at O1+) because store forwarding can expose new @@ -184,60 +187,80 @@ pub fn optimize_program(program: &mut crate::schema::EvmProgram, optimization_le } } -/// Optimize an expression tree, applying all variable optimizations bottom-up. +/// Optimize an expression DAG, applying all variable optimizations bottom-up. +/// Uses memoization to avoid re-processing shared subtrees. fn optimize_expr(expr: &RcExpr) -> RcExpr { - // Bottom-up: optimize children first, then apply transforms at this node. - let rebuilt = rebuild_children(expr); - apply_transforms(&rebuilt) + let mut cache: HashMap = HashMap::new(); + let result = optimize_expr_memo(expr, &mut cache); + tracing::debug!(" optimize_expr: cache_size={}, output_dag={}", cache.len(), crate::dag_node_count(&result)); + result +} + +fn optimize_expr_memo(expr: &RcExpr, cache: &mut HashMap) -> RcExpr { + let id = Rc::as_ptr(expr) as usize; + if let Some(cached) = cache.get(&id) { + return Rc::clone(cached); + } + let rebuilt = rebuild_children_memo(expr, cache); + let result = apply_transforms(&rebuilt); + cache.insert(id, Rc::clone(&result)); + result } /// Recursively rebuild an expression with optimized children. -fn rebuild_children(expr: &RcExpr) -> RcExpr { +fn rebuild_children_memo(expr: &RcExpr, cache: &mut HashMap) -> RcExpr { match expr.as_ref() { EvmExpr::Bop(op, lhs, rhs) => { - let l = optimize_expr(lhs); - let r = optimize_expr(rhs); + let l = optimize_expr_memo(lhs, cache); + let r = optimize_expr_memo(rhs, cache); if Rc::ptr_eq(&l, lhs) && Rc::ptr_eq(&r, rhs) { return Rc::clone(expr); } Rc::new(EvmExpr::Bop(*op, l, r)) } EvmExpr::Uop(op, inner) => { - let i = optimize_expr(inner); + let i = optimize_expr_memo(inner, cache); if Rc::ptr_eq(&i, inner) { return Rc::clone(expr); } Rc::new(EvmExpr::Uop(*op, i)) } + EvmExpr::DynAlloc(size) => { + let new_size = optimize_expr_memo(size, cache); + if Rc::ptr_eq(&new_size, size) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::DynAlloc(new_size)) + } EvmExpr::Top(op, a, b, c) => { - let a2 = optimize_expr(a); - let b2 = optimize_expr(b); - let c2 = optimize_expr(c); + let a2 = optimize_expr_memo(a, cache); + let b2 = optimize_expr_memo(b, cache); + let c2 = optimize_expr_memo(c, cache); if Rc::ptr_eq(&a2, a) && Rc::ptr_eq(&b2, b) && Rc::ptr_eq(&c2, c) { return Rc::clone(expr); } Rc::new(EvmExpr::Top(*op, a2, b2, c2)) } EvmExpr::Get(inner, idx) => { - let i = optimize_expr(inner); + let i = optimize_expr_memo(inner, cache); if Rc::ptr_eq(&i, inner) { return Rc::clone(expr); } Rc::new(EvmExpr::Get(i, *idx)) } EvmExpr::Concat(a, b) => { - let a2 = optimize_expr(a); - let b2 = optimize_expr(b); + let a2 = optimize_expr_memo(a, cache); + let b2 = optimize_expr_memo(b, cache); if Rc::ptr_eq(&a2, a) && Rc::ptr_eq(&b2, b) { return Rc::clone(expr); } Rc::new(EvmExpr::Concat(a2, b2)) } EvmExpr::If(cond, inputs, then_body, else_body) => { - let c = optimize_expr(cond); - let i = optimize_expr(inputs); - let t = optimize_expr(then_body); - let e = optimize_expr(else_body); + let c = optimize_expr_memo(cond, cache); + let i = optimize_expr_memo(inputs, cache); + let t = optimize_expr_memo(then_body, cache); + let e = optimize_expr_memo(else_body, cache); if Rc::ptr_eq(&c, cond) && Rc::ptr_eq(&i, inputs) && Rc::ptr_eq(&t, then_body) @@ -248,65 +271,78 @@ fn rebuild_children(expr: &RcExpr) -> RcExpr { Rc::new(EvmExpr::If(c, i, t, e)) } EvmExpr::DoWhile(inputs, body) => { - let i = optimize_expr(inputs); - let b = optimize_expr(body); + let i = optimize_expr_memo(inputs, cache); + let b = optimize_expr_memo(body, cache); if Rc::ptr_eq(&i, inputs) && Rc::ptr_eq(&b, body) { return Rc::clone(expr); } Rc::new(EvmExpr::DoWhile(i, b)) } EvmExpr::EnvRead(op, state) => { - let s = optimize_expr(state); + let s = optimize_expr_memo(state, cache); if Rc::ptr_eq(&s, state) { return Rc::clone(expr); } Rc::new(EvmExpr::EnvRead(*op, s)) } EvmExpr::EnvRead1(op, arg, state) => { - let a = optimize_expr(arg); - let s = optimize_expr(state); + let a = optimize_expr_memo(arg, cache); + let s = optimize_expr_memo(state, cache); if Rc::ptr_eq(&a, arg) && Rc::ptr_eq(&s, state) { return Rc::clone(expr); } Rc::new(EvmExpr::EnvRead1(*op, a, s)) } EvmExpr::Log(count, topics, data_offset, data_size, state) => { - let ts: Vec<_> = topics.iter().map(optimize_expr).collect(); - let doff = optimize_expr(data_offset); - let dsz = optimize_expr(data_size); - let s = optimize_expr(state); + let ts: Vec<_> = topics.iter().map(|t| optimize_expr_memo(t, cache)).collect(); + let doff = optimize_expr_memo(data_offset, cache); + let dsz = optimize_expr_memo(data_size, cache); + let s = optimize_expr_memo(state, cache); + if ts.iter().zip(topics.iter()).all(|(n, o)| Rc::ptr_eq(n, o)) + && Rc::ptr_eq(&doff, data_offset) + && Rc::ptr_eq(&dsz, data_size) + && Rc::ptr_eq(&s, state) + { + return Rc::clone(expr); + } Rc::new(EvmExpr::Log(*count, ts, doff, dsz, s)) } EvmExpr::Revert(off, sz, state) => { - let o = optimize_expr(off); - let s = optimize_expr(sz); - let st = optimize_expr(state); + let o = optimize_expr_memo(off, cache); + let s = optimize_expr_memo(sz, cache); + let st = optimize_expr_memo(state, cache); if Rc::ptr_eq(&o, off) && Rc::ptr_eq(&s, sz) && Rc::ptr_eq(&st, state) { return Rc::clone(expr); } Rc::new(EvmExpr::Revert(o, s, st)) } EvmExpr::ReturnOp(off, sz, state) => { - let o = optimize_expr(off); - let s = optimize_expr(sz); - let st = optimize_expr(state); + let o = optimize_expr_memo(off, cache); + let s = optimize_expr_memo(sz, cache); + let st = optimize_expr_memo(state, cache); if Rc::ptr_eq(&o, off) && Rc::ptr_eq(&s, sz) && Rc::ptr_eq(&st, state) { return Rc::clone(expr); } Rc::new(EvmExpr::ReturnOp(o, s, st)) } EvmExpr::ExtCall(a, b, c, d, e, f, g) => { - let a2 = optimize_expr(a); - let b2 = optimize_expr(b); - let c2 = optimize_expr(c); - let d2 = optimize_expr(d); - let e2 = optimize_expr(e); - let f2 = optimize_expr(f); - let g2 = optimize_expr(g); + let a2 = optimize_expr_memo(a, cache); + let b2 = optimize_expr_memo(b, cache); + let c2 = optimize_expr_memo(c, cache); + let d2 = optimize_expr_memo(d, cache); + let e2 = optimize_expr_memo(e, cache); + let f2 = optimize_expr_memo(f, cache); + let g2 = optimize_expr_memo(g, cache); + if Rc::ptr_eq(&a2, a) && Rc::ptr_eq(&b2, b) && Rc::ptr_eq(&c2, c) + && Rc::ptr_eq(&d2, d) && Rc::ptr_eq(&e2, e) && Rc::ptr_eq(&f2, f) + && Rc::ptr_eq(&g2, g) + { + return Rc::clone(expr); + } Rc::new(EvmExpr::ExtCall(a2, b2, c2, d2, e2, f2, g2)) } EvmExpr::Call(name, args) => { - let new_args: Vec<_> = args.iter().map(optimize_expr).collect(); + let new_args: Vec<_> = args.iter().map(|a| optimize_expr_memo(a, cache)).collect(); if new_args .iter() .zip(args.iter()) @@ -317,20 +353,20 @@ fn rebuild_children(expr: &RcExpr) -> RcExpr { Rc::new(EvmExpr::Call(name.clone(), new_args)) } EvmExpr::LetBind(name, value, body) => { - let v = optimize_expr(value); - let b = optimize_expr(body); + let v = optimize_expr_memo(value, cache); + let b = optimize_expr_memo(body, cache); // Don't short-circuit here — apply_transforms handles LetBind optimizations Rc::new(EvmExpr::LetBind(name.clone(), v, b)) } EvmExpr::VarStore(name, value) => { - let v = optimize_expr(value); + let v = optimize_expr_memo(value, cache); if Rc::ptr_eq(&v, value) { return Rc::clone(expr); } Rc::new(EvmExpr::VarStore(name.clone(), v)) } EvmExpr::Function(name, in_ty, out_ty, body) => { - let b = optimize_expr(body); + let b = optimize_expr_memo(body, cache); if Rc::ptr_eq(&b, body) { return Rc::clone(expr); } @@ -351,7 +387,7 @@ fn rebuild_children(expr: &RcExpr) -> RcExpr { | EvmExpr::StorageField(..) | EvmExpr::MemRegion(..) => Rc::clone(expr), EvmExpr::InlineAsm(inputs, hex, num_outputs) => { - let new_inputs: Vec<_> = inputs.iter().map(optimize_expr).collect(); + let new_inputs: Vec<_> = inputs.iter().map(|i| optimize_expr_memo(i, cache)).collect(); if new_inputs .iter() .zip(inputs.iter()) @@ -385,34 +421,45 @@ fn apply_letbind_opts( // 1. Dead variable elimination: never read → remove LetBind if info.read_count == 0 && info.write_count == 0 { if is_pure(init) { - return Rc::clone(body); + let r = Rc::clone(body); + tracing::debug!(" letbind_opt: dead-var-elim (pure) '{name}' body_dag={}", crate::dag_node_count(&r)); + return r; } // Keep side effects - return Rc::new(EvmExpr::Concat(Rc::clone(init), Rc::clone(body))); + let r = Rc::new(EvmExpr::Concat(Rc::clone(init), Rc::clone(body))); + tracing::debug!(" letbind_opt: dead-var-elim (side-effect) '{name}' result_dag={}", crate::dag_node_count(&r)); + return r; } // 2. Single-use inlining: read once, never written, not in loop, pure init if info.read_count == 1 && info.write_count == 0 && !info.in_loop && is_pure(init) { - return substitute_var(name, init, body); + let body_dag = crate::dag_node_count(body); + let r = substitute_var(name, init, body); + tracing::debug!(" letbind_opt: single-use-inline '{name}' init_dag={} body_dag={} result_dag={}", crate::dag_node_count(init), body_dag, crate::dag_node_count(&r)); + return r; } // 2b. Last-store forwarding: exactly one VarStore, one Var read, not in loop. - // Pattern: Concat(VarStore(x, val), ...Var(x)...) → substitute val for Var(x) - // and remove the VarStore. The LetBind becomes dead (no reads or writes). - // This handles "c = expr; return c;" → "return expr;". if info.write_count == 1 && info.read_count == 1 && !info.in_loop { if let Some(new_body) = forward_last_store(name, body) { + let body_dag = crate::dag_node_count(body); // LetBind is now dead — eliminate it if is_pure(init) { + tracing::debug!(" letbind_opt: last-store-fwd (pure) '{name}' body_dag={} result_dag={}", body_dag, crate::dag_node_count(&new_body)); return new_body; } - return Rc::new(EvmExpr::Concat(Rc::clone(init), new_body)); + let r = Rc::new(EvmExpr::Concat(Rc::clone(init), new_body)); + tracing::debug!(" letbind_opt: last-store-fwd (side-effect) '{name}' body_dag={} result_dag={}", body_dag, crate::dag_node_count(&r)); + return r; } } // 3. Multi-use constant propagation: constant init, never written if info.write_count == 0 && !info.in_loop && is_const(init) { - return substitute_var(name, init, body); + let body_dag = crate::dag_node_count(body); + let r = substitute_var(name, init, body); + tracing::debug!(" letbind_opt: const-prop '{name}' reads={} body_dag={} result_dag={}", info.read_count, body_dag, crate::dag_node_count(&r)); + return r; } Rc::clone(expr) @@ -479,11 +526,24 @@ fn analyze_var_inner(name: &str, expr: &RcExpr, in_loop: bool, info: &mut VarInf analyze_var_inner(name, inputs, in_loop, info); analyze_var_inner(name, body, true, info); } - EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) => { + EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) | EvmExpr::DynAlloc(a) => { analyze_var_inner(name, a, in_loop, info); } - // Top/ReturnOp/Revert: last arg (c) is the state parameter — skip it. - EvmExpr::Top(_, a, b, _c) | EvmExpr::Revert(a, b, _c) | EvmExpr::ReturnOp(a, b, _c) => { + // Top: last arg may be state OR operand depending on the op. + EvmExpr::Top(op, a, b, c) => { + analyze_var_inner(name, a, in_loop, info); + analyze_var_inner(name, b, in_loop, info); + // Select, CalldataCopy, Mcopy use all 3 positions as operands (no state param). + // Others (SStore, TStore, MStore, MStore8, Keccak256) have state as 3rd arg. + match op { + EvmTernaryOp::Select | EvmTernaryOp::CalldataCopy | EvmTernaryOp::Mcopy => { + analyze_var_inner(name, c, in_loop, info); + } + _ => {} + } + } + // ReturnOp/Revert: last arg (c) is the state parameter — skip it. + EvmExpr::Revert(a, b, _c) | EvmExpr::ReturnOp(a, b, _c) => { analyze_var_inner(name, a, in_loop, info); analyze_var_inner(name, b, in_loop, info); } @@ -613,7 +673,8 @@ fn is_pure(expr: &RcExpr) -> bool { | EvmExpr::ReturnOp(..) | EvmExpr::ExtCall(..) | EvmExpr::DoWhile(..) - | EvmExpr::Call(..) => false, + | EvmExpr::Call(..) + | EvmExpr::DynAlloc(..) => false, } } @@ -660,7 +721,7 @@ fn collect_immutable_vars_rec( collect_immutable_vars_rec(t, immutable, mutable); collect_immutable_vars_rec(e, immutable, mutable); } - EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) => { + EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) | EvmExpr::DynAlloc(a) => { collect_immutable_vars_rec(a, immutable, mutable); } EvmExpr::Top(_, a, b, c) | EvmExpr::Revert(a, b, c) | EvmExpr::ReturnOp(a, b, c) => { @@ -783,6 +844,13 @@ fn insert_drops_rec(expr: &RcExpr, vars_in_scope: &[String]) -> RcExpr { new_body, )) } + EvmExpr::DynAlloc(size) => { + let new_size = insert_drops_rec(size, vars_in_scope); + if Rc::ptr_eq(&new_size, size) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::DynAlloc(new_size)) + } // Leaf and other nodes: no structural changes needed _ => Rc::clone(expr), } @@ -847,7 +915,9 @@ fn references_var_inner(expr: &RcExpr, name: &str, follow_state: bool) -> bool { }; a_ref || b_ref } - EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) => references_var_inner(a, name, follow_state), + EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) | EvmExpr::DynAlloc(a) => { + references_var_inner(a, name, follow_state) + } EvmExpr::Top(op, a, b, c) => { use crate::schema::EvmTernaryOp::*; let c_is_state = matches!( @@ -1014,6 +1084,13 @@ fn tighten_drops_rec(expr: &RcExpr) -> RcExpr { new_body, )) } + EvmExpr::DynAlloc(size) => { + let new_size = tighten_drops_rec(size); + if Rc::ptr_eq(&new_size, size) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::DynAlloc(new_size)) + } // Leaf and other nodes: no structural changes needed _ => Rc::clone(expr), } @@ -1275,6 +1352,31 @@ fn stmt_references_var_deep_in_body(body: &RcExpr, name: &str) -> bool { /// Substitute all occurrences of `Var(name)` with `replacement` in `expr`. fn substitute_var(name: &str, replacement: &RcExpr, expr: &RcExpr) -> RcExpr { + let mut cache: HashMap = HashMap::new(); + substitute_var_memo(name, replacement, expr, &mut cache) +} + +fn substitute_var_memo( + name: &str, + replacement: &RcExpr, + expr: &RcExpr, + cache: &mut HashMap, +) -> RcExpr { + let id = Rc::as_ptr(expr) as usize; + if let Some(cached) = cache.get(&id) { + return Rc::clone(cached); + } + let result = substitute_var_inner(name, replacement, expr, cache); + cache.insert(id, Rc::clone(&result)); + result +} + +fn substitute_var_inner( + name: &str, + replacement: &RcExpr, + expr: &RcExpr, + cache: &mut HashMap, +) -> RcExpr { match expr.as_ref() { EvmExpr::Var(n) if n == name => Rc::clone(replacement), // Leaf nodes @@ -1289,113 +1391,184 @@ fn substitute_var(name: &str, replacement: &RcExpr, expr: &RcExpr) -> RcExpr { EvmExpr::InlineAsm(inputs, hex, num_outputs) => { let new_inputs: Vec<_> = inputs .iter() - .map(|i| substitute_var(name, replacement, i)) + .map(|i| substitute_var_memo(name, replacement, i, cache)) .collect(); + if new_inputs.iter().zip(inputs.iter()).all(|(n, o)| Rc::ptr_eq(n, o)) { + return Rc::clone(expr); + } Rc::new(EvmExpr::InlineAsm(new_inputs, hex.clone(), *num_outputs)) } + EvmExpr::DynAlloc(size) => { + let new_size = substitute_var_memo(name, replacement, size, cache); + if Rc::ptr_eq(&new_size, size) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::DynAlloc(new_size)) + } // Stop at shadowing LetBind EvmExpr::LetBind(n, init, body) => { - let new_init = substitute_var(name, replacement, init); + let new_init = substitute_var_memo(name, replacement, init, cache); if n == name { + if Rc::ptr_eq(&new_init, init) { + return Rc::clone(expr); + } Rc::new(EvmExpr::LetBind(n.clone(), new_init, Rc::clone(body))) } else { - let new_body = substitute_var(name, replacement, body); + let new_body = substitute_var_memo(name, replacement, body, cache); + if Rc::ptr_eq(&new_init, init) && Rc::ptr_eq(&new_body, body) { + return Rc::clone(expr); + } Rc::new(EvmExpr::LetBind(n.clone(), new_init, new_body)) } } EvmExpr::VarStore(n, val) => { - let new_val = substitute_var(name, replacement, val); + let new_val = substitute_var_memo(name, replacement, val, cache); + if Rc::ptr_eq(&new_val, val) { + return Rc::clone(expr); + } Rc::new(EvmExpr::VarStore(n.clone(), new_val)) } EvmExpr::Bop(op, a, b) => { - let a2 = substitute_var(name, replacement, a); - let b2 = substitute_var(name, replacement, b); + let a2 = substitute_var_memo(name, replacement, a, cache); + let b2 = substitute_var_memo(name, replacement, b, cache); + if Rc::ptr_eq(&a2, a) && Rc::ptr_eq(&b2, b) { + return Rc::clone(expr); + } Rc::new(EvmExpr::Bop(*op, a2, b2)) } EvmExpr::Uop(op, a) => { - let a2 = substitute_var(name, replacement, a); + let a2 = substitute_var_memo(name, replacement, a, cache); + if Rc::ptr_eq(&a2, a) { + return Rc::clone(expr); + } Rc::new(EvmExpr::Uop(*op, a2)) } EvmExpr::Top(op, a, b, c) => { - let a2 = substitute_var(name, replacement, a); - let b2 = substitute_var(name, replacement, b); - let c2 = substitute_var(name, replacement, c); + let a2 = substitute_var_memo(name, replacement, a, cache); + let b2 = substitute_var_memo(name, replacement, b, cache); + let c2 = substitute_var_memo(name, replacement, c, cache); + if Rc::ptr_eq(&a2, a) && Rc::ptr_eq(&b2, b) && Rc::ptr_eq(&c2, c) { + return Rc::clone(expr); + } Rc::new(EvmExpr::Top(*op, a2, b2, c2)) } EvmExpr::Get(a, idx) => { - let a2 = substitute_var(name, replacement, a); + let a2 = substitute_var_memo(name, replacement, a, cache); + if Rc::ptr_eq(&a2, a) { + return Rc::clone(expr); + } Rc::new(EvmExpr::Get(a2, *idx)) } EvmExpr::Concat(a, b) => { - let a2 = substitute_var(name, replacement, a); - let b2 = substitute_var(name, replacement, b); + let a2 = substitute_var_memo(name, replacement, a, cache); + let b2 = substitute_var_memo(name, replacement, b, cache); + if Rc::ptr_eq(&a2, a) && Rc::ptr_eq(&b2, b) { + return Rc::clone(expr); + } Rc::new(EvmExpr::Concat(a2, b2)) } EvmExpr::If(c, i, t, e) => { - let c2 = substitute_var(name, replacement, c); - let i2 = substitute_var(name, replacement, i); - let t2 = substitute_var(name, replacement, t); - let e2 = substitute_var(name, replacement, e); + let c2 = substitute_var_memo(name, replacement, c, cache); + let i2 = substitute_var_memo(name, replacement, i, cache); + let t2 = substitute_var_memo(name, replacement, t, cache); + let e2 = substitute_var_memo(name, replacement, e, cache); + if Rc::ptr_eq(&c2, c) && Rc::ptr_eq(&i2, i) && Rc::ptr_eq(&t2, t) && Rc::ptr_eq(&e2, e) { + return Rc::clone(expr); + } Rc::new(EvmExpr::If(c2, i2, t2, e2)) } EvmExpr::DoWhile(inputs, body) => { - let i2 = substitute_var(name, replacement, inputs); - let b2 = substitute_var(name, replacement, body); + let i2 = substitute_var_memo(name, replacement, inputs, cache); + let b2 = substitute_var_memo(name, replacement, body, cache); + if Rc::ptr_eq(&i2, inputs) && Rc::ptr_eq(&b2, body) { + return Rc::clone(expr); + } Rc::new(EvmExpr::DoWhile(i2, b2)) } EvmExpr::EnvRead(op, state) => { - let s2 = substitute_var(name, replacement, state); + let s2 = substitute_var_memo(name, replacement, state, cache); + if Rc::ptr_eq(&s2, state) { + return Rc::clone(expr); + } Rc::new(EvmExpr::EnvRead(*op, s2)) } EvmExpr::EnvRead1(op, arg, state) => { - let a2 = substitute_var(name, replacement, arg); - let s2 = substitute_var(name, replacement, state); + let a2 = substitute_var_memo(name, replacement, arg, cache); + let s2 = substitute_var_memo(name, replacement, state, cache); + if Rc::ptr_eq(&a2, arg) && Rc::ptr_eq(&s2, state) { + return Rc::clone(expr); + } Rc::new(EvmExpr::EnvRead1(*op, a2, s2)) } EvmExpr::Log(count, topics, data_offset, data_size, state) => { let ts: Vec<_> = topics .iter() - .map(|t| substitute_var(name, replacement, t)) + .map(|t| substitute_var_memo(name, replacement, t, cache)) .collect(); - let doff = substitute_var(name, replacement, data_offset); - let dsz = substitute_var(name, replacement, data_size); - let s2 = substitute_var(name, replacement, state); + let doff = substitute_var_memo(name, replacement, data_offset, cache); + let dsz = substitute_var_memo(name, replacement, data_size, cache); + let s2 = substitute_var_memo(name, replacement, state, cache); + if ts.iter().zip(topics.iter()).all(|(n, o)| Rc::ptr_eq(n, o)) + && Rc::ptr_eq(&doff, data_offset) + && Rc::ptr_eq(&dsz, data_size) + && Rc::ptr_eq(&s2, state) + { + return Rc::clone(expr); + } Rc::new(EvmExpr::Log(*count, ts, doff, dsz, s2)) } EvmExpr::Revert(a, b, c) => { - let a2 = substitute_var(name, replacement, a); - let b2 = substitute_var(name, replacement, b); - let c2 = substitute_var(name, replacement, c); + let a2 = substitute_var_memo(name, replacement, a, cache); + let b2 = substitute_var_memo(name, replacement, b, cache); + let c2 = substitute_var_memo(name, replacement, c, cache); + if Rc::ptr_eq(&a2, a) && Rc::ptr_eq(&b2, b) && Rc::ptr_eq(&c2, c) { + return Rc::clone(expr); + } Rc::new(EvmExpr::Revert(a2, b2, c2)) } EvmExpr::ReturnOp(a, b, c) => { - let a2 = substitute_var(name, replacement, a); - let b2 = substitute_var(name, replacement, b); - let c2 = substitute_var(name, replacement, c); + let a2 = substitute_var_memo(name, replacement, a, cache); + let b2 = substitute_var_memo(name, replacement, b, cache); + let c2 = substitute_var_memo(name, replacement, c, cache); + if Rc::ptr_eq(&a2, a) && Rc::ptr_eq(&b2, b) && Rc::ptr_eq(&c2, c) { + return Rc::clone(expr); + } Rc::new(EvmExpr::ReturnOp(a2, b2, c2)) } EvmExpr::ExtCall(a, b, c, d, e, f, g) => { - let a2 = substitute_var(name, replacement, a); - let b2 = substitute_var(name, replacement, b); - let c2 = substitute_var(name, replacement, c); - let d2 = substitute_var(name, replacement, d); - let e2 = substitute_var(name, replacement, e); - let f2 = substitute_var(name, replacement, f); - let g2 = substitute_var(name, replacement, g); + let a2 = substitute_var_memo(name, replacement, a, cache); + let b2 = substitute_var_memo(name, replacement, b, cache); + let c2 = substitute_var_memo(name, replacement, c, cache); + let d2 = substitute_var_memo(name, replacement, d, cache); + let e2 = substitute_var_memo(name, replacement, e, cache); + let f2 = substitute_var_memo(name, replacement, f, cache); + let g2 = substitute_var_memo(name, replacement, g, cache); + if Rc::ptr_eq(&a2, a) && Rc::ptr_eq(&b2, b) && Rc::ptr_eq(&c2, c) + && Rc::ptr_eq(&d2, d) && Rc::ptr_eq(&e2, e) && Rc::ptr_eq(&f2, f) + && Rc::ptr_eq(&g2, g) + { + return Rc::clone(expr); + } Rc::new(EvmExpr::ExtCall(a2, b2, c2, d2, e2, f2, g2)) } EvmExpr::Call(n, args) => { let new_args: Vec<_> = args .iter() - .map(|a| substitute_var(name, replacement, a)) + .map(|a| substitute_var_memo(name, replacement, a, cache)) .collect(); + if new_args.iter().zip(args.iter()).all(|(n, o)| Rc::ptr_eq(n, o)) { + return Rc::clone(expr); + } Rc::new(EvmExpr::Call(n.clone(), new_args)) } EvmExpr::Function(n, in_ty, out_ty, body) => { - let b2 = substitute_var(name, replacement, body); + let b2 = substitute_var_memo(name, replacement, body, cache); + if Rc::ptr_eq(&b2, body) { + return Rc::clone(expr); + } Rc::new(EvmExpr::Function( n.clone(), in_ty.clone(), @@ -1573,6 +1746,13 @@ fn monomorphize_rec( .collect(); Rc::new(EvmExpr::InlineAsm(new_inputs, hex.clone(), *num_outputs)) } + EvmExpr::DynAlloc(size) => { + let new_size = monomorphize_rec(size, funcs, site_counter, new_functions); + if Rc::ptr_eq(&new_size, size) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::DynAlloc(new_size)) + } // Leaves (EnvRead, EnvRead1, Function, Const, Var, Arg, etc.) _ => Rc::clone(expr), } @@ -1604,15 +1784,24 @@ fn substitute_args(body: &RcExpr, in_ty: &EvmType, args: &[RcExpr]) -> RcExpr { EvmExpr::Concat(a, b) => { let a2 = substitute_args(a, in_ty, args); let b2 = substitute_args(b, in_ty, args); + if Rc::ptr_eq(&a2, a) && Rc::ptr_eq(&b2, b) { + return Rc::clone(body); + } Rc::new(EvmExpr::Concat(a2, b2)) } EvmExpr::Bop(op, a, b) => { let a2 = substitute_args(a, in_ty, args); let b2 = substitute_args(b, in_ty, args); + if Rc::ptr_eq(&a2, a) && Rc::ptr_eq(&b2, b) { + return Rc::clone(body); + } Rc::new(EvmExpr::Bop(*op, a2, b2)) } EvmExpr::Uop(op, a) => { let a2 = substitute_args(a, in_ty, args); + if Rc::ptr_eq(&a2, a) { + return Rc::clone(body); + } Rc::new(EvmExpr::Uop(*op, a2)) } EvmExpr::If(c, i, t, e) => { @@ -1620,38 +1809,59 @@ fn substitute_args(body: &RcExpr, in_ty: &EvmType, args: &[RcExpr]) -> RcExpr { let i2 = substitute_args(i, in_ty, args); let t2 = substitute_args(t, in_ty, args); let e2 = substitute_args(e, in_ty, args); + if Rc::ptr_eq(&c2, c) && Rc::ptr_eq(&i2, i) && Rc::ptr_eq(&t2, t) && Rc::ptr_eq(&e2, e) { + return Rc::clone(body); + } Rc::new(EvmExpr::If(c2, i2, t2, e2)) } EvmExpr::LetBind(name, init, body_inner) => { let i2 = substitute_args(init, in_ty, args); let b2 = substitute_args(body_inner, in_ty, args); + if Rc::ptr_eq(&i2, init) && Rc::ptr_eq(&b2, body_inner) { + return Rc::clone(body); + } Rc::new(EvmExpr::LetBind(name.clone(), i2, b2)) } EvmExpr::VarStore(name, val) => { let v2 = substitute_args(val, in_ty, args); + if Rc::ptr_eq(&v2, val) { + return Rc::clone(body); + } Rc::new(EvmExpr::VarStore(name.clone(), v2)) } EvmExpr::Top(op, a, b, c) => { let a2 = substitute_args(a, in_ty, args); let b2 = substitute_args(b, in_ty, args); let c2 = substitute_args(c, in_ty, args); + if Rc::ptr_eq(&a2, a) && Rc::ptr_eq(&b2, b) && Rc::ptr_eq(&c2, c) { + return Rc::clone(body); + } Rc::new(EvmExpr::Top(*op, a2, b2, c2)) } EvmExpr::DoWhile(inputs, body_inner) => { let i2 = substitute_args(inputs, in_ty, args); let b2 = substitute_args(body_inner, in_ty, args); + if Rc::ptr_eq(&i2, inputs) && Rc::ptr_eq(&b2, body_inner) { + return Rc::clone(body); + } Rc::new(EvmExpr::DoWhile(i2, b2)) } EvmExpr::Revert(a, b, c) => { let a2 = substitute_args(a, in_ty, args); let b2 = substitute_args(b, in_ty, args); let c2 = substitute_args(c, in_ty, args); + if Rc::ptr_eq(&a2, a) && Rc::ptr_eq(&b2, b) && Rc::ptr_eq(&c2, c) { + return Rc::clone(body); + } Rc::new(EvmExpr::Revert(a2, b2, c2)) } EvmExpr::ReturnOp(a, b, c) => { let a2 = substitute_args(a, in_ty, args); let b2 = substitute_args(b, in_ty, args); let c2 = substitute_args(c, in_ty, args); + if Rc::ptr_eq(&a2, a) && Rc::ptr_eq(&b2, b) && Rc::ptr_eq(&c2, c) { + return Rc::clone(body); + } Rc::new(EvmExpr::ReturnOp(a2, b2, c2)) } EvmExpr::Call(name, call_args) => { @@ -1659,6 +1869,9 @@ fn substitute_args(body: &RcExpr, in_ty: &EvmType, args: &[RcExpr]) -> RcExpr { .iter() .map(|a| substitute_args(a, in_ty, args)) .collect(); + if new_args.iter().zip(call_args.iter()).all(|(n, o)| Rc::ptr_eq(n, o)) { + return Rc::clone(body); + } Rc::new(EvmExpr::Call(name.clone(), new_args)) } EvmExpr::Log(count, topics, data_off, data_sz, state) => { @@ -1669,6 +1882,13 @@ fn substitute_args(body: &RcExpr, in_ty: &EvmType, args: &[RcExpr]) -> RcExpr { let d2 = substitute_args(data_off, in_ty, args); let s2 = substitute_args(data_sz, in_ty, args); let st2 = substitute_args(state, in_ty, args); + if topics2.iter().zip(topics.iter()).all(|(n, o)| Rc::ptr_eq(n, o)) + && Rc::ptr_eq(&d2, data_off) + && Rc::ptr_eq(&s2, data_sz) + && Rc::ptr_eq(&st2, state) + { + return Rc::clone(body); + } Rc::new(EvmExpr::Log(*count, topics2, d2, s2, st2)) } EvmExpr::ExtCall(a, b, c, d, e, f, g) => { @@ -1679,6 +1899,12 @@ fn substitute_args(body: &RcExpr, in_ty: &EvmType, args: &[RcExpr]) -> RcExpr { let e2 = substitute_args(e, in_ty, args); let f2 = substitute_args(f, in_ty, args); let g2 = substitute_args(g, in_ty, args); + if Rc::ptr_eq(&a2, a) && Rc::ptr_eq(&b2, b) && Rc::ptr_eq(&c2, c) + && Rc::ptr_eq(&d2, d) && Rc::ptr_eq(&e2, e) && Rc::ptr_eq(&f2, f) + && Rc::ptr_eq(&g2, g) + { + return Rc::clone(body); + } Rc::new(EvmExpr::ExtCall(a2, b2, c2, d2, e2, f2, g2)) } EvmExpr::InlineAsm(inputs, hex, num_outputs) => { @@ -1686,8 +1912,18 @@ fn substitute_args(body: &RcExpr, in_ty: &EvmType, args: &[RcExpr]) -> RcExpr { .iter() .map(|i| substitute_args(i, in_ty, args)) .collect(); + if new_inputs.iter().zip(inputs.iter()).all(|(n, o)| Rc::ptr_eq(n, o)) { + return Rc::clone(body); + } Rc::new(EvmExpr::InlineAsm(new_inputs, hex.clone(), *num_outputs)) } + EvmExpr::DynAlloc(size) => { + let new_size = substitute_args(size, in_ty, args); + if Rc::ptr_eq(&new_size, size) { + return Rc::clone(body); + } + Rc::new(EvmExpr::DynAlloc(new_size)) + } // Leaves _ => Rc::clone(body), } @@ -1718,7 +1954,10 @@ fn collect_letbind_names(expr: &RcExpr, names: &mut std::collections::HashSet { + EvmExpr::Uop(_, a) + | EvmExpr::VarStore(_, a) + | EvmExpr::Get(a, _) + | EvmExpr::DynAlloc(a) => { collect_letbind_names(a, names); } EvmExpr::If(c, i, t, e) => { @@ -1770,13 +2009,17 @@ fn rename_locals_rec( ) -> RcExpr { match expr.as_ref() { EvmExpr::LetBind(name, init, body) => { - let new_name = if defined.contains(name) { + let needs_rename = defined.contains(name); + let i2 = rename_locals_rec(init, suffix, defined); + let b2 = rename_locals_rec(body, suffix, defined); + if !needs_rename && Rc::ptr_eq(&i2, init) && Rc::ptr_eq(&b2, body) { + return Rc::clone(expr); + } + let new_name = if needs_rename { format!("{name}{suffix}") } else { name.clone() }; - let i2 = rename_locals_rec(init, suffix, defined); - let b2 = rename_locals_rec(body, suffix, defined); Rc::new(EvmExpr::LetBind(new_name, i2, b2)) } EvmExpr::Var(name) => { @@ -1790,6 +2033,8 @@ fn rename_locals_rec( let v2 = rename_locals_rec(val, suffix, defined); if defined.contains(name) { Rc::new(EvmExpr::VarStore(format!("{name}{suffix}"), v2)) + } else if Rc::ptr_eq(&v2, val) { + return Rc::clone(expr); } else { Rc::new(EvmExpr::VarStore(name.clone(), v2)) } @@ -1804,15 +2049,24 @@ fn rename_locals_rec( EvmExpr::Concat(a, b) => { let a2 = rename_locals_rec(a, suffix, defined); let b2 = rename_locals_rec(b, suffix, defined); + if Rc::ptr_eq(&a2, a) && Rc::ptr_eq(&b2, b) { + return Rc::clone(expr); + } Rc::new(EvmExpr::Concat(a2, b2)) } EvmExpr::Bop(op, a, b) => { let a2 = rename_locals_rec(a, suffix, defined); let b2 = rename_locals_rec(b, suffix, defined); + if Rc::ptr_eq(&a2, a) && Rc::ptr_eq(&b2, b) { + return Rc::clone(expr); + } Rc::new(EvmExpr::Bop(*op, a2, b2)) } EvmExpr::Uop(op, a) => { let a2 = rename_locals_rec(a, suffix, defined); + if Rc::ptr_eq(&a2, a) { + return Rc::clone(expr); + } Rc::new(EvmExpr::Uop(*op, a2)) } EvmExpr::If(c, i, t, e) => { @@ -1820,33 +2074,51 @@ fn rename_locals_rec( let i2 = rename_locals_rec(i, suffix, defined); let t2 = rename_locals_rec(t, suffix, defined); let e2 = rename_locals_rec(e, suffix, defined); + if Rc::ptr_eq(&c2, c) && Rc::ptr_eq(&i2, i) && Rc::ptr_eq(&t2, t) && Rc::ptr_eq(&e2, e) { + return Rc::clone(expr); + } Rc::new(EvmExpr::If(c2, i2, t2, e2)) } EvmExpr::Top(op, a, b, c) => { let a2 = rename_locals_rec(a, suffix, defined); let b2 = rename_locals_rec(b, suffix, defined); let c2 = rename_locals_rec(c, suffix, defined); + if Rc::ptr_eq(&a2, a) && Rc::ptr_eq(&b2, b) && Rc::ptr_eq(&c2, c) { + return Rc::clone(expr); + } Rc::new(EvmExpr::Top(*op, a2, b2, c2)) } EvmExpr::DoWhile(inputs, body) => { let i2 = rename_locals_rec(inputs, suffix, defined); let b2 = rename_locals_rec(body, suffix, defined); + if Rc::ptr_eq(&i2, inputs) && Rc::ptr_eq(&b2, body) { + return Rc::clone(expr); + } Rc::new(EvmExpr::DoWhile(i2, b2)) } EvmExpr::Revert(a, b, c) => { let a2 = rename_locals_rec(a, suffix, defined); let b2 = rename_locals_rec(b, suffix, defined); let c2 = rename_locals_rec(c, suffix, defined); + if Rc::ptr_eq(&a2, a) && Rc::ptr_eq(&b2, b) && Rc::ptr_eq(&c2, c) { + return Rc::clone(expr); + } Rc::new(EvmExpr::Revert(a2, b2, c2)) } EvmExpr::ReturnOp(a, b, c) => { let a2 = rename_locals_rec(a, suffix, defined); let b2 = rename_locals_rec(b, suffix, defined); let c2 = rename_locals_rec(c, suffix, defined); + if Rc::ptr_eq(&a2, a) && Rc::ptr_eq(&b2, b) && Rc::ptr_eq(&c2, c) { + return Rc::clone(expr); + } Rc::new(EvmExpr::ReturnOp(a2, b2, c2)) } EvmExpr::Get(a, idx) => { let a2 = rename_locals_rec(a, suffix, defined); + if Rc::ptr_eq(&a2, a) { + return Rc::clone(expr); + } Rc::new(EvmExpr::Get(a2, *idx)) } EvmExpr::Call(name, call_args) => { @@ -1854,6 +2126,9 @@ fn rename_locals_rec( .iter() .map(|a| rename_locals_rec(a, suffix, defined)) .collect(); + if new_args.iter().zip(call_args.iter()).all(|(n, o)| Rc::ptr_eq(n, o)) { + return Rc::clone(expr); + } Rc::new(EvmExpr::Call(name.clone(), new_args)) } EvmExpr::Log(count, topics, data_off, data_sz, state) => { @@ -1864,6 +2139,13 @@ fn rename_locals_rec( let d2 = rename_locals_rec(data_off, suffix, defined); let s2 = rename_locals_rec(data_sz, suffix, defined); let st2 = rename_locals_rec(state, suffix, defined); + if topics2.iter().zip(topics.iter()).all(|(n, o)| Rc::ptr_eq(n, o)) + && Rc::ptr_eq(&d2, data_off) + && Rc::ptr_eq(&s2, data_sz) + && Rc::ptr_eq(&st2, state) + { + return Rc::clone(expr); + } Rc::new(EvmExpr::Log(*count, topics2, d2, s2, st2)) } EvmExpr::ExtCall(a, b, c, d, e, f, g) => { @@ -1874,6 +2156,12 @@ fn rename_locals_rec( let e2 = rename_locals_rec(e, suffix, defined); let f2 = rename_locals_rec(f, suffix, defined); let g2 = rename_locals_rec(g, suffix, defined); + if Rc::ptr_eq(&a2, a) && Rc::ptr_eq(&b2, b) && Rc::ptr_eq(&c2, c) + && Rc::ptr_eq(&d2, d) && Rc::ptr_eq(&e2, e) && Rc::ptr_eq(&f2, f) + && Rc::ptr_eq(&g2, g) + { + return Rc::clone(expr); + } Rc::new(EvmExpr::ExtCall(a2, b2, c2, d2, e2, f2, g2)) } EvmExpr::InlineAsm(inputs, hex, num_outputs) => { @@ -1881,8 +2169,18 @@ fn rename_locals_rec( .iter() .map(|i| rename_locals_rec(i, suffix, defined)) .collect(); + if new_inputs.iter().zip(inputs.iter()).all(|(n, o)| Rc::ptr_eq(n, o)) { + return Rc::clone(expr); + } Rc::new(EvmExpr::InlineAsm(new_inputs, hex.clone(), *num_outputs)) } + EvmExpr::DynAlloc(size) => { + let new_size = rename_locals_rec(size, suffix, defined); + if Rc::ptr_eq(&new_size, size) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::DynAlloc(new_size)) + } _ => Rc::clone(expr), } } @@ -1940,7 +2238,7 @@ fn reads_var(expr: &RcExpr, name: &str) -> bool { let b_is_state = matches!(op, SLoad | TLoad | MLoad | CalldataLoad); reads_var(a, name) || (!b_is_state && reads_var(b, name)) } - EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) => reads_var(a, name), + EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) | EvmExpr::DynAlloc(a) => reads_var(a, name), EvmExpr::Top(op, a, b, c) => { use crate::schema::EvmTernaryOp::*; let c_is_state = matches!( @@ -2097,7 +2395,9 @@ fn references_any_in_set(expr: &RcExpr, names: &HashSet<&str>) -> bool { | EvmExpr::DoWhile(a, b) => { references_any_in_set(a, names) || references_any_in_set(b, names) } - EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) => references_any_in_set(a, names), + EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) | EvmExpr::DynAlloc(a) => { + references_any_in_set(a, names) + } EvmExpr::Top(_, a, b, c) | EvmExpr::Revert(a, b, c) | EvmExpr::ReturnOp(a, b, c) => { references_any_in_set(a, names) || references_any_in_set(b, names) @@ -2358,7 +2658,10 @@ fn count_calldataloads(expr: &RcExpr, counts: &mut HashMap) { count_calldataloads(a, counts); count_calldataloads(b, counts); } - EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) | EvmExpr::Function(_, _, _, a) => { + EvmExpr::Uop(_, a) + | EvmExpr::Get(a, _) + | EvmExpr::Function(_, _, _, a) + | EvmExpr::DynAlloc(a) => { count_calldataloads(a, counts); } EvmExpr::Top(_, a, b, c) => { @@ -2563,6 +2866,13 @@ fn replace_calldataloads(expr: &RcExpr, hoisted: &HashMap) -> RcExp } Rc::new(EvmExpr::Log(*count, new_topics, nd, nz, Rc::clone(state))) } + EvmExpr::DynAlloc(size) => { + let new_size = replace_calldataloads(size, hoisted); + if Rc::ptr_eq(&new_size, size) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::DynAlloc(new_size)) + } _ => Rc::clone(expr), } } diff --git a/crates/lexer/src/lexer.rs b/crates/lexer/src/lexer.rs index f727b11..a41f914 100644 --- a/crates/lexer/src/lexer.rs +++ b/crates/lexer/src/lexer.rs @@ -637,6 +637,18 @@ impl<'a> Lexer<'a> { self.consume(); self.single_char_token(TokenKind::Pointer(Location::TransientStorage)) } + 'd' => { + let start = self.position; + self.consume(); // consume 'd' + if self.peek() == Some('m') { + self.consume(); // consume 'm' + self.single_char_token(TokenKind::Pointer(Location::DynamicMemory)) + } else { + // Not &dm, just &d — return AND operator + Ok(TokenKind::Operator(Operator::Bitwise(BitwiseOperator::And)) + .into_single_span(start)) + } + } 'm' => { self.consume(); self.single_char_token(TokenKind::Pointer(Location::Memory)) diff --git a/crates/parser/src/parser.rs b/crates/parser/src/parser.rs index e1ba434..180e899 100644 --- a/crates/parser/src/parser.rs +++ b/crates/parser/src/parser.rs @@ -1592,6 +1592,33 @@ impl Parser { }; left = Expr::Assign(Box::new(left), Box::new(right), span); + } else if matches!( + self.peek().kind, + TokenKind::Operator(edge_types::tokens::Operator::CompoundAssignment(_)) + ) { + let compound_op = if let TokenKind::Operator( + edge_types::tokens::Operator::CompoundAssignment(op), + ) = self.peek().kind + { + op + } else { + unreachable!() + }; + self.advance(); + let right = self.parse_binary_expr(prec + 1)?; + let bin_op = compound_to_bin_op(compound_op); + let span = Span { + start: left.span().start, + end: right.span().end, + file: left.span().file.clone(), + }; + let binary = Expr::Binary( + Box::new(left.clone()), + bin_op, + Box::new(right), + span.clone(), + ); + left = Expr::Assign(Box::new(left), Box::new(binary), span); } else { let op = self.parse_bin_op()?; let next_min_prec = if is_right_assoc { prec } else { prec + 1 }; @@ -1957,6 +1984,25 @@ impl Parser { span: name_token.span, }; + // Check for turbofish type arguments: @builtin::() + let type_args = if self.check(&TokenKind::DoubleColon) { + if self.cursor + 1 < self.tokens.len() + && matches!( + self.tokens[self.cursor + 1].kind, + TokenKind::Operator(Operator::Comparison( + ComparisonOperator::LessThan + )) + ) + { + self.advance(); // consume :: + self.parse_turbofish_type_args()? + } else { + Vec::new() + } + } else { + Vec::new() + }; + // Parse arguments if there are parentheses let args = if self.check(&TokenKind::OpenParen) { self.advance(); @@ -1979,7 +2025,7 @@ impl Parser { file: start.file, }; - Ok(Expr::At(builtin_ident, args, span)) + Ok(Expr::At(builtin_ident, type_args, args, span)) } else { Err(ParseError::InvalidExpr { message: "Expected identifier after @".to_string(), @@ -2637,6 +2683,7 @@ impl Parser { edge_types::tokens::Location::Returndata => edge_ast::ty::Location::Returndata, edge_types::tokens::Location::InternalCode => edge_ast::ty::Location::ImmutableCode, edge_types::tokens::Location::ExternalCode => edge_ast::ty::Location::ExternalCode, + edge_types::tokens::Location::DynamicMemory => edge_ast::ty::Location::DynamicMemory, } } @@ -2726,7 +2773,9 @@ impl Parser { }; match &self.peek().kind { - TokenKind::Operator(Operator::Assignment) => Some((0, true)), // Lowest precedence, right-associative + // Lowest precedence, right-associative + TokenKind::Operator(Operator::Assignment) + | TokenKind::Operator(Operator::CompoundAssignment(_)) => Some((0, true)), TokenKind::Operator(Operator::Logical(op)) => Some(match op { LogicalOperator::Or => (1, false), LogicalOperator::And => (2, false), @@ -3141,3 +3190,20 @@ fn is_evm_opcode(name: &str) -> bool { | "BLOBBASEFEE" ) } + +const fn compound_to_bin_op(op: edge_types::tokens::CompoundAssignmentOperator) -> BinOp { + use edge_types::tokens::CompoundAssignmentOperator; + match op { + CompoundAssignmentOperator::AddAssign => BinOp::Add, + CompoundAssignmentOperator::SubAssign => BinOp::Sub, + CompoundAssignmentOperator::MulAssign => BinOp::Mul, + CompoundAssignmentOperator::DivAssign => BinOp::Div, + CompoundAssignmentOperator::ModAssign => BinOp::Mod, + CompoundAssignmentOperator::ExpAssign => BinOp::Exp, + CompoundAssignmentOperator::AndAssign => BinOp::BitwiseAnd, + CompoundAssignmentOperator::OrAssign => BinOp::BitwiseOr, + CompoundAssignmentOperator::XorAssign => BinOp::BitwiseXor, + CompoundAssignmentOperator::ShrAssign => BinOp::Shr, + CompoundAssignmentOperator::ShlAssign => BinOp::Shl, + } +} diff --git a/crates/types/src/tokens/locations.rs b/crates/types/src/tokens/locations.rs index c680cc6..ebd4d99 100644 --- a/crates/types/src/tokens/locations.rs +++ b/crates/types/src/tokens/locations.rs @@ -89,4 +89,10 @@ pub enum Location { /// May only be copied to memory. #[display("&ec")] ExternalCode, + /// Dynamic Memory + /// + /// A runtime-allocated memory region. + /// Pointer value is the base address, accessed via MLOAD/MSTORE. + #[display("&dm")] + DynamicMemory, } diff --git a/examples/tests/test_vec.edge b/examples/tests/test_vec.edge new file mode 100644 index 0000000..9151fec --- /dev/null +++ b/examples/tests/test_vec.edge @@ -0,0 +1,66 @@ +// test_vec.edge — Vec dynamic memory allocation tests + +abi ITestVec { + fn test_new_and_push() -> (u256); + fn test_get() -> (u256); + fn test_set() -> (u256); + fn test_grow() -> (u256); + fn test_index() -> (u256); +} + +contract TestVec { + // Create a vec, push elements, verify len + pub fn test_new_and_push() -> (u256) { + let v: &dm Vec = Vec::new(4); + v.push(10); + v.push(20); + v.push(30); + // Return len (should be 3) + return v.len; + } + + // Push and get elements back + pub fn test_get() -> (u256) { + let v: &dm Vec = Vec::new(4); + v.push(100); + v.push(200); + v.push(300); + // Return second element (should be 200) + return v.get(1); + } + + // Set an element and verify the change + pub fn test_set() -> (u256) { + let v: &dm Vec = Vec::new(4); + v.push(10); + v.push(20); + v.push(30); + v.set(1, 999); + // Return modified element (should be 999) + return v.get(1); + } + + // Push beyond initial capacity to trigger growth + pub fn test_grow() -> (u256) { + let v: &dm Vec = Vec::new(2); + v.push(1); + v.push(2); + // This push exceeds capacity=2, triggers grow + v.push(3); + v.push(4); + v.push(5); + // Verify all elements survived the growth + // Return: get(0) + get(1) + get(2) + get(3) + get(4) + // = 1 + 2 + 3 + 4 + 5 = 15 + return v.get(0) + v.get(1) + v.get(2) + v.get(3) + v.get(4); + } + + // Test Index trait (v[i] syntax) + pub fn test_index() -> (u256) { + let v: &dm Vec = Vec::new(4); + v.push(42); + v.push(84); + // Return v[1] (should be 84) + return v[1]; + } +} diff --git a/std/globals/ops.edge b/std/globals/ops.edge index 247586f..2399d8a 100644 --- a/std/globals/ops.edge +++ b/std/globals/ops.edge @@ -77,3 +77,15 @@ trait Sstore { trait Sload { fn sload(base_slot: u256) -> Self; } + +trait Mstore { + fn mstore(self, offset: u256); +} + +trait Mload { + fn mload(offset: u256) -> Self; +} + +trait Mcopy { + fn mcopy(self, dest: u256, size: u256); +} diff --git a/std/globals/vec.edge b/std/globals/vec.edge new file mode 100644 index 0000000..38c8346 --- /dev/null +++ b/std/globals/vec.edge @@ -0,0 +1,71 @@ + + +// Vec — a dynamically-allocated, growable array. +// +// Memory layout (inline, no indirection): +// [len, capacity, elem0, elem1, ...] +// ^--- pointer on stack +// +// Element i at: ptr + 64 + i * @size_of::() +// +// Uses &dm (dynamic memory) pointer annotation for pass-by-reference +// semantics on methods, enabling transparent pointer updates on growth. + +type Vec = { + len: u256, + capacity: u256, +}; + + +impl Vec { + fn new(cap: u256) -> u256 { + let total = 64 + cap * @size_of::(); + let ptr = @alloc(total); + // len = 0 + 0.mstore(ptr); + // capacity = cap + cap.mstore(ptr + 32); + ptr + } + + fn push(self: &dm Self, val: T) { + if (self.len >= self.capacity) { + self.grow(self.capacity + 3); + } + let offset = self + 64 + self.len * @size_of::(); + val.mstore(offset); + self.len = self.len + 1; + } + + fn get(self: &dm Self, index: u256) -> T { + if (index >= self.len) { + revert(); + } + (self + 64 + index * @size_of::()).mload() + } + + fn set(self: &dm Self, index: u256, val: T) { + if (index >= self.len) { + revert(); + } + val.mstore(self + 64 + index * @size_of::()); + } + + fn grow(self: &dm Self, new_cap: u256) { + let old_size = 64 + self.capacity * @size_of::(); + let new_size = 64 + new_cap * @size_of::(); + let new_ptr = @alloc(new_size); + // copy old header + data to new location + self.mcopy(new_ptr, old_size); + // update capacity at new location + new_cap.mstore(new_ptr + 32); + // transparent update via &dm aliasing + self = new_ptr; + } +} + +impl Vec: Index { + fn index(self: &dm Self, idx: u256) -> T { + self.get(idx) + } +} From 3898efc4e812afa9604078fde24ab1dd13711947 Mon Sep 17 00:00:00 2001 From: brockelmore <31553173+brockelmore@users.noreply.github.com> Date: Thu, 12 Mar 2026 11:16:01 -0600 Subject: [PATCH 07/13] perf: bypass 32MB string round-trip in egglog extraction (6800x faster) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use egglog's structured TermDag API (get_extract_report) instead of parsing the tree-expanded s-expression string output. The TermDag is already hash-consed, so extract_report_to_expr produces RcExpr with perfect Rc sharing in ~70µs vs 471ms for string parsing. Also call disable_messages() to skip generating the 32MB string entirely, cutting egglog run time from ~500ms to ~42ms. Total IR pipeline drops from ~600ms to ~105ms. Also adds memoization to replace_regions in mem_region.rs to preserve Rc sharing during memory region assignment. Co-Authored-By: Claude Opus 4.6 --- crates/ir/src/lib.rs | 111 ++++++--- crates/ir/src/mem_region.rs | 163 ++++++++++---- crates/ir/src/sexp.rs | 438 ++++++++++++++++++++++++++++++++++++ 3 files changed, 640 insertions(+), 72 deletions(-) diff --git a/crates/ir/src/lib.rs b/crates/ir/src/lib.rs index 631a599..3bde523 100644 --- a/crates/ir/src/lib.rs +++ b/crates/ir/src/lib.rs @@ -965,7 +965,11 @@ fn ir_stats_dag( // Recurse into children let d = depth + 1; - macro_rules! go { ($e:expr) => { ir_stats_dag($e, stats, d, visited) }; } + macro_rules! go { + ($e:expr) => { + ir_stats_dag($e, stats, d, visited) + }; + } match expr.as_ref() { EvmExpr::Arg(..) | EvmExpr::Const(..) @@ -983,22 +987,53 @@ fn ir_stats_dag( EvmExpr::Bop(_, a, b) | EvmExpr::Concat(a, b) | EvmExpr::DoWhile(a, b) - | EvmExpr::EnvRead1(_, a, b) => { go!(a); go!(b); } - EvmExpr::LetBind(_, a, b) => { go!(a); go!(b); } + | EvmExpr::EnvRead1(_, a, b) => { + go!(a); + go!(b); + } + EvmExpr::LetBind(_, a, b) => { + go!(a); + go!(b); + } EvmExpr::Top(_, a, b, c) | EvmExpr::Revert(a, b, c) | EvmExpr::ReturnOp(a, b, c) => { - go!(a); go!(b); go!(c); + go!(a); + go!(b); + go!(c); + } + EvmExpr::If(a, b, c, e) => { + go!(a); + go!(b); + go!(c); + go!(e); } - EvmExpr::If(a, b, c, e) => { go!(a); go!(b); go!(c); go!(e); } EvmExpr::Function(_, _, _, body) => go!(body), - EvmExpr::Call(_, args) => { for a in args { go!(a); } } + EvmExpr::Call(_, args) => { + for a in args { + go!(a); + } + } EvmExpr::Log(_, topics, doff, dsz, state) => { - for t in topics { go!(t); } - go!(doff); go!(dsz); go!(state); + for t in topics { + go!(t); + } + go!(doff); + go!(dsz); + go!(state); } EvmExpr::ExtCall(a, b, c, e, f, g, h) => { - go!(a); go!(b); go!(c); go!(e); go!(f); go!(g); go!(h); + go!(a); + go!(b); + go!(c); + go!(e); + go!(f); + go!(g); + go!(h); + } + EvmExpr::InlineAsm(inputs, _, _) => { + for inp in inputs { + go!(inp); + } } - EvmExpr::InlineAsm(inputs, _, _) => { for inp in inputs { go!(inp); } } } } @@ -1087,7 +1122,12 @@ pub fn lower_and_optimize( tracing::debug!(" lowering: {:?}", t.elapsed()); for c in &ir_program.contracts { let dag = dag_node_count(&c.runtime); - tracing::debug!(" [{}] after lowering: {} DAG nodes, {} fns", c.name, dag, c.internal_functions.len()); + tracing::debug!( + " [{}] after lowering: {} DAG nodes, {} fns", + c.name, + dag, + c.internal_functions.len() + ); if tracing::enabled!(tracing::Level::TRACE) { let stats = ir_stats(&c.runtime); tracing::trace!(" [{}] IR stats after lowering:\n{stats}", c.name); @@ -1102,7 +1142,12 @@ pub fn lower_and_optimize( tracing::debug!(" var_opt: {:?}", t.elapsed()); for c in &ir_program.contracts { let dag = dag_node_count(&c.runtime); - tracing::debug!(" [{}] after var_opt: {} DAG nodes, {} fns", c.name, dag, c.internal_functions.len()); + tracing::debug!( + " [{}] after var_opt: {} DAG nodes, {} fns", + c.name, + dag, + c.internal_functions.len() + ); if tracing::enabled!(tracing::Level::TRACE) { let stats = ir_stats(&c.runtime); tracing::trace!(" [{}] IR stats after var_opt:\n{stats}", c.name); @@ -1146,6 +1191,7 @@ pub fn lower_and_optimize( let schedule = schedule::make_schedule(optimization_level); let mut optimized_contracts = Vec::new(); + // TODO: trying to remove this by fixing `replace_regions` w memoization instead // Re-establish Rc sharing broken by var_opt/storage_hoist/mem_region. // Must run right before serialization, after all IR transform passes. let t = std::time::Instant::now(); @@ -1206,25 +1252,26 @@ pub fn lower_and_optimize( let t_egg = std::time::Instant::now(); let mut egraph = create_egraph(); - let outputs = egraph + egraph.disable_messages(); // skip 32MB string generation + let _ = egraph .parse_and_run_program(None, &egglog_program) .map_err(|e| IrError::Egglog(format!("{e}")))?; tracing::debug!(" egglog run ({}): {:?}", contract.name, t_egg.elapsed()); - // The last output is the extracted expression from (extract __runtime) - let extracted_sexp = outputs - .last() - .ok_or_else(|| IrError::Extraction("no output from extract".to_owned()))?; - tracing::info!( "Optimized contract {} at -O{}", contract.name, optimization_level ); + // Extract directly from egglog's hash-consed TermDag (no string round-trip) let t_phase = std::time::Instant::now(); - let mut optimized_runtime = sexp::sexp_to_expr(extracted_sexp)?; - tracing::debug!(" sexp_to_expr: {:?}", t_phase.elapsed()); + let report = egraph + .get_extract_report() + .as_ref() + .ok_or_else(|| IrError::Extraction("no extract report from egglog".to_owned()))?; + let mut optimized_runtime = sexp::extract_report_to_expr(report)?; + tracing::debug!(" extract_report_to_expr: {:?}", t_phase.elapsed()); // Check for compile-time-detectable constant overflows in narrow types. // This catches overflow revealed by egglog const-folding (e.g. through @@ -1250,7 +1297,11 @@ pub fn lower_and_optimize( let t_phase = std::time::Instant::now(); optimized_runtime = hash_cons_expr(&optimized_runtime); - tracing::debug!(" post-egglog hash_cons: {:?} (dag={})", t_phase.elapsed(), dag_node_count(&optimized_runtime)); + tracing::debug!( + " post-egglog hash_cons: {:?} (dag={})", + t_phase.elapsed(), + dag_node_count(&optimized_runtime) + ); // Only keep internal functions still referenced (directly or transitively) // by Call nodes in the optimized runtime. Monomorphized functions that @@ -1294,17 +1345,23 @@ pub fn lower_and_optimize( schedule ); let mut func_egraph = create_egraph(); - let func_outputs = func_egraph + func_egraph.disable_messages(); + let _ = func_egraph .parse_and_run_program(None, &func_program) .map_err(|e| IrError::Egglog(format!("{e}")))?; - let func_extracted = func_outputs - .last() - .ok_or_else(|| IrError::Extraction("no output from func extract".to_owned()))?; - let optimized_func = sexp::sexp_to_expr(func_extracted)?; + let func_report = func_egraph + .get_extract_report() + .as_ref() + .ok_or_else(|| IrError::Extraction("no extract report from func egglog".to_owned()))?; + let optimized_func = sexp::extract_report_to_expr(func_report)?; let optimized_func = cleanup::cleanup_expr_pub(&optimized_func); optimized_functions.push(optimized_func); } - tracing::debug!(" collect+optimize fns: {:?} ({} kept)", t_phase.elapsed(), optimized_functions.len()); + tracing::debug!( + " collect+optimize fns: {:?} ({} kept)", + t_phase.elapsed(), + optimized_functions.len() + ); tracing::debug!( " contract {} total: {:?}", diff --git a/crates/ir/src/mem_region.rs b/crates/ir/src/mem_region.rs index cd767e0..fa61c64 100644 --- a/crates/ir/src/mem_region.rs +++ b/crates/ir/src/mem_region.rs @@ -249,6 +249,29 @@ pub fn assign_program_offsets(program: &mut crate::schema::EvmProgram) { /// Replace all `MemRegion(id, _)` with `Const(SmallInt(offset))`. fn replace_regions(expr: &RcExpr, assignments: &BTreeMap) -> RcExpr { + let mut cache = std::collections::HashMap::new(); + replace_regions_memo(expr, assignments, &mut cache) +} + +fn replace_regions_memo( + expr: &RcExpr, + assignments: &BTreeMap, + cache: &mut std::collections::HashMap, +) -> RcExpr { + let id = Rc::as_ptr(expr) as usize; + if let Some(cached) = cache.get(&id) { + return Rc::clone(cached); + } + let result = replace_regions_inner(expr, assignments, cache); + cache.insert(id, Rc::clone(&result)); + result +} + +fn replace_regions_inner( + expr: &RcExpr, + assignments: &BTreeMap, + cache: &mut std::collections::HashMap, +) -> RcExpr { match expr.as_ref() { EvmExpr::MemRegion(id, _size) => { let offset = assignments[id]; @@ -258,101 +281,142 @@ fn replace_regions(expr: &RcExpr, assignments: &BTreeMap) -> RcExpr EvmContext::InFunction("__mem__".to_owned()), )) } - // Recurse into all children (same structure as collect_regions) EvmExpr::Bop(op, a, b) => { - let na = replace_regions(a, assignments); - let nb = replace_regions(b, assignments); + let na = replace_regions_memo(a, assignments, cache); + let nb = replace_regions_memo(b, assignments, cache); if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) { return Rc::clone(expr); } Rc::new(EvmExpr::Bop(*op, na, nb)) } EvmExpr::Uop(op, a) => { - let na = replace_regions(a, assignments); + let na = replace_regions_memo(a, assignments, cache); if Rc::ptr_eq(&na, a) { return Rc::clone(expr); } Rc::new(EvmExpr::Uop(*op, na)) } EvmExpr::Top(op, a, b, c) => { - let na = replace_regions(a, assignments); - let nb = replace_regions(b, assignments); - let nc = replace_regions(c, assignments); + let na = replace_regions_memo(a, assignments, cache); + let nb = replace_regions_memo(b, assignments, cache); + let nc = replace_regions_memo(c, assignments, cache); + if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) && Rc::ptr_eq(&nc, c) { + return Rc::clone(expr); + } Rc::new(EvmExpr::Top(*op, na, nb, nc)) } EvmExpr::Concat(a, b) => { - let na = replace_regions(a, assignments); - let nb = replace_regions(b, assignments); + let na = replace_regions_memo(a, assignments, cache); + let nb = replace_regions_memo(b, assignments, cache); + if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) { + return Rc::clone(expr); + } Rc::new(EvmExpr::Concat(na, nb)) } EvmExpr::Get(a, idx) => { - let na = replace_regions(a, assignments); + let na = replace_regions_memo(a, assignments, cache); if Rc::ptr_eq(&na, a) { return Rc::clone(expr); } Rc::new(EvmExpr::Get(na, *idx)) } EvmExpr::If(c, i, t, e) => { - let nc = replace_regions(c, assignments); - let ni = replace_regions(i, assignments); - let nt = replace_regions(t, assignments); - let ne = replace_regions(e, assignments); + let nc = replace_regions_memo(c, assignments, cache); + let ni = replace_regions_memo(i, assignments, cache); + let nt = replace_regions_memo(t, assignments, cache); + let ne = replace_regions_memo(e, assignments, cache); + if Rc::ptr_eq(&nc, c) && Rc::ptr_eq(&ni, i) && Rc::ptr_eq(&nt, t) && Rc::ptr_eq(&ne, e) { + return Rc::clone(expr); + } Rc::new(EvmExpr::If(nc, ni, nt, ne)) } EvmExpr::DoWhile(inputs, body) => { - let ni = replace_regions(inputs, assignments); - let nb = replace_regions(body, assignments); + let ni = replace_regions_memo(inputs, assignments, cache); + let nb = replace_regions_memo(body, assignments, cache); + if Rc::ptr_eq(&ni, inputs) && Rc::ptr_eq(&nb, body) { + return Rc::clone(expr); + } Rc::new(EvmExpr::DoWhile(ni, nb)) } EvmExpr::LetBind(name, init, body) => { - let ni = replace_regions(init, assignments); - let nb = replace_regions(body, assignments); + let ni = replace_regions_memo(init, assignments, cache); + let nb = replace_regions_memo(body, assignments, cache); + if Rc::ptr_eq(&ni, init) && Rc::ptr_eq(&nb, body) { + return Rc::clone(expr); + } Rc::new(EvmExpr::LetBind(name.clone(), ni, nb)) } EvmExpr::VarStore(name, val) => { - let nv = replace_regions(val, assignments); + let nv = replace_regions_memo(val, assignments, cache); + if Rc::ptr_eq(&nv, val) { + return Rc::clone(expr); + } Rc::new(EvmExpr::VarStore(name.clone(), nv)) } EvmExpr::Revert(a, b, c) => { - let na = replace_regions(a, assignments); - let nb = replace_regions(b, assignments); - let nc = replace_regions(c, assignments); + let na = replace_regions_memo(a, assignments, cache); + let nb = replace_regions_memo(b, assignments, cache); + let nc = replace_regions_memo(c, assignments, cache); + if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) && Rc::ptr_eq(&nc, c) { + return Rc::clone(expr); + } Rc::new(EvmExpr::Revert(na, nb, nc)) } EvmExpr::ReturnOp(a, b, c) => { - let na = replace_regions(a, assignments); - let nb = replace_regions(b, assignments); - let nc = replace_regions(c, assignments); + let na = replace_regions_memo(a, assignments, cache); + let nb = replace_regions_memo(b, assignments, cache); + let nc = replace_regions_memo(c, assignments, cache); + if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) && Rc::ptr_eq(&nc, c) { + return Rc::clone(expr); + } Rc::new(EvmExpr::ReturnOp(na, nb, nc)) } EvmExpr::Log(count, topics, d, s, st) => { let nt: Vec<_> = topics .iter() - .map(|t| replace_regions(t, assignments)) + .map(|t| replace_regions_memo(t, assignments, cache)) .collect(); - let nd = replace_regions(d, assignments); - let ns = replace_regions(s, assignments); - let nst = replace_regions(st, assignments); + let nd = replace_regions_memo(d, assignments, cache); + let ns = replace_regions_memo(s, assignments, cache); + let nst = replace_regions_memo(st, assignments, cache); + if nt.iter().zip(topics.iter()).all(|(n, o)| Rc::ptr_eq(n, o)) + && Rc::ptr_eq(&nd, d) && Rc::ptr_eq(&ns, s) && Rc::ptr_eq(&nst, st) + { + return Rc::clone(expr); + } Rc::new(EvmExpr::Log(*count, nt, nd, ns, nst)) } - EvmExpr::ExtCall(a, b, c, d, e, f, g) => Rc::new(EvmExpr::ExtCall( - replace_regions(a, assignments), - replace_regions(b, assignments), - replace_regions(c, assignments), - replace_regions(d, assignments), - replace_regions(e, assignments), - replace_regions(f, assignments), - replace_regions(g, assignments), - )), + EvmExpr::ExtCall(a, b, c, d, e, f, g) => { + let na = replace_regions_memo(a, assignments, cache); + let nb = replace_regions_memo(b, assignments, cache); + let nc = replace_regions_memo(c, assignments, cache); + let nd = replace_regions_memo(d, assignments, cache); + let ne = replace_regions_memo(e, assignments, cache); + let nf = replace_regions_memo(f, assignments, cache); + let ng = replace_regions_memo(g, assignments, cache); + if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) && Rc::ptr_eq(&nc, c) + && Rc::ptr_eq(&nd, d) && Rc::ptr_eq(&ne, e) && Rc::ptr_eq(&nf, f) + && Rc::ptr_eq(&ng, g) + { + return Rc::clone(expr); + } + Rc::new(EvmExpr::ExtCall(na, nb, nc, nd, ne, nf, ng)) + } EvmExpr::Call(name, args) => { let new_args: Vec<_> = args .iter() - .map(|a| replace_regions(a, assignments)) + .map(|a| replace_regions_memo(a, assignments, cache)) .collect(); + if new_args.iter().zip(args.iter()).all(|(n, o)| Rc::ptr_eq(n, o)) { + return Rc::clone(expr); + } Rc::new(EvmExpr::Call(name.clone(), new_args)) } EvmExpr::Function(name, in_ty, out_ty, body) => { - let nb = replace_regions(body, assignments); + let nb = replace_regions_memo(body, assignments, cache); + if Rc::ptr_eq(&nb, body) { + return Rc::clone(expr); + } Rc::new(EvmExpr::Function( name.clone(), in_ty.clone(), @@ -361,23 +425,32 @@ fn replace_regions(expr: &RcExpr, assignments: &BTreeMap) -> RcExpr )) } EvmExpr::EnvRead(op, s) => { - let ns = replace_regions(s, assignments); + let ns = replace_regions_memo(s, assignments, cache); + if Rc::ptr_eq(&ns, s) { + return Rc::clone(expr); + } Rc::new(EvmExpr::EnvRead(*op, ns)) } EvmExpr::EnvRead1(op, a, s) => { - let na = replace_regions(a, assignments); - let ns = replace_regions(s, assignments); + let na = replace_regions_memo(a, assignments, cache); + let ns = replace_regions_memo(s, assignments, cache); + if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&ns, s) { + return Rc::clone(expr); + } Rc::new(EvmExpr::EnvRead1(*op, na, ns)) } EvmExpr::InlineAsm(inputs, hex, num_outputs) => { let ni: Vec<_> = inputs .iter() - .map(|i| replace_regions(i, assignments)) + .map(|i| replace_regions_memo(i, assignments, cache)) .collect(); + if ni.iter().zip(inputs.iter()).all(|(n, o)| Rc::ptr_eq(n, o)) { + return Rc::clone(expr); + } Rc::new(EvmExpr::InlineAsm(ni, hex.clone(), *num_outputs)) } EvmExpr::DynAlloc(size) => { - let ns = replace_regions(size, assignments); + let ns = replace_regions_memo(size, assignments, cache); if Rc::ptr_eq(&ns, size) { return Rc::clone(expr); } diff --git a/crates/ir/src/sexp.rs b/crates/ir/src/sexp.rs index 5eed021..23e4bd0 100644 --- a/crates/ir/src/sexp.rs +++ b/crates/ir/src/sexp.rs @@ -5,6 +5,11 @@ use std::{collections::HashMap, rc::Rc}; +use egglog::{ + ast::Literal, + ExtractReport, Term, TermDag, TermId, +}; + use crate::{ schema::{ EvmBaseType, EvmBinaryOp, EvmConstant, EvmContext, EvmEnvOp, EvmExpr, EvmTernaryOp, @@ -1156,6 +1161,439 @@ fn sexp_to_list(sexp: &Sexp) -> Result, IrError> { Ok(result) } +// ============================================================ +// TermDag → EvmExpr (structured extraction, no string round-trip) +// ============================================================ + +/// Convert an egglog `ExtractReport` directly to an `RcExpr`. +/// +/// This bypasses the 32MB string round-trip through `sexp_to_expr` by walking +/// egglog's hash-consed `TermDag` directly. Each `TermId` maps to exactly one +/// `RcExpr`, preserving perfect Rc sharing. +pub fn extract_report_to_expr(report: &ExtractReport) -> Result { + match report { + ExtractReport::Best { termdag, term, .. } => { + let root_id = termdag.lookup(term); + let mut cache: HashMap = HashMap::new(); + termdag_convert(termdag, root_id, &mut cache) + } + ExtractReport::Variants { .. } => { + Err(IrError::Extraction("expected Best extract, got Variants".to_owned())) + } + } +} + +fn termdag_convert( + dag: &TermDag, + id: TermId, + cache: &mut HashMap, +) -> Result { + if let Some(cached) = cache.get(&id) { + return Ok(Rc::clone(cached)); + } + let term = dag.get(id); + let result = match term { + Term::App(sym, args) => { + let head = sym.as_str(); + match head { + "Arg" => { + let ty = td_type(dag, args[0])?; + let ctx = td_ctx(dag, args[1], cache)?; + Ok(Rc::new(EvmExpr::Arg(ty, ctx))) + } + "Const" => { + let c = td_const(dag, args[0])?; + let ty = td_type(dag, args[1])?; + let ctx = td_ctx(dag, args[2], cache)?; + Ok(Rc::new(EvmExpr::Const(c, ty, ctx))) + } + "Empty" => { + let ty = td_type(dag, args[0])?; + let ctx = td_ctx(dag, args[1], cache)?; + Ok(Rc::new(EvmExpr::Empty(ty, ctx))) + } + "Bop" => { + let op = td_binop(dag, args[0])?; + let l = termdag_convert(dag, args[1], cache)?; + let r = termdag_convert(dag, args[2], cache)?; + Ok(Rc::new(EvmExpr::Bop(op, l, r))) + } + "Uop" => { + let op = td_unop(dag, args[0])?; + let e = termdag_convert(dag, args[1], cache)?; + Ok(Rc::new(EvmExpr::Uop(op, e))) + } + "Top" => { + let op = td_ternop(dag, args[0])?; + let a = termdag_convert(dag, args[1], cache)?; + let b = termdag_convert(dag, args[2], cache)?; + let c = termdag_convert(dag, args[3], cache)?; + Ok(Rc::new(EvmExpr::Top(op, a, b, c))) + } + "Get" => { + let e = termdag_convert(dag, args[0], cache)?; + let idx = td_i64(dag, args[1])? as usize; + Ok(Rc::new(EvmExpr::Get(e, idx))) + } + "Concat" => { + let a = termdag_convert(dag, args[0], cache)?; + let b = termdag_convert(dag, args[1], cache)?; + Ok(Rc::new(EvmExpr::Concat(a, b))) + } + "If" => { + let cond = termdag_convert(dag, args[0], cache)?; + let inputs = termdag_convert(dag, args[1], cache)?; + let t = termdag_convert(dag, args[2], cache)?; + let e = termdag_convert(dag, args[3], cache)?; + Ok(Rc::new(EvmExpr::If(cond, inputs, t, e))) + } + "DoWhile" => { + let inputs = termdag_convert(dag, args[0], cache)?; + let body = termdag_convert(dag, args[1], cache)?; + Ok(Rc::new(EvmExpr::DoWhile(inputs, body))) + } + "EnvRead" => { + let op = td_envop(dag, args[0])?; + let st = termdag_convert(dag, args[1], cache)?; + Ok(Rc::new(EvmExpr::EnvRead(op, st))) + } + "EnvRead1" => { + let op = td_envop(dag, args[0])?; + let arg = termdag_convert(dag, args[1], cache)?; + let st = termdag_convert(dag, args[2], cache)?; + Ok(Rc::new(EvmExpr::EnvRead1(op, arg, st))) + } + "Log" => { + let n = td_i64(dag, args[0])? as usize; + let topics = td_list(dag, args[1], cache)?; + let data_offset = termdag_convert(dag, args[2], cache)?; + let data_size = termdag_convert(dag, args[3], cache)?; + let st = termdag_convert(dag, args[4], cache)?; + Ok(Rc::new(EvmExpr::Log(n, topics, data_offset, data_size, st))) + } + "Revert" => { + let off = termdag_convert(dag, args[0], cache)?; + let sz = termdag_convert(dag, args[1], cache)?; + let st = termdag_convert(dag, args[2], cache)?; + Ok(Rc::new(EvmExpr::Revert(off, sz, st))) + } + "ReturnOp" => { + let off = termdag_convert(dag, args[0], cache)?; + let sz = termdag_convert(dag, args[1], cache)?; + let st = termdag_convert(dag, args[2], cache)?; + Ok(Rc::new(EvmExpr::ReturnOp(off, sz, st))) + } + "ExtCall" => { + let tgt = termdag_convert(dag, args[0], cache)?; + let val = termdag_convert(dag, args[1], cache)?; + let ao = termdag_convert(dag, args[2], cache)?; + let al = termdag_convert(dag, args[3], cache)?; + let ro = termdag_convert(dag, args[4], cache)?; + let rl = termdag_convert(dag, args[5], cache)?; + let st = termdag_convert(dag, args[6], cache)?; + Ok(Rc::new(EvmExpr::ExtCall(tgt, val, ao, al, ro, rl, st))) + } + "Call" => { + let name = td_string(dag, args[0])?; + let list = td_list(dag, args[1], cache)?; + Ok(Rc::new(EvmExpr::Call(name, list))) + } + "Selector" => { + let sig = td_string(dag, args[0])?; + Ok(Rc::new(EvmExpr::Selector(sig))) + } + "LetBind" => { + let name = td_string(dag, args[0])?; + let value = termdag_convert(dag, args[1], cache)?; + let body = termdag_convert(dag, args[2], cache)?; + Ok(Rc::new(EvmExpr::LetBind(name, value, body))) + } + "Var" => { + let name = td_string(dag, args[0])?; + Ok(Rc::new(EvmExpr::Var(name))) + } + "VarStore" => { + let name = td_string(dag, args[0])?; + let value = termdag_convert(dag, args[1], cache)?; + Ok(Rc::new(EvmExpr::VarStore(name, value))) + } + "Drop" => { + let name = td_string(dag, args[0])?; + Ok(Rc::new(EvmExpr::Drop(name))) + } + "Function" => { + let name = td_string(dag, args[0])?; + let in_ty = td_type(dag, args[1])?; + let out_ty = td_type(dag, args[2])?; + let body = termdag_convert(dag, args[3], cache)?; + Ok(Rc::new(EvmExpr::Function(name, in_ty, out_ty, body))) + } + "StorageField" => { + let name = td_string(dag, args[0])?; + let slot = td_i64(dag, args[1])? as usize; + let ty = td_type(dag, args[2])?; + Ok(Rc::new(EvmExpr::StorageField(name, slot, ty))) + } + "InlineAsm" => { + let inputs = td_list(dag, args[0], cache)?; + let hex = td_string(dag, args[1])?; + let num_outputs = td_i64(dag, args[2])? as i32; + Ok(Rc::new(EvmExpr::InlineAsm(inputs, hex, num_outputs))) + } + "MemRegion" => { + let id = td_i64(dag, args[0])?; + let size = td_i64(dag, args[1])?; + Ok(Rc::new(EvmExpr::MemRegion(id, size))) + } + "DynAlloc" => { + let size = termdag_convert(dag, args[0], cache)?; + Ok(Rc::new(EvmExpr::DynAlloc(size))) + } + other => Err(IrError::Extraction(format!( + "termdag: unknown constructor: {other}" + ))), + } + } + Term::Lit(_) | Term::Var(_) => { + Err(IrError::Extraction(format!("termdag: unexpected term: {term:?}"))) + } + }?; + cache.insert(id, Rc::clone(&result)); + Ok(result) +} + +/// Extract an i64 literal from a TermDag node. +fn td_i64(dag: &TermDag, id: TermId) -> Result { + match dag.get(id) { + Term::Lit(Literal::Int(n)) => Ok(*n), + other => Err(IrError::Extraction(format!("expected i64 literal, got: {other:?}"))), + } +} + +/// Extract a string literal from a TermDag node. +fn td_string(dag: &TermDag, id: TermId) -> Result { + match dag.get(id) { + Term::Lit(Literal::String(sym)) => Ok(sym.as_str().to_owned()), + other => Err(IrError::Extraction(format!("expected string literal, got: {other:?}"))), + } +} + +/// Extract a bool literal from a TermDag node. +fn td_bool(dag: &TermDag, id: TermId) -> Result { + match dag.get(id) { + Term::Lit(Literal::Bool(b)) => Ok(*b), + other => Err(IrError::Extraction(format!("expected bool literal, got: {other:?}"))), + } +} + +fn td_const(dag: &TermDag, id: TermId) -> Result { + match dag.get(id) { + Term::App(sym, args) => match sym.as_str() { + "SmallInt" => Ok(EvmConstant::SmallInt(td_i64(dag, args[0])?)), + "LargeInt" => Ok(EvmConstant::LargeInt(td_string(dag, args[0])?)), + "ConstBool" => Ok(EvmConstant::Bool(td_bool(dag, args[0])?)), + "ConstAddr" => Ok(EvmConstant::Addr(td_string(dag, args[0])?)), + other => Err(IrError::Extraction(format!("termdag: unknown constant: {other}"))), + }, + other => Err(IrError::Extraction(format!("termdag: expected constant, got: {other:?}"))), + } +} + +fn td_type(dag: &TermDag, id: TermId) -> Result { + match dag.get(id) { + Term::App(sym, args) => match sym.as_str() { + "Base" => Ok(EvmType::Base(td_basetype(dag, args[0])?)), + "TupleT" => { + let types = td_type_list(dag, args[0])?; + Ok(EvmType::TupleT(types)) + } + "ArrayT" => { + let elem = td_basetype(dag, args[0])?; + let len = td_i64(dag, args[1])? as usize; + Ok(EvmType::ArrayT(elem, len)) + } + other => Err(IrError::Extraction(format!("termdag: unknown type: {other}"))), + }, + other => Err(IrError::Extraction(format!("termdag: expected type, got: {other:?}"))), + } +} + +fn td_basetype(dag: &TermDag, id: TermId) -> Result { + match dag.get(id) { + Term::App(sym, args) => match sym.as_str() { + "UIntT" => Ok(EvmBaseType::UIntT(td_i64(dag, args[0])? as u16)), + "IntT" => Ok(EvmBaseType::IntT(td_i64(dag, args[0])? as u16)), + "BytesT" => Ok(EvmBaseType::BytesT(td_i64(dag, args[0])? as u8)), + "AddrT" => Ok(EvmBaseType::AddrT), + "BoolT" => Ok(EvmBaseType::BoolT), + "UnitT" => Ok(EvmBaseType::UnitT), + "StateT" => Ok(EvmBaseType::StateT), + other => Err(IrError::Extraction(format!("termdag: unknown base type: {other}"))), + }, + other => Err(IrError::Extraction(format!("termdag: expected base type, got: {other:?}"))), + } +} + +fn td_type_list(dag: &TermDag, id: TermId) -> Result, IrError> { + let mut result = Vec::new(); + let mut cur = id; + loop { + match dag.get(cur) { + Term::App(sym, args) => match sym.as_str() { + "TLNil" => break, + "TLCons" => { + result.push(td_basetype(dag, args[0])?); + cur = args[1]; + } + other => return Err(IrError::Extraction(format!("termdag: expected TLCons/TLNil, got: {other}"))), + }, + other => return Err(IrError::Extraction(format!("termdag: expected type list, got: {other:?}"))), + } + } + Ok(result) +} + +fn td_ctx(dag: &TermDag, id: TermId, cache: &mut HashMap) -> Result { + match dag.get(id) { + Term::App(sym, args) => match sym.as_str() { + "InFunction" => Ok(EvmContext::InFunction(td_string(dag, args[0])?)), + "InBranch" => { + let b = td_bool(dag, args[0])?; + let pred = termdag_convert(dag, args[1], cache)?; + let input = termdag_convert(dag, args[2], cache)?; + Ok(EvmContext::InBranch(b, pred, input)) + } + "InLoop" => { + let input = termdag_convert(dag, args[0], cache)?; + let pred = termdag_convert(dag, args[1], cache)?; + Ok(EvmContext::InLoop(input, pred)) + } + other => Err(IrError::Extraction(format!("termdag: unknown context: {other}"))), + }, + other => Err(IrError::Extraction(format!("termdag: expected context, got: {other:?}"))), + } +} + +fn td_binop(dag: &TermDag, id: TermId) -> Result { + match dag.get(id) { + Term::App(sym, _) => match sym.as_str() { + "OpAdd" => Ok(EvmBinaryOp::Add), + "OpSub" => Ok(EvmBinaryOp::Sub), + "OpMul" => Ok(EvmBinaryOp::Mul), + "OpDiv" => Ok(EvmBinaryOp::Div), + "OpSDiv" => Ok(EvmBinaryOp::SDiv), + "OpMod" => Ok(EvmBinaryOp::Mod), + "OpSMod" => Ok(EvmBinaryOp::SMod), + "OpExp" => Ok(EvmBinaryOp::Exp), + "OpCheckedAdd" => Ok(EvmBinaryOp::CheckedAdd), + "OpCheckedSub" => Ok(EvmBinaryOp::CheckedSub), + "OpCheckedMul" => Ok(EvmBinaryOp::CheckedMul), + "OpLt" => Ok(EvmBinaryOp::Lt), + "OpGt" => Ok(EvmBinaryOp::Gt), + "OpSLt" => Ok(EvmBinaryOp::SLt), + "OpSGt" => Ok(EvmBinaryOp::SGt), + "OpEq" => Ok(EvmBinaryOp::Eq), + "OpAnd" => Ok(EvmBinaryOp::And), + "OpOr" => Ok(EvmBinaryOp::Or), + "OpXor" => Ok(EvmBinaryOp::Xor), + "OpShl" => Ok(EvmBinaryOp::Shl), + "OpShr" => Ok(EvmBinaryOp::Shr), + "OpSar" => Ok(EvmBinaryOp::Sar), + "OpByte" => Ok(EvmBinaryOp::Byte), + "OpLogAnd" => Ok(EvmBinaryOp::LogAnd), + "OpLogOr" => Ok(EvmBinaryOp::LogOr), + "OpSLoad" => Ok(EvmBinaryOp::SLoad), + "OpTLoad" => Ok(EvmBinaryOp::TLoad), + "OpMLoad" => Ok(EvmBinaryOp::MLoad), + "OpCalldataLoad" => Ok(EvmBinaryOp::CalldataLoad), + other => Err(IrError::Extraction(format!("termdag: unknown binary op: {other}"))), + }, + other => Err(IrError::Extraction(format!("termdag: expected binary op, got: {other:?}"))), + } +} + +fn td_unop(dag: &TermDag, id: TermId) -> Result { + match dag.get(id) { + Term::App(sym, _) => match sym.as_str() { + "OpIsZero" => Ok(EvmUnaryOp::IsZero), + "OpNot" => Ok(EvmUnaryOp::Not), + "OpNeg" => Ok(EvmUnaryOp::Neg), + "OpSignExtend" => Ok(EvmUnaryOp::SignExtend), + "OpClz" => Ok(EvmUnaryOp::Clz), + other => Err(IrError::Extraction(format!("termdag: unknown unary op: {other}"))), + }, + other => Err(IrError::Extraction(format!("termdag: expected unary op, got: {other:?}"))), + } +} + +fn td_ternop(dag: &TermDag, id: TermId) -> Result { + match dag.get(id) { + Term::App(sym, _) => match sym.as_str() { + "OpSStore" => Ok(EvmTernaryOp::SStore), + "OpTStore" => Ok(EvmTernaryOp::TStore), + "OpMStore" => Ok(EvmTernaryOp::MStore), + "OpMStore8" => Ok(EvmTernaryOp::MStore8), + "OpKeccak256" => Ok(EvmTernaryOp::Keccak256), + "OpSelect" => Ok(EvmTernaryOp::Select), + "OpCalldataCopy" => Ok(EvmTernaryOp::CalldataCopy), + "OpMcopy" => Ok(EvmTernaryOp::Mcopy), + other => Err(IrError::Extraction(format!("termdag: unknown ternary op: {other}"))), + }, + other => Err(IrError::Extraction(format!("termdag: expected ternary op, got: {other:?}"))), + } +} + +fn td_envop(dag: &TermDag, id: TermId) -> Result { + match dag.get(id) { + Term::App(sym, _) => match sym.as_str() { + "EnvCaller" => Ok(EvmEnvOp::Caller), + "EnvCallValue" => Ok(EvmEnvOp::CallValue), + "EnvCallDataSize" => Ok(EvmEnvOp::CallDataSize), + "EnvOrigin" => Ok(EvmEnvOp::Origin), + "EnvGasPrice" => Ok(EvmEnvOp::GasPrice), + "EnvBlockHash" => Ok(EvmEnvOp::BlockHash), + "EnvCoinbase" => Ok(EvmEnvOp::Coinbase), + "EnvTimestamp" => Ok(EvmEnvOp::Timestamp), + "EnvNumber" => Ok(EvmEnvOp::Number), + "EnvGasLimit" => Ok(EvmEnvOp::GasLimit), + "EnvChainId" => Ok(EvmEnvOp::ChainId), + "EnvSelfBalance" => Ok(EvmEnvOp::SelfBalance), + "EnvBaseFee" => Ok(EvmEnvOp::BaseFee), + "EnvGas" => Ok(EvmEnvOp::Gas), + "EnvAddress" => Ok(EvmEnvOp::Address), + "EnvBalance" => Ok(EvmEnvOp::Balance), + "EnvCodeSize" => Ok(EvmEnvOp::CodeSize), + "EnvReturnDataSize" => Ok(EvmEnvOp::ReturnDataSize), + other => Err(IrError::Extraction(format!("termdag: unknown env op: {other}"))), + }, + other => Err(IrError::Extraction(format!("termdag: expected env op, got: {other:?}"))), + } +} + +/// Convert a Cons/Nil list in TermDag to Vec. +fn td_list( + dag: &TermDag, + id: TermId, + cache: &mut HashMap, +) -> Result, IrError> { + let mut result = Vec::new(); + let mut cur = id; + loop { + match dag.get(cur) { + Term::App(sym, args) => match sym.as_str() { + "Nil" => break, + "Cons" => { + result.push(termdag_convert(dag, args[0], cache)?); + cur = args[1]; + } + other => return Err(IrError::Extraction(format!("termdag: expected Cons/Nil, got: {other}"))), + }, + other => return Err(IrError::Extraction(format!("termdag: expected list, got: {other:?}"))), + } + } + Ok(result) +} + // ---- Atom helpers ---- fn atom_str(sexp: &Sexp) -> Result { From a3fca114f30a9bd362a1f53976a9fd3490a48743 Mon Sep 17 00:00:00 2001 From: brockelmore <31553173+brockelmore@users.noreply.github.com> Date: Thu, 12 Mar 2026 23:34:26 -0600 Subject: [PATCH 08/13] feat: region store forwarding + unchecked Vec arithmetic (35-49% gas reduction) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Region forwarding pass (region_forward.rs) walks IR in program order, tracks known (region_id, field_idx) → value mappings, and replaces RegionLoad with forwarded constants. Enables egglog to constant-fold Vec capacity checks and eliminate dead growth branches. Vec methods now use UnsafeAdd/UnsafeMul for internal pointer/offset calculations that cannot overflow u256. compiler_provided_method extended to work on composite types (struct pointers), and infer_receiver_type handles FieldAccess expressions. UnsafeAdd/UnsafeSub/UnsafeMul added to auto-imported traits so globals (vec.edge) can use them without explicit imports. Co-Authored-By: Claude Opus 4.6 --- crates/codegen/src/dispatcher.rs | 6 + crates/codegen/src/expr_compiler.rs | 27 + crates/e2e/.gas-snapshot | 10 +- crates/e2e/tests/main.rs | 3 + crates/e2e/tests/suites/bench_vec.rs | 55 ++ crates/e2e/tests/suites/helpers.rs | 2 +- crates/e2e/tests/suites/vec_exec.rs | 8 + crates/ir/src/ast_helpers.rs | 16 + crates/ir/src/cleanup.rs | 17 + crates/ir/src/lib.rs | 94 ++- crates/ir/src/mem_region.rs | 473 ++++++++++++-- .../ir/src/optimizations/range_analysis.egg | 96 +++ crates/ir/src/optimizations/region_memory.egg | 42 ++ crates/ir/src/pretty.rs | 44 +- crates/ir/src/region_forward.rs | 595 ++++++++++++++++++ crates/ir/src/schema.egg | 12 + crates/ir/src/schema.rs | 17 + crates/ir/src/sexp.rs | 80 +++ crates/ir/src/storage_hoist.rs | 26 + crates/ir/src/to_egglog/calls.rs | 49 +- crates/ir/src/to_egglog/composite.rs | 20 + crates/ir/src/to_egglog/control_flow.rs | 2 + crates/ir/src/to_egglog/expr.rs | 25 + crates/ir/src/to_egglog/function.rs | 5 + crates/ir/src/to_egglog/mod.rs | 28 +- crates/ir/src/to_egglog/pattern.rs | 2 + crates/ir/src/var_opt.rs | 215 ++++++- examples/tests/test_vec.edge | 22 + std/globals/vec.edge | 30 +- 29 files changed, 1917 insertions(+), 104 deletions(-) create mode 100644 crates/e2e/tests/suites/bench_vec.rs create mode 100644 crates/ir/src/optimizations/region_memory.egg create mode 100644 crates/ir/src/region_forward.rs diff --git a/crates/codegen/src/dispatcher.rs b/crates/codegen/src/dispatcher.rs index 369b280..7b1451c 100644 --- a/crates/codegen/src/dispatcher.rs +++ b/crates/codegen/src/dispatcher.rs @@ -45,6 +45,12 @@ fn contains_dyn_alloc(expr: &edge_ir::schema::RcExpr) -> bool { EvmExpr::Call(_, args) => args.iter().any(contains_dyn_alloc), EvmExpr::InlineAsm(inputs, _, _) => inputs.iter().any(contains_dyn_alloc), EvmExpr::Get(inner, _) => contains_dyn_alloc(inner), + EvmExpr::AllocRegion(_, _, true) => true, + EvmExpr::AllocRegion(_, nf, false) => contains_dyn_alloc(nf), + EvmExpr::RegionStore(_, _, val, state) => { + contains_dyn_alloc(val) || contains_dyn_alloc(state) + } + EvmExpr::RegionLoad(_, _, state) => contains_dyn_alloc(state), EvmExpr::Const(..) | EvmExpr::Var(_) | EvmExpr::Drop(_) diff --git a/crates/codegen/src/expr_compiler.rs b/crates/codegen/src/expr_compiler.rs index bdd3376..1c7a777 100644 --- a/crates/codegen/src/expr_compiler.rs +++ b/crates/codegen/src/expr_compiler.rs @@ -284,6 +284,27 @@ impl<'a> ExprCompiler<'a> { // stack: [base] — the returned pointer } + EvmExpr::AllocRegion(id, _, _) => { + panic!( + "AllocRegion({id}) reached codegen without being resolved. \ + Run resolve_regions() after egglog extraction." + ); + } + + EvmExpr::RegionStore(id, field, _, _) => { + panic!( + "RegionStore({id}, {field}) reached codegen without being resolved to MStore. \ + Run resolve_regions() after egglog extraction." + ); + } + + EvmExpr::RegionLoad(id, field, _) => { + panic!( + "RegionLoad({id}, {field}) reached codegen without being resolved to MLoad. \ + Run resolve_regions() after egglog extraction." + ); + } + EvmExpr::Empty(_, _) | EvmExpr::StorageField(_, _, _) => { // Empty: unit — no value on stack. // StorageField: declarations don't emit code. @@ -1756,8 +1777,14 @@ impl<'a> LetOffsetSim<'a> { } EvmExpr::Uop(_, a) | EvmExpr::DynAlloc(a) + | EvmExpr::AllocRegion(_, a, _) | EvmExpr::Get(a, _) | EvmExpr::EnvRead(_, a) => self.walk(a), + EvmExpr::RegionStore(_, _, val, state) => { + self.walk(val); + self.walk(state); + } + EvmExpr::RegionLoad(_, _, state) => self.walk(state), EvmExpr::Log(_, topics, offset, size, state) => { for t in topics { self.walk(t); diff --git a/crates/e2e/.gas-snapshot b/crates/e2e/.gas-snapshot index abf4eda..a41145c 100644 --- a/crates/e2e/.gas-snapshot +++ b/crates/e2e/.gas-snapshot @@ -141,8 +141,8 @@ test_unsafe_arith::test_sub_underflow(), 108, 108, 108, 108 test_unsafe_arith::test_unsafe_add(), 135, 129, 129, 129 test_unsafe_arith::test_unsafe_mul(), 112, 104, 104, 104 test_unsafe_arith::test_unsafe_sub(), 161, 155, 155, 155 -test_vec::test_get(), 1329, 1072, 1072, 1072 -test_vec::test_grow(), 3115, 2491, 2491, 2491 -test_vec::test_index(), 1034, 798, 798, 798 -test_vec::test_new_and_push(), 1082, 903, 903, 903 -test_vec::test_set(), 1514, 1174, 1106, 1106 +test_vec::test_get(), 593, 347, 347, 347 +test_vec::test_grow(), 1545, 1251, 1251, 1251 +test_vec::test_index(), 453, 290, 290, 290 +test_vec::test_new_and_push(), 465, 257, 257, 257 +test_vec::test_set(), 665, 364, 352, 352 diff --git a/crates/e2e/tests/main.rs b/crates/e2e/tests/main.rs index bcb9924..7369347 100644 --- a/crates/e2e/tests/main.rs +++ b/crates/e2e/tests/main.rs @@ -60,6 +60,9 @@ mod map_std_exec; #[path = "suites/vec_exec.rs"] mod vec_exec; +#[path = "suites/bench_vec.rs"] +mod bench_vec; + #[path = "suites/int_widths_exec.rs"] mod int_widths_exec; #[path = "suites/large_int_literals.rs"] diff --git a/crates/e2e/tests/suites/bench_vec.rs b/crates/e2e/tests/suites/bench_vec.rs new file mode 100644 index 0000000..8ba34f7 --- /dev/null +++ b/crates/e2e/tests/suites/bench_vec.rs @@ -0,0 +1,55 @@ +#![allow(missing_docs)] + +//! Gas benchmarking for Vec operations using the existing test_vec.edge contract. +//! Uses the test harness to measure actual execution gas at each opt level. + +use crate::helpers::*; + +const CONTRACT: &str = "examples/tests/test_vec.edge"; + +/// Detailed gas analysis of Vec test functions. +#[test] +fn bench_vec_gas_breakdown() { + let sigs = [ + "test_new_and_push()", + "test_get()", + "test_set()", + "test_grow()", + "test_index()", + ]; + + eprintln!("\n╔═══════════════════════════════════════════════════════════════╗"); + eprintln!("║ Vec Gas Analysis — Execution Gas Only ║"); + eprintln!("║ (tx base + calldata intrinsic stripped) ║"); + eprintln!("╠════════════════════╦════════╦════════╦════════╦═════════════╣"); + eprintln!("║ Function ║ O0 ║ O1 ║ O2 ║ O3 ║"); + eprintln!("╠════════════════════╬════════╬════════╬════════╬═════════════╣"); + + for sig in &sigs { + let mut gases = [0u64; 4]; + for opt in 0..=3u8 { + let bc = compile_contract_opt(CONTRACT, opt); + let mut h = EvmHandle::new(bc); + let sel = selector(sig); + let cd = calldata(sel, &[]); + let r = h.call(cd.clone()); + assert!(r.success, "{sig} reverted at O{opt}; gas={}", r.gas_used); + gases[opt as usize] = execution_gas(r.gas_used, &cd); + } + eprintln!( + "║ {:18} ║ {:>6} ║ {:>6} ║ {:>6} ║ {:>6} ║", + sig.trim_end_matches("()"), + gases[0], gases[1], gases[2], gases[3] + ); + } + eprintln!("╚════════════════════╩════════╩════════╩════════╩═════════════╝"); + eprintln!(); + + // Now show what operations each test does for context: + eprintln!("Operation breakdown:"); + eprintln!(" test_new_and_push: new(4) + 3×push + len read"); + eprintln!(" test_get: new(4) + 3×push + get(1)"); + eprintln!(" test_set: new(4) + 3×push + set(1,999) + get(1)"); + eprintln!(" test_grow: new(2) + 5×push (triggers grow) + 5×get + 4×add"); + eprintln!(" test_index: new(4) + 2×push + v[1] (Index trait)"); +} diff --git a/crates/e2e/tests/suites/helpers.rs b/crates/e2e/tests/suites/helpers.rs index afa5a62..65bd1ed 100644 --- a/crates/e2e/tests/suites/helpers.rs +++ b/crates/e2e/tests/suites/helpers.rs @@ -74,7 +74,7 @@ fn calldata_intrinsic_gas(cd: &[u8]) -> u64 { cd.iter().map(|&b| if b == 0 { 4u64 } else { 16u64 }).sum() } -fn execution_gas(gas_used: u64, cd: &[u8]) -> u64 { +pub(crate) fn execution_gas(gas_used: u64, cd: &[u8]) -> u64 { gas_used .saturating_sub(21000) .saturating_sub(calldata_intrinsic_gas(cd)) diff --git a/crates/e2e/tests/suites/vec_exec.rs b/crates/e2e/tests/suites/vec_exec.rs index 68071b5..f5399d0 100644 --- a/crates/e2e/tests/suites/vec_exec.rs +++ b/crates/e2e/tests/suites/vec_exec.rs @@ -62,6 +62,14 @@ fn test_vec_grow() { }); } +#[test] +fn test_vec_zero() { + for_all_opt_levels(CONTRACT, |evm, opt| { + let r = evm.call(calldata(selector("test_zero_array()"), &[])); + assert!(r.success, "test_zero_array() reverted at O{opt}"); + }); +} + #[test] fn test_vec_index() { for_all_opt_levels(CONTRACT, |evm, opt| { diff --git a/crates/ir/src/ast_helpers.rs b/crates/ir/src/ast_helpers.rs index 33b2b56..b508ebf 100644 --- a/crates/ir/src/ast_helpers.rs +++ b/crates/ir/src/ast_helpers.rs @@ -289,6 +289,22 @@ pub fn dyn_alloc(size: RcExpr) -> RcExpr { Rc::new(EvmExpr::DynAlloc(size)) } +/// Allocate a memory region with a unique region ID. +/// `is_dynamic`: true for runtime MSIZE-based allocation, false for static offset assignment. +pub fn alloc_region(region_id: i64, num_fields: RcExpr, is_dynamic: bool) -> RcExpr { + Rc::new(EvmExpr::AllocRegion(region_id, num_fields, is_dynamic)) +} + +/// Store a value to a specific field of a memory region. +pub fn region_store(region_id: i64, field_idx: i64, value: RcExpr, state: RcExpr) -> RcExpr { + Rc::new(EvmExpr::RegionStore(region_id, field_idx, value, state)) +} + +/// Load a value from a specific field of a memory region. +pub fn region_load(region_id: i64, field_idx: i64, state: RcExpr) -> RcExpr { + Rc::new(EvmExpr::RegionLoad(region_id, field_idx, state)) +} + // ---- Integer width helpers ---- /// Create a mask constant for the given bit width: `(1 << bit_width) - 1`. diff --git a/crates/ir/src/cleanup.rs b/crates/ir/src/cleanup.rs index 3ef91d1..7b7fdda 100644 --- a/crates/ir/src/cleanup.rs +++ b/crates/ir/src/cleanup.rs @@ -165,6 +165,23 @@ fn cleanup_expr(expr: &RcExpr) -> RcExpr { Rc::new(EvmExpr::DynAlloc(ns)) } + EvmExpr::AllocRegion(id, num_fields, is_dynamic) => { + let nf = cleanup_expr(num_fields); + if Rc::ptr_eq(&nf, num_fields) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::AllocRegion(*id, nf, *is_dynamic)) + } + + EvmExpr::RegionStore(id, field_idx, val, _state) => { + let nv = cleanup_expr(val); + Rc::new(EvmExpr::RegionStore(*id, *field_idx, nv, state_sentinel())) + } + + EvmExpr::RegionLoad(id, field_idx, _state) => { + Rc::new(EvmExpr::RegionLoad(*id, *field_idx, state_sentinel())) + } + // Leaf nodes — no children to clean EvmExpr::Arg(..) | EvmExpr::Const(..) diff --git a/crates/ir/src/lib.rs b/crates/ir/src/lib.rs index 3bde523..46ef6ad 100644 --- a/crates/ir/src/lib.rs +++ b/crates/ir/src/lib.rs @@ -27,6 +27,7 @@ pub mod cleanup; pub mod costs; pub mod mem_region; pub mod optimizations; +pub mod region_forward; pub mod pretty; pub mod schedule; pub mod schema; @@ -617,6 +618,53 @@ fn hash_cons_rec(expr: &RcExpr, cache: &mut HashMap) -> RcE Rc::new(EvmExpr::DynAlloc(ns)) } } + EvmExpr::AllocRegion(id, num_fields, is_dynamic) => { + let nf = child!(num_fields); + k.tag(27); + k.i64(*id); + k.ptr(&nf); + k.u8(*is_dynamic as u8); + if let Some(cached) = cache.get(&k) { + return Rc::clone(cached); + } + if Rc::ptr_eq(&nf, num_fields) { + Rc::clone(expr) + } else { + Rc::new(EvmExpr::AllocRegion(*id, nf, *is_dynamic)) + } + } + EvmExpr::RegionStore(id, field_idx, val, state) => { + let nv = child!(val); + let ns = child!(state); + k.tag(28); + k.i64(*id); + k.i64(*field_idx); + k.ptr(&nv); + k.ptr(&ns); + if let Some(cached) = cache.get(&k) { + return Rc::clone(cached); + } + if Rc::ptr_eq(&nv, val) && Rc::ptr_eq(&ns, state) { + Rc::clone(expr) + } else { + Rc::new(EvmExpr::RegionStore(*id, *field_idx, nv, ns)) + } + } + EvmExpr::RegionLoad(id, field_idx, state) => { + let ns = child!(state); + k.tag(29); + k.i64(*id); + k.i64(*field_idx); + k.ptr(&ns); + if let Some(cached) = cache.get(&k) { + return Rc::clone(cached); + } + if Rc::ptr_eq(&ns, state) { + Rc::clone(expr) + } else { + Rc::new(EvmExpr::RegionLoad(*id, *field_idx, ns)) + } + } }; cache.insert(k, Rc::clone(&result)); @@ -663,7 +711,8 @@ fn dag_count_rec(expr: &RcExpr, visited: &mut std::collections::HashSet) | EvmExpr::VarStore(_, a) | EvmExpr::Get(a, _) | EvmExpr::EnvRead(_, a) - | EvmExpr::DynAlloc(a) => { + | EvmExpr::DynAlloc(a) + | EvmExpr::AllocRegion(_, a, _) => { add!(a); } EvmExpr::Bop(_, a, b) @@ -673,6 +722,13 @@ fn dag_count_rec(expr: &RcExpr, visited: &mut std::collections::HashSet) add!(a); add!(b); } + EvmExpr::RegionStore(_, _, a, b) => { + add!(a); + add!(b); + } + EvmExpr::RegionLoad(_, _, a) => { + add!(a); + } EvmExpr::LetBind(_, a, b) => { add!(a); add!(b); @@ -960,6 +1016,9 @@ fn ir_stats_dag( stats.dyn_allocs += 1; "DynAlloc" } + EvmExpr::AllocRegion(..) => "AllocRegion", + EvmExpr::RegionStore(..) => "RegionStore", + EvmExpr::RegionLoad(..) => "RegionLoad", }; *stats.node_counts.entry(variant_name).or_default() += 1; @@ -983,7 +1042,8 @@ fn ir_stats_dag( | EvmExpr::VarStore(_, a) | EvmExpr::Get(a, _) | EvmExpr::EnvRead(_, a) - | EvmExpr::DynAlloc(a) => go!(a), + | EvmExpr::DynAlloc(a) + | EvmExpr::AllocRegion(_, a, _) => go!(a), EvmExpr::Bop(_, a, b) | EvmExpr::Concat(a, b) | EvmExpr::DoWhile(a, b) @@ -991,6 +1051,13 @@ fn ir_stats_dag( go!(a); go!(b); } + EvmExpr::RegionStore(_, _, a, b) => { + go!(a); + go!(b); + } + EvmExpr::RegionLoad(_, _, a) => { + go!(a); + } EvmExpr::LetBind(_, a, b) => { go!(a); go!(b); @@ -1077,6 +1144,7 @@ pub fn prologue(optimize_for: OptimizeFor) -> String { include_str!("optimizations/arithmetic.egg"), include_str!("optimizations/storage.egg"), include_str!("optimizations/memory.egg"), + include_str!("optimizations/region_memory.egg"), include_str!("optimizations/dead_code.egg"), include_str!("optimizations/range_analysis.egg"), include_str!("optimizations/u256_const_fold.egg"), @@ -1160,14 +1228,26 @@ pub fn lower_and_optimize( storage_hoist::hoist_program(&mut ir_program); tracing::debug!(" storage_hoist: {:?}", t.elapsed()); - // 4. Resolve symbolic MemRegion nodes to concrete offsets. + // 4. Forward RegionStore → RegionLoad in straight-line code. + // Walks IR in program order, forwarding known field values through + // struct field access. Enables compile-time resolution of Vec len/cap. + let t = std::time::Instant::now(); + region_forward::forward_region_stores_program(&mut ir_program, &lowering.region_var_map); + tracing::debug!(" region_forward: {:?}", t.elapsed()); + + // 5. Resolve symbolic MemRegion nodes to concrete offsets. // Runs before egglog so that Add(Const, Const) patterns from // region+field offsets get folded by egglog's constant folding. let t = std::time::Instant::now(); - mem_region::assign_program_offsets(&mut ir_program); + mem_region::assign_program_offsets(&mut ir_program, &lowering.region_var_map); tracing::debug!(" mem_region: {:?}", t.elapsed()); if optimization_level == 0 { + // Resolve RegionStore/RegionLoad → MStore/MLoad (no egglog to forward through) + let t = std::time::Instant::now(); + mem_region::resolve_regions_post_egglog(&mut ir_program, &lowering.region_var_map); + tracing::debug!(" resolve_regions: {:?}", t.elapsed()); + // b) At O0 only: forward SStore→SLoad in straight-line code (no egglog) let t = std::time::Instant::now(); storage_hoist::forward_stores_program(&mut ir_program); @@ -1385,6 +1465,12 @@ pub fn lower_and_optimize( warnings: ir_program.warnings, }; + // Post-egglog: resolve RegionStore/RegionLoad → MStore/MLoad. + // These survived into egglog for symbolic forwarding; now lower to concrete memory ops. + let t = std::time::Instant::now(); + mem_region::resolve_regions_post_egglog(&mut result, &lowering.region_var_map); + tracing::debug!(" resolve_regions: {:?}", t.elapsed()); + // Post-egglog: forward SStore→SLoad and eliminate dead stores in straight-line code. // Egglog's storage-opt rules only handle state-threaded SStore chains, not Concat-chained // SStores (which use Arg(StateT) as state). This pass handles the Concat case. diff --git a/crates/ir/src/mem_region.rs b/crates/ir/src/mem_region.rs index fa61c64..d6eac5c 100644 --- a/crates/ir/src/mem_region.rs +++ b/crates/ir/src/mem_region.rs @@ -14,7 +14,9 @@ use std::{collections::BTreeMap, rc::Rc}; -use crate::schema::{EvmBaseType, EvmConstant, EvmContext, EvmExpr, EvmType, RcExpr}; +use crate::schema::{ + EvmBaseType, EvmConstant, EvmContext, EvmExpr, EvmProgram, EvmType, RcExpr, +}; /// Scope tree node for memory region allocation. /// @@ -33,20 +35,27 @@ enum RegionScope { /// /// Returns `(rewritten_expr, memory_high_water)` where `memory_high_water` is /// the first free byte offset after all allocated regions. -pub fn assign_memory_offsets(expr: &RcExpr) -> (RcExpr, usize) { +pub fn assign_memory_offsets( + expr: &RcExpr, + region_var_map: &indexmap::IndexMap, +) -> (RcExpr, usize) { let scope = collect_region_scopes(expr); let scope = simplify_scope(scope); let mut assignments = BTreeMap::new(); let hw = assign_scoped_offsets(&scope, 0, &mut assignments); - if assignments.is_empty() { + if assignments.is_empty() && region_var_map.is_empty() { return (Rc::clone(expr), 0); } tracing::debug!(" mem_region hw={hw} ({} regions)", assignments.len()); - let rewritten = replace_regions(expr, &assignments); + let ctx = RegionResolveCtx { + assignments, + region_var_map: region_var_map.clone(), + }; + let rewritten = replace_regions(expr, &ctx); (rewritten, hw) } @@ -136,7 +145,8 @@ fn collect_region_scopes(expr: &RcExpr) -> RegionScope { EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) | EvmExpr::VarStore(_, a) - | EvmExpr::DynAlloc(a) => collect_region_scopes(a), + | EvmExpr::DynAlloc(a) + | EvmExpr::AllocRegion(_, a, _) => collect_region_scopes(a), // Multi-child nodes EvmExpr::Log(_, topics, d, s, st) => { @@ -155,6 +165,10 @@ fn collect_region_scopes(expr: &RcExpr) -> RegionScope { EvmExpr::Call(_, args) => { RegionScope::Sequential(args.iter().map(collect_region_scopes).collect()) } + EvmExpr::RegionStore(_, _, val, state) => { + RegionScope::Sequential(vec![collect_region_scopes(val), collect_region_scopes(state)]) + } + EvmExpr::RegionLoad(_, _, state) => collect_region_scopes(state), EvmExpr::EnvRead(_, s) => collect_region_scopes(s), EvmExpr::EnvRead1(_, a, s) => { RegionScope::Sequential(vec![collect_region_scopes(a), collect_region_scopes(s)]) @@ -225,21 +239,24 @@ fn simplify_scope(scope: RegionScope) -> RegionScope { /// Assign memory offsets for an entire program. /// /// Updates `memory_high_water` on each contract. -pub fn assign_program_offsets(program: &mut crate::schema::EvmProgram) { +pub fn assign_program_offsets( + program: &mut crate::schema::EvmProgram, + region_var_map: &indexmap::IndexMap, +) { for contract in &mut program.contracts { - let (new_runtime, hw) = assign_memory_offsets(&contract.runtime); + let (new_runtime, hw) = assign_memory_offsets(&contract.runtime, region_var_map); contract.runtime = new_runtime; // Also process internal functions let mut max_hw = hw; for func in &mut contract.internal_functions { - let (new_func, func_hw) = assign_memory_offsets(func); + let (new_func, func_hw) = assign_memory_offsets(func, region_var_map); *func = new_func; max_hw = max_hw.max(func_hw); } // Also process constructor - let (new_ctor, ctor_hw) = assign_memory_offsets(&contract.constructor); + let (new_ctor, ctor_hw) = assign_memory_offsets(&contract.constructor, region_var_map); contract.constructor = new_ctor; max_hw = max_hw.max(ctor_hw); @@ -247,34 +264,52 @@ pub fn assign_program_offsets(program: &mut crate::schema::EvmProgram) { } } -/// Replace all `MemRegion(id, _)` with `Const(SmallInt(offset))`. -fn replace_regions(expr: &RcExpr, assignments: &BTreeMap) -> RcExpr { +/// Context for region resolution: both MemRegion offset assignments and +/// RegionStore/RegionLoad → MStore/MLoad variable mappings. +#[derive(Debug)] +pub struct RegionResolveCtx { + /// MemRegion id → concrete byte offset + pub assignments: BTreeMap, + /// Region id → LetBind variable name (for &dm struct field access) + pub region_var_map: indexmap::IndexMap, +} + +/// Replace all `MemRegion(id, _)` with `Const(SmallInt(offset))` and +/// all `RegionStore`/`RegionLoad` with `MStore`/`MLoad` using the variable base pointer. +fn replace_regions(expr: &RcExpr, ctx: &RegionResolveCtx) -> RcExpr { let mut cache = std::collections::HashMap::new(); - replace_regions_memo(expr, assignments, &mut cache) + replace_regions_memo(expr, ctx, &mut cache) } fn replace_regions_memo( expr: &RcExpr, - assignments: &BTreeMap, + ctx: &RegionResolveCtx, cache: &mut std::collections::HashMap, ) -> RcExpr { let id = Rc::as_ptr(expr) as usize; if let Some(cached) = cache.get(&id) { return Rc::clone(cached); } - let result = replace_regions_inner(expr, assignments, cache); + let result = replace_regions_inner(expr, ctx, cache); cache.insert(id, Rc::clone(&result)); result } fn replace_regions_inner( expr: &RcExpr, - assignments: &BTreeMap, + ctx: &RegionResolveCtx, cache: &mut std::collections::HashMap, ) -> RcExpr { + // Shorthand for recursive calls + macro_rules! rec { + ($e:expr) => { + replace_regions_memo($e, ctx, cache) + }; + } + match expr.as_ref() { EvmExpr::MemRegion(id, _size) => { - let offset = assignments[id]; + let offset = ctx.assignments[id]; Rc::new(EvmExpr::Const( EvmConstant::SmallInt(offset as i64), EvmType::Base(EvmBaseType::UIntT(256)), @@ -282,90 +317,90 @@ fn replace_regions_inner( )) } EvmExpr::Bop(op, a, b) => { - let na = replace_regions_memo(a, assignments, cache); - let nb = replace_regions_memo(b, assignments, cache); + let na = rec!(a); + let nb = rec!(b); if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) { return Rc::clone(expr); } Rc::new(EvmExpr::Bop(*op, na, nb)) } EvmExpr::Uop(op, a) => { - let na = replace_regions_memo(a, assignments, cache); + let na = rec!(a); if Rc::ptr_eq(&na, a) { return Rc::clone(expr); } Rc::new(EvmExpr::Uop(*op, na)) } EvmExpr::Top(op, a, b, c) => { - let na = replace_regions_memo(a, assignments, cache); - let nb = replace_regions_memo(b, assignments, cache); - let nc = replace_regions_memo(c, assignments, cache); + let na = rec!(a); + let nb = rec!(b); + let nc = rec!(c); if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) && Rc::ptr_eq(&nc, c) { return Rc::clone(expr); } Rc::new(EvmExpr::Top(*op, na, nb, nc)) } EvmExpr::Concat(a, b) => { - let na = replace_regions_memo(a, assignments, cache); - let nb = replace_regions_memo(b, assignments, cache); + let na = rec!(a); + let nb = rec!(b); if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) { return Rc::clone(expr); } Rc::new(EvmExpr::Concat(na, nb)) } EvmExpr::Get(a, idx) => { - let na = replace_regions_memo(a, assignments, cache); + let na = rec!(a); if Rc::ptr_eq(&na, a) { return Rc::clone(expr); } Rc::new(EvmExpr::Get(na, *idx)) } EvmExpr::If(c, i, t, e) => { - let nc = replace_regions_memo(c, assignments, cache); - let ni = replace_regions_memo(i, assignments, cache); - let nt = replace_regions_memo(t, assignments, cache); - let ne = replace_regions_memo(e, assignments, cache); + let nc = rec!(c); + let ni = rec!(i); + let nt = rec!(t); + let ne = rec!(e); if Rc::ptr_eq(&nc, c) && Rc::ptr_eq(&ni, i) && Rc::ptr_eq(&nt, t) && Rc::ptr_eq(&ne, e) { return Rc::clone(expr); } Rc::new(EvmExpr::If(nc, ni, nt, ne)) } EvmExpr::DoWhile(inputs, body) => { - let ni = replace_regions_memo(inputs, assignments, cache); - let nb = replace_regions_memo(body, assignments, cache); + let ni = rec!(inputs); + let nb = rec!(body); if Rc::ptr_eq(&ni, inputs) && Rc::ptr_eq(&nb, body) { return Rc::clone(expr); } Rc::new(EvmExpr::DoWhile(ni, nb)) } EvmExpr::LetBind(name, init, body) => { - let ni = replace_regions_memo(init, assignments, cache); - let nb = replace_regions_memo(body, assignments, cache); + let ni = rec!(init); + let nb = rec!(body); if Rc::ptr_eq(&ni, init) && Rc::ptr_eq(&nb, body) { return Rc::clone(expr); } Rc::new(EvmExpr::LetBind(name.clone(), ni, nb)) } EvmExpr::VarStore(name, val) => { - let nv = replace_regions_memo(val, assignments, cache); + let nv = rec!(val); if Rc::ptr_eq(&nv, val) { return Rc::clone(expr); } Rc::new(EvmExpr::VarStore(name.clone(), nv)) } EvmExpr::Revert(a, b, c) => { - let na = replace_regions_memo(a, assignments, cache); - let nb = replace_regions_memo(b, assignments, cache); - let nc = replace_regions_memo(c, assignments, cache); + let na = rec!(a); + let nb = rec!(b); + let nc = rec!(c); if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) && Rc::ptr_eq(&nc, c) { return Rc::clone(expr); } Rc::new(EvmExpr::Revert(na, nb, nc)) } EvmExpr::ReturnOp(a, b, c) => { - let na = replace_regions_memo(a, assignments, cache); - let nb = replace_regions_memo(b, assignments, cache); - let nc = replace_regions_memo(c, assignments, cache); + let na = rec!(a); + let nb = rec!(b); + let nc = rec!(c); if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) && Rc::ptr_eq(&nc, c) { return Rc::clone(expr); } @@ -374,11 +409,11 @@ fn replace_regions_inner( EvmExpr::Log(count, topics, d, s, st) => { let nt: Vec<_> = topics .iter() - .map(|t| replace_regions_memo(t, assignments, cache)) + .map(|t| rec!(t)) .collect(); - let nd = replace_regions_memo(d, assignments, cache); - let ns = replace_regions_memo(s, assignments, cache); - let nst = replace_regions_memo(st, assignments, cache); + let nd = rec!(d); + let ns = rec!(s); + let nst = rec!(st); if nt.iter().zip(topics.iter()).all(|(n, o)| Rc::ptr_eq(n, o)) && Rc::ptr_eq(&nd, d) && Rc::ptr_eq(&ns, s) && Rc::ptr_eq(&nst, st) { @@ -387,13 +422,13 @@ fn replace_regions_inner( Rc::new(EvmExpr::Log(*count, nt, nd, ns, nst)) } EvmExpr::ExtCall(a, b, c, d, e, f, g) => { - let na = replace_regions_memo(a, assignments, cache); - let nb = replace_regions_memo(b, assignments, cache); - let nc = replace_regions_memo(c, assignments, cache); - let nd = replace_regions_memo(d, assignments, cache); - let ne = replace_regions_memo(e, assignments, cache); - let nf = replace_regions_memo(f, assignments, cache); - let ng = replace_regions_memo(g, assignments, cache); + let na = rec!(a); + let nb = rec!(b); + let nc = rec!(c); + let nd = rec!(d); + let ne = rec!(e); + let nf = rec!(f); + let ng = rec!(g); if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) && Rc::ptr_eq(&nc, c) && Rc::ptr_eq(&nd, d) && Rc::ptr_eq(&ne, e) && Rc::ptr_eq(&nf, f) && Rc::ptr_eq(&ng, g) @@ -405,7 +440,7 @@ fn replace_regions_inner( EvmExpr::Call(name, args) => { let new_args: Vec<_> = args .iter() - .map(|a| replace_regions_memo(a, assignments, cache)) + .map(|a| rec!(a)) .collect(); if new_args.iter().zip(args.iter()).all(|(n, o)| Rc::ptr_eq(n, o)) { return Rc::clone(expr); @@ -413,7 +448,7 @@ fn replace_regions_inner( Rc::new(EvmExpr::Call(name.clone(), new_args)) } EvmExpr::Function(name, in_ty, out_ty, body) => { - let nb = replace_regions_memo(body, assignments, cache); + let nb = rec!(body); if Rc::ptr_eq(&nb, body) { return Rc::clone(expr); } @@ -425,15 +460,15 @@ fn replace_regions_inner( )) } EvmExpr::EnvRead(op, s) => { - let ns = replace_regions_memo(s, assignments, cache); + let ns = rec!(s); if Rc::ptr_eq(&ns, s) { return Rc::clone(expr); } Rc::new(EvmExpr::EnvRead(*op, ns)) } EvmExpr::EnvRead1(op, a, s) => { - let na = replace_regions_memo(a, assignments, cache); - let ns = replace_regions_memo(s, assignments, cache); + let na = rec!(a); + let ns = rec!(s); if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&ns, s) { return Rc::clone(expr); } @@ -442,7 +477,7 @@ fn replace_regions_inner( EvmExpr::InlineAsm(inputs, hex, num_outputs) => { let ni: Vec<_> = inputs .iter() - .map(|i| replace_regions_memo(i, assignments, cache)) + .map(|i| rec!(i)) .collect(); if ni.iter().zip(inputs.iter()).all(|(n, o)| Rc::ptr_eq(n, o)) { return Rc::clone(expr); @@ -450,12 +485,37 @@ fn replace_regions_inner( Rc::new(EvmExpr::InlineAsm(ni, hex.clone(), *num_outputs)) } EvmExpr::DynAlloc(size) => { - let ns = replace_regions_memo(size, assignments, cache); + let ns = rec!(size); if Rc::ptr_eq(&ns, size) { return Rc::clone(expr); } Rc::new(EvmExpr::DynAlloc(ns)) } + EvmExpr::AllocRegion(id, num_fields, is_dynamic) => { + let nf = rec!(num_fields); + if Rc::ptr_eq(&nf, num_fields) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::AllocRegion(*id, nf, *is_dynamic)) + } + // RegionStore/RegionLoad: just recurse, don't resolve here. + // These survive into egglog for symbolic forwarding and get resolved + // post-egglog by `resolve_regions_post_egglog`. + EvmExpr::RegionStore(id, field_idx, val, state) => { + let nv = rec!(val); + let ns = rec!(state); + if Rc::ptr_eq(&nv, val) && Rc::ptr_eq(&ns, state) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::RegionStore(*id, *field_idx, nv, ns)) + } + EvmExpr::RegionLoad(id, field_idx, state) => { + let ns = rec!(state); + if Rc::ptr_eq(&ns, state) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::RegionLoad(*id, *field_idx, ns)) + } // Leaf nodes — no MemRegion possible EvmExpr::Const(..) | EvmExpr::Arg(..) @@ -467,6 +527,293 @@ fn replace_regions_inner( } } +/// Resolve RegionStore/RegionLoad → MStore/MLoad after egglog optimization. +/// This runs post-egglog so that egglog forwarding rules can fire first. +pub fn resolve_regions_post_egglog( + program: &mut EvmProgram, + region_var_map: &indexmap::IndexMap, +) { + if region_var_map.is_empty() { + return; + } + for contract in &mut program.contracts { + contract.runtime = resolve_region_expr(&contract.runtime, region_var_map); + for func in &mut contract.internal_functions { + *func = resolve_region_expr(func, region_var_map); + } + contract.constructor = resolve_region_expr(&contract.constructor, region_var_map); + } +} + +fn resolve_region_expr( + expr: &RcExpr, + region_var_map: &indexmap::IndexMap, +) -> RcExpr { + let mut cache = std::collections::HashMap::new(); + resolve_region_memo(expr, region_var_map, &mut cache) +} + +fn resolve_region_memo( + expr: &RcExpr, + rvm: &indexmap::IndexMap, + cache: &mut std::collections::HashMap, +) -> RcExpr { + let id = Rc::as_ptr(expr) as usize; + if let Some(cached) = cache.get(&id) { + return Rc::clone(cached); + } + let result = resolve_region_inner(expr, rvm, cache); + cache.insert(id, Rc::clone(&result)); + result +} + +fn resolve_region_inner( + expr: &RcExpr, + rvm: &indexmap::IndexMap, + cache: &mut std::collections::HashMap, +) -> RcExpr { + macro_rules! rec { + ($e:expr) => { + resolve_region_memo($e, rvm, cache) + }; + } + + fn region_offset(var_name: &str, field_idx: i64) -> RcExpr { + let base = Rc::new(EvmExpr::Var(var_name.to_string())); + if field_idx == 0 { + base + } else { + Rc::new(EvmExpr::Bop( + crate::schema::EvmBinaryOp::Add, + base, + Rc::new(EvmExpr::Const( + EvmConstant::SmallInt(field_idx * 32), + EvmType::Base(EvmBaseType::UIntT(256)), + EvmContext::InFunction("__mem__".to_owned()), + )), + )) + } + } + + match expr.as_ref() { + EvmExpr::RegionStore(id, field_idx, val, state) => { + let nv = rec!(val); + let ns = rec!(state); + if let Some(var_name) = rvm.get(id) { + let offset = region_offset(var_name, *field_idx); + Rc::new(EvmExpr::Top( + crate::schema::EvmTernaryOp::MStore, + offset, + nv, + ns, + )) + } else { + // Unknown region — shouldn't happen, but pass through + Rc::new(EvmExpr::RegionStore(*id, *field_idx, nv, ns)) + } + } + EvmExpr::RegionLoad(id, field_idx, state) => { + let ns = rec!(state); + if let Some(var_name) = rvm.get(id) { + let offset = region_offset(var_name, *field_idx); + Rc::new(EvmExpr::Bop( + crate::schema::EvmBinaryOp::MLoad, + offset, + ns, + )) + } else { + Rc::new(EvmExpr::RegionLoad(*id, *field_idx, ns)) + } + } + // For all other nodes, just recurse + EvmExpr::Bop(op, a, b) => { + let na = rec!(a); + let nb = rec!(b); + if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::Bop(*op, na, nb)) + } + EvmExpr::Uop(op, a) => { + let na = rec!(a); + if Rc::ptr_eq(&na, a) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::Uop(*op, na)) + } + EvmExpr::Top(op, a, b, c) => { + let na = rec!(a); + let nb = rec!(b); + let nc = rec!(c); + if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) && Rc::ptr_eq(&nc, c) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::Top(*op, na, nb, nc)) + } + EvmExpr::Concat(a, b) => { + let na = rec!(a); + let nb = rec!(b); + if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::Concat(na, nb)) + } + EvmExpr::Get(a, idx) => { + let na = rec!(a); + if Rc::ptr_eq(&na, a) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::Get(na, *idx)) + } + EvmExpr::If(c, i, t, e) => { + let nc = rec!(c); + let ni = rec!(i); + let nt = rec!(t); + let ne = rec!(e); + if Rc::ptr_eq(&nc, c) + && Rc::ptr_eq(&ni, i) + && Rc::ptr_eq(&nt, t) + && Rc::ptr_eq(&ne, e) + { + return Rc::clone(expr); + } + Rc::new(EvmExpr::If(nc, ni, nt, ne)) + } + EvmExpr::DoWhile(inputs, body) => { + let ni = rec!(inputs); + let nb = rec!(body); + if Rc::ptr_eq(&ni, inputs) && Rc::ptr_eq(&nb, body) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::DoWhile(ni, nb)) + } + EvmExpr::LetBind(name, init, body) => { + let ni = rec!(init); + let nb = rec!(body); + if Rc::ptr_eq(&ni, init) && Rc::ptr_eq(&nb, body) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::LetBind(name.clone(), ni, nb)) + } + EvmExpr::VarStore(name, val) => { + let nv = rec!(val); + if Rc::ptr_eq(&nv, val) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::VarStore(name.clone(), nv)) + } + EvmExpr::Revert(a, b, c) => { + let na = rec!(a); + let nb = rec!(b); + let nc = rec!(c); + if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) && Rc::ptr_eq(&nc, c) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::Revert(na, nb, nc)) + } + EvmExpr::ReturnOp(a, b, c) => { + let na = rec!(a); + let nb = rec!(b); + let nc = rec!(c); + if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) && Rc::ptr_eq(&nc, c) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::ReturnOp(na, nb, nc)) + } + EvmExpr::Log(count, topics, d, s, st) => { + let nt: Vec<_> = topics.iter().map(|t| rec!(t)).collect(); + let nd = rec!(d); + let ns = rec!(s); + let nst = rec!(st); + if nt.iter().zip(topics.iter()).all(|(n, o)| Rc::ptr_eq(n, o)) + && Rc::ptr_eq(&nd, d) + && Rc::ptr_eq(&ns, s) + && Rc::ptr_eq(&nst, st) + { + return Rc::clone(expr); + } + Rc::new(EvmExpr::Log(*count, nt, nd, ns, nst)) + } + EvmExpr::ExtCall(a, b, c, d, e, f, g) => { + let na = rec!(a); + let nb = rec!(b); + let nc = rec!(c); + let nd = rec!(d); + let ne = rec!(e); + let nf = rec!(f); + let ng = rec!(g); + if Rc::ptr_eq(&na, a) + && Rc::ptr_eq(&nb, b) + && Rc::ptr_eq(&nc, c) + && Rc::ptr_eq(&nd, d) + && Rc::ptr_eq(&ne, e) + && Rc::ptr_eq(&nf, f) + && Rc::ptr_eq(&ng, g) + { + return Rc::clone(expr); + } + Rc::new(EvmExpr::ExtCall(na, nb, nc, nd, ne, nf, ng)) + } + EvmExpr::Call(name, args) => { + let new_args: Vec<_> = args.iter().map(|a| rec!(a)).collect(); + if new_args + .iter() + .zip(args.iter()) + .all(|(n, o)| Rc::ptr_eq(n, o)) + { + return Rc::clone(expr); + } + Rc::new(EvmExpr::Call(name.clone(), new_args)) + } + EvmExpr::Function(name, in_ty, out_ty, body) => { + let nb = rec!(body); + if Rc::ptr_eq(&nb, body) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::Function(name.clone(), in_ty.clone(), out_ty.clone(), nb)) + } + EvmExpr::EnvRead(op, s) => { + let ns = rec!(s); + if Rc::ptr_eq(&ns, s) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::EnvRead(*op, ns)) + } + EvmExpr::EnvRead1(op, a, s) => { + let na = rec!(a); + let ns = rec!(s); + if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&ns, s) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::EnvRead1(*op, na, ns)) + } + EvmExpr::InlineAsm(inputs, hex, num_outputs) => { + let ni: Vec<_> = inputs.iter().map(|i| rec!(i)).collect(); + if ni.iter().zip(inputs.iter()).all(|(n, o)| Rc::ptr_eq(n, o)) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::InlineAsm(ni, hex.clone(), *num_outputs)) + } + EvmExpr::DynAlloc(size) => { + let ns = rec!(size); + if Rc::ptr_eq(&ns, size) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::DynAlloc(ns)) + } + // Leaves — no children to recurse + EvmExpr::AllocRegion(..) + | EvmExpr::Const(..) + | EvmExpr::Arg(..) + | EvmExpr::MemRegion(..) + | EvmExpr::Empty(..) + | EvmExpr::Var(_) + | EvmExpr::Drop(_) + | EvmExpr::Selector(_) + | EvmExpr::StorageField(..) => Rc::clone(expr), + } +} + #[cfg(test)] mod tests { use super::*; @@ -481,7 +828,7 @@ mod tests { let state = Rc::new(EvmExpr::Arg(EvmType::Base(EvmBaseType::StateT), ctx)); let mstore = ast_helpers::mstore(Rc::clone(®ion), val, state); - let (result, hw) = assign_memory_offsets(&mstore); + let (result, hw) = assign_memory_offsets(&mstore, &indexmap::IndexMap::new()); assert_eq!(hw, 96); // 3 words * 32 bytes // The MemRegion should be replaced with Const(0) @@ -509,7 +856,7 @@ mod tests { let ms1 = ast_helpers::mstore(r1, val, state); let expr = ast_helpers::concat(ms0, ms1); - let (result, hw) = assign_memory_offsets(&expr); + let (result, hw) = assign_memory_offsets(&expr, &indexmap::IndexMap::new()); assert_eq!(hw, 160); // 2*32 + 3*32 = 160 // Verify the offsets are 0 and 64 @@ -533,7 +880,7 @@ mod tests { fn test_no_regions_passthrough() { let ctx = EvmContext::InFunction("test".to_owned()); let expr = ast_helpers::const_int(42, ctx); - let (result, hw) = assign_memory_offsets(&expr); + let (result, hw) = assign_memory_offsets(&expr, &indexmap::IndexMap::new()); assert_eq!(hw, 0); assert_eq!(*result, *expr); } @@ -555,7 +902,7 @@ mod tests { let else_br = ast_helpers::mstore(r1, val, state); let if_expr = Rc::new(EvmExpr::If(cond, inputs, then_br, else_br)); - let (result, hw) = assign_memory_offsets(&if_expr); + let (result, hw) = assign_memory_offsets(&if_expr, &indexmap::IndexMap::new()); // Branches are exclusive: hw = max(64, 96) = 96, NOT 64+96=160 assert_eq!(hw, 96); @@ -596,7 +943,7 @@ mod tests { let if_expr = Rc::new(EvmExpr::If(cond, inputs, then_br, else_br)); let expr = ast_helpers::concat(pre, if_expr); - let (_result, hw) = assign_memory_offsets(&expr); + let (_result, hw) = assign_memory_offsets(&expr, &indexmap::IndexMap::new()); // r_shared=32 bytes, then branches max(64,96)=96 → total 32+96=128 assert_eq!(hw, 128); } diff --git a/crates/ir/src/optimizations/range_analysis.egg b/crates/ir/src/optimizations/range_analysis.egg index b5eb822..4e922bb 100644 --- a/crates/ir/src/optimizations/range_analysis.egg +++ b/crates/ir/src/optimizations/range_analysis.egg @@ -234,6 +234,102 @@ ((set (lower-bound e) 0)) :ruleset range-analysis) +;; ============================================================ +;; Pointer & Environment Bounds (EVM Architecture) +;; ============================================================ +;; EVM memory is bounded to ~2^32 bytes (gas-limited), calldata/returndata +;; similarly. These bounds enable checked arithmetic elision on pointer math. +;; +;; | Region | Key width | Bound | +;; |------------------|-----------|------------| +;; | memory | 32-bit | 2^32 | +;; | calldata | 32-bit | 2^32 | +;; | returndata | 32-bit | 2^32 | +;; | internal code | 16-bit | 2^16 | +;; | selector | 32-bit | 2^32 | + +;; DynAlloc returns a memory pointer (bounded by 2^32) +(rule ((= e (DynAlloc size))) + ((set (lower-bound e) 0) + (set (upper-bound e) 4294967296) + (set (u256-lower-bound e) (u256-from-i64 0)) + (set (u256-upper-bound e) (u256-from-i64 4294967296)) + (set (max-bits e) 32)) + :ruleset range-analysis) + +;; CallDataSize: max ~2^32 +(rule ((= e (EnvRead (EnvCallDataSize) st))) + ((set (upper-bound e) 4294967296) + (set (u256-upper-bound e) (u256-from-i64 4294967296)) + (set (max-bits e) 32)) + :ruleset range-analysis) + +;; ReturnDataSize: max ~2^32 +(rule ((= e (EnvRead (EnvReturnDataSize) st))) + ((set (upper-bound e) 4294967296) + (set (u256-upper-bound e) (u256-from-i64 4294967296)) + (set (max-bits e) 32)) + :ruleset range-analysis) + +;; CodeSize: max 2^16 (EVM contract code limit is 24576, but 2^16 is safe) +(rule ((= e (EnvRead (EnvCodeSize) st))) + ((set (upper-bound e) 65536) + (set (u256-upper-bound e) (u256-from-i64 65536)) + (set (max-bits e) 16)) + :ruleset range-analysis) + +;; Selector: 4-byte function signature, fits in u32 +(rule ((= e (Selector s))) + ((set (lower-bound e) 0) + (set (upper-bound e) 4294967295) + (set (u256-lower-bound e) (u256-from-i64 0)) + (set (u256-upper-bound e) (u256-from-i64 4294967295)) + (set (max-bits e) 32)) + :ruleset range-analysis) + +;; Address: 160-bit (20 bytes) +(rule ((= e (EnvRead (EnvAddress) st))) + ((set (max-bits e) 160)) + :ruleset range-analysis) +(rule ((= e (EnvRead (EnvCaller) st))) + ((set (max-bits e) 160)) + :ruleset range-analysis) +(rule ((= e (EnvRead (EnvOrigin) st))) + ((set (max-bits e) 160)) + :ruleset range-analysis) +(rule ((= e (EnvRead (EnvCoinbase) st))) + ((set (max-bits e) 160)) + :ruleset range-analysis) + +;; Gas: bounded by block gas limit (~2^30 in practice, use 2^64 to be safe) +(rule ((= e (EnvRead (EnvGas) st))) + ((set (max-bits e) 64)) + :ruleset range-analysis) +(rule ((= e (EnvRead (EnvGasLimit) st))) + ((set (max-bits e) 64)) + :ruleset range-analysis) +(rule ((= e (EnvRead (EnvGasPrice) st))) + ((set (max-bits e) 64)) + :ruleset range-analysis) + +;; Timestamp, block number: bounded by ~2^64 +(rule ((= e (EnvRead (EnvTimestamp) st))) + ((set (max-bits e) 64)) + :ruleset range-analysis) +(rule ((= e (EnvRead (EnvNumber) st))) + ((set (max-bits e) 64)) + :ruleset range-analysis) + +;; BaseFee: bounded by ~2^64 +(rule ((= e (EnvRead (EnvBaseFee) st))) + ((set (max-bits e) 64)) + :ruleset range-analysis) + +;; ChainId: bounded by ~2^64 +(rule ((= e (EnvRead (EnvChainId) st))) + ((set (max-bits e) 64)) + :ruleset range-analysis) + ;; ============================================================ ;; NonZero Derivation ;; ============================================================ diff --git a/crates/ir/src/optimizations/region_memory.egg b/crates/ir/src/optimizations/region_memory.egg new file mode 100644 index 0000000..ef1a3db --- /dev/null +++ b/crates/ir/src/optimizations/region_memory.egg @@ -0,0 +1,42 @@ +;; ============================================================ +;; Region-Based Memory Optimizations +;; ============================================================ +;; Symbolic forwarding rules for RegionStore/RegionLoad. +;; Different region IDs are guaranteed non-overlapping (structural). +;; Same region + different field indices are guaranteed non-overlapping. +;; +;; NOTE: These rules are currently DISABLED because the IR does not +;; properly thread state through RegionStore/RegionLoad nodes. +;; All operations use the same Arg(StateT) as their state parameter, +;; so egglog cannot distinguish between the state BEFORE and AFTER +;; a store. The forwarding rule would match a RegionLoad with ANY +;; RegionStore to the same region+field, not necessarily the most +;; recent one in program order. +;; +;; To enable these rules, we need proper state threading where each +;; RegionStore produces a unique state token consumed by subsequent +;; operations. Until then, the region nodes serve as symbolic markers +;; that pass through egglog unchanged and get resolved to MStore/MLoad +;; post-egglog. +;; +;; Planned forwarding rules (for when state threading is implemented): +;; +;; ;; Same-region, same-field: load after store → stored value +;; (rewrite (RegionLoad r f (RegionStore r f val state)) +;; val :subsume :ruleset memory-opt) +;; +;; ;; Same-region, different-field: skip store +;; (rule ((= e (RegionLoad r f1 (RegionStore r f2 val state))) +;; (!= f1 f2)) +;; ((union e (RegionLoad r f1 state))) +;; :ruleset memory-opt) +;; +;; ;; Cross-region: skip store +;; (rule ((= e (RegionLoad r1 f1 (RegionStore r2 f2 val state))) +;; (!= r1 r2)) +;; ((union e (RegionLoad r1 f1 state))) +;; :ruleset memory-opt) +;; +;; ;; Dead store elimination +;; (rewrite (RegionStore r f val1 (RegionStore r f val2 state)) +;; (RegionStore r f val1 state) :subsume :ruleset memory-opt) diff --git a/crates/ir/src/pretty.rs b/crates/ir/src/pretty.rs index 0133d2b..db08251 100644 --- a/crates/ir/src/pretty.rs +++ b/crates/ir/src/pretty.rs @@ -280,7 +280,10 @@ fn inline_width(expr: &RcExpr) -> Option { | EvmExpr::Call(..) | EvmExpr::VarStore(..) | EvmExpr::InlineAsm(..) - | EvmExpr::DynAlloc(_) => None, // never inline — has sub-expression + | EvmExpr::DynAlloc(_) + | EvmExpr::AllocRegion(..) + | EvmExpr::RegionStore(..) + | EvmExpr::RegionLoad(..) => None, // never inline — has sub-expression EvmExpr::MemRegion(id, sz) => Some(format!("region({id}, {sz})").len()), } } @@ -654,6 +657,32 @@ fn pp(expr: &RcExpr, depth: usize, buf: &mut String) { } buf.push(')'); } + EvmExpr::AllocRegion(id, num_fields, is_dynamic) => { + indent(depth, buf); + buf.push_str(&format!("@alloc_region({id}, ")); + if fits_inline(num_fields, budget(depth + 1)) { + pp_inline(num_fields, buf); + } else { + buf.push('\n'); + pp(num_fields, depth + 1, buf); + } + buf.push_str(&format!(", {is_dynamic})")); + } + EvmExpr::RegionStore(id, field_idx, val, _state) => { + indent(depth, buf); + buf.push_str(&format!("region_store({id}, {field_idx}, ")); + if fits_inline(val, budget(depth + 1)) { + pp_inline(val, buf); + } else { + buf.push('\n'); + pp(val, depth + 1, buf); + } + buf.push_str(", state)"); + } + EvmExpr::RegionLoad(id, field_idx, _state) => { + indent(depth, buf); + buf.push_str(&format!("region_load({id}, {field_idx}, state)")); + } } } @@ -783,6 +812,19 @@ fn pp_oneline(expr: &RcExpr, buf: &mut String) { pp_oneline(size, buf); buf.push(')'); } + EvmExpr::AllocRegion(id, num_fields, is_dynamic) => { + buf.push_str(&format!("@alloc_region({id}, ")); + pp_oneline(num_fields, buf); + buf.push_str(&format!(", {is_dynamic})")); + } + EvmExpr::RegionStore(id, field_idx, val, _) => { + buf.push_str(&format!("region_store({id}, {field_idx}, ")); + pp_oneline(val, buf); + buf.push_str(", state)"); + } + EvmExpr::RegionLoad(id, field_idx, _) => { + buf.push_str(&format!("region_load({id}, {field_idx}, state)")); + } } } diff --git a/crates/ir/src/region_forward.rs b/crates/ir/src/region_forward.rs new file mode 100644 index 0000000..532c153 --- /dev/null +++ b/crates/ir/src/region_forward.rs @@ -0,0 +1,595 @@ +//! Region store forwarding pass. +//! +//! Walks the IR in program order (Concat chains) and forwards known +//! RegionStore values to subsequent RegionLoad reads. This enables +//! compile-time resolution of struct field access patterns like Vec's +//! len/capacity fields. +//! +//! Runs after lowering, before egglog. Forwarded constants enable +//! egglog's constant folding and dead-branch elimination. + +use std::collections::HashMap; +use std::rc::Rc; + +use crate::schema::{EvmBinaryOp, EvmConstant, EvmExpr, EvmProgram, EvmUnaryOp, RcExpr}; + +/// Forward region stores across the entire program. +pub fn forward_region_stores_program( + program: &mut EvmProgram, + region_var_map: &indexmap::IndexMap, +) { + if region_var_map.is_empty() { + return; + } + + tracing::debug!("region_forward: region_var_map = {:?}", region_var_map); + + // Reverse map: variable name → set of region_ids. + // Multiple region_ids can map to the same variable name (e.g., different + // test functions each declare `let v = Vec::new()`). + let mut reverse_map: HashMap> = HashMap::new(); + for (&rid, name) in region_var_map { + reverse_map.entry(name.clone()).or_default().push(rid); + } + tracing::debug!("region_forward: reverse_map = {:?}", reverse_map); + + for contract in &mut program.contracts { + let mut state = ForwardState { + known: HashMap::new(), + reverse_map: &reverse_map, + }; + contract.runtime = forward_expr(&contract.runtime, &mut state); + + for func in &mut contract.internal_functions { + let mut state = ForwardState { + known: HashMap::new(), + reverse_map: &reverse_map, + }; + *func = forward_expr(func, &mut state); + } + } +} + +struct ForwardState<'a> { + /// Known values for (region_id, field_idx). + known: HashMap<(i64, i64), RcExpr>, + /// Variable name → all region_ids for that variable name. + reverse_map: &'a HashMap>, +} + +impl ForwardState<'_> { + /// Clear all known values for a specific region. + fn clear_region(&mut self, rid: i64) { + self.known.retain(|&(r, _), _| r != rid); + } + + /// Clear all known values. + fn clear_all(&mut self) { + self.known.clear(); + } +} + +fn forward_expr(expr: &RcExpr, state: &mut ForwardState<'_>) -> RcExpr { + match expr.as_ref() { + // Concat: process left (side effects) then right (more effects or result). + // This is the key ordering construct in the IR. + EvmExpr::Concat(a, b) => { + let na = forward_expr(a, state); + let nb = forward_expr(b, state); + if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::Concat(na, nb)) + } + + // LetBind: process init first (establishes values), then body. + // If this LetBind is for a region variable, extract initial field values. + EvmExpr::LetBind(name, init, body) => { + let ni = forward_expr(init, state); + + // Check if this is a region variable by looking at all candidate region_ids + // for this variable name and finding the one used in the body. + if let Some(rids) = state.reverse_map.get(name.as_str()) { + // Find which region_id is actually used in this LetBind's body + let rid = if rids.len() == 1 { + Some(rids[0]) + } else { + // Multiple region_ids map to this name — find which one + // appears in the body of this LetBind + find_region_id_in_expr(body, rids) + }; + + if let Some(rid) = rid { + // Try to extract initial field values from the init expression. + if let Some(inner_var) = find_return_var(&ni) { + let field_values = extract_init_field_values(&ni, &inner_var); + for (field_idx, val) in field_values { + tracing::trace!( + "region_forward: init field ({}, {}) = {:?}", + rid, + field_idx, + val + ); + state.known.insert((rid, field_idx), val); + } + } + } + } + + let nb = forward_expr(body, state); + if Rc::ptr_eq(&ni, init) && Rc::ptr_eq(&nb, body) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::LetBind(name.clone(), ni, nb)) + } + + // RegionLoad: forward if known value exists. + // NOTE: Do NOT recurse into state parameter. Codegen ignores state params, + // and recursing would traverse the entire state chain (all prior operations), + // causing stores/loads to be re-processed with wrong forwarding context. + EvmExpr::RegionLoad(rid, fid, _st) => { + if let Some(val) = state.known.get(&(*rid, *fid)) { + tracing::trace!("region_forward: load ({}, {}) → forwarded", rid, fid); + return Rc::clone(val); + } + // Not forwarded — keep as-is (don't recurse into state) + Rc::clone(expr) + } + + // RegionStore: process value (forwarding inner loads), record, keep the store. + // NOTE: Do NOT recurse into state parameter — same reason as RegionLoad. + EvmExpr::RegionStore(rid, fid, val, st) => { + let nv = forward_expr(val, state); + // Record the (already-forwarded) value + state.known.insert((*rid, *fid), Rc::clone(&nv)); + tracing::trace!("region_forward: store ({}, {}) = {:?}", rid, fid, nv); + if Rc::ptr_eq(&nv, val) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::RegionStore(*rid, *fid, nv, Rc::clone(st))) + } + + // VarStore: if storing to a region variable (pointer reassignment, e.g. growth), + // clear all known values for that region. + EvmExpr::VarStore(name, val) => { + let nv = forward_expr(val, state); + if let Some(rids) = state.reverse_map.get(name.as_str()) { + for &rid in rids { + if state.known.keys().any(|&(r, _)| r == rid) { + tracing::trace!("region_forward: VarStore to region var '{}' → clear region {}", name, rid); + state.clear_region(rid); + } + } + } + if Rc::ptr_eq(&nv, val) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::VarStore(name.clone(), nv)) + } + + // If: process cond, then try compile-time branch evaluation. + // If branch is known dead, skip it and preserve known values. + EvmExpr::If(cond, inputs, then_br, else_br) => { + let nc = forward_expr(cond, state); + let ni = forward_expr(inputs, state); + + // Try compile-time evaluation of the condition + if let Some(cond_val) = try_eval_const(&nc) { + tracing::trace!("region_forward: If condition evaluated to {}", cond_val); + if cond_val { + // Condition is true → only then-branch executes + let nt = forward_expr(then_br, state); + // Emit the If with the original condition (egglog will fold it) + return Rc::new(EvmExpr::If(nc, ni, nt, Rc::clone(else_br))); + } else { + // Condition is false → only else-branch executes + let ne = forward_expr(else_br, state); + return Rc::new(EvmExpr::If(nc, ni, Rc::clone(then_br), ne)); + } + } + + // Can't evaluate → conservative: process both, clear modified regions + tracing::trace!("region_forward: If condition could NOT be evaluated: {:?}", nc); + let saved = state.known.clone(); + let nt = forward_expr(then_br, state); + let then_known = state.known.clone(); + state.known = saved; + let ne = forward_expr(else_br, state); + // After if: only keep values that are identical in both branches + let mut merged = HashMap::new(); + for (key, then_val) in &then_known { + if let Some(else_val) = state.known.get(key) { + if Rc::ptr_eq(then_val, else_val) { + merged.insert(*key, Rc::clone(then_val)); + } + } + } + state.known = merged; + + if Rc::ptr_eq(&nc, cond) + && Rc::ptr_eq(&ni, inputs) + && Rc::ptr_eq(&nt, then_br) + && Rc::ptr_eq(&ne, else_br) + { + return Rc::clone(expr); + } + Rc::new(EvmExpr::If(nc, ni, nt, ne)) + } + + // DoWhile: clear all known values (can't reason about loops). + EvmExpr::DoWhile(inputs, body) => { + state.clear_all(); + let ni = forward_expr(inputs, state); + let nb = forward_expr(body, state); + state.clear_all(); + if Rc::ptr_eq(&ni, inputs) && Rc::ptr_eq(&nb, body) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::DoWhile(ni, nb)) + } + + // --- Recurse into all other node types --- + // For state-carrying Bop ops (MLoad, CalldataLoad, SLoad, TLoad), + // second arg is state — skip it. For others, recurse into both. + EvmExpr::Bop(op, a, b) => { + let is_state_carrying = matches!( + op, + EvmBinaryOp::MLoad + | EvmBinaryOp::CalldataLoad + | EvmBinaryOp::SLoad + | EvmBinaryOp::TLoad + ); + let na = forward_expr(a, state); + if is_state_carrying { + if Rc::ptr_eq(&na, a) { + return Rc::clone(expr); + } + return Rc::new(EvmExpr::Bop(*op, na, Rc::clone(b))); + } + let nb = forward_expr(b, state); + if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::Bop(*op, na, nb)) + } + EvmExpr::Uop(op, a) => { + let na = forward_expr(a, state); + if Rc::ptr_eq(&na, a) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::Uop(*op, na)) + } + // Top: third arg is state parameter — skip recursion into it. + EvmExpr::Top(op, a, b, _c) => { + let na = forward_expr(a, state); + let nb = forward_expr(b, state); + if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::Top(*op, na, nb, Rc::clone(_c))) + } + EvmExpr::Get(a, idx) => { + let na = forward_expr(a, state); + if Rc::ptr_eq(&na, a) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::Get(na, *idx)) + } + // Revert/ReturnOp: third arg is state — skip it. + EvmExpr::Revert(a, b, _c) => { + let na = forward_expr(a, state); + let nb = forward_expr(b, state); + if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::Revert(na, nb, Rc::clone(_c))) + } + EvmExpr::ReturnOp(a, b, _c) => { + let na = forward_expr(a, state); + let nb = forward_expr(b, state); + if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::ReturnOp(na, nb, Rc::clone(_c))) + } + // Log: last arg is state — skip it. + EvmExpr::Log(count, topics, d, s, st) => { + let nt: Vec<_> = topics.iter().map(|t| forward_expr(t, state)).collect(); + let nd = forward_expr(d, state); + let ns = forward_expr(s, state); + if nt.iter().zip(topics.iter()).all(|(n, o)| Rc::ptr_eq(n, o)) + && Rc::ptr_eq(&nd, d) + && Rc::ptr_eq(&ns, s) + { + return Rc::clone(expr); + } + Rc::new(EvmExpr::Log(*count, nt, nd, ns, Rc::clone(st))) + } + EvmExpr::ExtCall(a, b, c, d, e, f, g) => { + // ExtCall: last arg is state — skip it. Clear all known values. + let na = forward_expr(a, state); + let nb = forward_expr(b, state); + let nc = forward_expr(c, state); + let nd = forward_expr(d, state); + let ne = forward_expr(e, state); + let nf = forward_expr(f, state); + state.clear_all(); + if Rc::ptr_eq(&na, a) + && Rc::ptr_eq(&nb, b) + && Rc::ptr_eq(&nc, c) + && Rc::ptr_eq(&nd, d) + && Rc::ptr_eq(&ne, e) + && Rc::ptr_eq(&nf, f) + { + return Rc::clone(expr); + } + Rc::new(EvmExpr::ExtCall(na, nb, nc, nd, ne, nf, Rc::clone(g))) + } + EvmExpr::Call(name, args) => { + let new_args: Vec<_> = args.iter().map(|a| forward_expr(a, state)).collect(); + // Call could modify memory — clear known values + state.clear_all(); + if new_args + .iter() + .zip(args.iter()) + .all(|(n, o)| Rc::ptr_eq(n, o)) + { + return Rc::clone(expr); + } + Rc::new(EvmExpr::Call(name.clone(), new_args)) + } + EvmExpr::Function(name, in_ty, out_ty, body) => { + let nb = forward_expr(body, state); + if Rc::ptr_eq(&nb, body) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::Function(name.clone(), in_ty.clone(), out_ty.clone(), nb)) + } + // EnvRead: state parameter — skip it. + EvmExpr::EnvRead(_op, _s) => Rc::clone(expr), + EvmExpr::EnvRead1(op, a, _s) => { + let na = forward_expr(a, state); + if Rc::ptr_eq(&na, a) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::EnvRead1(*op, na, Rc::clone(_s))) + } + EvmExpr::InlineAsm(inputs, hex, num_outputs) => { + let ni: Vec<_> = inputs.iter().map(|i| forward_expr(i, state)).collect(); + state.clear_all(); // inline asm could do anything + if ni.iter().zip(inputs.iter()).all(|(n, o)| Rc::ptr_eq(n, o)) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::InlineAsm(ni, hex.clone(), *num_outputs)) + } + EvmExpr::DynAlloc(size) => { + let ns = forward_expr(size, state); + if Rc::ptr_eq(&ns, size) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::DynAlloc(ns)) + } + EvmExpr::AllocRegion(id, num_fields, is_dynamic) => { + let nf = forward_expr(num_fields, state); + if Rc::ptr_eq(&nf, num_fields) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::AllocRegion(*id, nf, *is_dynamic)) + } + + // Leaves — no children + EvmExpr::Const(..) + | EvmExpr::Arg(..) + | EvmExpr::MemRegion(..) + | EvmExpr::Empty(..) + | EvmExpr::Var(_) + | EvmExpr::Drop(_) + | EvmExpr::Selector(_) + | EvmExpr::StorageField(..) => Rc::clone(expr), + } +} + +/// Find which region_id from the candidate list appears in the expression. +/// Used to disambiguate when multiple region_ids map to the same variable name. +fn find_region_id_in_expr(expr: &RcExpr, candidates: &[i64]) -> Option { + match expr.as_ref() { + EvmExpr::RegionLoad(rid, _, _) | EvmExpr::RegionStore(rid, _, _, _) => { + if candidates.contains(rid) { + return Some(*rid); + } + // Recurse into children + match expr.as_ref() { + EvmExpr::RegionLoad(_, _, st) => find_region_id_in_expr(st, candidates), + EvmExpr::RegionStore(_, _, val, st) => find_region_id_in_expr(val, candidates) + .or_else(|| find_region_id_in_expr(st, candidates)), + _ => None, + } + } + EvmExpr::Concat(a, b) => find_region_id_in_expr(a, candidates) + .or_else(|| find_region_id_in_expr(b, candidates)), + EvmExpr::LetBind(_, init, body) => find_region_id_in_expr(init, candidates) + .or_else(|| find_region_id_in_expr(body, candidates)), + EvmExpr::If(cond, _, then_br, else_br) => find_region_id_in_expr(cond, candidates) + .or_else(|| find_region_id_in_expr(then_br, candidates)) + .or_else(|| find_region_id_in_expr(else_br, candidates)), + EvmExpr::Bop(_, a, b) => find_region_id_in_expr(a, candidates) + .or_else(|| find_region_id_in_expr(b, candidates)), + EvmExpr::Uop(_, a) => find_region_id_in_expr(a, candidates), + EvmExpr::Top(_, a, b, c) => find_region_id_in_expr(a, candidates) + .or_else(|| find_region_id_in_expr(b, candidates)) + .or_else(|| find_region_id_in_expr(c, candidates)), + _ => None, + } +} + +/// Find the "return variable" of an expression — the variable whose value +/// is the result of evaluating the expression. Traces through Concat chains +/// (which return b) and LetBind (which returns body). +fn find_return_var(expr: &RcExpr) -> Option { + match expr.as_ref() { + EvmExpr::Var(name) => Some(name.clone()), + EvmExpr::Concat(_, b) => find_return_var(b), + EvmExpr::LetBind(_, _, body) => find_return_var(body), + _ => None, + } +} + +/// Extract initial field values from an init expression. +/// +/// Scans for MStore patterns that write to fields of the given inner variable: +/// - `MStore(Var(inner), val, _)` → field 0 +/// - `MStore(Add(Var(inner), Const(32)), val, _)` → field 1 +/// - `MStore(Add(Var(inner), Const(64)), val, _)` → field 2 +/// - etc. +fn extract_init_field_values(expr: &RcExpr, inner_var: &str) -> HashMap { + let mut result = HashMap::new(); + collect_mstore_fields(expr, inner_var, &mut result); + result +} + +fn collect_mstore_fields(expr: &RcExpr, inner_var: &str, out: &mut HashMap) { + match expr.as_ref() { + EvmExpr::Top(crate::schema::EvmTernaryOp::MStore, offset, val, _state) => { + // Check: MStore(Var(inner), val, _) → field 0 + if let EvmExpr::Var(name) = offset.as_ref() { + if name == inner_var { + out.insert(0, Rc::clone(val)); + return; + } + } + // Check: MStore(Add(Var(inner), Const(N)), val, _) → field N/32 + if let EvmExpr::Bop(EvmBinaryOp::Add, a, b) = offset.as_ref() { + let (base, offset_val) = if matches!(a.as_ref(), EvmExpr::Var(_)) { + (a, b) + } else if matches!(b.as_ref(), EvmExpr::Var(_)) { + (b, a) + } else { + return; + }; + if let EvmExpr::Var(name) = base.as_ref() { + if name == inner_var { + if let Some(off) = const_value(offset_val) { + if off >= 0 && off % 32 == 0 { + let field_idx = off / 32; + out.insert(field_idx, Rc::clone(val)); + } + } + } + } + } + // Check: MStore(CheckedAdd(Var(inner), Const(N)), val, _) → field N/32 + if let EvmExpr::Bop(EvmBinaryOp::CheckedAdd, a, b) = offset.as_ref() { + let (base, offset_val) = if matches!(a.as_ref(), EvmExpr::Var(_)) { + (a, b) + } else if matches!(b.as_ref(), EvmExpr::Var(_)) { + (b, a) + } else { + return; + }; + if let EvmExpr::Var(name) = base.as_ref() { + if name == inner_var { + if let Some(off) = const_value(offset_val) { + if off >= 0 && off % 32 == 0 { + let field_idx = off / 32; + out.insert(field_idx, Rc::clone(val)); + } + } + } + } + } + } + // Recurse into Concat, LetBind to find nested MStores + EvmExpr::Concat(a, b) => { + collect_mstore_fields(a, inner_var, out); + collect_mstore_fields(b, inner_var, out); + } + EvmExpr::LetBind(_, init, body) => { + collect_mstore_fields(init, inner_var, out); + collect_mstore_fields(body, inner_var, out); + } + _ => {} + } +} + +/// Extract a constant integer value from an expression. +fn const_value(expr: &RcExpr) -> Option { + match expr.as_ref() { + EvmExpr::Const(EvmConstant::SmallInt(n), _, _) => Some(*n), + _ => None, + } +} + +/// Try to evaluate an expression as a boolean constant (true/false). +/// Returns `Some(true)` if the expression is known to be non-zero, +/// `Some(false)` if known to be zero, `None` if unknown. +fn try_eval_const(expr: &RcExpr) -> Option { + match expr.as_ref() { + EvmExpr::Const(EvmConstant::SmallInt(n), _, _) => Some(*n != 0), + EvmExpr::Const(EvmConstant::LargeInt(b), _, _) => { + // LargeInt is stored as a hex string + Some(b != "0" && b != "0x0" && !b.chars().all(|c| c == '0' || c == 'x')) + } + EvmExpr::Uop(EvmUnaryOp::IsZero, a) => { + try_eval_const(a).map(|v| !v) + } + EvmExpr::Bop(EvmBinaryOp::Lt, a, b) => { + let av = try_eval_u256(a)?; + let bv = try_eval_u256(b)?; + Some(av < bv) + } + EvmExpr::Bop(EvmBinaryOp::Gt, a, b) => { + let av = try_eval_u256(a)?; + let bv = try_eval_u256(b)?; + Some(av > bv) + } + EvmExpr::Bop(EvmBinaryOp::Eq, a, b) => { + let av = try_eval_u256(a)?; + let bv = try_eval_u256(b)?; + Some(av == bv) + } + _ => None, + } +} + +/// Try to evaluate an expression as a U256 value. +fn try_eval_u256(expr: &RcExpr) -> Option { + match expr.as_ref() { + EvmExpr::Const(EvmConstant::SmallInt(n), _, _) => { + if *n >= 0 { + Some(*n as u64) + } else { + None + } + } + EvmExpr::Bop(EvmBinaryOp::Add | EvmBinaryOp::CheckedAdd, a, b) => { + let av = try_eval_u256(a)?; + let bv = try_eval_u256(b)?; + av.checked_add(bv) + } + EvmExpr::Bop(EvmBinaryOp::Sub | EvmBinaryOp::CheckedSub, a, b) => { + let av = try_eval_u256(a)?; + let bv = try_eval_u256(b)?; + av.checked_sub(bv) + } + EvmExpr::Bop(EvmBinaryOp::Mul | EvmBinaryOp::CheckedMul, a, b) => { + let av = try_eval_u256(a)?; + let bv = try_eval_u256(b)?; + av.checked_mul(bv) + } + EvmExpr::Uop(EvmUnaryOp::IsZero, a) => { + let av = try_eval_u256(a)?; + Some(if av == 0 { 1 } else { 0 }) + } + EvmExpr::Bop(EvmBinaryOp::Lt, a, b) => { + let av = try_eval_u256(a)?; + let bv = try_eval_u256(b)?; + Some(if av < bv { 1 } else { 0 }) + } + EvmExpr::Bop(EvmBinaryOp::Gt, a, b) => { + let av = try_eval_u256(a)?; + let bv = try_eval_u256(b)?; + Some(if av > bv { 1 } else { 0 }) + } + _ => None, + } +} diff --git a/crates/ir/src/schema.egg b/crates/ir/src/schema.egg index 3cdd6bb..a6f0e1e 100644 --- a/crates/ir/src/schema.egg +++ b/crates/ir/src/schema.egg @@ -254,6 +254,18 @@ ;; Uses MSIZE at runtime. NOT pure — memory expansion is observable. (constructor DynAlloc (EvmExpr) EvmExpr) +;; Region-based memory allocation: (AllocRegion region_id num_fields is_dynamic) -> base address +;; Symbolic allocation resolved post-egglog. Different region IDs guaranteed non-overlapping. +(constructor AllocRegion (i64 EvmExpr bool) EvmExpr) + +;; Store to a region field: (RegionStore region_id field_index value state) -> state +;; Enables symbolic forwarding — same region+field stores/loads can be forwarded. +(constructor RegionStore (i64 i64 EvmExpr EvmExpr) EvmExpr) + +;; Load from a region field: (RegionLoad region_id field_index state) -> value +;; Symmetric to RegionStore. +(constructor RegionLoad (i64 i64 EvmExpr) EvmExpr) + ;; ============================================================ ;; Rulesets ;; ============================================================ diff --git a/crates/ir/src/schema.rs b/crates/ir/src/schema.rs index ebbcd6d..14296a0 100644 --- a/crates/ir/src/schema.rs +++ b/crates/ir/src/schema.rs @@ -355,6 +355,23 @@ pub enum EvmExpr { /// Uses MSIZE to find the current memory high-water mark and expands memory. /// NOT pure — memory expansion is an observable side effect. DynAlloc(RcExpr), + + /// Allocate a memory region: (`region_id`, `num_fields`, `is_dynamic`) → base address. + /// `region_id` is a compile-time unique identifier for this allocation site. + /// `num_fields` is the number of word-sized fields (may be a constant or expression). + /// `is_dynamic`: true → runtime MSIZE-based allocation, false → static offset assigned later. + AllocRegion(i64, RcExpr, bool), + + /// Store to a region field: (`region_id`, `field_index`, value, state) → state. + /// `field_index` is a compile-time constant (0, 1, 2, ...). + /// Different region IDs are guaranteed non-overlapping; same region + different field is + /// guaranteed non-overlapping. Enables symbolic forwarding in egglog. + RegionStore(i64, i64, RcExpr, RcExpr), + + /// Load from a region field: (`region_id`, `field_index`, state) → value. + /// Symmetric to `RegionStore`. Egglog can forward through intervening stores to + /// different regions or different fields of the same region. + RegionLoad(i64, i64, RcExpr), } // ============================================================ diff --git a/crates/ir/src/sexp.rs b/crates/ir/src/sexp.rs index 23e4bd0..71a5f02 100644 --- a/crates/ir/src/sexp.rs +++ b/crates/ir/src/sexp.rs @@ -161,6 +161,19 @@ pub fn expr_to_sexp(expr: &EvmExpr) -> String { } EvmExpr::MemRegion(id, size) => format!("(MemRegion {id} {size})"), EvmExpr::DynAlloc(size) => format!("(DynAlloc {})", expr_to_sexp(size)), + EvmExpr::AllocRegion(id, num_fields, is_dynamic) => { + format!("(AllocRegion {id} {} {is_dynamic})", expr_to_sexp(num_fields)) + } + EvmExpr::RegionStore(id, field, val, state) => { + format!( + "(RegionStore {id} {field} {} {})", + expr_to_sexp(val), + expr_to_sexp(state) + ) + } + EvmExpr::RegionLoad(id, field, state) => { + format!("(RegionLoad {id} {field} {})", expr_to_sexp(state)) + } } } @@ -439,6 +452,16 @@ fn count_refs_dag(expr: &RcExpr, counts: &mut HashMap, visited: &m visit!(a); } } + EvmExpr::AllocRegion(_, num_fields, _) => { + visit!(num_fields); + } + EvmExpr::RegionStore(_, _, val, state) => { + visit!(val); + visit!(state); + } + EvmExpr::RegionLoad(_, _, state) => { + visit!(state); + } } } @@ -618,6 +641,25 @@ fn dag_sexp_node(expr: &RcExpr, ctx: &mut DagSexpCtx) -> String { } EvmExpr::MemRegion(id, size) => format!("(MemRegion {id} {size})"), EvmExpr::DynAlloc(size) => format!("(DynAlloc {})", dag_sexp_rec(size, ctx)), + EvmExpr::AllocRegion(id, num_fields, is_dynamic) => { + format!( + "(AllocRegion {id} {} {is_dynamic})", + dag_sexp_rec(num_fields, ctx) + ) + } + EvmExpr::RegionStore(id, field, val, state) => { + format!( + "(RegionStore {id} {field} {} {})", + dag_sexp_rec(val, ctx), + dag_sexp_rec(state, ctx) + ) + } + EvmExpr::RegionLoad(id, field, state) => { + format!( + "(RegionLoad {id} {field} {})", + dag_sexp_rec(state, ctx) + ) + } } } @@ -885,6 +927,25 @@ fn sexp_to_evm_expr(sexp: &Sexp) -> Result { let size = sexp_to_evm_expr(&items[1])?; Ok(Rc::new(EvmExpr::DynAlloc(size))) } + "AllocRegion" => { + let id = atom_i64(&items[1])?; + let num_fields = sexp_to_evm_expr(&items[2])?; + let is_dynamic = atom_bool(&items[3])?; + Ok(Rc::new(EvmExpr::AllocRegion(id, num_fields, is_dynamic))) + } + "RegionStore" => { + let id = atom_i64(&items[1])?; + let field = atom_i64(&items[2])?; + let val = sexp_to_evm_expr(&items[3])?; + let state = sexp_to_evm_expr(&items[4])?; + Ok(Rc::new(EvmExpr::RegionStore(id, field, val, state))) + } + "RegionLoad" => { + let id = atom_i64(&items[1])?; + let field = atom_i64(&items[2])?; + let state = sexp_to_evm_expr(&items[3])?; + Ok(Rc::new(EvmExpr::RegionLoad(id, field, state))) + } other => Err(IrError::Extraction(format!( "unknown expression constructor: {other}" ))), @@ -1349,6 +1410,25 @@ fn termdag_convert( let size = termdag_convert(dag, args[0], cache)?; Ok(Rc::new(EvmExpr::DynAlloc(size))) } + "AllocRegion" => { + let id = td_i64(dag, args[0])?; + let num_fields = termdag_convert(dag, args[1], cache)?; + let is_dynamic = td_bool(dag, args[2])?; + Ok(Rc::new(EvmExpr::AllocRegion(id, num_fields, is_dynamic))) + } + "RegionStore" => { + let id = td_i64(dag, args[0])?; + let field = td_i64(dag, args[1])?; + let val = termdag_convert(dag, args[2], cache)?; + let state = termdag_convert(dag, args[3], cache)?; + Ok(Rc::new(EvmExpr::RegionStore(id, field, val, state))) + } + "RegionLoad" => { + let id = td_i64(dag, args[0])?; + let field = td_i64(dag, args[1])?; + let state = termdag_convert(dag, args[2], cache)?; + Ok(Rc::new(EvmExpr::RegionLoad(id, field, state))) + } other => Err(IrError::Extraction(format!( "termdag: unknown constructor: {other}" ))), diff --git a/crates/ir/src/storage_hoist.rs b/crates/ir/src/storage_hoist.rs index 72a8fef..c0fdcfd 100644 --- a/crates/ir/src/storage_hoist.rs +++ b/crates/ir/src/storage_hoist.rs @@ -316,6 +316,19 @@ fn replace_sloads_inline(expr: &RcExpr, known: &HashMap) -> RcE let ns = replace_sloads_inline(size, known); Rc::new(EvmExpr::DynAlloc(ns)) } + EvmExpr::AllocRegion(id, num_fields, is_dynamic) => { + let nf = replace_sloads_inline(num_fields, known); + Rc::new(EvmExpr::AllocRegion(*id, nf, *is_dynamic)) + } + EvmExpr::RegionStore(id, field_idx, val, state) => { + let nv = replace_sloads_inline(val, known); + let ns = replace_sloads_inline(state, known); + Rc::new(EvmExpr::RegionStore(*id, *field_idx, nv, ns)) + } + EvmExpr::RegionLoad(id, field_idx, state) => { + let ns = replace_sloads_inline(state, known); + Rc::new(EvmExpr::RegionLoad(*id, *field_idx, ns)) + } } } @@ -1065,6 +1078,19 @@ fn replace_storage(expr: &RcExpr, key: &SlotKey, var_name: &str, replace_stores: let ns = replace_storage(size, key, var_name, replace_stores); Rc::new(EvmExpr::DynAlloc(ns)) } + EvmExpr::AllocRegion(id, num_fields, is_dynamic) => { + let nf = replace_storage(num_fields, key, var_name, replace_stores); + Rc::new(EvmExpr::AllocRegion(*id, nf, *is_dynamic)) + } + EvmExpr::RegionStore(id, field_idx, val, state) => { + let nv = replace_storage(val, key, var_name, replace_stores); + let ns = replace_storage(state, key, var_name, replace_stores); + Rc::new(EvmExpr::RegionStore(*id, *field_idx, nv, ns)) + } + EvmExpr::RegionLoad(id, field_idx, state) => { + let ns = replace_storage(state, key, var_name, replace_stores); + Rc::new(EvmExpr::RegionLoad(*id, *field_idx, ns)) + } // Leaf nodes — no children EvmExpr::Const(..) | EvmExpr::Arg(..) diff --git a/crates/ir/src/to_egglog/calls.rs b/crates/ir/src/to_egglog/calls.rs index 58261cf..335468c 100644 --- a/crates/ir/src/to_egglog/calls.rs +++ b/crates/ir/src/to_egglog/calls.rs @@ -447,27 +447,32 @@ impl AstToEgglog { )); } - // Compiler-provided trait methods for primitive types + // Compiler-provided trait methods for primitive types. + // Pure arithmetic methods (unsafe_add, unsafe_sub, unsafe_mul) are also allowed + // on non-primitive types since all EVM values are u256 at runtime — this enables + // unchecked pointer arithmetic on struct pointers like Vec. { let receiver_type = self.infer_receiver_type(&args[0]); let is_primitive = receiver_type .as_ref() .is_none_or(|t| Self::is_primitive_type(t)); - if is_primitive { - if let Some(op) = self.compiler_provided_method(method_name) { - if args.len() != 2 { - return Err(IrError::Diagnostic( - edge_diagnostics::Diagnostic::error(format!( - "`{trait_name}::{method_name}` expects exactly 2 arguments", - )) - .with_label(span.clone(), "expected 2 arguments"), - )); - } - let lhs = self.lower_expr(&args[0])?; - let rhs = self.lower_expr(&args[1])?; - return Ok(ast_helpers::bop(op, lhs, rhs)); + + // Check pure arithmetic methods first — these work on any type + if let Some(op) = self.compiler_provided_method(method_name) { + if args.len() != 2 { + return Err(IrError::Diagnostic( + edge_diagnostics::Diagnostic::error(format!( + "`{trait_name}::{method_name}` expects exactly 2 arguments", + )) + .with_label(span.clone(), "expected 2 arguments"), + )); } + let lhs = self.lower_expr(&args[0])?; + let rhs = self.lower_expr(&args[1])?; + return Ok(ast_helpers::bop(op, lhs, rhs)); + } + if is_primitive { // Compiler-provided stateful methods (sload, sstore, derive_slot) // For qualified calls: Sload::sload(slot) has no receiver (first arg is slot) // Sstore::sstore(value, slot) has receiver as first arg @@ -698,6 +703,20 @@ impl AstToEgglog { } None } + // FieldAccess: obj.field — return the field's type from the struct definition + edge_ast::Expr::FieldAccess(obj, field, _) => { + let base_type = self.infer_receiver_type(obj); + if let Some(ref bt) = base_type { + if let Some(struct_info) = self.struct_types.get(bt) { + for (fname, fty) in &struct_info.fields { + if *fname == field.name { + return Self::evm_type_to_name(fty); + } + } + } + } + None + } _ => None, } } @@ -1304,6 +1323,7 @@ impl AstToEgglog { composite_base: alias_binding.composite_base.clone(), composite_type_args: alias_binding.composite_type_args.clone(), is_dynamic_memory: true, + region_id: alias_binding.region_id, }; self.scopes .last_mut() @@ -1330,6 +1350,7 @@ impl AstToEgglog { composite_base, composite_type_args, is_dynamic_memory: is_dm_param, + region_id: None, }; self.scopes .last_mut() diff --git a/crates/ir/src/to_egglog/composite.rs b/crates/ir/src/to_egglog/composite.rs index 7e1bf0f..a266a08 100644 --- a/crates/ir/src/to_egglog/composite.rs +++ b/crates/ir/src/to_egglog/composite.rs @@ -407,6 +407,14 @@ impl AstToEgglog { } } // Unpacked struct field read + // Use RegionLoad if the binding has a region ID (symbolic forwarding) + if let Some(rid) = self.lookup_region_id(&ident.name) { + return Ok(ast_helpers::region_load( + rid, + field_idx as i64, + Rc::clone(&self.current_state), + )); + } let offset = ast_helpers::add( base_expr, ast_helpers::const_int( @@ -505,6 +513,18 @@ impl AstToEgglog { None } + /// Look up a variable's region ID for symbolic field access. + pub(crate) fn lookup_region_id(&self, var_name: &str) -> Option { + for scope in self.scopes.iter().rev() { + if let Some(binding) = scope.bindings.get(var_name) { + if binding.region_id.is_some() { + return binding.region_id; + } + } + } + None + } + /// Check if a variable is an array parameter with dynamic base address. pub(crate) fn lookup_array_param_binding(&self, var_name: &str) -> Option { for scope in self.scopes.iter().rev() { diff --git a/crates/ir/src/to_egglog/control_flow.rs b/crates/ir/src/to_egglog/control_flow.rs index ba8c883..1c9711c 100644 --- a/crates/ir/src/to_egglog/control_flow.rs +++ b/crates/ir/src/to_egglog/control_flow.rs @@ -68,6 +68,7 @@ impl AstToEgglog { composite_base: None, composite_type_args: Vec::new(), is_dynamic_memory: false, + region_id: None, }, ); } @@ -180,6 +181,7 @@ impl AstToEgglog { composite_base: None, composite_type_args: Vec::new(), is_dynamic_memory: false, + region_id: None, }; self.scopes .last_mut() diff --git a/crates/ir/src/to_egglog/expr.rs b/crates/ir/src/to_egglog/expr.rs index 5c14858..c231a3e 100644 --- a/crates/ir/src/to_egglog/expr.rs +++ b/crates/ir/src/to_egglog/expr.rs @@ -153,6 +153,17 @@ impl AstToEgglog { None }; + // Assign a region ID for &dm struct bindings so field access + // uses symbolic RegionStore/RegionLoad instead of raw MSTORE/MLOAD. + let region_id = if is_dynamic_memory && composite_type.is_some() { + let rid = self.fresh_region_id(); + // Register the mapping so post-egglog resolution can convert back + self.region_var_map.insert(rid, var_name.clone()); + Some(rid) + } else { + None + }; + let binding = VarBinding { value: zero, location: DataLocation::Memory, @@ -163,6 +174,7 @@ impl AstToEgglog { composite_base, composite_type_args: Vec::new(), is_dynamic_memory, + region_id, }; self.scopes .last_mut() @@ -276,6 +288,7 @@ impl AstToEgglog { composite_base: None, composite_type_args: Vec::new(), is_dynamic_memory: false, + region_id: None, }; self.scopes .last_mut() @@ -969,6 +982,17 @@ impl AstToEgglog { .iter() .position(|(n, _)| n == &field.name) { + // Use RegionStore if the binding has a region ID + if let Some(rid) = self.lookup_region_id(&ident.name) { + let store = ast_helpers::region_store( + rid, + field_idx as i64, + rhs_ir, + Rc::clone(&self.current_state), + ); + self.current_state = Rc::clone(&store); + return Ok(store); + } let offset = ast_helpers::add( base_expr, ast_helpers::const_int( @@ -1920,6 +1944,7 @@ impl AstToEgglog { composite_base: None, composite_type_args: Vec::new(), is_dynamic_memory: false, + region_id: None, }; // Get the original name (without prefix) for scope lookup let orig_name = outputs diff --git a/crates/ir/src/to_egglog/function.rs b/crates/ir/src/to_egglog/function.rs index e5f4bf6..cc791c1 100644 --- a/crates/ir/src/to_egglog/function.rs +++ b/crates/ir/src/to_egglog/function.rs @@ -69,6 +69,7 @@ impl AstToEgglog { composite_base: Some(base_ir), composite_type_args: Vec::new(), is_dynamic_memory: false, + region_id: None, }; self.scopes .last_mut() @@ -97,6 +98,7 @@ impl AstToEgglog { composite_base: Some(base_ir), composite_type_args: Vec::new(), is_dynamic_memory: false, + region_id: None, }; self.scopes .last_mut() @@ -137,6 +139,7 @@ impl AstToEgglog { composite_base: None, composite_type_args: Vec::new(), is_dynamic_memory: false, + region_id: None, }; self.scopes .last_mut() @@ -241,6 +244,7 @@ impl AstToEgglog { composite_base: None, // dynamic base — resolved at element access composite_type_args: Vec::new(), is_dynamic_memory: false, + region_id: None, }; self.scopes .last_mut() @@ -590,6 +594,7 @@ impl AstToEgglog { composite_base: None, composite_type_args: Vec::new(), is_dynamic_memory: false, + region_id: None, }; self.scopes .last_mut() diff --git a/crates/ir/src/to_egglog/mod.rs b/crates/ir/src/to_egglog/mod.rs index 90f2dcf..b86df0b 100644 --- a/crates/ir/src/to_egglog/mod.rs +++ b/crates/ir/src/to_egglog/mod.rs @@ -47,7 +47,8 @@ pub(crate) fn references_any_var(expr: &RcExpr, names: &HashSet<&str>) -> bool { EvmExpr::Bop(_, a, b) | EvmExpr::Concat(a, b) | EvmExpr::DoWhile(a, b) => { references_any_var(a, names) || references_any_var(b, names) } - EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) | EvmExpr::DynAlloc(a) => { + EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) | EvmExpr::DynAlloc(a) + | EvmExpr::AllocRegion(_, a, _) => { references_any_var(a, names) } EvmExpr::Top(_, a, b, c) | EvmExpr::Revert(a, b, c) | EvmExpr::ReturnOp(a, b, c) => { @@ -55,6 +56,10 @@ pub(crate) fn references_any_var(expr: &RcExpr, names: &HashSet<&str>) -> bool { || references_any_var(b, names) || references_any_var(c, names) } + EvmExpr::RegionStore(_, _, val, state) => { + references_any_var(val, names) || references_any_var(state, names) + } + EvmExpr::RegionLoad(_, _, state) => references_any_var(state, names), EvmExpr::If(c, i, t, e) => { references_any_var(c, names) || references_any_var(i, names) @@ -102,6 +107,9 @@ pub(crate) struct VarBinding { pub composite_type_args: Vec, /// Whether this variable is a dynamically-allocated memory pointer (&dm type) pub is_dynamic_memory: bool, + /// Symbolic region ID for region-based field access (RegionStore/RegionLoad). + /// Set when the struct instance has a known allocation site. + pub region_id: Option, } /// Scope for variable resolution during lowering. @@ -311,6 +319,9 @@ pub struct AstToEgglog { pub(crate) storage_array_fields: IndexMap, /// Next available region ID for symbolic memory allocation. pub(crate) next_region_id: i64, + /// Mapping from region ID to the LetBind variable name that holds the base pointer. + /// Used by the post-egglog resolution pass to convert RegionStore/RegionLoad to MStore/MLoad. + pub(crate) region_var_map: IndexMap, /// Tracks the last composite allocation `(type_name, base_expr)` for wiring /// struct/array assignments to variable bindings. pub(crate) last_composite_alloc: Option<(String, RcExpr)>, @@ -383,6 +394,7 @@ impl AstToEgglog { type_aliases: IndexMap::new(), storage_array_fields: IndexMap::new(), next_region_id: 0, + region_var_map: IndexMap::new(), last_composite_alloc: None, module_prefixes: HashSet::new(), generic_type_templates: IndexMap::new(), @@ -410,6 +422,14 @@ impl AstToEgglog { crate::ast_helpers::mem_region(id, size_words as i64) } + /// Allocate a fresh region ID without creating a MemRegion node. + /// Used for symbolic field access tracking on &dm struct instances. + pub(crate) fn fresh_region_id(&mut self) -> i64 { + let id = self.next_region_id; + self.next_region_id += 1; + id + } + /// Extract the type name and type args from a Named type sig, unwrapping Pointer wrappers. /// Returns (`base_name`, `type_args`), e.g., ("Map", [addr, u256]) from `&s Map`. fn extract_named_type( @@ -481,6 +501,9 @@ impl AstToEgglog { "Mstore", "Mload", "Mcopy", + "UnsafeAdd", + "UnsafeSub", + "UnsafeMul", ] { self.std_ops_traits.insert(name.to_string()); } @@ -622,6 +645,7 @@ impl AstToEgglog { composite_base: None, composite_type_args: Vec::new(), is_dynamic_memory: false, + region_id: None, }; self.scopes .last_mut() @@ -1026,6 +1050,7 @@ impl AstToEgglog { composite_base: None, composite_type_args, is_dynamic_memory: false, + region_id: None, }; self.scopes .last_mut() @@ -1052,6 +1077,7 @@ impl AstToEgglog { composite_base: None, composite_type_args: Vec::new(), is_dynamic_memory: false, + region_id: None, }; self.scopes .last_mut() diff --git a/crates/ir/src/to_egglog/pattern.rs b/crates/ir/src/to_egglog/pattern.rs index 88a1312..186808c 100644 --- a/crates/ir/src/to_egglog/pattern.rs +++ b/crates/ir/src/to_egglog/pattern.rs @@ -136,6 +136,7 @@ impl AstToEgglog { composite_base: None, composite_type_args: Vec::new(), is_dynamic_memory: false, + region_id: None, }, ); } @@ -220,6 +221,7 @@ impl AstToEgglog { composite_base: None, composite_type_args: Vec::new(), is_dynamic_memory: false, + region_id: None, }, ); } diff --git a/crates/ir/src/var_opt.rs b/crates/ir/src/var_opt.rs index 9654c3a..96796d7 100644 --- a/crates/ir/src/var_opt.rs +++ b/crates/ir/src/var_opt.rs @@ -147,6 +147,16 @@ fn collect_allocations(expr: &RcExpr, result: &mut HashMap { + collect_allocations(num_fields, result); + } + EvmExpr::RegionStore(_, _, val, state) => { + collect_allocations(val, result); + collect_allocations(state, result); + } + EvmExpr::RegionLoad(_, _, state) => { + collect_allocations(state, result); + } } } @@ -397,6 +407,28 @@ fn rebuild_children_memo(expr: &RcExpr, cache: &mut HashMap) -> R } Rc::new(EvmExpr::InlineAsm(new_inputs, hex.clone(), *num_outputs)) } + EvmExpr::AllocRegion(id, num_fields, is_dynamic) => { + let new_nf = optimize_expr_memo(num_fields, cache); + if Rc::ptr_eq(&new_nf, num_fields) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::AllocRegion(*id, new_nf, *is_dynamic)) + } + EvmExpr::RegionStore(id, field, val, state) => { + let new_val = optimize_expr_memo(val, cache); + let new_state = optimize_expr_memo(state, cache); + if Rc::ptr_eq(&new_val, val) && Rc::ptr_eq(&new_state, state) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::RegionStore(*id, *field, new_val, new_state)) + } + EvmExpr::RegionLoad(id, field, state) => { + let new_state = optimize_expr_memo(state, cache); + if Rc::ptr_eq(&new_state, state) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::RegionLoad(*id, *field, new_state)) + } } } @@ -526,9 +558,15 @@ fn analyze_var_inner(name: &str, expr: &RcExpr, in_loop: bool, info: &mut VarInf analyze_var_inner(name, inputs, in_loop, info); analyze_var_inner(name, body, true, info); } - EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) | EvmExpr::DynAlloc(a) => { + EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) | EvmExpr::DynAlloc(a) | EvmExpr::AllocRegion(_, a, _) => { analyze_var_inner(name, a, in_loop, info); } + // RegionStore: last arg (state) is state — skip it. + EvmExpr::RegionStore(_, _, val, _state) => { + analyze_var_inner(name, val, in_loop, info); + } + // RegionLoad: last arg (state) is state — skip it. + EvmExpr::RegionLoad(_, _, _state) => {} // Top: last arg may be state OR operand depending on the op. EvmExpr::Top(op, a, b, c) => { analyze_var_inner(name, a, in_loop, info); @@ -674,7 +712,10 @@ fn is_pure(expr: &RcExpr) -> bool { | EvmExpr::ExtCall(..) | EvmExpr::DoWhile(..) | EvmExpr::Call(..) - | EvmExpr::DynAlloc(..) => false, + | EvmExpr::DynAlloc(..) + | EvmExpr::AllocRegion(..) + | EvmExpr::RegionStore(..) + | EvmExpr::RegionLoad(..) => false, } } @@ -721,9 +762,16 @@ fn collect_immutable_vars_rec( collect_immutable_vars_rec(t, immutable, mutable); collect_immutable_vars_rec(e, immutable, mutable); } - EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) | EvmExpr::DynAlloc(a) => { + EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) | EvmExpr::DynAlloc(a) | EvmExpr::AllocRegion(_, a, _) => { collect_immutable_vars_rec(a, immutable, mutable); } + EvmExpr::RegionStore(_, _, val, state) => { + collect_immutable_vars_rec(val, immutable, mutable); + collect_immutable_vars_rec(state, immutable, mutable); + } + EvmExpr::RegionLoad(_, _, state) => { + collect_immutable_vars_rec(state, immutable, mutable); + } EvmExpr::Top(_, a, b, c) | EvmExpr::Revert(a, b, c) | EvmExpr::ReturnOp(a, b, c) => { collect_immutable_vars_rec(a, immutable, mutable); collect_immutable_vars_rec(b, immutable, mutable); @@ -851,6 +899,28 @@ fn insert_drops_rec(expr: &RcExpr, vars_in_scope: &[String]) -> RcExpr { } Rc::new(EvmExpr::DynAlloc(new_size)) } + EvmExpr::AllocRegion(id, num_fields, is_dynamic) => { + let new_nf = insert_drops_rec(num_fields, vars_in_scope); + if Rc::ptr_eq(&new_nf, num_fields) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::AllocRegion(*id, new_nf, *is_dynamic)) + } + EvmExpr::RegionStore(id, field, val, state) => { + let new_val = insert_drops_rec(val, vars_in_scope); + let new_state = insert_drops_rec(state, vars_in_scope); + if Rc::ptr_eq(&new_val, val) && Rc::ptr_eq(&new_state, state) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::RegionStore(*id, *field, new_val, new_state)) + } + EvmExpr::RegionLoad(id, field, state) => { + let new_state = insert_drops_rec(state, vars_in_scope); + if Rc::ptr_eq(&new_state, state) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::RegionLoad(*id, *field, new_state)) + } // Leaf and other nodes: no structural changes needed _ => Rc::clone(expr), } @@ -915,9 +985,26 @@ fn references_var_inner(expr: &RcExpr, name: &str, follow_state: bool) -> bool { }; a_ref || b_ref } - EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) | EvmExpr::DynAlloc(a) => { + EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) | EvmExpr::DynAlloc(a) | EvmExpr::AllocRegion(_, a, _) => { references_var_inner(a, name, follow_state) } + // RegionStore: state is last arg + EvmExpr::RegionStore(_, _, val, state) => { + references_var_inner(val, name, follow_state) + || if follow_state { + references_var_inner(state, name, follow_state) + } else { + false + } + } + // RegionLoad: state is last arg + EvmExpr::RegionLoad(_, _, state) => { + if follow_state { + references_var_inner(state, name, follow_state) + } else { + false + } + } EvmExpr::Top(op, a, b, c) => { use crate::schema::EvmTernaryOp::*; let c_is_state = matches!( @@ -1091,6 +1178,28 @@ fn tighten_drops_rec(expr: &RcExpr) -> RcExpr { } Rc::new(EvmExpr::DynAlloc(new_size)) } + EvmExpr::AllocRegion(id, num_fields, is_dynamic) => { + let new_nf = tighten_drops_rec(num_fields); + if Rc::ptr_eq(&new_nf, num_fields) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::AllocRegion(*id, new_nf, *is_dynamic)) + } + EvmExpr::RegionStore(id, field, val, state) => { + let new_val = tighten_drops_rec(val); + let new_state = tighten_drops_rec(state); + if Rc::ptr_eq(&new_val, val) && Rc::ptr_eq(&new_state, state) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::RegionStore(*id, *field, new_val, new_state)) + } + EvmExpr::RegionLoad(id, field, state) => { + let new_state = tighten_drops_rec(state); + if Rc::ptr_eq(&new_state, state) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::RegionLoad(*id, *field, new_state)) + } // Leaf and other nodes: no structural changes needed _ => Rc::clone(expr), } @@ -1405,6 +1514,28 @@ fn substitute_var_inner( } Rc::new(EvmExpr::DynAlloc(new_size)) } + EvmExpr::AllocRegion(id, num_fields, is_dynamic) => { + let new_nf = substitute_var_memo(name, replacement, num_fields, cache); + if Rc::ptr_eq(&new_nf, num_fields) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::AllocRegion(*id, new_nf, *is_dynamic)) + } + EvmExpr::RegionStore(id, field, val, state) => { + let new_val = substitute_var_memo(name, replacement, val, cache); + let new_state = substitute_var_memo(name, replacement, state, cache); + if Rc::ptr_eq(&new_val, val) && Rc::ptr_eq(&new_state, state) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::RegionStore(*id, *field, new_val, new_state)) + } + EvmExpr::RegionLoad(id, field, state) => { + let new_state = substitute_var_memo(name, replacement, state, cache); + if Rc::ptr_eq(&new_state, state) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::RegionLoad(*id, *field, new_state)) + } // Stop at shadowing LetBind EvmExpr::LetBind(n, init, body) => { @@ -1753,6 +1884,28 @@ fn monomorphize_rec( } Rc::new(EvmExpr::DynAlloc(new_size)) } + EvmExpr::AllocRegion(id, num_fields, is_dynamic) => { + let new_nf = monomorphize_rec(num_fields, funcs, site_counter, new_functions); + if Rc::ptr_eq(&new_nf, num_fields) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::AllocRegion(*id, new_nf, *is_dynamic)) + } + EvmExpr::RegionStore(id, field, val, state) => { + let new_val = monomorphize_rec(val, funcs, site_counter, new_functions); + let new_state = monomorphize_rec(state, funcs, site_counter, new_functions); + if Rc::ptr_eq(&new_val, val) && Rc::ptr_eq(&new_state, state) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::RegionStore(*id, *field, new_val, new_state)) + } + EvmExpr::RegionLoad(id, field, state) => { + let new_state = monomorphize_rec(state, funcs, site_counter, new_functions); + if Rc::ptr_eq(&new_state, state) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::RegionLoad(*id, *field, new_state)) + } // Leaves (EnvRead, EnvRead1, Function, Const, Var, Arg, etc.) _ => Rc::clone(expr), } @@ -1924,6 +2077,28 @@ fn substitute_args(body: &RcExpr, in_ty: &EvmType, args: &[RcExpr]) -> RcExpr { } Rc::new(EvmExpr::DynAlloc(new_size)) } + EvmExpr::AllocRegion(id, num_fields, is_dynamic) => { + let new_nf = substitute_args(num_fields, in_ty, args); + if Rc::ptr_eq(&new_nf, num_fields) { + return Rc::clone(body); + } + Rc::new(EvmExpr::AllocRegion(*id, new_nf, *is_dynamic)) + } + EvmExpr::RegionStore(id, field, val, state) => { + let new_val = substitute_args(val, in_ty, args); + let new_state = substitute_args(state, in_ty, args); + if Rc::ptr_eq(&new_val, val) && Rc::ptr_eq(&new_state, state) { + return Rc::clone(body); + } + Rc::new(EvmExpr::RegionStore(*id, *field, new_val, new_state)) + } + EvmExpr::RegionLoad(id, field, state) => { + let new_state = substitute_args(state, in_ty, args); + if Rc::ptr_eq(&new_state, state) { + return Rc::clone(body); + } + Rc::new(EvmExpr::RegionLoad(*id, *field, new_state)) + } // Leaves _ => Rc::clone(body), } @@ -1957,9 +2132,17 @@ fn collect_letbind_names(expr: &RcExpr, names: &mut std::collections::HashSet { + | EvmExpr::DynAlloc(a) + | EvmExpr::AllocRegion(_, a, _) => { collect_letbind_names(a, names); } + EvmExpr::RegionStore(_, _, val, state) => { + collect_letbind_names(val, names); + collect_letbind_names(state, names); + } + EvmExpr::RegionLoad(_, _, state) => { + collect_letbind_names(state, names); + } EvmExpr::If(c, i, t, e) => { collect_letbind_names(c, names); collect_letbind_names(i, names); @@ -2181,6 +2364,28 @@ fn rename_locals_rec( } Rc::new(EvmExpr::DynAlloc(new_size)) } + EvmExpr::AllocRegion(id, num_fields, is_dynamic) => { + let new_nf = rename_locals_rec(num_fields, suffix, defined); + if Rc::ptr_eq(&new_nf, num_fields) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::AllocRegion(*id, new_nf, *is_dynamic)) + } + EvmExpr::RegionStore(id, field, val, state) => { + let new_val = rename_locals_rec(val, suffix, defined); + let new_state = rename_locals_rec(state, suffix, defined); + if Rc::ptr_eq(&new_val, val) && Rc::ptr_eq(&new_state, state) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::RegionStore(*id, *field, new_val, new_state)) + } + EvmExpr::RegionLoad(id, field, state) => { + let new_state = rename_locals_rec(state, suffix, defined); + if Rc::ptr_eq(&new_state, state) { + return Rc::clone(expr); + } + Rc::new(EvmExpr::RegionLoad(*id, *field, new_state)) + } _ => Rc::clone(expr), } } diff --git a/examples/tests/test_vec.edge b/examples/tests/test_vec.edge index 9151fec..c02d01b 100644 --- a/examples/tests/test_vec.edge +++ b/examples/tests/test_vec.edge @@ -63,4 +63,26 @@ contract TestVec { // Return v[1] (should be 84) return v[1]; } + + pub fn test_zero_array() { + let v: &dm Vec = Vec::new(0); + // safe pushes + v.push(125); + v.push(126); + v.push(127); + v.push(128); + v.push(129); + v.push(130); + v.push(131); + v.push(132); + v.push(133); + v.push(134); + v.push(135); + let i = 0; + while (i < 11) { + if (v.get(i) != 125 + i) { + revert(); + } + } + } } diff --git a/std/globals/vec.edge b/std/globals/vec.edge index 38c8346..0f0d7d3 100644 --- a/std/globals/vec.edge +++ b/std/globals/vec.edge @@ -19,46 +19,56 @@ type Vec = { impl Vec { fn new(cap: u256) -> u256 { - let total = 64 + cap * @size_of::(); + let total = UnsafeAdd::unsafe_add(64, UnsafeMul::unsafe_mul(cap, @size_of::())); let ptr = @alloc(total); // len = 0 0.mstore(ptr); // capacity = cap - cap.mstore(ptr + 32); + cap.mstore(UnsafeAdd::unsafe_add(ptr, 32)); ptr } + fn len(self: &dm Self) -> u256 { + self.len + } + + fn capacity(self: &dm Self) -> u256 { + self.capacity + } + fn push(self: &dm Self, val: T) { if (self.len >= self.capacity) { - self.grow(self.capacity + 3); + self.grow(UnsafeAdd::unsafe_add(self.capacity, 3)); } - let offset = self + 64 + self.len * @size_of::(); + // Unchecked arithmetic — internal pointer calculations cannot overflow u256. + let offset = UnsafeAdd::unsafe_add(self, UnsafeAdd::unsafe_add(64, UnsafeMul::unsafe_mul(self.len, @size_of::()))); val.mstore(offset); - self.len = self.len + 1; + self.len = UnsafeAdd::unsafe_add(self.len, 1); } fn get(self: &dm Self, index: u256) -> T { if (index >= self.len) { revert(); } - (self + 64 + index * @size_of::()).mload() + UnsafeAdd::unsafe_add(self, UnsafeAdd::unsafe_add(64, UnsafeMul::unsafe_mul(index, @size_of::()))).mload() } fn set(self: &dm Self, index: u256, val: T) { if (index >= self.len) { revert(); } - val.mstore(self + 64 + index * @size_of::()); + val.mstore(UnsafeAdd::unsafe_add(self, UnsafeAdd::unsafe_add(64, UnsafeMul::unsafe_mul(index, @size_of::())))); } fn grow(self: &dm Self, new_cap: u256) { - let old_size = 64 + self.capacity * @size_of::(); - let new_size = 64 + new_cap * @size_of::(); + let old_cap = self.capacity; + let old_size = UnsafeAdd::unsafe_add(64, UnsafeMul::unsafe_mul(old_cap, @size_of::())); + let new_size = UnsafeAdd::unsafe_add(64, UnsafeMul::unsafe_mul(new_cap, @size_of::())); let new_ptr = @alloc(new_size); // copy old header + data to new location self.mcopy(new_ptr, old_size); // update capacity at new location - new_cap.mstore(new_ptr + 32); + new_cap.mstore(UnsafeAdd::unsafe_add(new_ptr, 32)); // transparent update via &dm aliasing self = new_ptr; } From 8f43ac6db7ac22cdfc639990b2f441198ff5f8e5 Mon Sep 17 00:00:00 2001 From: brockelmore <31553173+brockelmore@users.noreply.github.com> Date: Thu, 12 Mar 2026 23:53:24 -0600 Subject: [PATCH 09/13] perf: memoize hash_cons and mem_region traversals (76,000x faster on Vec) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit hash_cons_rec traversed the full tree form instead of the DAG — with heavy Rc sharing (2069 DAG nodes), this caused exponential blowup (20s). Added visited map keyed by input Rc pointer for O(DAG) traversal. collect_region_scopes had no memoization at all (17s). Added has_mem_region pre-filter memoized by Rc pointer — subtrees without MemRegion nodes (the vast majority) are skipped entirely. Also fix test_zero_array: add missing loop increment. Co-Authored-By: Claude Opus 4.6 --- crates/e2e/.gas-snapshot | 11 +-- crates/ir/src/lib.rs | 99 +++++++++++++--------- crates/ir/src/mem_region.rs | 159 +++++++++++++++++++++++++++-------- examples/tests/test_vec.edge | 1 + 4 files changed, 191 insertions(+), 79 deletions(-) diff --git a/crates/e2e/.gas-snapshot b/crates/e2e/.gas-snapshot index a41145c..29bca7c 100644 --- a/crates/e2e/.gas-snapshot +++ b/crates/e2e/.gas-snapshot @@ -141,8 +141,9 @@ test_unsafe_arith::test_sub_underflow(), 108, 108, 108, 108 test_unsafe_arith::test_unsafe_add(), 135, 129, 129, 129 test_unsafe_arith::test_unsafe_mul(), 112, 104, 104, 104 test_unsafe_arith::test_unsafe_sub(), 161, 155, 155, 155 -test_vec::test_get(), 593, 347, 347, 347 -test_vec::test_grow(), 1545, 1251, 1251, 1251 -test_vec::test_index(), 453, 290, 290, 290 -test_vec::test_new_and_push(), 465, 257, 257, 257 -test_vec::test_set(), 665, 364, 352, 352 +test_vec::test_get(), 624, 378, 378, 378 +test_vec::test_grow(), 1552, 1258, 1258, 1258 +test_vec::test_index(), 459, 296, 296, 296 +test_vec::test_new_and_push(), 471, 263, 263, 263 +test_vec::test_set(), 671, 370, 358, 358 +test_vec::test_zero_array(), 4457, 4280, 4280, 4280 diff --git a/crates/ir/src/lib.rs b/crates/ir/src/lib.rs index 46ef6ad..2b0820e 100644 --- a/crates/ir/src/lib.rs +++ b/crates/ir/src/lib.rs @@ -61,28 +61,31 @@ pub use schema::{EvmContract, EvmExpr, EvmProgram, RcExpr}; /// subtrees into shared `Rc` pointers. pub fn hash_cons(expr: &RcExpr) -> RcExpr { let mut cache: HashMap = HashMap::new(); - hash_cons_rec(expr, &mut cache) + let mut visited: HashMap = HashMap::new(); + hash_cons_rec(expr, &mut cache, &mut visited) } /// Hash-cons all expressions in a program. pub fn hash_cons_program(program: &mut EvmProgram) { let mut cache: HashMap = HashMap::new(); + let mut visited: HashMap = HashMap::new(); for contract in &mut program.contracts { - contract.runtime = hash_cons_rec(&contract.runtime, &mut cache); + contract.runtime = hash_cons_rec(&contract.runtime, &mut cache, &mut visited); for func in &mut contract.internal_functions { - *func = hash_cons_rec(func, &mut cache); + *func = hash_cons_rec(func, &mut cache, &mut visited); } - contract.constructor = hash_cons_rec(&contract.constructor, &mut cache); + contract.constructor = hash_cons_rec(&contract.constructor, &mut cache, &mut visited); } for func in &mut program.free_functions { - *func = hash_cons_rec(func, &mut cache); + *func = hash_cons_rec(func, &mut cache, &mut visited); } } /// Hash-cons a single expression tree, restoring Rc sharing for structurally identical subtrees. pub fn hash_cons_expr(expr: &RcExpr) -> RcExpr { let mut cache: HashMap = HashMap::new(); - hash_cons_rec(expr, &mut cache) + let mut visited: HashMap = HashMap::new(); + hash_cons_rec(expr, &mut cache, &mut visited) } /// A hash key that captures node identity by type + inline data + child Rc pointers. @@ -229,23 +232,42 @@ pub(crate) fn key_for_ctx(k: &mut HashConsKey, ctx: &schema::EvmContext) { } } -fn hash_cons_rec(expr: &RcExpr, cache: &mut HashMap) -> RcExpr { +fn hash_cons_rec( + expr: &RcExpr, + cache: &mut HashMap, + visited: &mut HashMap, +) -> RcExpr { + // Fast path: if we've already processed this exact Rc node, return the result. + // This prevents exponential blowup when the DAG has heavy sharing. + let ptr = Rc::as_ptr(expr) as usize; + if let Some(result) = visited.get(&ptr) { + return Rc::clone(result); + } + // Build key and hash-cons children first let mut k = HashConsKey::new(); macro_rules! child { ($e:expr) => { - hash_cons_rec($e, cache) + hash_cons_rec($e, cache, visited) }; } + macro_rules! cache_hit { + ($cached:expr) => {{ + let r = Rc::clone($cached); + visited.insert(ptr, Rc::clone(&r)); + return r; + }}; + } + let result: RcExpr = match expr.as_ref() { EvmExpr::Arg(ty, ctx) => { k.tag(0); key_for_type(&mut k, ty); key_for_ctx(&mut k, ctx); if let Some(cached) = cache.get(&k) { - return Rc::clone(cached); + cache_hit!(cached); } Rc::clone(expr) } @@ -255,7 +277,7 @@ fn hash_cons_rec(expr: &RcExpr, cache: &mut HashMap) -> RcE key_for_type(&mut k, ty); key_for_ctx(&mut k, ctx); if let Some(cached) = cache.get(&k) { - return Rc::clone(cached); + cache_hit!(cached); } Rc::clone(expr) } @@ -264,7 +286,7 @@ fn hash_cons_rec(expr: &RcExpr, cache: &mut HashMap) -> RcE key_for_type(&mut k, ty); key_for_ctx(&mut k, ctx); if let Some(cached) = cache.get(&k) { - return Rc::clone(cached); + cache_hit!(cached); } Rc::clone(expr) } @@ -276,7 +298,7 @@ fn hash_cons_rec(expr: &RcExpr, cache: &mut HashMap) -> RcE k.ptr(&nl); k.ptr(&nr); if let Some(cached) = cache.get(&k) { - return Rc::clone(cached); + cache_hit!(cached); } if Rc::ptr_eq(&nl, l) && Rc::ptr_eq(&nr, r) { Rc::clone(expr) @@ -290,7 +312,7 @@ fn hash_cons_rec(expr: &RcExpr, cache: &mut HashMap) -> RcE k.u8(*op as u8); k.ptr(&na); if let Some(cached) = cache.get(&k) { - return Rc::clone(cached); + cache_hit!(cached); } if Rc::ptr_eq(&na, a) { Rc::clone(expr) @@ -308,7 +330,7 @@ fn hash_cons_rec(expr: &RcExpr, cache: &mut HashMap) -> RcE k.ptr(&nb); k.ptr(&nc); if let Some(cached) = cache.get(&k) { - return Rc::clone(cached); + cache_hit!(cached); } if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) && Rc::ptr_eq(&nc, c) { Rc::clone(expr) @@ -322,7 +344,7 @@ fn hash_cons_rec(expr: &RcExpr, cache: &mut HashMap) -> RcE k.ptr(&na); k.usize(*idx); if let Some(cached) = cache.get(&k) { - return Rc::clone(cached); + cache_hit!(cached); } if Rc::ptr_eq(&na, a) { Rc::clone(expr) @@ -337,7 +359,7 @@ fn hash_cons_rec(expr: &RcExpr, cache: &mut HashMap) -> RcE k.ptr(&na); k.ptr(&nb); if let Some(cached) = cache.get(&k) { - return Rc::clone(cached); + cache_hit!(cached); } if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) { Rc::clone(expr) @@ -356,7 +378,7 @@ fn hash_cons_rec(expr: &RcExpr, cache: &mut HashMap) -> RcE k.ptr(&nt); k.ptr(&ne); if let Some(cached) = cache.get(&k) { - return Rc::clone(cached); + cache_hit!(cached); } if Rc::ptr_eq(&nc, cond) && Rc::ptr_eq(&ni, inputs) @@ -375,7 +397,7 @@ fn hash_cons_rec(expr: &RcExpr, cache: &mut HashMap) -> RcE k.ptr(&na); k.ptr(&nb); if let Some(cached) = cache.get(&k) { - return Rc::clone(cached); + cache_hit!(cached); } if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) { Rc::clone(expr) @@ -389,7 +411,7 @@ fn hash_cons_rec(expr: &RcExpr, cache: &mut HashMap) -> RcE k.u8(*op as u8); k.ptr(&ns); if let Some(cached) = cache.get(&k) { - return Rc::clone(cached); + cache_hit!(cached); } if Rc::ptr_eq(&ns, st) { Rc::clone(expr) @@ -405,7 +427,7 @@ fn hash_cons_rec(expr: &RcExpr, cache: &mut HashMap) -> RcE k.ptr(&na); k.ptr(&ns); if let Some(cached) = cache.get(&k) { - return Rc::clone(cached); + cache_hit!(cached); } if Rc::ptr_eq(&na, arg) && Rc::ptr_eq(&ns, st) { Rc::clone(expr) @@ -427,7 +449,7 @@ fn hash_cons_rec(expr: &RcExpr, cache: &mut HashMap) -> RcE k.ptr(&ns); k.ptr(&nst); if let Some(cached) = cache.get(&k) { - return Rc::clone(cached); + cache_hit!(cached); } Rc::new(EvmExpr::Log(*n, new_topics, nd, ns, nst)) } @@ -440,7 +462,7 @@ fn hash_cons_rec(expr: &RcExpr, cache: &mut HashMap) -> RcE k.ptr(&nb); k.ptr(&nc); if let Some(cached) = cache.get(&k) { - return Rc::clone(cached); + cache_hit!(cached); } if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) && Rc::ptr_eq(&nc, c) { Rc::clone(expr) @@ -457,7 +479,7 @@ fn hash_cons_rec(expr: &RcExpr, cache: &mut HashMap) -> RcE k.ptr(&nb); k.ptr(&nc); if let Some(cached) = cache.get(&k) { - return Rc::clone(cached); + cache_hit!(cached); } if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) && Rc::ptr_eq(&nc, c) { Rc::clone(expr) @@ -482,7 +504,7 @@ fn hash_cons_rec(expr: &RcExpr, cache: &mut HashMap) -> RcE k.ptr(&nf); k.ptr(&ng); if let Some(cached) = cache.get(&k) { - return Rc::clone(cached); + cache_hit!(cached); } Rc::new(EvmExpr::ExtCall(na, nb, nc, nd, ne, nf, ng)) } @@ -494,7 +516,7 @@ fn hash_cons_rec(expr: &RcExpr, cache: &mut HashMap) -> RcE k.ptr(a); } if let Some(cached) = cache.get(&k) { - return Rc::clone(cached); + cache_hit!(cached); } Rc::new(EvmExpr::Call(name.clone(), new_args)) } @@ -502,7 +524,7 @@ fn hash_cons_rec(expr: &RcExpr, cache: &mut HashMap) -> RcE k.tag(17); k.str(s); if let Some(cached) = cache.get(&k) { - return Rc::clone(cached); + cache_hit!(cached); } Rc::clone(expr) } @@ -514,7 +536,7 @@ fn hash_cons_rec(expr: &RcExpr, cache: &mut HashMap) -> RcE k.ptr(&nv); k.ptr(&nb); if let Some(cached) = cache.get(&k) { - return Rc::clone(cached); + cache_hit!(cached); } if Rc::ptr_eq(&nv, value) && Rc::ptr_eq(&nb, body) { Rc::clone(expr) @@ -526,7 +548,7 @@ fn hash_cons_rec(expr: &RcExpr, cache: &mut HashMap) -> RcE k.tag(19); k.str(name); if let Some(cached) = cache.get(&k) { - return Rc::clone(cached); + cache_hit!(cached); } Rc::clone(expr) } @@ -536,7 +558,7 @@ fn hash_cons_rec(expr: &RcExpr, cache: &mut HashMap) -> RcE k.str(name); k.ptr(&nv); if let Some(cached) = cache.get(&k) { - return Rc::clone(cached); + cache_hit!(cached); } if Rc::ptr_eq(&nv, val) { Rc::clone(expr) @@ -548,7 +570,7 @@ fn hash_cons_rec(expr: &RcExpr, cache: &mut HashMap) -> RcE k.tag(21); k.str(name); if let Some(cached) = cache.get(&k) { - return Rc::clone(cached); + cache_hit!(cached); } Rc::clone(expr) } @@ -560,7 +582,7 @@ fn hash_cons_rec(expr: &RcExpr, cache: &mut HashMap) -> RcE key_for_type(&mut k, out_ty); k.ptr(&nb); if let Some(cached) = cache.get(&k) { - return Rc::clone(cached); + cache_hit!(cached); } if Rc::ptr_eq(&nb, body) { Rc::clone(expr) @@ -579,7 +601,7 @@ fn hash_cons_rec(expr: &RcExpr, cache: &mut HashMap) -> RcE k.usize(*slot); key_for_type(&mut k, ty); if let Some(cached) = cache.get(&k) { - return Rc::clone(cached); + cache_hit!(cached); } Rc::clone(expr) } @@ -592,7 +614,7 @@ fn hash_cons_rec(expr: &RcExpr, cache: &mut HashMap) -> RcE k.ptr(a); } if let Some(cached) = cache.get(&k) { - return Rc::clone(cached); + cache_hit!(cached); } Rc::new(EvmExpr::InlineAsm(new_inputs, hex.clone(), *num_outputs)) } @@ -601,7 +623,7 @@ fn hash_cons_rec(expr: &RcExpr, cache: &mut HashMap) -> RcE k.i64(*id); k.i64(*size); if let Some(cached) = cache.get(&k) { - return Rc::clone(cached); + cache_hit!(cached); } Rc::clone(expr) } @@ -610,7 +632,7 @@ fn hash_cons_rec(expr: &RcExpr, cache: &mut HashMap) -> RcE k.tag(26); k.ptr(&ns); if let Some(cached) = cache.get(&k) { - return Rc::clone(cached); + cache_hit!(cached); } if Rc::ptr_eq(&ns, size) { Rc::clone(expr) @@ -625,7 +647,7 @@ fn hash_cons_rec(expr: &RcExpr, cache: &mut HashMap) -> RcE k.ptr(&nf); k.u8(*is_dynamic as u8); if let Some(cached) = cache.get(&k) { - return Rc::clone(cached); + cache_hit!(cached); } if Rc::ptr_eq(&nf, num_fields) { Rc::clone(expr) @@ -642,7 +664,7 @@ fn hash_cons_rec(expr: &RcExpr, cache: &mut HashMap) -> RcE k.ptr(&nv); k.ptr(&ns); if let Some(cached) = cache.get(&k) { - return Rc::clone(cached); + cache_hit!(cached); } if Rc::ptr_eq(&nv, val) && Rc::ptr_eq(&ns, state) { Rc::clone(expr) @@ -657,7 +679,7 @@ fn hash_cons_rec(expr: &RcExpr, cache: &mut HashMap) -> RcE k.i64(*field_idx); k.ptr(&ns); if let Some(cached) = cache.get(&k) { - return Rc::clone(cached); + cache_hit!(cached); } if Rc::ptr_eq(&ns, state) { Rc::clone(expr) @@ -668,6 +690,7 @@ fn hash_cons_rec(expr: &RcExpr, cache: &mut HashMap) -> RcE }; cache.insert(k, Rc::clone(&result)); + visited.insert(ptr, Rc::clone(&result)); result } diff --git a/crates/ir/src/mem_region.rs b/crates/ir/src/mem_region.rs index d6eac5c..a7f45c4 100644 --- a/crates/ir/src/mem_region.rs +++ b/crates/ir/src/mem_region.rs @@ -12,7 +12,10 @@ //! The assigned offsets become the `memory_high_water` value that codegen uses //! to start `LetBind` variable slots above all IR-allocated regions. -use std::{collections::BTreeMap, rc::Rc}; +use std::{ + collections::{BTreeMap, HashMap}, + rc::Rc, +}; use crate::schema::{ EvmBaseType, EvmConstant, EvmContext, EvmExpr, EvmProgram, EvmType, RcExpr, @@ -100,8 +103,81 @@ fn assign_scoped_offsets( } } +/// Check whether a subtree contains any MemRegion nodes (memoized by Rc pointer). +fn has_mem_region(expr: &RcExpr, cache: &mut HashMap) -> bool { + let ptr = Rc::as_ptr(expr) as usize; + if let Some(&result) = cache.get(&ptr) { + return result; + } + let result = match expr.as_ref() { + EvmExpr::MemRegion(..) => true, + EvmExpr::Concat(a, b) + | EvmExpr::Bop(_, a, b) + | EvmExpr::DoWhile(a, b) + | EvmExpr::EnvRead1(_, a, b) => { + has_mem_region(a, cache) || has_mem_region(b, cache) + } + EvmExpr::If(a, b, c, d) => { + has_mem_region(a, cache) + || has_mem_region(b, cache) + || has_mem_region(c, cache) + || has_mem_region(d, cache) + } + EvmExpr::LetBind(_, init, body) => { + has_mem_region(init, cache) || has_mem_region(body, cache) + } + EvmExpr::Top(_, a, b, c) + | EvmExpr::Revert(a, b, c) + | EvmExpr::ReturnOp(a, b, c) => { + has_mem_region(a, cache) + || has_mem_region(b, cache) + || has_mem_region(c, cache) + } + EvmExpr::Function(_, _, _, body) => has_mem_region(body, cache), + EvmExpr::Uop(_, a) + | EvmExpr::Get(a, _) + | EvmExpr::VarStore(_, a) + | EvmExpr::DynAlloc(a) + | EvmExpr::AllocRegion(_, a, _) + | EvmExpr::EnvRead(_, a) => has_mem_region(a, cache), + EvmExpr::RegionStore(_, _, val, state) => { + has_mem_region(val, cache) || has_mem_region(state, cache) + } + EvmExpr::RegionLoad(_, _, state) => has_mem_region(state, cache), + EvmExpr::Log(_, topics, d, s, st) => { + topics.iter().any(|t| has_mem_region(t, cache)) + || has_mem_region(d, cache) + || has_mem_region(s, cache) + || has_mem_region(st, cache) + } + EvmExpr::ExtCall(a, b, c, d, e, f, g) => { + [a, b, c, d, e, f, g] + .iter() + .any(|x| has_mem_region(x, cache)) + } + EvmExpr::Call(_, args) => args.iter().any(|a| has_mem_region(a, cache)), + EvmExpr::InlineAsm(inputs, ..) => inputs.iter().any(|a| has_mem_region(a, cache)), + _ => false, + }; + cache.insert(ptr, result); + result +} + /// Build a scope tree from an IR expression. fn collect_region_scopes(expr: &RcExpr) -> RegionScope { + let mut hmr_cache = HashMap::new(); + collect_region_scopes_inner(expr, &mut hmr_cache) +} + +fn collect_region_scopes_inner( + expr: &RcExpr, + hmr_cache: &mut HashMap, +) -> RegionScope { + // Fast path: if this subtree contains no MemRegion nodes, skip traversal. + if !has_mem_region(expr, hmr_cache) { + return RegionScope::Sequential(vec![]); + } + match expr.as_ref() { EvmExpr::MemRegion(id, size_words) => RegionScope::Leaf { region_id: *id, @@ -110,34 +186,35 @@ fn collect_region_scopes(expr: &RcExpr) -> RegionScope { // If: condition+inputs sequential, then/else exclusive EvmExpr::If(cond, inputs, then_br, else_br) => RegionScope::Sequential(vec![ - collect_region_scopes(cond), - collect_region_scopes(inputs), + collect_region_scopes_inner(cond, hmr_cache), + collect_region_scopes_inner(inputs, hmr_cache), RegionScope::Exclusive(vec![ - collect_region_scopes(then_br), - collect_region_scopes(else_br), + collect_region_scopes_inner(then_br, hmr_cache), + collect_region_scopes_inner(else_br, hmr_cache), ]), ]), // Sequential composition - EvmExpr::Concat(a, b) | EvmExpr::Bop(_, a, b) => { - RegionScope::Sequential(vec![collect_region_scopes(a), collect_region_scopes(b)]) - } + EvmExpr::Concat(a, b) | EvmExpr::Bop(_, a, b) => RegionScope::Sequential(vec![ + collect_region_scopes_inner(a, hmr_cache), + collect_region_scopes_inner(b, hmr_cache), + ]), EvmExpr::LetBind(_, init, body) => RegionScope::Sequential(vec![ - collect_region_scopes(init), - collect_region_scopes(body), + collect_region_scopes_inner(init, hmr_cache), + collect_region_scopes_inner(body, hmr_cache), ]), EvmExpr::DoWhile(inputs, body) => RegionScope::Sequential(vec![ - collect_region_scopes(inputs), - collect_region_scopes(body), + collect_region_scopes_inner(inputs, hmr_cache), + collect_region_scopes_inner(body, hmr_cache), ]), - EvmExpr::Function(_, _, _, body) => collect_region_scopes(body), + EvmExpr::Function(_, _, _, body) => collect_region_scopes_inner(body, hmr_cache), // Ternary children — sequential EvmExpr::Top(_, a, b, c) | EvmExpr::Revert(a, b, c) | EvmExpr::ReturnOp(a, b, c) => { RegionScope::Sequential(vec![ - collect_region_scopes(a), - collect_region_scopes(b), - collect_region_scopes(c), + collect_region_scopes_inner(a, hmr_cache), + collect_region_scopes_inner(b, hmr_cache), + collect_region_scopes_inner(c, hmr_cache), ]) } @@ -146,36 +223,46 @@ fn collect_region_scopes(expr: &RcExpr) -> RegionScope { | EvmExpr::Get(a, _) | EvmExpr::VarStore(_, a) | EvmExpr::DynAlloc(a) - | EvmExpr::AllocRegion(_, a, _) => collect_region_scopes(a), + | EvmExpr::AllocRegion(_, a, _) => collect_region_scopes_inner(a, hmr_cache), // Multi-child nodes EvmExpr::Log(_, topics, d, s, st) => { - let mut children: Vec<_> = topics.iter().map(collect_region_scopes).collect(); - children.push(collect_region_scopes(d)); - children.push(collect_region_scopes(s)); - children.push(collect_region_scopes(st)); + let mut children: Vec<_> = topics + .iter() + .map(|t| collect_region_scopes_inner(t, hmr_cache)) + .collect(); + children.push(collect_region_scopes_inner(d, hmr_cache)); + children.push(collect_region_scopes_inner(s, hmr_cache)); + children.push(collect_region_scopes_inner(st, hmr_cache)); RegionScope::Sequential(children) } EvmExpr::ExtCall(a, b, c, d, e, f, g) => RegionScope::Sequential( [a, b, c, d, e, f, g] .into_iter() - .map(collect_region_scopes) + .map(|x| collect_region_scopes_inner(x, hmr_cache)) + .collect(), + ), + EvmExpr::Call(_, args) => RegionScope::Sequential( + args.iter() + .map(|a| collect_region_scopes_inner(a, hmr_cache)) + .collect(), + ), + EvmExpr::RegionStore(_, _, val, state) => RegionScope::Sequential(vec![ + collect_region_scopes_inner(val, hmr_cache), + collect_region_scopes_inner(state, hmr_cache), + ]), + EvmExpr::RegionLoad(_, _, state) => collect_region_scopes_inner(state, hmr_cache), + EvmExpr::EnvRead(_, s) => collect_region_scopes_inner(s, hmr_cache), + EvmExpr::EnvRead1(_, a, s) => RegionScope::Sequential(vec![ + collect_region_scopes_inner(a, hmr_cache), + collect_region_scopes_inner(s, hmr_cache), + ]), + EvmExpr::InlineAsm(inputs, ..) => RegionScope::Sequential( + inputs + .iter() + .map(|a| collect_region_scopes_inner(a, hmr_cache)) .collect(), ), - EvmExpr::Call(_, args) => { - RegionScope::Sequential(args.iter().map(collect_region_scopes).collect()) - } - EvmExpr::RegionStore(_, _, val, state) => { - RegionScope::Sequential(vec![collect_region_scopes(val), collect_region_scopes(state)]) - } - EvmExpr::RegionLoad(_, _, state) => collect_region_scopes(state), - EvmExpr::EnvRead(_, s) => collect_region_scopes(s), - EvmExpr::EnvRead1(_, a, s) => { - RegionScope::Sequential(vec![collect_region_scopes(a), collect_region_scopes(s)]) - } - EvmExpr::InlineAsm(inputs, ..) => { - RegionScope::Sequential(inputs.iter().map(collect_region_scopes).collect()) - } // Leaf nodes — no regions EvmExpr::Const(..) diff --git a/examples/tests/test_vec.edge b/examples/tests/test_vec.edge index c02d01b..6820a0e 100644 --- a/examples/tests/test_vec.edge +++ b/examples/tests/test_vec.edge @@ -83,6 +83,7 @@ contract TestVec { if (v.get(i) != 125 + i) { revert(); } + i = i + 1; } } } From 5e4ee6a0cbbf06f0edc09f720ddb131c47d88b90 Mon Sep 17 00:00:00 2001 From: brockelmore <31553173+brockelmore@users.noreply.github.com> Date: Fri, 13 Mar 2026 10:33:11 -0600 Subject: [PATCH 10/13] perf: memoize insert_early_drops and references_var (570x faster) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit insert_drops_rec traversed the full tree without memoization — with heavy Rc sharing (2050 DAG nodes) this caused exponential blowup (715ms). Added visited cache keyed by Rc pointer when vars_in_scope is empty (the common case for most of the tree). references_var_inner also traversed without memoization, compounding the cost. Added per-call HashMap cache. var_opt: 715ms → 1.3ms on test_vec.edge. Co-Authored-By: Claude Opus 4.6 --- crates/ir/src/var_opt.rs | 165 +++++++++++++++++++++++++-------------- 1 file changed, 108 insertions(+), 57 deletions(-) diff --git a/crates/ir/src/var_opt.rs b/crates/ir/src/var_opt.rs index 96796d7..a7763ae 100644 --- a/crates/ir/src/var_opt.rs +++ b/crates/ir/src/var_opt.rs @@ -836,24 +836,39 @@ fn collect_immutable_vars_rec( /// /// Call this on the full expression tree after other `var_opt` passes. pub fn insert_early_drops(expr: &RcExpr) -> RcExpr { - insert_drops_rec(expr, &[]) + let mut cache = HashMap::new(); + insert_drops_rec(expr, &[], &mut cache) } -fn insert_drops_rec(expr: &RcExpr, vars_in_scope: &[String]) -> RcExpr { - match expr.as_ref() { +fn insert_drops_rec( + expr: &RcExpr, + vars_in_scope: &[String], + cache: &mut HashMap, +) -> RcExpr { + // When vars_in_scope is empty, the only thing that can change the result is + // encountering a LetBind (which adds to scope). For a given Rc-shared subtree + // with empty scope, the result is always the same, so we can memoize. + let ptr_key = Rc::as_ptr(expr) as usize; + if vars_in_scope.is_empty() { + if let Some(cached) = cache.get(&ptr_key) { + return Rc::clone(cached); + } + } + + let result = match expr.as_ref() { EvmExpr::LetBind(name, init, body) => { - let new_init = insert_drops_rec(init, vars_in_scope); + let new_init = insert_drops_rec(init, vars_in_scope, cache); let mut new_scope = vars_in_scope.to_vec(); new_scope.push(name.clone()); - let new_body = insert_drops_rec(body, &new_scope); + let new_body = insert_drops_rec(body, &new_scope, cache); Rc::new(EvmExpr::LetBind(name.clone(), new_init, new_body)) } EvmExpr::If(cond, inputs, then_body, else_body) => { // Recurse first into sub-expressions - let new_cond = insert_drops_rec(cond, vars_in_scope); - let new_inputs = insert_drops_rec(inputs, vars_in_scope); - let mut new_then = insert_drops_rec(then_body, vars_in_scope); - let mut new_else = insert_drops_rec(else_body, vars_in_scope); + let new_cond = insert_drops_rec(cond, vars_in_scope, cache); + let new_inputs = insert_drops_rec(inputs, vars_in_scope, cache); + let mut new_then = insert_drops_rec(then_body, vars_in_scope, cache); + let mut new_else = insert_drops_rec(else_body, vars_in_scope, cache); // For each halting branch, add Drops for unreferenced in-scope vars if expr_definitely_halts(&new_then) { @@ -874,17 +889,17 @@ fn insert_drops_rec(expr: &RcExpr, vars_in_scope: &[String]) -> RcExpr { Rc::new(EvmExpr::If(new_cond, new_inputs, new_then, new_else)) } EvmExpr::Concat(a, b) => { - let new_a = insert_drops_rec(a, vars_in_scope); - let new_b = insert_drops_rec(b, vars_in_scope); + let new_a = insert_drops_rec(a, vars_in_scope, cache); + let new_b = insert_drops_rec(b, vars_in_scope, cache); Rc::new(EvmExpr::Concat(new_a, new_b)) } EvmExpr::DoWhile(inputs, body) => { - let new_inputs = insert_drops_rec(inputs, vars_in_scope); - let new_body = insert_drops_rec(body, vars_in_scope); + let new_inputs = insert_drops_rec(inputs, vars_in_scope, cache); + let new_body = insert_drops_rec(body, vars_in_scope, cache); Rc::new(EvmExpr::DoWhile(new_inputs, new_body)) } EvmExpr::Function(name, in_ty, out_ty, body) => { - let new_body = insert_drops_rec(body, vars_in_scope); + let new_body = insert_drops_rec(body, vars_in_scope, cache); Rc::new(EvmExpr::Function( name.clone(), in_ty.clone(), @@ -893,29 +908,29 @@ fn insert_drops_rec(expr: &RcExpr, vars_in_scope: &[String]) -> RcExpr { )) } EvmExpr::DynAlloc(size) => { - let new_size = insert_drops_rec(size, vars_in_scope); + let new_size = insert_drops_rec(size, vars_in_scope, cache); if Rc::ptr_eq(&new_size, size) { return Rc::clone(expr); } Rc::new(EvmExpr::DynAlloc(new_size)) } EvmExpr::AllocRegion(id, num_fields, is_dynamic) => { - let new_nf = insert_drops_rec(num_fields, vars_in_scope); + let new_nf = insert_drops_rec(num_fields, vars_in_scope, cache); if Rc::ptr_eq(&new_nf, num_fields) { return Rc::clone(expr); } Rc::new(EvmExpr::AllocRegion(*id, new_nf, *is_dynamic)) } EvmExpr::RegionStore(id, field, val, state) => { - let new_val = insert_drops_rec(val, vars_in_scope); - let new_state = insert_drops_rec(state, vars_in_scope); + let new_val = insert_drops_rec(val, vars_in_scope, cache); + let new_state = insert_drops_rec(state, vars_in_scope, cache); if Rc::ptr_eq(&new_val, val) && Rc::ptr_eq(&new_state, state) { return Rc::clone(expr); } Rc::new(EvmExpr::RegionStore(*id, *field, new_val, new_state)) } EvmExpr::RegionLoad(id, field, state) => { - let new_state = insert_drops_rec(state, vars_in_scope); + let new_state = insert_drops_rec(state, vars_in_scope, cache); if Rc::ptr_eq(&new_state, state) { return Rc::clone(expr); } @@ -923,7 +938,13 @@ fn insert_drops_rec(expr: &RcExpr, vars_in_scope: &[String]) -> RcExpr { } // Leaf and other nodes: no structural changes needed _ => Rc::clone(expr), + }; + + if vars_in_scope.is_empty() { + cache.insert(ptr_key, Rc::clone(&result)); } + + result } /// Check if an expression is guaranteed to halt (ends with RETURN or REVERT). @@ -947,7 +968,8 @@ fn expr_definitely_halts(expr: &RcExpr) -> bool { /// This follows ALL sub-expressions including state parameters. /// Used by `insert_early_drops` which needs full reachability. fn references_var(expr: &RcExpr, name: &str) -> bool { - references_var_inner(expr, name, true) + let mut cache = HashMap::new(); + references_var_inner(expr, name, true, &mut cache) } /// Check if an expression references a variable in a data-flow sense. @@ -957,42 +979,69 @@ fn references_var(expr: &RcExpr, name: &str) -> bool { /// Also ignores Drop nodes, which are lifetime markers, not data uses. /// Used by `tighten_drops` to find the last actual use of a variable. fn references_var_dataflow(expr: &RcExpr, name: &str) -> bool { - references_var_inner(expr, name, false) + let mut cache = HashMap::new(); + references_var_inner(expr, name, false, &mut cache) } -fn references_var_inner(expr: &RcExpr, name: &str, follow_state: bool) -> bool { +fn references_var_inner( + expr: &RcExpr, + name: &str, + follow_state: bool, + cache: &mut HashMap, +) -> bool { + let ptr_key = Rc::as_ptr(expr) as usize; + if let Some(&cached) = cache.get(&ptr_key) { + return cached; + } + + let result = references_var_inner_uncached(expr, name, follow_state, cache); + cache.insert(ptr_key, result); + result +} + +fn references_var_inner_uncached( + expr: &RcExpr, + name: &str, + follow_state: bool, + cache: &mut HashMap, +) -> bool { match expr.as_ref() { EvmExpr::Var(n) => n == name, EvmExpr::Drop(n) => follow_state && n == name, - EvmExpr::VarStore(n, val) => n == name || references_var_inner(val, name, follow_state), + EvmExpr::VarStore(n, val) => { + n == name || references_var_inner(val, name, follow_state, cache) + } EvmExpr::LetBind(n, init, body) => { - references_var_inner(init, name, follow_state) - || (n != name && references_var_inner(body, name, follow_state)) + references_var_inner(init, name, follow_state, cache) + || (n != name && references_var_inner(body, name, follow_state, cache)) } EvmExpr::Concat(a, b) => { - references_var_inner(a, name, follow_state) - || references_var_inner(b, name, follow_state) + references_var_inner(a, name, follow_state, cache) + || references_var_inner(b, name, follow_state, cache) } EvmExpr::Bop(op, a, b) => { use crate::schema::EvmBinaryOp::*; - let a_ref = references_var_inner(a, name, follow_state); + let a_ref = references_var_inner(a, name, follow_state, cache); // For state-consuming binary ops, b is the state parameter let b_is_state = matches!(op, SLoad | TLoad | MLoad | CalldataLoad); let b_ref = if b_is_state && !follow_state { false } else { - references_var_inner(b, name, follow_state) + references_var_inner(b, name, follow_state, cache) }; a_ref || b_ref } - EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) | EvmExpr::DynAlloc(a) | EvmExpr::AllocRegion(_, a, _) => { - references_var_inner(a, name, follow_state) + EvmExpr::Uop(_, a) + | EvmExpr::Get(a, _) + | EvmExpr::DynAlloc(a) + | EvmExpr::AllocRegion(_, a, _) => { + references_var_inner(a, name, follow_state, cache) } // RegionStore: state is last arg EvmExpr::RegionStore(_, _, val, state) => { - references_var_inner(val, name, follow_state) + references_var_inner(val, name, follow_state, cache) || if follow_state { - references_var_inner(state, name, follow_state) + references_var_inner(state, name, follow_state, cache) } else { false } @@ -1000,7 +1049,7 @@ fn references_var_inner(expr: &RcExpr, name: &str, follow_state: bool) -> bool { // RegionLoad: state is last arg EvmExpr::RegionLoad(_, _, state) => { if follow_state { - references_var_inner(state, name, follow_state) + references_var_inner(state, name, follow_state, cache) } else { false } @@ -1011,45 +1060,45 @@ fn references_var_inner(expr: &RcExpr, name: &str, follow_state: bool) -> bool { op, SStore | TStore | MStore | MStore8 | Keccak256 | CalldataCopy | Mcopy ); - references_var_inner(a, name, follow_state) - || references_var_inner(b, name, follow_state) + references_var_inner(a, name, follow_state, cache) + || references_var_inner(b, name, follow_state, cache) || if c_is_state && !follow_state { false } else { - references_var_inner(c, name, follow_state) + references_var_inner(c, name, follow_state, cache) } } EvmExpr::Revert(a, b, c) | EvmExpr::ReturnOp(a, b, c) => { // c is always state for Revert/ReturnOp - references_var_inner(a, name, follow_state) - || references_var_inner(b, name, follow_state) + references_var_inner(a, name, follow_state, cache) + || references_var_inner(b, name, follow_state, cache) || if follow_state { - references_var_inner(c, name, follow_state) + references_var_inner(c, name, follow_state, cache) } else { false } } EvmExpr::If(c, i, t, e) => { - references_var_inner(c, name, follow_state) - || references_var_inner(i, name, follow_state) - || references_var_inner(t, name, follow_state) - || references_var_inner(e, name, follow_state) + references_var_inner(c, name, follow_state, cache) + || references_var_inner(i, name, follow_state, cache) + || references_var_inner(t, name, follow_state, cache) + || references_var_inner(e, name, follow_state, cache) } EvmExpr::DoWhile(inputs, body) => { - references_var_inner(inputs, name, follow_state) - || references_var_inner(body, name, follow_state) + references_var_inner(inputs, name, follow_state, cache) + || references_var_inner(body, name, follow_state, cache) } EvmExpr::EnvRead(_, s) => { if follow_state { - references_var_inner(s, name, follow_state) + references_var_inner(s, name, follow_state, cache) } else { false } } EvmExpr::EnvRead1(_, a, s) => { - references_var_inner(a, name, follow_state) + references_var_inner(a, name, follow_state, cache) || if follow_state { - references_var_inner(s, name, follow_state) + references_var_inner(s, name, follow_state, cache) } else { false } @@ -1057,11 +1106,11 @@ fn references_var_inner(expr: &RcExpr, name: &str, follow_state: bool) -> bool { EvmExpr::Log(_, topics, data_offset, data_size, state) => { topics .iter() - .any(|t| references_var_inner(t, name, follow_state)) - || references_var_inner(data_offset, name, follow_state) - || references_var_inner(data_size, name, follow_state) + .any(|t| references_var_inner(t, name, follow_state, cache)) + || references_var_inner(data_offset, name, follow_state, cache) + || references_var_inner(data_size, name, follow_state, cache) || if follow_state { - references_var_inner(state, name, follow_state) + references_var_inner(state, name, follow_state, cache) } else { false } @@ -1074,12 +1123,14 @@ fn references_var_inner(expr: &RcExpr, name: &str, follow_state: bool) -> bool { &[a, b, c, d, e, f] }; args.iter() - .any(|x| references_var_inner(x, name, follow_state)) + .any(|x| references_var_inner(x, name, follow_state, cache)) } EvmExpr::Call(_, args) => args .iter() - .any(|a| references_var_inner(a, name, follow_state)), - EvmExpr::Function(_, _, _, body) => references_var_inner(body, name, follow_state), + .any(|a| references_var_inner(a, name, follow_state, cache)), + EvmExpr::Function(_, _, _, body) => { + references_var_inner(body, name, follow_state, cache) + } EvmExpr::Const(..) | EvmExpr::Arg(..) | EvmExpr::Empty(..) @@ -1088,7 +1139,7 @@ fn references_var_inner(expr: &RcExpr, name: &str, follow_state: bool) -> bool { | EvmExpr::MemRegion(..) => false, EvmExpr::InlineAsm(inputs, ..) => inputs .iter() - .any(|i| references_var_inner(i, name, follow_state)), + .any(|i| references_var_inner(i, name, follow_state, cache)), } } From d46710b0fd18ce6adfe5854ed140664866b07111 Mon Sep 17 00:00:00 2001 From: brockelmore <31553173+brockelmore@users.noreply.github.com> Date: Fri, 13 Mar 2026 11:22:36 -0600 Subject: [PATCH 11/13] perf: memoize all exponential DAG traversals in IR pipeline and codegen MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix exponential-time traversals across the compiler by adding Rc::as_ptr memoization to functions that recursively walk DAG-shared IR trees. Key fixes: - var_opt: analyze_var uses return-value caching keyed by (ptr, in_loop) to preserve correct tree-expanded read counts for allocation decisions. collect_allocations, collect_immutable_vars, dead_store_elim use visited sets or HashMap caches. - storage_hoist: forward_stores_expr, replace_sloads_inline, might_modify_storage, collect_sload_slots all memoized. - to_egglog: references_any_var uses visited set. - codegen: contains_dyn_alloc uses visited set, LetOffsetSim::walk uses visited set, collect_allocations_inner uses visited set. Performance on test_vec.edge O0 (before → after): - forward_stores: 2.45s → 492µs (4,980x) - dead_store_elim: 3.40s → 267µs (12,734x) - analyze_allocations: 799ms → 637µs (1,254x) - codegen total: 1.42s → 1.6ms (888x) - total compile: ~7.3s → ~10ms Zero gas regression — snapshot identical. Co-Authored-By: Claude Opus 4.6 --- crates/codegen/src/contract.rs | 4 + crates/codegen/src/dispatcher.rs | 81 +++++-- crates/codegen/src/expr_compiler.rs | 8 +- crates/ir/src/lib.rs | 8 + crates/ir/src/storage_hoist.rs | 259 ++++++++++++-------- crates/ir/src/to_egglog/calls.rs | 2 + crates/ir/src/to_egglog/function.rs | 7 + crates/ir/src/to_egglog/mod.rs | 65 ++++-- crates/ir/src/var_opt.rs | 350 +++++++++++++++++----------- std/globals/vec.edge | 9 + 10 files changed, 513 insertions(+), 280 deletions(-) diff --git a/crates/codegen/src/contract.rs b/crates/codegen/src/contract.rs index 51d8724..a1520bd 100644 --- a/crates/codegen/src/contract.rs +++ b/crates/codegen/src/contract.rs @@ -124,7 +124,9 @@ fn generate_runtime_bytecode( // reads from 0x40. This saves 6 bytes + 9 gas per call. // Function dispatcher + let t = std::time::Instant::now(); dispatcher::generate_dispatcher(&mut asm, contract); + tracing::debug!(" dispatcher: {:?}", t.elapsed()); // 4. Function bodies are compiled inline within the dispatcher's // call targets. For now, the dispatcher already contains the @@ -132,7 +134,9 @@ fn generate_runtime_bytecode( // 5. Optimize runtime bytecode let instructions = asm.take_instructions(); + let t = std::time::Instant::now(); let optimized = bytecode_opt::optimize(instructions, optimization_level, optimize_for)?; + tracing::debug!(" bytecode_opt: {:?}", t.elapsed()); // 6. Extract repeated sequences into subroutines (size mode only). // Subroutine extraction trades ~30 gas per call for significant code size diff --git a/crates/codegen/src/dispatcher.rs b/crates/codegen/src/dispatcher.rs index 7b1451c..06bba24 100644 --- a/crates/codegen/src/dispatcher.rs +++ b/crates/codegen/src/dispatcher.rs @@ -9,48 +9,67 @@ use crate::{assembler::Assembler, expr_compiler::ExprCompiler}; /// Recursively check if an IR tree contains any `DynAlloc` nodes. fn contains_dyn_alloc(expr: &edge_ir::schema::RcExpr) -> bool { + let mut visited = std::collections::HashSet::new(); + contains_dyn_alloc_inner(expr, &mut visited) +} + +fn contains_dyn_alloc_inner( + expr: &edge_ir::schema::RcExpr, + visited: &mut std::collections::HashSet, +) -> bool { + if !visited.insert(std::rc::Rc::as_ptr(expr) as usize) { + return false; + } use edge_ir::schema::EvmExpr; match expr.as_ref() { EvmExpr::DynAlloc(_) => true, EvmExpr::Bop(_, a, b) | EvmExpr::Concat(a, b) | EvmExpr::DoWhile(a, b) => { - contains_dyn_alloc(a) || contains_dyn_alloc(b) + contains_dyn_alloc_inner(a, visited) || contains_dyn_alloc_inner(b, visited) } - EvmExpr::Uop(_, a) => contains_dyn_alloc(a), + EvmExpr::Uop(_, a) => contains_dyn_alloc_inner(a, visited), EvmExpr::Top(_, a, b, c) | EvmExpr::If(_, a, b, c) | EvmExpr::Revert(a, b, c) | EvmExpr::ReturnOp(a, b, c) => { - contains_dyn_alloc(a) || contains_dyn_alloc(b) || contains_dyn_alloc(c) + contains_dyn_alloc_inner(a, visited) + || contains_dyn_alloc_inner(b, visited) + || contains_dyn_alloc_inner(c, visited) + } + EvmExpr::LetBind(_, init, body) => { + contains_dyn_alloc_inner(init, visited) || contains_dyn_alloc_inner(body, visited) + } + EvmExpr::VarStore(_, val) => contains_dyn_alloc_inner(val, visited), + EvmExpr::EnvRead(_, state) => contains_dyn_alloc_inner(state, visited), + EvmExpr::EnvRead1(_, arg, state) => { + contains_dyn_alloc_inner(arg, visited) || contains_dyn_alloc_inner(state, visited) } - EvmExpr::LetBind(_, init, body) => contains_dyn_alloc(init) || contains_dyn_alloc(body), - EvmExpr::VarStore(_, val) => contains_dyn_alloc(val), - EvmExpr::EnvRead(_, state) => contains_dyn_alloc(state), - EvmExpr::EnvRead1(_, arg, state) => contains_dyn_alloc(arg) || contains_dyn_alloc(state), EvmExpr::Log(_, topics, offset, size, state) => { - topics.iter().any(contains_dyn_alloc) - || contains_dyn_alloc(offset) - || contains_dyn_alloc(size) - || contains_dyn_alloc(state) + topics.iter().any(|t| contains_dyn_alloc_inner(t, visited)) + || contains_dyn_alloc_inner(offset, visited) + || contains_dyn_alloc_inner(size, visited) + || contains_dyn_alloc_inner(state, visited) } EvmExpr::ExtCall(a, b, c, d, e, f, g) => { - contains_dyn_alloc(a) - || contains_dyn_alloc(b) - || contains_dyn_alloc(c) - || contains_dyn_alloc(d) - || contains_dyn_alloc(e) - || contains_dyn_alloc(f) - || contains_dyn_alloc(g) + contains_dyn_alloc_inner(a, visited) + || contains_dyn_alloc_inner(b, visited) + || contains_dyn_alloc_inner(c, visited) + || contains_dyn_alloc_inner(d, visited) + || contains_dyn_alloc_inner(e, visited) + || contains_dyn_alloc_inner(f, visited) + || contains_dyn_alloc_inner(g, visited) + } + EvmExpr::Function(_, _, _, body) => contains_dyn_alloc_inner(body, visited), + EvmExpr::Call(_, args) => args.iter().any(|a| contains_dyn_alloc_inner(a, visited)), + EvmExpr::InlineAsm(inputs, _, _) => { + inputs.iter().any(|i| contains_dyn_alloc_inner(i, visited)) } - EvmExpr::Function(_, _, _, body) => contains_dyn_alloc(body), - EvmExpr::Call(_, args) => args.iter().any(contains_dyn_alloc), - EvmExpr::InlineAsm(inputs, _, _) => inputs.iter().any(contains_dyn_alloc), - EvmExpr::Get(inner, _) => contains_dyn_alloc(inner), + EvmExpr::Get(inner, _) => contains_dyn_alloc_inner(inner, visited), EvmExpr::AllocRegion(_, _, true) => true, - EvmExpr::AllocRegion(_, nf, false) => contains_dyn_alloc(nf), + EvmExpr::AllocRegion(_, nf, false) => contains_dyn_alloc_inner(nf, visited), EvmExpr::RegionStore(_, _, val, state) => { - contains_dyn_alloc(val) || contains_dyn_alloc(state) + contains_dyn_alloc_inner(val, visited) || contains_dyn_alloc_inner(state, visited) } - EvmExpr::RegionLoad(_, _, state) => contains_dyn_alloc(state), + EvmExpr::RegionLoad(_, _, state) => contains_dyn_alloc_inner(state, visited), EvmExpr::Const(..) | EvmExpr::Var(_) | EvmExpr::Drop(_) @@ -69,6 +88,7 @@ fn contains_dyn_alloc(expr: &edge_ir::schema::RcExpr) -> bool { /// Each branch loads the selector from calldata, compares it, and /// executes the matching function body (which terminates with RETURN/STOP). pub fn generate_dispatcher(asm: &mut Assembler, contract: &EvmContract) { + let t = std::time::Instant::now(); // Analyze variable allocations to decide stack vs memory let mut allocations = var_opt::analyze_allocations(&contract.runtime); // Also analyze internal function bodies @@ -87,6 +107,9 @@ pub fn generate_dispatcher(asm: &mut Assembler, contract: &EvmContract) { .or_insert(alloc); } } + tracing::debug!(" analyze_allocations: {:?}", t.elapsed()); + + let t = std::time::Instant::now(); // Compute the DynAlloc floor: the minimum address DynAlloc may return. // Without this, DynAlloc (which uses MSIZE) could return pointers that // overlap with LetBind slots whose MSTORE hasn't happened yet. @@ -105,6 +128,7 @@ pub fn generate_dispatcher(asm: &mut Assembler, contract: &EvmContract) { } else { 0 }; + tracing::debug!(" dyn_alloc_floor: {:?}", t.elapsed()); // Start LetBind slots after IR-allocated memory regions (arrays, structs) let mut compiler = ExprCompiler::with_allocations_base_and_floor( @@ -113,16 +137,23 @@ pub fn generate_dispatcher(asm: &mut Assembler, contract: &EvmContract) { contract.memory_high_water, dyn_alloc_floor, ); + let t = std::time::Instant::now(); // Collect fn_info from both runtime and internal functions compiler.collect_fn_info(&contract.runtime); for func in &contract.internal_functions { compiler.collect_fn_info(func); } + tracing::debug!(" collect_fn_info: {:?}", t.elapsed()); + + let t = std::time::Instant::now(); compiler.compile_expr(&contract.runtime); + tracing::debug!(" compile_expr(runtime): {:?}", t.elapsed()); // Compile internal function subroutines + let t = std::time::Instant::now(); for func in &contract.internal_functions { compiler.compile_expr(func); } + tracing::debug!(" compile_expr(fns): {:?}", t.elapsed()); compiler.emit_overflow_revert_trampoline(); } diff --git a/crates/codegen/src/expr_compiler.rs b/crates/codegen/src/expr_compiler.rs index 1c7a777..a03dc95 100644 --- a/crates/codegen/src/expr_compiler.rs +++ b/crates/codegen/src/expr_compiler.rs @@ -4,7 +4,8 @@ //! Since the EVM is a stack machine, we compile in postorder: children //! first, then the operator. -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; +use std::rc::Rc; use edge_ir::{ schema::{ @@ -631,6 +632,7 @@ impl<'a> ExprCompiler<'a> { peak: memory_base, allocation_modes, stack_var_count: 0, + visited: HashSet::new(), }; for expr in exprs { state.walk(expr); @@ -1673,6 +1675,7 @@ struct LetOffsetSim<'a> { peak: usize, allocation_modes: &'a HashMap, stack_var_count: usize, + visited: HashSet, } impl<'a> LetOffsetSim<'a> { @@ -1688,6 +1691,9 @@ impl<'a> LetOffsetSim<'a> { } fn walk(&mut self, expr: &RcExpr) { + if !self.visited.insert(Rc::as_ptr(expr) as usize) { + return; + } match expr.as_ref() { EvmExpr::LetBind(name, init, body) => { self.walk(init); diff --git a/crates/ir/src/lib.rs b/crates/ir/src/lib.rs index 2b0820e..23a9166 100644 --- a/crates/ir/src/lib.rs +++ b/crates/ir/src/lib.rs @@ -1309,17 +1309,22 @@ pub fn lower_and_optimize( let t_contract = std::time::Instant::now(); // DAG-aware serialization: emit shared sub-expressions as egglog let-bindings + let t_phase = std::time::Instant::now(); let (shared_lets, runtime_sexp, mut next_id) = sexp::expr_to_sexp_dag(&contract.runtime, 0); + tracing::debug!(" sexp_dag: {:?}", t_phase.elapsed()); // Collect immutable variable names for bound propagation in egglog + let t_phase = std::time::Instant::now(); let immutable_vars = var_opt::collect_immutable_vars(&contract.runtime); let immutable_facts: String = immutable_vars .iter() .map(|name| format!("(ImmutableVar \"{name}\")\n")) .collect(); + tracing::debug!(" immutable_vars: {:?}", t_phase.elapsed()); // Include internal function definitions in the same egraph so that // the inline rule (Call + Function → body) can fire. + let t_phase = std::time::Instant::now(); let mut func_lets = String::new(); for (i, func) in contract.internal_functions.iter().enumerate() { let (func_shared, func_sexp, new_next_id) = sexp::expr_to_sexp_dag(func, next_id); @@ -1330,7 +1335,9 @@ pub fn lower_and_optimize( } func_lets.push_str(&format!("(let __fn_{i} {func_sexp})\n")); } + tracing::debug!(" func_lets: {:?}", t_phase.elapsed()); + let t_phase = std::time::Instant::now(); let egglog_program = format!( "{}\n\n{}\n(let __runtime {})\n{}\n{}\n{}\n\n(extract __runtime)\n", prologue(optimize_for), @@ -1340,6 +1347,7 @@ pub fn lower_and_optimize( immutable_facts, schedule ); + tracing::debug!(" format_program: {:?}", t_phase.elapsed()); let prologue_len = prologue(optimize_for).len(); tracing::debug!( " [{}] egglog input: {} bytes (prologue: {}, shared_lets: {}, runtime_sexp: {}, func_lets: {}, immutable: {}, schedule: {})", diff --git a/crates/ir/src/storage_hoist.rs b/crates/ir/src/storage_hoist.rs index c0fdcfd..977c1a0 100644 --- a/crates/ir/src/storage_hoist.rs +++ b/crates/ir/src/storage_hoist.rs @@ -5,7 +5,10 @@ //! SLoad/SStore with Var/VarStore inside the loop. Write-backs are //! emitted after the loop exits. -use std::{collections::HashMap, rc::Rc}; +use std::{ + collections::{HashMap, HashSet}, + rc::Rc, +}; use crate::schema::{ EvmBaseType, EvmBinaryOp, EvmConstant, EvmContext, EvmExpr, EvmTernaryOp, EvmType, RcExpr, @@ -65,7 +68,20 @@ pub fn forward_stores_program(program: &mut crate::schema::EvmProgram) { /// Top-level entry: find Concat chains and apply forwarding. fn forward_stores_expr(expr: &RcExpr) -> RcExpr { - match expr.as_ref() { + let mut cache = HashMap::new(); + forward_stores_expr_inner(expr, &mut cache) +} + +fn forward_stores_expr_inner( + expr: &RcExpr, + cache: &mut HashMap, +) -> RcExpr { + let ptr = Rc::as_ptr(expr) as usize; + if let Some(cached) = cache.get(&ptr) { + return Rc::clone(cached); + } + + let result = match expr.as_ref() { EvmExpr::Concat(..) => { // Flatten this Concat chain let mut stmts = Vec::new(); @@ -77,41 +93,55 @@ fn forward_stores_expr(expr: &RcExpr) -> RcExpr { // Phase 3: Recurse into structural sub-bodies of each statement let recursed: Vec = processed .into_iter() - .map(|s| recurse_substructures(&s)) + .map(|s| recurse_substructures_inner(&s, cache)) .collect(); rebuild_concat(&recursed) } // For non-Concat nodes, just recurse into sub-structures - _ => recurse_substructures(expr), - } + _ => recurse_substructures_inner(expr, cache), + }; + + cache.insert(ptr, Rc::clone(&result)); + result } /// Recurse into structural sub-bodies (If branches, `DoWhile` body, `LetBind` body). /// These get their own independent forwarding context. -fn recurse_substructures(expr: &RcExpr) -> RcExpr { - match expr.as_ref() { +fn recurse_substructures_inner( + expr: &RcExpr, + cache: &mut HashMap, +) -> RcExpr { + let ptr = Rc::as_ptr(expr) as usize; + if let Some(cached) = cache.get(&ptr) { + return Rc::clone(cached); + } + + let result = match expr.as_ref() { EvmExpr::If(c, i, t, e) => Rc::new(EvmExpr::If( Rc::clone(c), Rc::clone(i), - forward_stores_expr(t), - forward_stores_expr(e), + forward_stores_expr_inner(t, cache), + forward_stores_expr_inner(e, cache), )), EvmExpr::LetBind(name, init, body) => Rc::new(EvmExpr::LetBind( name.clone(), - forward_stores_expr(init), - forward_stores_expr(body), + forward_stores_expr_inner(init, cache), + forward_stores_expr_inner(body, cache), )), EvmExpr::Function(name, in_ty, out_ty, body) => Rc::new(EvmExpr::Function( name.clone(), in_ty.clone(), out_ty.clone(), - forward_stores_expr(body), + forward_stores_expr_inner(body, cache), )), // Don't forward inside DoWhile bodies — they're cyclic (SStores from // iteration N affect SLoads at iteration N+1). Loop hoisting handles these. _ => Rc::clone(expr), - } + }; + + cache.insert(ptr, Rc::clone(&result)); + result } /// Forward `SStore` values and eliminate dead stores in a flat statement list. @@ -184,7 +214,30 @@ fn replace_sloads_inline(expr: &RcExpr, known: &HashMap) -> RcE if known.is_empty() { return Rc::clone(expr); } + let mut cache = HashMap::new(); + replace_sloads_inline_inner(expr, known, &mut cache) +} + +fn replace_sloads_inline_inner( + expr: &RcExpr, + known: &HashMap, + cache: &mut HashMap, +) -> RcExpr { + let ptr = Rc::as_ptr(expr) as usize; + if let Some(cached) = cache.get(&ptr) { + return Rc::clone(cached); + } + + let result = replace_sloads_inline_match(expr, known, cache); + cache.insert(ptr, Rc::clone(&result)); + result +} +fn replace_sloads_inline_match( + expr: &RcExpr, + known: &HashMap, + cache: &mut HashMap, +) -> RcExpr { match expr.as_ref() { // SLoad/TLoad → known value EvmExpr::Bop(op @ (EvmBinaryOp::SLoad | EvmBinaryOp::TLoad), slot, _state) => { @@ -202,98 +255,98 @@ fn replace_sloads_inline(expr: &RcExpr, known: &HashMap) -> RcE return Rc::clone(val); } } - let ns = replace_sloads_inline(slot, known); + let ns = replace_sloads_inline_inner(slot, known, cache); Rc::new(EvmExpr::Bop(*op, ns, Rc::clone(_state))) } // Structural nodes: forward into inline parts ONLY EvmExpr::If(cond, inputs, then_b, else_b) => Rc::new(EvmExpr::If( - replace_sloads_inline(cond, known), - replace_sloads_inline(inputs, known), + replace_sloads_inline_inner(cond, known, cache), + replace_sloads_inline_inner(inputs, known, cache), Rc::clone(then_b), // don't forward into branches Rc::clone(else_b), )), EvmExpr::DoWhile(inputs, body) => Rc::new(EvmExpr::DoWhile( - replace_sloads_inline(inputs, known), + replace_sloads_inline_inner(inputs, known, cache), Rc::clone(body), // don't forward into loop body )), EvmExpr::LetBind(name, init, body) => Rc::new(EvmExpr::LetBind( name.clone(), - replace_sloads_inline(init, known), + replace_sloads_inline_inner(init, known, cache), Rc::clone(body), // don't forward into body )), // All other nodes: recurse normally EvmExpr::Bop(op, a, b) => Rc::new(EvmExpr::Bop( *op, - replace_sloads_inline(a, known), - replace_sloads_inline(b, known), + replace_sloads_inline_inner(a, known, cache), + replace_sloads_inline_inner(b, known, cache), )), - EvmExpr::Uop(op, a) => Rc::new(EvmExpr::Uop(*op, replace_sloads_inline(a, known))), + EvmExpr::Uop(op, a) => Rc::new(EvmExpr::Uop(*op, replace_sloads_inline_inner(a, known, cache))), EvmExpr::Top(op, a, b, c) => Rc::new(EvmExpr::Top( *op, - replace_sloads_inline(a, known), - replace_sloads_inline(b, known), - replace_sloads_inline(c, known), + replace_sloads_inline_inner(a, known, cache), + replace_sloads_inline_inner(b, known, cache), + replace_sloads_inline_inner(c, known, cache), )), EvmExpr::Concat(a, b) => Rc::new(EvmExpr::Concat( - replace_sloads_inline(a, known), - replace_sloads_inline(b, known), + replace_sloads_inline_inner(a, known, cache), + replace_sloads_inline_inner(b, known, cache), )), - EvmExpr::Get(a, idx) => Rc::new(EvmExpr::Get(replace_sloads_inline(a, known), *idx)), + EvmExpr::Get(a, idx) => Rc::new(EvmExpr::Get(replace_sloads_inline_inner(a, known, cache), *idx)), EvmExpr::VarStore(name, val) => Rc::new(EvmExpr::VarStore( name.clone(), - replace_sloads_inline(val, known), + replace_sloads_inline_inner(val, known, cache), )), EvmExpr::Revert(a, b, c) => Rc::new(EvmExpr::Revert( - replace_sloads_inline(a, known), - replace_sloads_inline(b, known), - replace_sloads_inline(c, known), + replace_sloads_inline_inner(a, known, cache), + replace_sloads_inline_inner(b, known, cache), + replace_sloads_inline_inner(c, known, cache), )), EvmExpr::ReturnOp(a, b, c) => Rc::new(EvmExpr::ReturnOp( - replace_sloads_inline(a, known), - replace_sloads_inline(b, known), - replace_sloads_inline(c, known), + replace_sloads_inline_inner(a, known, cache), + replace_sloads_inline_inner(b, known, cache), + replace_sloads_inline_inner(c, known, cache), )), EvmExpr::Log(count, topics, data_offset, data_size, state) => { let ts: Vec<_> = topics .iter() - .map(|t| replace_sloads_inline(t, known)) + .map(|t| replace_sloads_inline_inner(t, known, cache)) .collect(); Rc::new(EvmExpr::Log( *count, ts, - replace_sloads_inline(data_offset, known), - replace_sloads_inline(data_size, known), - replace_sloads_inline(state, known), + replace_sloads_inline_inner(data_offset, known, cache), + replace_sloads_inline_inner(data_size, known, cache), + replace_sloads_inline_inner(state, known, cache), )) } - EvmExpr::EnvRead(op, s) => Rc::new(EvmExpr::EnvRead(*op, replace_sloads_inline(s, known))), + EvmExpr::EnvRead(op, s) => Rc::new(EvmExpr::EnvRead(*op, replace_sloads_inline_inner(s, known, cache))), EvmExpr::EnvRead1(op, a, s) => Rc::new(EvmExpr::EnvRead1( *op, - replace_sloads_inline(a, known), - replace_sloads_inline(s, known), + replace_sloads_inline_inner(a, known, cache), + replace_sloads_inline_inner(s, known, cache), )), EvmExpr::ExtCall(a, b, c, d, e, f, g) => Rc::new(EvmExpr::ExtCall( - replace_sloads_inline(a, known), - replace_sloads_inline(b, known), - replace_sloads_inline(c, known), - replace_sloads_inline(d, known), - replace_sloads_inline(e, known), - replace_sloads_inline(f, known), - replace_sloads_inline(g, known), + replace_sloads_inline_inner(a, known, cache), + replace_sloads_inline_inner(b, known, cache), + replace_sloads_inline_inner(c, known, cache), + replace_sloads_inline_inner(d, known, cache), + replace_sloads_inline_inner(e, known, cache), + replace_sloads_inline_inner(f, known, cache), + replace_sloads_inline_inner(g, known, cache), )), EvmExpr::Call(name, args) => Rc::new(EvmExpr::Call( name.clone(), args.iter() - .map(|a| replace_sloads_inline(a, known)) + .map(|a| replace_sloads_inline_inner(a, known, cache)) .collect(), )), EvmExpr::Function(name, in_ty, out_ty, body) => Rc::new(EvmExpr::Function( name.clone(), in_ty.clone(), out_ty.clone(), - replace_sloads_inline(body, known), + replace_sloads_inline_inner(body, known, cache), )), // Leaf nodes @@ -308,25 +361,25 @@ fn replace_sloads_inline(expr: &RcExpr, known: &HashMap) -> RcE EvmExpr::InlineAsm(inputs, hex, num_outputs) => { let new_inputs: Vec<_> = inputs .iter() - .map(|i| replace_sloads_inline(i, known)) + .map(|i| replace_sloads_inline_inner(i, known, cache)) .collect(); Rc::new(EvmExpr::InlineAsm(new_inputs, hex.clone(), *num_outputs)) } EvmExpr::DynAlloc(size) => { - let ns = replace_sloads_inline(size, known); + let ns = replace_sloads_inline_inner(size, known, cache); Rc::new(EvmExpr::DynAlloc(ns)) } EvmExpr::AllocRegion(id, num_fields, is_dynamic) => { - let nf = replace_sloads_inline(num_fields, known); + let nf = replace_sloads_inline_inner(num_fields, known, cache); Rc::new(EvmExpr::AllocRegion(*id, nf, *is_dynamic)) } EvmExpr::RegionStore(id, field_idx, val, state) => { - let nv = replace_sloads_inline(val, known); - let ns = replace_sloads_inline(state, known); + let nv = replace_sloads_inline_inner(val, known, cache); + let ns = replace_sloads_inline_inner(state, known, cache); Rc::new(EvmExpr::RegionStore(*id, *field_idx, nv, ns)) } EvmExpr::RegionLoad(id, field_idx, state) => { - let ns = replace_sloads_inline(state, known); + let ns = replace_sloads_inline_inner(state, known, cache); Rc::new(EvmExpr::RegionLoad(*id, *field_idx, ns)) } } @@ -334,19 +387,35 @@ fn replace_sloads_inline(expr: &RcExpr, known: &HashMap) -> RcE /// Check if an expression (non-top-level `SStore`) might modify storage. fn might_modify_storage(expr: &RcExpr) -> bool { + let mut visited = HashSet::new(); + might_modify_storage_inner(expr, &mut visited) +} + +fn might_modify_storage_inner(expr: &RcExpr, visited: &mut HashSet) -> bool { + let ptr = Rc::as_ptr(expr) as usize; + if !visited.insert(ptr) { + return false; + } + match expr.as_ref() { EvmExpr::ExtCall(..) | EvmExpr::InlineAsm(..) | EvmExpr::Top(EvmTernaryOp::SStore | EvmTernaryOp::TStore, ..) => true, EvmExpr::If(c, i, t, e) => { - might_modify_storage(c) - || might_modify_storage(i) - || might_modify_storage(t) - || might_modify_storage(e) - } - EvmExpr::DoWhile(i, b) => might_modify_storage(i) || might_modify_storage(b), - EvmExpr::LetBind(_, init, body) => might_modify_storage(init) || might_modify_storage(body), - EvmExpr::Concat(a, b) => might_modify_storage(a) || might_modify_storage(b), + might_modify_storage_inner(c, visited) + || might_modify_storage_inner(i, visited) + || might_modify_storage_inner(t, visited) + || might_modify_storage_inner(e, visited) + } + EvmExpr::DoWhile(i, b) => { + might_modify_storage_inner(i, visited) || might_modify_storage_inner(b, visited) + } + EvmExpr::LetBind(_, init, body) => { + might_modify_storage_inner(init, visited) || might_modify_storage_inner(body, visited) + } + EvmExpr::Concat(a, b) => { + might_modify_storage_inner(a, visited) || might_modify_storage_inner(b, visited) + } _ => false, } } @@ -364,11 +433,17 @@ fn might_observe_storage(expr: &RcExpr) -> bool { /// Collect all SLoad/TLoad slot keys anywhere in an expression tree (deep recursive). fn collect_sload_slots_deep(expr: &RcExpr) -> Vec { let mut result = Vec::new(); - collect_sload_slots_inner(expr, &mut result); + let mut visited = HashSet::new(); + collect_sload_slots_inner(expr, &mut result, &mut visited); result } -fn collect_sload_slots_inner(expr: &RcExpr, out: &mut Vec) { +fn collect_sload_slots_inner(expr: &RcExpr, out: &mut Vec, visited: &mut HashSet) { + let ptr = Rc::as_ptr(expr) as usize; + if !visited.insert(ptr) { + return; + } + match expr.as_ref() { EvmExpr::Bop(op @ (EvmBinaryOp::SLoad | EvmBinaryOp::TLoad), slot, _) => { let kind = if *op == EvmBinaryOp::SLoad { @@ -382,57 +457,57 @@ fn collect_sload_slots_inner(expr: &RcExpr, out: &mut Vec) { slot_value: sv, }); } - collect_sload_slots_inner(slot, out); + collect_sload_slots_inner(slot, out, visited); } EvmExpr::Bop(_, a, b) | EvmExpr::Concat(a, b) => { - collect_sload_slots_inner(a, out); - collect_sload_slots_inner(b, out); + collect_sload_slots_inner(a, out, visited); + collect_sload_slots_inner(b, out, visited); } - EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) => collect_sload_slots_inner(a, out), + EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) => collect_sload_slots_inner(a, out, visited), EvmExpr::Top(_, a, b, c) | EvmExpr::Revert(a, b, c) | EvmExpr::ReturnOp(a, b, c) => { - collect_sload_slots_inner(a, out); - collect_sload_slots_inner(b, out); - collect_sload_slots_inner(c, out); + collect_sload_slots_inner(a, out, visited); + collect_sload_slots_inner(b, out, visited); + collect_sload_slots_inner(c, out, visited); } EvmExpr::If(c, i, t, e) => { - collect_sload_slots_inner(c, out); - collect_sload_slots_inner(i, out); - collect_sload_slots_inner(t, out); - collect_sload_slots_inner(e, out); + collect_sload_slots_inner(c, out, visited); + collect_sload_slots_inner(i, out, visited); + collect_sload_slots_inner(t, out, visited); + collect_sload_slots_inner(e, out, visited); } EvmExpr::DoWhile(i, b) => { - collect_sload_slots_inner(i, out); - collect_sload_slots_inner(b, out); + collect_sload_slots_inner(i, out, visited); + collect_sload_slots_inner(b, out, visited); } EvmExpr::LetBind(_, init, body) => { - collect_sload_slots_inner(init, out); - collect_sload_slots_inner(body, out); + collect_sload_slots_inner(init, out, visited); + collect_sload_slots_inner(body, out, visited); } - EvmExpr::VarStore(_, val) => collect_sload_slots_inner(val, out), + EvmExpr::VarStore(_, val) => collect_sload_slots_inner(val, out, visited), EvmExpr::Log(_, topics, data_offset, data_size, state) => { for t in topics { - collect_sload_slots_inner(t, out); + collect_sload_slots_inner(t, out, visited); } - collect_sload_slots_inner(data_offset, out); - collect_sload_slots_inner(data_size, out); - collect_sload_slots_inner(state, out); + collect_sload_slots_inner(data_offset, out, visited); + collect_sload_slots_inner(data_size, out, visited); + collect_sload_slots_inner(state, out, visited); } - EvmExpr::EnvRead(_, s) => collect_sload_slots_inner(s, out), + EvmExpr::EnvRead(_, s) => collect_sload_slots_inner(s, out, visited), EvmExpr::EnvRead1(_, a, s) => { - collect_sload_slots_inner(a, out); - collect_sload_slots_inner(s, out); + collect_sload_slots_inner(a, out, visited); + collect_sload_slots_inner(s, out, visited); } EvmExpr::ExtCall(a, b, c, d, e, f, g) => { for x in [a, b, c, d, e, f, g] { - collect_sload_slots_inner(x, out); + collect_sload_slots_inner(x, out, visited); } } EvmExpr::Call(_, args) => { for arg in args { - collect_sload_slots_inner(arg, out); + collect_sload_slots_inner(arg, out, visited); } } - EvmExpr::Function(_, _, _, body) => collect_sload_slots_inner(body, out), + EvmExpr::Function(_, _, _, body) => collect_sload_slots_inner(body, out, visited), _ => {} } } diff --git a/crates/ir/src/to_egglog/calls.rs b/crates/ir/src/to_egglog/calls.rs index 335468c..c79fa25 100644 --- a/crates/ir/src/to_egglog/calls.rs +++ b/crates/ir/src/to_egglog/calls.rs @@ -1363,7 +1363,9 @@ impl AstToEgglog { let old_prefix = std::mem::replace(&mut self.inline_prefix, new_prefix); self.inline_counter += 1; self.inline_depth += 1; + let t_inline = std::time::Instant::now(); let result = self.lower_code_block(body)?; + tracing::debug!(" inline depth={} prefix={} elapsed={:?}", self.inline_depth, &self.inline_prefix, t_inline.elapsed()); self.inline_depth -= 1; self.inline_prefix = old_prefix; self.scopes.pop(); diff --git a/crates/ir/src/to_egglog/function.rs b/crates/ir/src/to_egglog/function.rs index cc791c1..d6a45a1 100644 --- a/crates/ir/src/to_egglog/function.rs +++ b/crates/ir/src/to_egglog/function.rs @@ -388,6 +388,7 @@ impl AstToEgglog { .collect(); // Lower all statements + let t_block = std::time::Instant::now(); let mut stmts: Vec = Vec::new(); let last_idx = block.stmts.len().saturating_sub(1); for (idx, item) in block.stmts.iter().enumerate() { @@ -407,6 +408,7 @@ impl AstToEgglog { }; stmts.push(ir); } + let lower_stmts_time = t_block.elapsed(); if stmts.is_empty() { return Ok(ast_helpers::empty( @@ -536,6 +538,11 @@ impl AstToEgglog { .unwrap_or_else(|| ast_helpers::const_int(0, self.current_ctx.clone())); result = ast_helpers::let_bind(var_name, init, result); } + if block.stmts.len() > 5 { + let dag_size = crate::dag_node_count(&result); + tracing::debug!(" lower_code_block: n_stmts={}, dag={}, lower_stmts={:?}, store_fwd+letbind={:?}", + block.stmts.len(), dag_size, lower_stmts_time, t_block.elapsed() - lower_stmts_time); + } Ok(result) } diff --git a/crates/ir/src/to_egglog/mod.rs b/crates/ir/src/to_egglog/mod.rs index b86df0b..6f4c2cf 100644 --- a/crates/ir/src/to_egglog/mod.rs +++ b/crates/ir/src/to_egglog/mod.rs @@ -34,6 +34,15 @@ use crate::{ /// Used during lowering to ensure a `LetBind` init expression doesn't reference /// variables whose `LetBinds` are inner (not yet allocated). pub(crate) fn references_any_var(expr: &RcExpr, names: &HashSet<&str>) -> bool { + let mut visited = HashSet::new(); + references_any_var_inner(expr, names, &mut visited) +} + +fn references_any_var_inner(expr: &RcExpr, names: &HashSet<&str>, visited: &mut HashSet) -> bool { + let ptr = Rc::as_ptr(expr) as usize; + if !visited.insert(ptr) { + return false; + } match expr.as_ref() { EvmExpr::Var(n) => names.contains(n.as_str()), EvmExpr::Const(..) @@ -43,46 +52,46 @@ pub(crate) fn references_any_var(expr: &RcExpr, names: &HashSet<&str>) -> bool { | EvmExpr::StorageField(..) | EvmExpr::Drop(_) | EvmExpr::MemRegion(..) => false, - EvmExpr::InlineAsm(inputs, _, _) => inputs.iter().any(|inp| references_any_var(inp, names)), + EvmExpr::InlineAsm(inputs, _, _) => inputs.iter().any(|inp| references_any_var_inner(inp, names, visited)), EvmExpr::Bop(_, a, b) | EvmExpr::Concat(a, b) | EvmExpr::DoWhile(a, b) => { - references_any_var(a, names) || references_any_var(b, names) + references_any_var_inner(a, names, visited) || references_any_var_inner(b, names, visited) } EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) | EvmExpr::DynAlloc(a) | EvmExpr::AllocRegion(_, a, _) => { - references_any_var(a, names) + references_any_var_inner(a, names, visited) } EvmExpr::Top(_, a, b, c) | EvmExpr::Revert(a, b, c) | EvmExpr::ReturnOp(a, b, c) => { - references_any_var(a, names) - || references_any_var(b, names) - || references_any_var(c, names) + references_any_var_inner(a, names, visited) + || references_any_var_inner(b, names, visited) + || references_any_var_inner(c, names, visited) } EvmExpr::RegionStore(_, _, val, state) => { - references_any_var(val, names) || references_any_var(state, names) + references_any_var_inner(val, names, visited) || references_any_var_inner(state, names, visited) } - EvmExpr::RegionLoad(_, _, state) => references_any_var(state, names), + EvmExpr::RegionLoad(_, _, state) => references_any_var_inner(state, names, visited), EvmExpr::If(c, i, t, e) => { - references_any_var(c, names) - || references_any_var(i, names) - || references_any_var(t, names) - || references_any_var(e, names) + references_any_var_inner(c, names, visited) + || references_any_var_inner(i, names, visited) + || references_any_var_inner(t, names, visited) + || references_any_var_inner(e, names, visited) } - EvmExpr::VarStore(_, v) => references_any_var(v, names), + EvmExpr::VarStore(_, v) => references_any_var_inner(v, names, visited), EvmExpr::LetBind(_, init, body) => { - references_any_var(init, names) || references_any_var(body, names) + references_any_var_inner(init, names, visited) || references_any_var_inner(body, names, visited) } - EvmExpr::EnvRead(_, s) => references_any_var(s, names), - EvmExpr::EnvRead1(_, a, s) => references_any_var(a, names) || references_any_var(s, names), + EvmExpr::EnvRead(_, s) => references_any_var_inner(s, names, visited), + EvmExpr::EnvRead1(_, a, s) => references_any_var_inner(a, names, visited) || references_any_var_inner(s, names, visited), EvmExpr::Log(_, topics, data_offset, data_size, state) => { - topics.iter().any(|t| references_any_var(t, names)) - || references_any_var(data_offset, names) - || references_any_var(data_size, names) - || references_any_var(state, names) + topics.iter().any(|t| references_any_var_inner(t, names, visited)) + || references_any_var_inner(data_offset, names, visited) + || references_any_var_inner(data_size, names, visited) + || references_any_var_inner(state, names, visited) } EvmExpr::ExtCall(a, b, c, d, e, f, g) => [a, b, c, d, e, f, g] .iter() - .any(|x| references_any_var(x, names)), - EvmExpr::Call(_, args) => args.iter().any(|a| references_any_var(a, names)), - EvmExpr::Function(_, _, _, body) => references_any_var(body, names), + .any(|x| references_any_var_inner(x, names, visited)), + EvmExpr::Call(_, args) => args.iter().any(|a| references_any_var_inner(a, names, visited)), + EvmExpr::Function(_, _, _, body) => references_any_var_inner(body, names, visited), } } @@ -444,6 +453,7 @@ impl AstToEgglog { /// Lower an entire program. pub fn lower_program(&mut self, program: &edge_ast::Program) -> Result { + let t_lower = std::time::Instant::now(); let mut contracts = Vec::new(); let mut free_functions = Vec::new(); @@ -895,7 +905,10 @@ impl AstToEgglog { // Fifth pass: eagerly monomorphize generic types used with concrete type args // anywhere in the program (function params, return types, variable decls, etc.) + tracing::debug!(" lower_program passes 1-4: {:?}", t_lower.elapsed()); + let t_phase = std::time::Instant::now(); self.monomorphize_all_type_usages(program)?; + tracing::debug!(" monomorphize_all: {:?}", t_phase.elapsed()); // Save top-level const bindings to inject into each contract scope let toplevel_consts: IndexMap = self @@ -907,6 +920,7 @@ impl AstToEgglog { // Collect free function declarations for potential synthetic contract let mut fn_stmts: Vec<(&edge_ast::FnDecl, &edge_ast::CodeBlock)> = Vec::new(); + let t_phase = std::time::Instant::now(); for stmt in &program.stmts { match stmt { edge_ast::Stmt::ContractDecl(contract) => { @@ -939,6 +953,7 @@ impl AstToEgglog { } } + tracing::debug!(" lower_contracts+fns: {:?}", t_phase.elapsed()); Ok(EvmProgram { contracts, free_functions, @@ -1110,7 +1125,9 @@ impl AstToEgglog { let mut fn_bodies: Vec<(&edge_ast::ContractFnDecl, Option)> = Vec::new(); for fn_decl in &contract.functions { if let Some(body) = &fn_decl.body { + let t_fn = std::time::Instant::now(); let body_ir = self.lower_contract_fn_body(&contract_name, fn_decl, body)?; + tracing::debug!(" lower_fn {}: {:?}", fn_decl.name.name, t_fn.elapsed()); fn_bodies.push((fn_decl, Some(body_ir))); } else { fn_bodies.push((fn_decl, None)); @@ -1118,7 +1135,9 @@ impl AstToEgglog { } // Build dispatcher (runtime entry point) with inlined function bodies + let t_disp = std::time::Instant::now(); let runtime = self.build_dispatcher(&contract_name, &fn_bodies)?; + tracing::debug!(" build_dispatcher: {:?}", t_disp.elapsed()); // Internal functions are stored separately (not Concat'd to runtime) // so they survive halting-DCE in the cleanup pass. diff --git a/crates/ir/src/var_opt.rs b/crates/ir/src/var_opt.rs index a7763ae..73b3401 100644 --- a/crates/ir/src/var_opt.rs +++ b/crates/ir/src/var_opt.rs @@ -46,6 +46,14 @@ struct VarInfo { in_loop: bool, } +impl VarInfo { + fn merge(&mut self, other: &VarInfo) { + self.read_count += other.read_count; + self.write_count += other.write_count; + self.in_loop |= other.in_loop; + } +} + /// Analyze all `LetBind` variables in an expression and decide allocation mode. /// /// Returns a map from variable name to `VarAllocation`. Variables not in the @@ -61,9 +69,21 @@ pub fn analyze_allocations(expr: &RcExpr) -> HashMap { } fn collect_allocations(expr: &RcExpr, result: &mut HashMap) { + let mut visited = HashSet::new(); + collect_allocations_inner(expr, result, &mut visited); +} + +fn collect_allocations_inner( + expr: &RcExpr, + result: &mut HashMap, + visited: &mut HashSet, +) { + if !visited.insert(Rc::as_ptr(expr) as usize) { + return; + } match expr.as_ref() { EvmExpr::LetBind(name, init, body) => { - collect_allocations(init, result); + collect_allocations_inner(init, result, visited); let info = analyze_var(name, body); let mode = if !info.in_loop && info.read_count <= 16 { AllocationMode::Stack @@ -85,55 +105,55 @@ fn collect_allocations(expr: &RcExpr, result: &mut HashMap { - collect_allocations(a, result); - collect_allocations(b, result); + collect_allocations_inner(a, result, visited); + collect_allocations_inner(b, result, visited); } EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) | EvmExpr::DynAlloc(a) => { - collect_allocations(a, result); + collect_allocations_inner(a, result, visited); } EvmExpr::Top(_, a, b, c) | EvmExpr::Revert(a, b, c) | EvmExpr::ReturnOp(a, b, c) => { - collect_allocations(a, result); - collect_allocations(b, result); - collect_allocations(c, result); + collect_allocations_inner(a, result, visited); + collect_allocations_inner(b, result, visited); + collect_allocations_inner(c, result, visited); } EvmExpr::If(c, i, t, e) => { - collect_allocations(c, result); - collect_allocations(i, result); - collect_allocations(t, result); - collect_allocations(e, result); + collect_allocations_inner(c, result, visited); + collect_allocations_inner(i, result, visited); + collect_allocations_inner(t, result, visited); + collect_allocations_inner(e, result, visited); } EvmExpr::DoWhile(inputs, body) => { - collect_allocations(inputs, result); - collect_allocations(body, result); + collect_allocations_inner(inputs, result, visited); + collect_allocations_inner(body, result, visited); } - EvmExpr::EnvRead(_, s) => collect_allocations(s, result), + EvmExpr::EnvRead(_, s) => collect_allocations_inner(s, result, visited), EvmExpr::EnvRead1(_, a, s) => { - collect_allocations(a, result); - collect_allocations(s, result); + collect_allocations_inner(a, result, visited); + collect_allocations_inner(s, result, visited); } EvmExpr::Log(_, topics, data_offset, data_size, state) => { for t in topics { - collect_allocations(t, result); + collect_allocations_inner(t, result, visited); } - collect_allocations(data_offset, result); - collect_allocations(data_size, result); - collect_allocations(state, result); + collect_allocations_inner(data_offset, result, visited); + collect_allocations_inner(data_size, result, visited); + collect_allocations_inner(state, result, visited); } EvmExpr::ExtCall(a, b, c, d, e, f, g) => { for x in [a, b, c, d, e, f, g] { - collect_allocations(x, result); + collect_allocations_inner(x, result, visited); } } EvmExpr::Call(_, args) => { for arg in args { - collect_allocations(arg, result); + collect_allocations_inner(arg, result, visited); } } - EvmExpr::VarStore(_, val) => collect_allocations(val, result), - EvmExpr::Function(_, _, _, body) => collect_allocations(body, result), + EvmExpr::VarStore(_, val) => collect_allocations_inner(val, result, visited), + EvmExpr::Function(_, _, _, body) => collect_allocations_inner(body, result, visited), EvmExpr::Const(..) | EvmExpr::Arg(..) | EvmExpr::Empty(..) @@ -144,18 +164,18 @@ fn collect_allocations(expr: &RcExpr, result: &mut HashMap {} EvmExpr::InlineAsm(inputs, ..) => { for input in inputs { - collect_allocations(input, result); + collect_allocations_inner(input, result, visited); } } EvmExpr::AllocRegion(_, num_fields, _) => { - collect_allocations(num_fields, result); + collect_allocations_inner(num_fields, result, visited); } EvmExpr::RegionStore(_, _, val, state) => { - collect_allocations(val, result); - collect_allocations(state, result); + collect_allocations_inner(val, result, visited); + collect_allocations_inner(state, result, visited); } EvmExpr::RegionLoad(_, _, state) => { - collect_allocations(state, result); + collect_allocations_inner(state, result, visited); } } } @@ -498,36 +518,60 @@ fn apply_letbind_opts( } /// Analyze how a variable is used within an expression. +/// +/// Uses memoization keyed by (node_ptr, in_loop) to avoid exponential DAG traversal +/// while returning correct tree-expanded read/write counts (matching codegen's traversal). fn analyze_var(name: &str, expr: &RcExpr) -> VarInfo { - let mut info = VarInfo::default(); - analyze_var_inner(name, expr, false, &mut info); - info + let mut cache = HashMap::new(); + analyze_var_cached(name, expr, false, &mut cache) } -fn analyze_var_inner(name: &str, expr: &RcExpr, in_loop: bool, info: &mut VarInfo) { +fn analyze_var_cached( + name: &str, + expr: &RcExpr, + in_loop: bool, + cache: &mut HashMap<(usize, bool), VarInfo>, +) -> VarInfo { + let key = (Rc::as_ptr(expr) as usize, in_loop); + if let Some(cached) = cache.get(&key) { + return cached.clone(); + } + let result = analyze_var_compute(name, expr, in_loop, cache); + cache.insert(key, result.clone()); + result +} + +fn analyze_var_compute( + name: &str, + expr: &RcExpr, + in_loop: bool, + cache: &mut HashMap<(usize, bool), VarInfo>, +) -> VarInfo { match expr.as_ref() { EvmExpr::Var(n) if n == name => { - info.read_count += 1; + let mut info = VarInfo::default(); + info.read_count = 1; if in_loop { info.in_loop = true; } + info } EvmExpr::VarStore(n, val) if n == name => { + let mut info = analyze_var_cached(name, val, in_loop, cache); info.write_count += 1; if in_loop { info.in_loop = true; } - analyze_var_inner(name, val, in_loop, info); - } - EvmExpr::VarStore(_, val) => { - analyze_var_inner(name, val, in_loop, info); + info } + EvmExpr::VarStore(_, val) => analyze_var_cached(name, val, in_loop, cache), EvmExpr::LetBind(n, init, body) => { - analyze_var_inner(name, init, in_loop, info); + let mut info = analyze_var_cached(name, init, in_loop, cache); // If this LetBind shadows our variable, don't count refs in its body if n != name { - analyze_var_inner(name, body, in_loop, info); + info.merge(&analyze_var_cached(name, body, in_loop, cache)); } + info } EvmExpr::Var(_) | EvmExpr::Const(..) @@ -536,88 +580,88 @@ fn analyze_var_inner(name: &str, expr: &RcExpr, in_loop: bool, info: &mut VarInf | EvmExpr::Selector(_) | EvmExpr::Drop(_) | EvmExpr::StorageField(..) - | EvmExpr::MemRegion(..) => {} + | EvmExpr::MemRegion(..) => VarInfo::default(), EvmExpr::InlineAsm(inputs, ..) => { + let mut info = VarInfo::default(); for input in inputs { - analyze_var_inner(name, input, in_loop, info); + info.merge(&analyze_var_cached(name, input, in_loop, cache)); } + info } - // For Bop: skip the state parameter (2nd arg) of stateful ops. - // Codegen ignores state parameters, so Var refs there are phantom. EvmExpr::Bop(op, a, b) => { - analyze_var_inner(name, a, in_loop, info); + let mut info = analyze_var_cached(name, a, in_loop, cache); if !op.has_state() { - analyze_var_inner(name, b, in_loop, info); + info.merge(&analyze_var_cached(name, b, in_loop, cache)); } + info } EvmExpr::Concat(a, b) => { - analyze_var_inner(name, a, in_loop, info); - analyze_var_inner(name, b, in_loop, info); + let mut info = analyze_var_cached(name, a, in_loop, cache); + info.merge(&analyze_var_cached(name, b, in_loop, cache)); + info } EvmExpr::DoWhile(inputs, body) => { - analyze_var_inner(name, inputs, in_loop, info); - analyze_var_inner(name, body, true, info); + let mut info = analyze_var_cached(name, inputs, in_loop, cache); + info.merge(&analyze_var_cached(name, body, true, cache)); + info } - EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) | EvmExpr::DynAlloc(a) | EvmExpr::AllocRegion(_, a, _) => { - analyze_var_inner(name, a, in_loop, info); - } - // RegionStore: last arg (state) is state — skip it. + EvmExpr::Uop(_, a) + | EvmExpr::Get(a, _) + | EvmExpr::DynAlloc(a) + | EvmExpr::AllocRegion(_, a, _) => analyze_var_cached(name, a, in_loop, cache), EvmExpr::RegionStore(_, _, val, _state) => { - analyze_var_inner(name, val, in_loop, info); + analyze_var_cached(name, val, in_loop, cache) } - // RegionLoad: last arg (state) is state — skip it. - EvmExpr::RegionLoad(_, _, _state) => {} - // Top: last arg may be state OR operand depending on the op. + EvmExpr::RegionLoad(_, _, _state) => VarInfo::default(), EvmExpr::Top(op, a, b, c) => { - analyze_var_inner(name, a, in_loop, info); - analyze_var_inner(name, b, in_loop, info); - // Select, CalldataCopy, Mcopy use all 3 positions as operands (no state param). - // Others (SStore, TStore, MStore, MStore8, Keccak256) have state as 3rd arg. + let mut info = analyze_var_cached(name, a, in_loop, cache); + info.merge(&analyze_var_cached(name, b, in_loop, cache)); match op { EvmTernaryOp::Select | EvmTernaryOp::CalldataCopy | EvmTernaryOp::Mcopy => { - analyze_var_inner(name, c, in_loop, info); + info.merge(&analyze_var_cached(name, c, in_loop, cache)); } _ => {} } + info } - // ReturnOp/Revert: last arg (c) is the state parameter — skip it. EvmExpr::Revert(a, b, _c) | EvmExpr::ReturnOp(a, b, _c) => { - analyze_var_inner(name, a, in_loop, info); - analyze_var_inner(name, b, in_loop, info); + let mut info = analyze_var_cached(name, a, in_loop, cache); + info.merge(&analyze_var_cached(name, b, in_loop, cache)); + info } EvmExpr::If(c, i, t, e) => { - analyze_var_inner(name, c, in_loop, info); - analyze_var_inner(name, i, in_loop, info); - analyze_var_inner(name, t, in_loop, info); - analyze_var_inner(name, e, in_loop, info); - } - // EnvRead/EnvRead1: last arg is state — skip it. - EvmExpr::EnvRead(_, _s) => {} - EvmExpr::EnvRead1(_, a, _s) => { - analyze_var_inner(name, a, in_loop, info); - } - // Log: last arg is state — skip it. + let mut info = analyze_var_cached(name, c, in_loop, cache); + info.merge(&analyze_var_cached(name, i, in_loop, cache)); + info.merge(&analyze_var_cached(name, t, in_loop, cache)); + info.merge(&analyze_var_cached(name, e, in_loop, cache)); + info + } + EvmExpr::EnvRead(_, _s) => VarInfo::default(), + EvmExpr::EnvRead1(_, a, _s) => analyze_var_cached(name, a, in_loop, cache), EvmExpr::Log(_, topics, data_offset, data_size, _state) => { + let mut info = VarInfo::default(); for t in topics { - analyze_var_inner(name, t, in_loop, info); + info.merge(&analyze_var_cached(name, t, in_loop, cache)); } - analyze_var_inner(name, data_offset, in_loop, info); - analyze_var_inner(name, data_size, in_loop, info); + info.merge(&analyze_var_cached(name, data_offset, in_loop, cache)); + info.merge(&analyze_var_cached(name, data_size, in_loop, cache)); + info } - // ExtCall: last arg (g) is state — skip it. EvmExpr::ExtCall(a, b, c, d, e, f, _g) => { + let mut info = VarInfo::default(); for x in [a, b, c, d, e, f] { - analyze_var_inner(name, x, in_loop, info); + info.merge(&analyze_var_cached(name, x, in_loop, cache)); } + info } EvmExpr::Call(_, args) => { + let mut info = VarInfo::default(); for arg in args { - analyze_var_inner(name, arg, in_loop, info); + info.merge(&analyze_var_cached(name, arg, in_loop, cache)); } + info } - EvmExpr::Function(_, _, _, body) => { - analyze_var_inner(name, body, in_loop, info); - } + EvmExpr::Function(_, _, _, body) => analyze_var_cached(name, body, in_loop, cache), } } @@ -741,6 +785,19 @@ fn collect_immutable_vars_rec( immutable: &mut HashSet, mutable: &mut HashSet, ) { + let mut visited = HashSet::new(); + collect_immutable_vars_inner(expr, immutable, mutable, &mut visited); +} + +fn collect_immutable_vars_inner( + expr: &RcExpr, + immutable: &mut HashSet, + mutable: &mut HashSet, + visited: &mut HashSet, +) { + if !visited.insert(std::rc::Rc::as_ptr(expr) as usize) { + return; + } match expr.as_ref() { EvmExpr::LetBind(name, init, body) => { let info = analyze_var(name, body); @@ -749,64 +806,64 @@ fn collect_immutable_vars_rec( } else { mutable.insert(name.clone()); } - collect_immutable_vars_rec(init, immutable, mutable); - collect_immutable_vars_rec(body, immutable, mutable); + collect_immutable_vars_inner(init, immutable, mutable, visited); + collect_immutable_vars_inner(body, immutable, mutable, visited); } EvmExpr::Concat(a, b) | EvmExpr::Bop(_, a, b) | EvmExpr::DoWhile(a, b) => { - collect_immutable_vars_rec(a, immutable, mutable); - collect_immutable_vars_rec(b, immutable, mutable); + collect_immutable_vars_inner(a, immutable, mutable, visited); + collect_immutable_vars_inner(b, immutable, mutable, visited); } EvmExpr::If(c, i, t, e) => { - collect_immutable_vars_rec(c, immutable, mutable); - collect_immutable_vars_rec(i, immutable, mutable); - collect_immutable_vars_rec(t, immutable, mutable); - collect_immutable_vars_rec(e, immutable, mutable); + collect_immutable_vars_inner(c, immutable, mutable, visited); + collect_immutable_vars_inner(i, immutable, mutable, visited); + collect_immutable_vars_inner(t, immutable, mutable, visited); + collect_immutable_vars_inner(e, immutable, mutable, visited); } EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) | EvmExpr::DynAlloc(a) | EvmExpr::AllocRegion(_, a, _) => { - collect_immutable_vars_rec(a, immutable, mutable); + collect_immutable_vars_inner(a, immutable, mutable, visited); } EvmExpr::RegionStore(_, _, val, state) => { - collect_immutable_vars_rec(val, immutable, mutable); - collect_immutable_vars_rec(state, immutable, mutable); + collect_immutable_vars_inner(val, immutable, mutable, visited); + collect_immutable_vars_inner(state, immutable, mutable, visited); } EvmExpr::RegionLoad(_, _, state) => { - collect_immutable_vars_rec(state, immutable, mutable); + collect_immutable_vars_inner(state, immutable, mutable, visited); } EvmExpr::Top(_, a, b, c) | EvmExpr::Revert(a, b, c) | EvmExpr::ReturnOp(a, b, c) => { - collect_immutable_vars_rec(a, immutable, mutable); - collect_immutable_vars_rec(b, immutable, mutable); - collect_immutable_vars_rec(c, immutable, mutable); + collect_immutable_vars_inner(a, immutable, mutable, visited); + collect_immutable_vars_inner(b, immutable, mutable, visited); + collect_immutable_vars_inner(c, immutable, mutable, visited); } EvmExpr::Log(_, topics, data_offset, data_size, state) => { for t in topics { - collect_immutable_vars_rec(t, immutable, mutable); + collect_immutable_vars_inner(t, immutable, mutable, visited); } - collect_immutable_vars_rec(data_offset, immutable, mutable); - collect_immutable_vars_rec(data_size, immutable, mutable); - collect_immutable_vars_rec(state, immutable, mutable); + collect_immutable_vars_inner(data_offset, immutable, mutable, visited); + collect_immutable_vars_inner(data_size, immutable, mutable, visited); + collect_immutable_vars_inner(state, immutable, mutable, visited); } EvmExpr::ExtCall(a, b, c, d, e, f, g) => { for x in [a, b, c, d, e, f, g] { - collect_immutable_vars_rec(x, immutable, mutable); + collect_immutable_vars_inner(x, immutable, mutable, visited); } } EvmExpr::VarStore(_, val) => { - collect_immutable_vars_rec(val, immutable, mutable); + collect_immutable_vars_inner(val, immutable, mutable, visited); } EvmExpr::Call(_, args) => { for arg in args { - collect_immutable_vars_rec(arg, immutable, mutable); + collect_immutable_vars_inner(arg, immutable, mutable, visited); } } EvmExpr::Function(_, _, _, body) => { - collect_immutable_vars_rec(body, immutable, mutable); + collect_immutable_vars_inner(body, immutable, mutable, visited); } EvmExpr::EnvRead(_, s) => { - collect_immutable_vars_rec(s, immutable, mutable); + collect_immutable_vars_inner(s, immutable, mutable, visited); } EvmExpr::EnvRead1(_, a, s) => { - collect_immutable_vars_rec(a, immutable, mutable); - collect_immutable_vars_rec(s, immutable, mutable); + collect_immutable_vars_inner(a, immutable, mutable, visited); + collect_immutable_vars_inner(s, immutable, mutable, visited); } EvmExpr::Const(..) | EvmExpr::Arg(..) @@ -818,7 +875,7 @@ fn collect_immutable_vars_rec( | EvmExpr::MemRegion(..) => {} EvmExpr::InlineAsm(inputs, ..) => { for input in inputs { - collect_immutable_vars_rec(input, immutable, mutable); + collect_immutable_vars_inner(input, immutable, mutable, visited); } } } @@ -2693,10 +2750,25 @@ fn rebuild_nested_letbinds(body: &RcExpr, depth: usize, new_innermost: &RcExpr) } fn dead_store_elim_rec(expr: &RcExpr) -> RcExpr { + let mut cache = HashMap::new(); + dead_store_elim_rec_inner(expr, &mut cache) +} + +fn dead_store_elim_rec_inner(expr: &RcExpr, cache: &mut HashMap) -> RcExpr { + let ptr = Rc::as_ptr(expr) as usize; + if let Some(cached) = cache.get(&ptr) { + return Rc::clone(cached); + } + let result = dead_store_elim_rec_body(expr, cache); + cache.insert(ptr, Rc::clone(&result)); + result +} + +fn dead_store_elim_rec_body(expr: &RcExpr, cache: &mut HashMap) -> RcExpr { match expr.as_ref() { EvmExpr::LetBind(name, init, body) => { - let new_init = dead_store_elim_rec(init); - let new_body = dead_store_elim_rec(body); + let new_init = dead_store_elim_rec_inner(init, cache); + let new_body = dead_store_elim_rec_inner(body, cache); // Flatten body into statement list let mut stmts = Vec::new(); @@ -2782,18 +2854,18 @@ fn dead_store_elim_rec(expr: &RcExpr) -> RcExpr { } } EvmExpr::Concat(a, b) => { - let na = dead_store_elim_rec(a); - let nb = dead_store_elim_rec(b); + let na = dead_store_elim_rec_inner(a, cache); + let nb = dead_store_elim_rec_inner(b, cache); if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) { return Rc::clone(expr); } Rc::new(EvmExpr::Concat(na, nb)) } EvmExpr::If(cond, inputs, then_b, else_b) => { - let nc = dead_store_elim_rec(cond); - let ni = dead_store_elim_rec(inputs); - let nt = dead_store_elim_rec(then_b); - let ne = dead_store_elim_rec(else_b); + let nc = dead_store_elim_rec_inner(cond, cache); + let ni = dead_store_elim_rec_inner(inputs, cache); + let nt = dead_store_elim_rec_inner(then_b, cache); + let ne = dead_store_elim_rec_inner(else_b, cache); if Rc::ptr_eq(&nc, cond) && Rc::ptr_eq(&ni, inputs) && Rc::ptr_eq(&nt, then_b) @@ -2804,15 +2876,15 @@ fn dead_store_elim_rec(expr: &RcExpr) -> RcExpr { Rc::new(EvmExpr::If(nc, ni, nt, ne)) } EvmExpr::DoWhile(inputs, body) => { - let ni = dead_store_elim_rec(inputs); - let nb = dead_store_elim_rec(body); + let ni = dead_store_elim_rec_inner(inputs, cache); + let nb = dead_store_elim_rec_inner(body, cache); if Rc::ptr_eq(&ni, inputs) && Rc::ptr_eq(&nb, body) { return Rc::clone(expr); } Rc::new(EvmExpr::DoWhile(ni, nb)) } EvmExpr::Function(name, in_ty, out_ty, body) => { - let nb = dead_store_elim_rec(body); + let nb = dead_store_elim_rec_inner(body, cache); if Rc::ptr_eq(&nb, body) { return Rc::clone(expr); } @@ -2824,56 +2896,56 @@ fn dead_store_elim_rec(expr: &RcExpr) -> RcExpr { )) } EvmExpr::VarStore(name, val) => { - let nv = dead_store_elim_rec(val); + let nv = dead_store_elim_rec_inner(val, cache); if Rc::ptr_eq(&nv, val) { return Rc::clone(expr); } Rc::new(EvmExpr::VarStore(name.clone(), nv)) } EvmExpr::Bop(op, a, b) => { - let na = dead_store_elim_rec(a); - let nb = dead_store_elim_rec(b); + let na = dead_store_elim_rec_inner(a, cache); + let nb = dead_store_elim_rec_inner(b, cache); if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) { return Rc::clone(expr); } Rc::new(EvmExpr::Bop(*op, na, nb)) } EvmExpr::Uop(op, a) => { - let na = dead_store_elim_rec(a); + let na = dead_store_elim_rec_inner(a, cache); if Rc::ptr_eq(&na, a) { return Rc::clone(expr); } Rc::new(EvmExpr::Uop(*op, na)) } EvmExpr::Top(op, a, b, c) => { - let na = dead_store_elim_rec(a); - let nb = dead_store_elim_rec(b); - let nc = dead_store_elim_rec(c); + let na = dead_store_elim_rec_inner(a, cache); + let nb = dead_store_elim_rec_inner(b, cache); + let nc = dead_store_elim_rec_inner(c, cache); if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) && Rc::ptr_eq(&nc, c) { return Rc::clone(expr); } Rc::new(EvmExpr::Top(*op, na, nb, nc)) } EvmExpr::Get(a, idx) => { - let na = dead_store_elim_rec(a); + let na = dead_store_elim_rec_inner(a, cache); if Rc::ptr_eq(&na, a) { return Rc::clone(expr); } Rc::new(EvmExpr::Get(na, *idx)) } EvmExpr::ReturnOp(a, b, c) => { - let na = dead_store_elim_rec(a); - let nb = dead_store_elim_rec(b); - let nc = dead_store_elim_rec(c); + let na = dead_store_elim_rec_inner(a, cache); + let nb = dead_store_elim_rec_inner(b, cache); + let nc = dead_store_elim_rec_inner(c, cache); if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) && Rc::ptr_eq(&nc, c) { return Rc::clone(expr); } Rc::new(EvmExpr::ReturnOp(na, nb, nc)) } EvmExpr::Revert(a, b, c) => { - let na = dead_store_elim_rec(a); - let nb = dead_store_elim_rec(b); - let nc = dead_store_elim_rec(c); + let na = dead_store_elim_rec_inner(a, cache); + let nb = dead_store_elim_rec_inner(b, cache); + let nc = dead_store_elim_rec_inner(c, cache); if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) && Rc::ptr_eq(&nc, c) { return Rc::clone(expr); } diff --git a/std/globals/vec.edge b/std/globals/vec.edge index 0f0d7d3..1ae65ef 100644 --- a/std/globals/vec.edge +++ b/std/globals/vec.edge @@ -46,6 +46,15 @@ impl Vec { self.len = UnsafeAdd::unsafe_add(self.len, 1); } + fn pop(self: &dm Self) -> T { + if (self.len == 0) { + revert(); + } + self.len = UnsafeSub::unsafe_sub(self.len, 1); + let offset = UnsafeAdd::unsafe_add(self, UnsafeAdd::unsafe_add(64, UnsafeMul::unsafe_mul(self.len, @size_of::()))); + offset.mload() + } + fn get(self: &dm Self, index: u256) -> T { if (index >= self.len) { revert(); From 76aa8aee9d2189b4decc70553ca84d0c74a92032 Mon Sep 17 00:00:00 2001 From: brockelmore <31553173+brockelmore@users.noreply.github.com> Date: Fri, 13 Mar 2026 11:33:12 -0600 Subject: [PATCH 12/13] chore: fix clippy warnings and format Co-Authored-By: Claude Opus 4.6 --- crates/codegen/src/dispatcher.rs | 31 ++--- crates/codegen/src/expr_compiler.rs | 6 +- crates/e2e/tests/suites/bench_vec.rs | 8 +- crates/ir/src/lib.rs | 63 ++++------ crates/ir/src/mem_region.rs | 94 ++++++-------- crates/ir/src/region_forward.rs | 56 +++++---- crates/ir/src/sexp.rs | 156 +++++++++++++++-------- crates/ir/src/storage_hoist.rs | 25 ++-- crates/ir/src/to_egglog/calls.rs | 7 +- crates/ir/src/to_egglog/mod.rs | 44 ++++--- crates/ir/src/var_opt.rs | 177 ++++++++++++++++++++------- 11 files changed, 402 insertions(+), 265 deletions(-) diff --git a/crates/codegen/src/dispatcher.rs b/crates/codegen/src/dispatcher.rs index 06bba24..2df3a7d 100644 --- a/crates/codegen/src/dispatcher.rs +++ b/crates/codegen/src/dispatcher.rs @@ -22,11 +22,21 @@ fn contains_dyn_alloc_inner( } use edge_ir::schema::EvmExpr; match expr.as_ref() { - EvmExpr::DynAlloc(_) => true, - EvmExpr::Bop(_, a, b) | EvmExpr::Concat(a, b) | EvmExpr::DoWhile(a, b) => { + EvmExpr::DynAlloc(_) | EvmExpr::AllocRegion(_, _, true) => true, + EvmExpr::Bop(_, a, b) + | EvmExpr::Concat(a, b) + | EvmExpr::DoWhile(a, b) + | EvmExpr::LetBind(_, a, b) + | EvmExpr::EnvRead1(_, a, b) + | EvmExpr::RegionStore(_, _, a, b) => { contains_dyn_alloc_inner(a, visited) || contains_dyn_alloc_inner(b, visited) } - EvmExpr::Uop(_, a) => contains_dyn_alloc_inner(a, visited), + EvmExpr::Uop(_, a) + | EvmExpr::VarStore(_, a) + | EvmExpr::EnvRead(_, a) + | EvmExpr::Function(_, _, _, a) + | EvmExpr::Get(a, _) + | EvmExpr::RegionLoad(_, _, a) => contains_dyn_alloc_inner(a, visited), EvmExpr::Top(_, a, b, c) | EvmExpr::If(_, a, b, c) | EvmExpr::Revert(a, b, c) @@ -35,14 +45,6 @@ fn contains_dyn_alloc_inner( || contains_dyn_alloc_inner(b, visited) || contains_dyn_alloc_inner(c, visited) } - EvmExpr::LetBind(_, init, body) => { - contains_dyn_alloc_inner(init, visited) || contains_dyn_alloc_inner(body, visited) - } - EvmExpr::VarStore(_, val) => contains_dyn_alloc_inner(val, visited), - EvmExpr::EnvRead(_, state) => contains_dyn_alloc_inner(state, visited), - EvmExpr::EnvRead1(_, arg, state) => { - contains_dyn_alloc_inner(arg, visited) || contains_dyn_alloc_inner(state, visited) - } EvmExpr::Log(_, topics, offset, size, state) => { topics.iter().any(|t| contains_dyn_alloc_inner(t, visited)) || contains_dyn_alloc_inner(offset, visited) @@ -58,18 +60,11 @@ fn contains_dyn_alloc_inner( || contains_dyn_alloc_inner(f, visited) || contains_dyn_alloc_inner(g, visited) } - EvmExpr::Function(_, _, _, body) => contains_dyn_alloc_inner(body, visited), EvmExpr::Call(_, args) => args.iter().any(|a| contains_dyn_alloc_inner(a, visited)), EvmExpr::InlineAsm(inputs, _, _) => { inputs.iter().any(|i| contains_dyn_alloc_inner(i, visited)) } - EvmExpr::Get(inner, _) => contains_dyn_alloc_inner(inner, visited), - EvmExpr::AllocRegion(_, _, true) => true, EvmExpr::AllocRegion(_, nf, false) => contains_dyn_alloc_inner(nf, visited), - EvmExpr::RegionStore(_, _, val, state) => { - contains_dyn_alloc_inner(val, visited) || contains_dyn_alloc_inner(state, visited) - } - EvmExpr::RegionLoad(_, _, state) => contains_dyn_alloc_inner(state, visited), EvmExpr::Const(..) | EvmExpr::Var(_) | EvmExpr::Drop(_) diff --git a/crates/codegen/src/expr_compiler.rs b/crates/codegen/src/expr_compiler.rs index a03dc95..83cc077 100644 --- a/crates/codegen/src/expr_compiler.rs +++ b/crates/codegen/src/expr_compiler.rs @@ -4,8 +4,10 @@ //! Since the EVM is a stack machine, we compile in postorder: children //! first, then the operator. -use std::collections::{HashMap, HashSet}; -use std::rc::Rc; +use std::{ + collections::{HashMap, HashSet}, + rc::Rc, +}; use edge_ir::{ schema::{ diff --git a/crates/e2e/tests/suites/bench_vec.rs b/crates/e2e/tests/suites/bench_vec.rs index 8ba34f7..24519e5 100644 --- a/crates/e2e/tests/suites/bench_vec.rs +++ b/crates/e2e/tests/suites/bench_vec.rs @@ -1,6 +1,6 @@ #![allow(missing_docs)] -//! Gas benchmarking for Vec operations using the existing test_vec.edge contract. +//! Gas benchmarking for `Vec` operations using the existing `test_vec.edge` contract. //! Uses the test harness to measure actual execution gas at each opt level. use crate::helpers::*; @@ -9,6 +9,7 @@ const CONTRACT: &str = "examples/tests/test_vec.edge"; /// Detailed gas analysis of Vec test functions. #[test] +#[ignore = "gas benchmark doesnt need to run for e2e"] fn bench_vec_gas_breakdown() { let sigs = [ "test_new_and_push()", @@ -39,7 +40,10 @@ fn bench_vec_gas_breakdown() { eprintln!( "║ {:18} ║ {:>6} ║ {:>6} ║ {:>6} ║ {:>6} ║", sig.trim_end_matches("()"), - gases[0], gases[1], gases[2], gases[3] + gases[0], + gases[1], + gases[2], + gases[3] ); } eprintln!("╚════════════════════╩════════╩════════╩════════╩═════════════╝"); diff --git a/crates/ir/src/lib.rs b/crates/ir/src/lib.rs index 23a9166..f62552c 100644 --- a/crates/ir/src/lib.rs +++ b/crates/ir/src/lib.rs @@ -27,8 +27,8 @@ pub mod cleanup; pub mod costs; pub mod mem_region; pub mod optimizations; -pub mod region_forward; pub mod pretty; +pub mod region_forward; pub mod schedule; pub mod schema; pub mod sexp; @@ -735,24 +735,17 @@ fn dag_count_rec(expr: &RcExpr, visited: &mut std::collections::HashSet) | EvmExpr::Get(a, _) | EvmExpr::EnvRead(_, a) | EvmExpr::DynAlloc(a) - | EvmExpr::AllocRegion(_, a, _) => { + | EvmExpr::AllocRegion(_, a, _) + | EvmExpr::RegionLoad(_, _, a) + | EvmExpr::Function(_, _, _, a) => { add!(a); } EvmExpr::Bop(_, a, b) | EvmExpr::Concat(a, b) | EvmExpr::DoWhile(a, b) - | EvmExpr::EnvRead1(_, a, b) => { - add!(a); - add!(b); - } - EvmExpr::RegionStore(_, _, a, b) => { - add!(a); - add!(b); - } - EvmExpr::RegionLoad(_, _, a) => { - add!(a); - } - EvmExpr::LetBind(_, a, b) => { + | EvmExpr::EnvRead1(_, a, b) + | EvmExpr::RegionStore(_, _, a, b) + | EvmExpr::LetBind(_, a, b) => { add!(a); add!(b); } @@ -767,9 +760,6 @@ fn dag_count_rec(expr: &RcExpr, visited: &mut std::collections::HashSet) add!(c); add!(d); } - EvmExpr::Function(_, _, _, a) => { - add!(a); - } EvmExpr::Call(_, args) => { for a in args { add!(a); @@ -802,7 +792,7 @@ fn dag_count_rec(expr: &RcExpr, visited: &mut std::collections::HashSet) } /// Walk the top-level Concat spine and record each child's DAG size + label. -/// Recurses into LetBind bodies and If branches to break down the dispatcher. +/// Recurses into `LetBind` bodies and If branches to break down the dispatcher. fn collect_top_concat_sizes_dag(expr: &RcExpr, out: &mut Vec<(String, usize)>, depth: usize) { if depth > 6 { out.push(( @@ -864,7 +854,7 @@ pub struct IrStats { pub total_nodes: usize, /// Maximum tree depth pub max_depth: usize, - /// Count of LetBind nodes (proxy for variable allocations) + /// Count of `LetBind` nodes (proxy for variable allocations) pub let_binds: usize, /// Count of Function nodes pub functions: usize, @@ -874,17 +864,17 @@ pub struct IrStats { pub concats: usize, /// Count of If nodes pub ifs: usize, - /// Count of VarStore nodes + /// Count of `VarStore` nodes pub var_stores: usize, /// Count of Var nodes (reads) pub var_reads: usize, - /// Count of DynAlloc nodes + /// Count of `DynAlloc` nodes pub dyn_allocs: usize, /// Per-variable Var read counts pub var_read_names: HashMap, - /// Per-variable LetBind counts + /// Per-variable `LetBind` counts pub let_bind_names: HashMap, - /// Per-variable VarStore counts + /// Per-variable `VarStore` counts pub var_store_names: HashMap, /// Subtree sizes for top-level Concat children (to identify where bulk lives) pub top_concat_child_sizes: Vec<(String, usize)>, @@ -1066,22 +1056,15 @@ fn ir_stats_dag( | EvmExpr::Get(a, _) | EvmExpr::EnvRead(_, a) | EvmExpr::DynAlloc(a) - | EvmExpr::AllocRegion(_, a, _) => go!(a), + | EvmExpr::AllocRegion(_, a, _) + | EvmExpr::RegionLoad(_, _, a) + | EvmExpr::Function(_, _, _, a) => go!(a), EvmExpr::Bop(_, a, b) | EvmExpr::Concat(a, b) | EvmExpr::DoWhile(a, b) - | EvmExpr::EnvRead1(_, a, b) => { - go!(a); - go!(b); - } - EvmExpr::RegionStore(_, _, a, b) => { - go!(a); - go!(b); - } - EvmExpr::RegionLoad(_, _, a) => { - go!(a); - } - EvmExpr::LetBind(_, a, b) => { + | EvmExpr::EnvRead1(_, a, b) + | EvmExpr::RegionStore(_, _, a, b) + | EvmExpr::LetBind(_, a, b) => { go!(a); go!(b); } @@ -1096,7 +1079,6 @@ fn ir_stats_dag( go!(c); go!(e); } - EvmExpr::Function(_, _, _, body) => go!(body), EvmExpr::Call(_, args) => { for a in args { go!(a); @@ -1460,10 +1442,9 @@ pub fn lower_and_optimize( let _ = func_egraph .parse_and_run_program(None, &func_program) .map_err(|e| IrError::Egglog(format!("{e}")))?; - let func_report = func_egraph - .get_extract_report() - .as_ref() - .ok_or_else(|| IrError::Extraction("no extract report from func egglog".to_owned()))?; + let func_report = func_egraph.get_extract_report().as_ref().ok_or_else(|| { + IrError::Extraction("no extract report from func egglog".to_owned()) + })?; let optimized_func = sexp::extract_report_to_expr(func_report)?; let optimized_func = cleanup::cleanup_expr_pub(&optimized_func); optimized_functions.push(optimized_func); diff --git a/crates/ir/src/mem_region.rs b/crates/ir/src/mem_region.rs index a7f45c4..7ebf20f 100644 --- a/crates/ir/src/mem_region.rs +++ b/crates/ir/src/mem_region.rs @@ -17,9 +17,7 @@ use std::{ rc::Rc, }; -use crate::schema::{ - EvmBaseType, EvmConstant, EvmContext, EvmExpr, EvmProgram, EvmType, RcExpr, -}; +use crate::schema::{EvmBaseType, EvmConstant, EvmContext, EvmExpr, EvmProgram, EvmType, RcExpr}; /// Scope tree node for memory region allocation. /// @@ -103,7 +101,7 @@ fn assign_scoped_offsets( } } -/// Check whether a subtree contains any MemRegion nodes (memoized by Rc pointer). +/// Check whether a subtree contains any `MemRegion` nodes (memoized by Rc pointer). fn has_mem_region(expr: &RcExpr, cache: &mut HashMap) -> bool { let ptr = Rc::as_ptr(expr) as usize; if let Some(&result) = cache.get(&ptr) { @@ -114,9 +112,7 @@ fn has_mem_region(expr: &RcExpr, cache: &mut HashMap) -> bool { EvmExpr::Concat(a, b) | EvmExpr::Bop(_, a, b) | EvmExpr::DoWhile(a, b) - | EvmExpr::EnvRead1(_, a, b) => { - has_mem_region(a, cache) || has_mem_region(b, cache) - } + | EvmExpr::EnvRead1(_, a, b) => has_mem_region(a, cache) || has_mem_region(b, cache), EvmExpr::If(a, b, c, d) => { has_mem_region(a, cache) || has_mem_region(b, cache) @@ -126,12 +122,8 @@ fn has_mem_region(expr: &RcExpr, cache: &mut HashMap) -> bool { EvmExpr::LetBind(_, init, body) => { has_mem_region(init, cache) || has_mem_region(body, cache) } - EvmExpr::Top(_, a, b, c) - | EvmExpr::Revert(a, b, c) - | EvmExpr::ReturnOp(a, b, c) => { - has_mem_region(a, cache) - || has_mem_region(b, cache) - || has_mem_region(c, cache) + EvmExpr::Top(_, a, b, c) | EvmExpr::Revert(a, b, c) | EvmExpr::ReturnOp(a, b, c) => { + has_mem_region(a, cache) || has_mem_region(b, cache) || has_mem_region(c, cache) } EvmExpr::Function(_, _, _, body) => has_mem_region(body, cache), EvmExpr::Uop(_, a) @@ -150,11 +142,9 @@ fn has_mem_region(expr: &RcExpr, cache: &mut HashMap) -> bool { || has_mem_region(s, cache) || has_mem_region(st, cache) } - EvmExpr::ExtCall(a, b, c, d, e, f, g) => { - [a, b, c, d, e, f, g] - .iter() - .any(|x| has_mem_region(x, cache)) - } + EvmExpr::ExtCall(a, b, c, d, e, f, g) => [a, b, c, d, e, f, g] + .iter() + .any(|x| has_mem_region(x, cache)), EvmExpr::Call(_, args) => args.iter().any(|a| has_mem_region(a, cache)), EvmExpr::InlineAsm(inputs, ..) => inputs.iter().any(|a| has_mem_region(a, cache)), _ => false, @@ -169,10 +159,7 @@ fn collect_region_scopes(expr: &RcExpr) -> RegionScope { collect_region_scopes_inner(expr, &mut hmr_cache) } -fn collect_region_scopes_inner( - expr: &RcExpr, - hmr_cache: &mut HashMap, -) -> RegionScope { +fn collect_region_scopes_inner(expr: &RcExpr, hmr_cache: &mut HashMap) -> RegionScope { // Fast path: if this subtree contains no MemRegion nodes, skip traversal. if !has_mem_region(expr, hmr_cache) { return RegionScope::Sequential(vec![]); @@ -351,13 +338,13 @@ pub fn assign_program_offsets( } } -/// Context for region resolution: both MemRegion offset assignments and +/// Context for region resolution: both `MemRegion` offset assignments and /// RegionStore/RegionLoad → MStore/MLoad variable mappings. #[derive(Debug)] pub struct RegionResolveCtx { - /// MemRegion id → concrete byte offset + /// `MemRegion` id → concrete byte offset pub assignments: BTreeMap, - /// Region id → LetBind variable name (for &dm struct field access) + /// Region id → `LetBind` variable name (for &dm struct field access) pub region_var_map: indexmap::IndexMap, } @@ -447,7 +434,8 @@ fn replace_regions_inner( let ni = rec!(i); let nt = rec!(t); let ne = rec!(e); - if Rc::ptr_eq(&nc, c) && Rc::ptr_eq(&ni, i) && Rc::ptr_eq(&nt, t) && Rc::ptr_eq(&ne, e) { + if Rc::ptr_eq(&nc, c) && Rc::ptr_eq(&ni, i) && Rc::ptr_eq(&nt, t) && Rc::ptr_eq(&ne, e) + { return Rc::clone(expr); } Rc::new(EvmExpr::If(nc, ni, nt, ne)) @@ -494,15 +482,14 @@ fn replace_regions_inner( Rc::new(EvmExpr::ReturnOp(na, nb, nc)) } EvmExpr::Log(count, topics, d, s, st) => { - let nt: Vec<_> = topics - .iter() - .map(|t| rec!(t)) - .collect(); + let nt: Vec<_> = topics.iter().map(|t| rec!(t)).collect(); let nd = rec!(d); let ns = rec!(s); let nst = rec!(st); if nt.iter().zip(topics.iter()).all(|(n, o)| Rc::ptr_eq(n, o)) - && Rc::ptr_eq(&nd, d) && Rc::ptr_eq(&ns, s) && Rc::ptr_eq(&nst, st) + && Rc::ptr_eq(&nd, d) + && Rc::ptr_eq(&ns, s) + && Rc::ptr_eq(&nst, st) { return Rc::clone(expr); } @@ -516,8 +503,12 @@ fn replace_regions_inner( let ne = rec!(e); let nf = rec!(f); let ng = rec!(g); - if Rc::ptr_eq(&na, a) && Rc::ptr_eq(&nb, b) && Rc::ptr_eq(&nc, c) - && Rc::ptr_eq(&nd, d) && Rc::ptr_eq(&ne, e) && Rc::ptr_eq(&nf, f) + if Rc::ptr_eq(&na, a) + && Rc::ptr_eq(&nb, b) + && Rc::ptr_eq(&nc, c) + && Rc::ptr_eq(&nd, d) + && Rc::ptr_eq(&ne, e) + && Rc::ptr_eq(&nf, f) && Rc::ptr_eq(&ng, g) { return Rc::clone(expr); @@ -525,11 +516,12 @@ fn replace_regions_inner( Rc::new(EvmExpr::ExtCall(na, nb, nc, nd, ne, nf, ng)) } EvmExpr::Call(name, args) => { - let new_args: Vec<_> = args + let new_args: Vec<_> = args.iter().map(|a| rec!(a)).collect(); + if new_args .iter() - .map(|a| rec!(a)) - .collect(); - if new_args.iter().zip(args.iter()).all(|(n, o)| Rc::ptr_eq(n, o)) { + .zip(args.iter()) + .all(|(n, o)| Rc::ptr_eq(n, o)) + { return Rc::clone(expr); } Rc::new(EvmExpr::Call(name.clone(), new_args)) @@ -562,10 +554,7 @@ fn replace_regions_inner( Rc::new(EvmExpr::EnvRead1(*op, na, ns)) } EvmExpr::InlineAsm(inputs, hex, num_outputs) => { - let ni: Vec<_> = inputs - .iter() - .map(|i| rec!(i)) - .collect(); + let ni: Vec<_> = inputs.iter().map(|i| rec!(i)).collect(); if ni.iter().zip(inputs.iter()).all(|(n, o)| Rc::ptr_eq(n, o)) { return Rc::clone(expr); } @@ -632,10 +621,7 @@ pub fn resolve_regions_post_egglog( } } -fn resolve_region_expr( - expr: &RcExpr, - region_var_map: &indexmap::IndexMap, -) -> RcExpr { +fn resolve_region_expr(expr: &RcExpr, region_var_map: &indexmap::IndexMap) -> RcExpr { let mut cache = std::collections::HashMap::new(); resolve_region_memo(expr, region_var_map, &mut cache) } @@ -703,11 +689,7 @@ fn resolve_region_inner( let ns = rec!(state); if let Some(var_name) = rvm.get(id) { let offset = region_offset(var_name, *field_idx); - Rc::new(EvmExpr::Bop( - crate::schema::EvmBinaryOp::MLoad, - offset, - ns, - )) + Rc::new(EvmExpr::Bop(crate::schema::EvmBinaryOp::MLoad, offset, ns)) } else { Rc::new(EvmExpr::RegionLoad(*id, *field_idx, ns)) } @@ -757,10 +739,7 @@ fn resolve_region_inner( let ni = rec!(i); let nt = rec!(t); let ne = rec!(e); - if Rc::ptr_eq(&nc, c) - && Rc::ptr_eq(&ni, i) - && Rc::ptr_eq(&nt, t) - && Rc::ptr_eq(&ne, e) + if Rc::ptr_eq(&nc, c) && Rc::ptr_eq(&ni, i) && Rc::ptr_eq(&nt, t) && Rc::ptr_eq(&ne, e) { return Rc::clone(expr); } @@ -857,7 +836,12 @@ fn resolve_region_inner( if Rc::ptr_eq(&nb, body) { return Rc::clone(expr); } - Rc::new(EvmExpr::Function(name.clone(), in_ty.clone(), out_ty.clone(), nb)) + Rc::new(EvmExpr::Function( + name.clone(), + in_ty.clone(), + out_ty.clone(), + nb, + )) } EvmExpr::EnvRead(op, s) => { let ns = rec!(s); diff --git a/crates/ir/src/region_forward.rs b/crates/ir/src/region_forward.rs index 532c153..d0c58af 100644 --- a/crates/ir/src/region_forward.rs +++ b/crates/ir/src/region_forward.rs @@ -1,15 +1,14 @@ //! Region store forwarding pass. //! //! Walks the IR in program order (Concat chains) and forwards known -//! RegionStore values to subsequent RegionLoad reads. This enables +//! `RegionStore` values to subsequent `RegionLoad` reads. This enables //! compile-time resolution of struct field access patterns like Vec's //! len/capacity fields. //! //! Runs after lowering, before egglog. Forwarded constants enable //! egglog's constant folding and dead-branch elimination. -use std::collections::HashMap; -use std::rc::Rc; +use std::{collections::HashMap, rc::Rc}; use crate::schema::{EvmBinaryOp, EvmConstant, EvmExpr, EvmProgram, EvmUnaryOp, RcExpr}; @@ -51,9 +50,9 @@ pub fn forward_region_stores_program( } struct ForwardState<'a> { - /// Known values for (region_id, field_idx). + /// Known values for (`region_id`, `field_idx`). known: HashMap<(i64, i64), RcExpr>, - /// Variable name → all region_ids for that variable name. + /// Variable name → all `region_ids` for that variable name. reverse_map: &'a HashMap>, } @@ -156,7 +155,11 @@ fn forward_expr(expr: &RcExpr, state: &mut ForwardState<'_>) -> RcExpr { if let Some(rids) = state.reverse_map.get(name.as_str()) { for &rid in rids { if state.known.keys().any(|&(r, _)| r == rid) { - tracing::trace!("region_forward: VarStore to region var '{}' → clear region {}", name, rid); + tracing::trace!( + "region_forward: VarStore to region var '{}' → clear region {}", + name, + rid + ); state.clear_region(rid); } } @@ -181,15 +184,17 @@ fn forward_expr(expr: &RcExpr, state: &mut ForwardState<'_>) -> RcExpr { let nt = forward_expr(then_br, state); // Emit the If with the original condition (egglog will fold it) return Rc::new(EvmExpr::If(nc, ni, nt, Rc::clone(else_br))); - } else { - // Condition is false → only else-branch executes - let ne = forward_expr(else_br, state); - return Rc::new(EvmExpr::If(nc, ni, Rc::clone(then_br), ne)); } + // Condition is false → only else-branch executes + let ne = forward_expr(else_br, state); + return Rc::new(EvmExpr::If(nc, ni, Rc::clone(then_br), ne)); } // Can't evaluate → conservative: process both, clear modified regions - tracing::trace!("region_forward: If condition could NOT be evaluated: {:?}", nc); + tracing::trace!( + "region_forward: If condition could NOT be evaluated: {:?}", + nc + ); let saved = state.known.clone(); let nt = forward_expr(then_br, state); let then_known = state.known.clone(); @@ -343,7 +348,12 @@ fn forward_expr(expr: &RcExpr, state: &mut ForwardState<'_>) -> RcExpr { if Rc::ptr_eq(&nb, body) { return Rc::clone(expr); } - Rc::new(EvmExpr::Function(name.clone(), in_ty.clone(), out_ty.clone(), nb)) + Rc::new(EvmExpr::Function( + name.clone(), + in_ty.clone(), + out_ty.clone(), + nb, + )) } // EnvRead: state parameter — skip it. EvmExpr::EnvRead(_op, _s) => Rc::clone(expr), @@ -389,8 +399,8 @@ fn forward_expr(expr: &RcExpr, state: &mut ForwardState<'_>) -> RcExpr { } } -/// Find which region_id from the candidate list appears in the expression. -/// Used to disambiguate when multiple region_ids map to the same variable name. +/// Find which `region_id` from the candidate list appears in the expression. +/// Used to disambiguate when multiple `region_ids` map to the same variable name. fn find_region_id_in_expr(expr: &RcExpr, candidates: &[i64]) -> Option { match expr.as_ref() { EvmExpr::RegionLoad(rid, _, _) | EvmExpr::RegionStore(rid, _, _, _) => { @@ -405,15 +415,17 @@ fn find_region_id_in_expr(expr: &RcExpr, candidates: &[i64]) -> Option { _ => None, } } - EvmExpr::Concat(a, b) => find_region_id_in_expr(a, candidates) - .or_else(|| find_region_id_in_expr(b, candidates)), + EvmExpr::Concat(a, b) => { + find_region_id_in_expr(a, candidates).or_else(|| find_region_id_in_expr(b, candidates)) + } EvmExpr::LetBind(_, init, body) => find_region_id_in_expr(init, candidates) .or_else(|| find_region_id_in_expr(body, candidates)), EvmExpr::If(cond, _, then_br, else_br) => find_region_id_in_expr(cond, candidates) .or_else(|| find_region_id_in_expr(then_br, candidates)) .or_else(|| find_region_id_in_expr(else_br, candidates)), - EvmExpr::Bop(_, a, b) => find_region_id_in_expr(a, candidates) - .or_else(|| find_region_id_in_expr(b, candidates)), + EvmExpr::Bop(_, a, b) => { + find_region_id_in_expr(a, candidates).or_else(|| find_region_id_in_expr(b, candidates)) + } EvmExpr::Uop(_, a) => find_region_id_in_expr(a, candidates), EvmExpr::Top(_, a, b, c) => find_region_id_in_expr(a, candidates) .or_else(|| find_region_id_in_expr(b, candidates)) @@ -424,7 +436,7 @@ fn find_region_id_in_expr(expr: &RcExpr, candidates: &[i64]) -> Option { /// Find the "return variable" of an expression — the variable whose value /// is the result of evaluating the expression. Traces through Concat chains -/// (which return b) and LetBind (which returns body). +/// (which return b) and `LetBind` (which returns body). fn find_return_var(expr: &RcExpr) -> Option { match expr.as_ref() { EvmExpr::Var(name) => Some(name.clone()), @@ -436,7 +448,7 @@ fn find_return_var(expr: &RcExpr) -> Option { /// Extract initial field values from an init expression. /// -/// Scans for MStore patterns that write to fields of the given inner variable: +/// Scans for `MStore` patterns that write to fields of the given inner variable: /// - `MStore(Var(inner), val, _)` → field 0 /// - `MStore(Add(Var(inner), Const(32)), val, _)` → field 1 /// - `MStore(Add(Var(inner), Const(64)), val, _)` → field 2 @@ -529,9 +541,7 @@ fn try_eval_const(expr: &RcExpr) -> Option { // LargeInt is stored as a hex string Some(b != "0" && b != "0x0" && !b.chars().all(|c| c == '0' || c == 'x')) } - EvmExpr::Uop(EvmUnaryOp::IsZero, a) => { - try_eval_const(a).map(|v| !v) - } + EvmExpr::Uop(EvmUnaryOp::IsZero, a) => try_eval_const(a).map(|v| !v), EvmExpr::Bop(EvmBinaryOp::Lt, a, b) => { let av = try_eval_u256(a)?; let bv = try_eval_u256(b)?; diff --git a/crates/ir/src/sexp.rs b/crates/ir/src/sexp.rs index 71a5f02..276c539 100644 --- a/crates/ir/src/sexp.rs +++ b/crates/ir/src/sexp.rs @@ -5,10 +5,7 @@ use std::{collections::HashMap, rc::Rc}; -use egglog::{ - ast::Literal, - ExtractReport, Term, TermDag, TermId, -}; +use egglog::{ast::Literal, ExtractReport, Term, TermDag, TermId}; use crate::{ schema::{ @@ -162,7 +159,10 @@ pub fn expr_to_sexp(expr: &EvmExpr) -> String { EvmExpr::MemRegion(id, size) => format!("(MemRegion {id} {size})"), EvmExpr::DynAlloc(size) => format!("(DynAlloc {})", expr_to_sexp(size)), EvmExpr::AllocRegion(id, num_fields, is_dynamic) => { - format!("(AllocRegion {id} {} {is_dynamic})", expr_to_sexp(num_fields)) + format!( + "(AllocRegion {id} {} {is_dynamic})", + expr_to_sexp(num_fields) + ) } EvmExpr::RegionStore(id, field, val, state) => { format!( @@ -397,17 +397,15 @@ fn count_refs_dag(expr: &RcExpr, counts: &mut HashMap, visited: &m | EvmExpr::VarStore(_, a) | EvmExpr::Get(a, _) | EvmExpr::EnvRead(_, a) - | EvmExpr::DynAlloc(a) => { + | EvmExpr::DynAlloc(a) + | EvmExpr::Function(_, _, _, a) => { visit!(a); } EvmExpr::Bop(_, a, b) | EvmExpr::Concat(a, b) | EvmExpr::DoWhile(a, b) - | EvmExpr::EnvRead1(_, a, b) => { - visit!(a); - visit!(b); - } - EvmExpr::LetBind(_, a, b) => { + | EvmExpr::EnvRead1(_, a, b) + | EvmExpr::LetBind(_, a, b) => { visit!(a); visit!(b); } @@ -422,9 +420,6 @@ fn count_refs_dag(expr: &RcExpr, counts: &mut HashMap, visited: &m visit!(c); visit!(d); } - EvmExpr::Function(_, _, _, a) => { - visit!(a); - } EvmExpr::Call(_, args) => { for a in args { visit!(a); @@ -504,7 +499,7 @@ fn dag_sexp_rec(expr: &RcExpr, ctx: &mut DagSexpCtx) -> String { sexp } -/// Serialize a single node's s-expression, using dag_sexp_rec for children. +/// Serialize a single node's s-expression, using `dag_sexp_rec` for children. fn dag_sexp_node(expr: &RcExpr, ctx: &mut DagSexpCtx) -> String { match expr.as_ref() { EvmExpr::Arg(ty, c) => format!("(Arg {} {})", type_sexp(ty), ctx_sexp(c)), @@ -655,10 +650,7 @@ fn dag_sexp_node(expr: &RcExpr, ctx: &mut DagSexpCtx) -> String { ) } EvmExpr::RegionLoad(id, field, state) => { - format!( - "(RegionLoad {id} {field} {})", - dag_sexp_rec(state, ctx) - ) + format!("(RegionLoad {id} {field} {})", dag_sexp_rec(state, ctx)) } } } @@ -1238,9 +1230,9 @@ pub fn extract_report_to_expr(report: &ExtractReport) -> Result let mut cache: HashMap = HashMap::new(); termdag_convert(termdag, root_id, &mut cache) } - ExtractReport::Variants { .. } => { - Err(IrError::Extraction("expected Best extract, got Variants".to_owned())) - } + ExtractReport::Variants { .. } => Err(IrError::Extraction( + "expected Best extract, got Variants".to_owned(), + )), } } @@ -1434,35 +1426,41 @@ fn termdag_convert( ))), } } - Term::Lit(_) | Term::Var(_) => { - Err(IrError::Extraction(format!("termdag: unexpected term: {term:?}"))) - } + Term::Lit(_) | Term::Var(_) => Err(IrError::Extraction(format!( + "termdag: unexpected term: {term:?}" + ))), }?; cache.insert(id, Rc::clone(&result)); Ok(result) } -/// Extract an i64 literal from a TermDag node. +/// Extract an i64 literal from a `TermDag` node. fn td_i64(dag: &TermDag, id: TermId) -> Result { match dag.get(id) { Term::Lit(Literal::Int(n)) => Ok(*n), - other => Err(IrError::Extraction(format!("expected i64 literal, got: {other:?}"))), + other => Err(IrError::Extraction(format!( + "expected i64 literal, got: {other:?}" + ))), } } -/// Extract a string literal from a TermDag node. +/// Extract a string literal from a `TermDag` node. fn td_string(dag: &TermDag, id: TermId) -> Result { match dag.get(id) { Term::Lit(Literal::String(sym)) => Ok(sym.as_str().to_owned()), - other => Err(IrError::Extraction(format!("expected string literal, got: {other:?}"))), + other => Err(IrError::Extraction(format!( + "expected string literal, got: {other:?}" + ))), } } -/// Extract a bool literal from a TermDag node. +/// Extract a bool literal from a `TermDag` node. fn td_bool(dag: &TermDag, id: TermId) -> Result { match dag.get(id) { Term::Lit(Literal::Bool(b)) => Ok(*b), - other => Err(IrError::Extraction(format!("expected bool literal, got: {other:?}"))), + other => Err(IrError::Extraction(format!( + "expected bool literal, got: {other:?}" + ))), } } @@ -1473,9 +1471,13 @@ fn td_const(dag: &TermDag, id: TermId) -> Result { "LargeInt" => Ok(EvmConstant::LargeInt(td_string(dag, args[0])?)), "ConstBool" => Ok(EvmConstant::Bool(td_bool(dag, args[0])?)), "ConstAddr" => Ok(EvmConstant::Addr(td_string(dag, args[0])?)), - other => Err(IrError::Extraction(format!("termdag: unknown constant: {other}"))), + other => Err(IrError::Extraction(format!( + "termdag: unknown constant: {other}" + ))), }, - other => Err(IrError::Extraction(format!("termdag: expected constant, got: {other:?}"))), + other => Err(IrError::Extraction(format!( + "termdag: expected constant, got: {other:?}" + ))), } } @@ -1492,9 +1494,13 @@ fn td_type(dag: &TermDag, id: TermId) -> Result { let len = td_i64(dag, args[1])? as usize; Ok(EvmType::ArrayT(elem, len)) } - other => Err(IrError::Extraction(format!("termdag: unknown type: {other}"))), + other => Err(IrError::Extraction(format!( + "termdag: unknown type: {other}" + ))), }, - other => Err(IrError::Extraction(format!("termdag: expected type, got: {other:?}"))), + other => Err(IrError::Extraction(format!( + "termdag: expected type, got: {other:?}" + ))), } } @@ -1508,9 +1514,13 @@ fn td_basetype(dag: &TermDag, id: TermId) -> Result { "BoolT" => Ok(EvmBaseType::BoolT), "UnitT" => Ok(EvmBaseType::UnitT), "StateT" => Ok(EvmBaseType::StateT), - other => Err(IrError::Extraction(format!("termdag: unknown base type: {other}"))), + other => Err(IrError::Extraction(format!( + "termdag: unknown base type: {other}" + ))), }, - other => Err(IrError::Extraction(format!("termdag: expected base type, got: {other:?}"))), + other => Err(IrError::Extraction(format!( + "termdag: expected base type, got: {other:?}" + ))), } } @@ -1525,15 +1535,27 @@ fn td_type_list(dag: &TermDag, id: TermId) -> Result, IrError> result.push(td_basetype(dag, args[0])?); cur = args[1]; } - other => return Err(IrError::Extraction(format!("termdag: expected TLCons/TLNil, got: {other}"))), + other => { + return Err(IrError::Extraction(format!( + "termdag: expected TLCons/TLNil, got: {other}" + ))) + } }, - other => return Err(IrError::Extraction(format!("termdag: expected type list, got: {other:?}"))), + other => { + return Err(IrError::Extraction(format!( + "termdag: expected type list, got: {other:?}" + ))) + } } } Ok(result) } -fn td_ctx(dag: &TermDag, id: TermId, cache: &mut HashMap) -> Result { +fn td_ctx( + dag: &TermDag, + id: TermId, + cache: &mut HashMap, +) -> Result { match dag.get(id) { Term::App(sym, args) => match sym.as_str() { "InFunction" => Ok(EvmContext::InFunction(td_string(dag, args[0])?)), @@ -1548,9 +1570,13 @@ fn td_ctx(dag: &TermDag, id: TermId, cache: &mut HashMap) -> Res let pred = termdag_convert(dag, args[1], cache)?; Ok(EvmContext::InLoop(input, pred)) } - other => Err(IrError::Extraction(format!("termdag: unknown context: {other}"))), + other => Err(IrError::Extraction(format!( + "termdag: unknown context: {other}" + ))), }, - other => Err(IrError::Extraction(format!("termdag: expected context, got: {other:?}"))), + other => Err(IrError::Extraction(format!( + "termdag: expected context, got: {other:?}" + ))), } } @@ -1586,9 +1612,13 @@ fn td_binop(dag: &TermDag, id: TermId) -> Result { "OpTLoad" => Ok(EvmBinaryOp::TLoad), "OpMLoad" => Ok(EvmBinaryOp::MLoad), "OpCalldataLoad" => Ok(EvmBinaryOp::CalldataLoad), - other => Err(IrError::Extraction(format!("termdag: unknown binary op: {other}"))), + other => Err(IrError::Extraction(format!( + "termdag: unknown binary op: {other}" + ))), }, - other => Err(IrError::Extraction(format!("termdag: expected binary op, got: {other:?}"))), + other => Err(IrError::Extraction(format!( + "termdag: expected binary op, got: {other:?}" + ))), } } @@ -1600,9 +1630,13 @@ fn td_unop(dag: &TermDag, id: TermId) -> Result { "OpNeg" => Ok(EvmUnaryOp::Neg), "OpSignExtend" => Ok(EvmUnaryOp::SignExtend), "OpClz" => Ok(EvmUnaryOp::Clz), - other => Err(IrError::Extraction(format!("termdag: unknown unary op: {other}"))), + other => Err(IrError::Extraction(format!( + "termdag: unknown unary op: {other}" + ))), }, - other => Err(IrError::Extraction(format!("termdag: expected unary op, got: {other:?}"))), + other => Err(IrError::Extraction(format!( + "termdag: expected unary op, got: {other:?}" + ))), } } @@ -1617,9 +1651,13 @@ fn td_ternop(dag: &TermDag, id: TermId) -> Result { "OpSelect" => Ok(EvmTernaryOp::Select), "OpCalldataCopy" => Ok(EvmTernaryOp::CalldataCopy), "OpMcopy" => Ok(EvmTernaryOp::Mcopy), - other => Err(IrError::Extraction(format!("termdag: unknown ternary op: {other}"))), + other => Err(IrError::Extraction(format!( + "termdag: unknown ternary op: {other}" + ))), }, - other => Err(IrError::Extraction(format!("termdag: expected ternary op, got: {other:?}"))), + other => Err(IrError::Extraction(format!( + "termdag: expected ternary op, got: {other:?}" + ))), } } @@ -1644,13 +1682,17 @@ fn td_envop(dag: &TermDag, id: TermId) -> Result { "EnvBalance" => Ok(EvmEnvOp::Balance), "EnvCodeSize" => Ok(EvmEnvOp::CodeSize), "EnvReturnDataSize" => Ok(EvmEnvOp::ReturnDataSize), - other => Err(IrError::Extraction(format!("termdag: unknown env op: {other}"))), + other => Err(IrError::Extraction(format!( + "termdag: unknown env op: {other}" + ))), }, - other => Err(IrError::Extraction(format!("termdag: expected env op, got: {other:?}"))), + other => Err(IrError::Extraction(format!( + "termdag: expected env op, got: {other:?}" + ))), } } -/// Convert a Cons/Nil list in TermDag to Vec. +/// Convert a Cons/Nil list in `TermDag` to `Vec`. fn td_list( dag: &TermDag, id: TermId, @@ -1666,9 +1708,17 @@ fn td_list( result.push(termdag_convert(dag, args[0], cache)?); cur = args[1]; } - other => return Err(IrError::Extraction(format!("termdag: expected Cons/Nil, got: {other}"))), + other => { + return Err(IrError::Extraction(format!( + "termdag: expected Cons/Nil, got: {other}" + ))) + } }, - other => return Err(IrError::Extraction(format!("termdag: expected list, got: {other:?}"))), + other => { + return Err(IrError::Extraction(format!( + "termdag: expected list, got: {other:?}" + ))) + } } } Ok(result) diff --git a/crates/ir/src/storage_hoist.rs b/crates/ir/src/storage_hoist.rs index 977c1a0..3c84f92 100644 --- a/crates/ir/src/storage_hoist.rs +++ b/crates/ir/src/storage_hoist.rs @@ -72,10 +72,7 @@ fn forward_stores_expr(expr: &RcExpr) -> RcExpr { forward_stores_expr_inner(expr, &mut cache) } -fn forward_stores_expr_inner( - expr: &RcExpr, - cache: &mut HashMap, -) -> RcExpr { +fn forward_stores_expr_inner(expr: &RcExpr, cache: &mut HashMap) -> RcExpr { let ptr = Rc::as_ptr(expr) as usize; if let Some(cached) = cache.get(&ptr) { return Rc::clone(cached); @@ -108,10 +105,7 @@ fn forward_stores_expr_inner( /// Recurse into structural sub-bodies (If branches, `DoWhile` body, `LetBind` body). /// These get their own independent forwarding context. -fn recurse_substructures_inner( - expr: &RcExpr, - cache: &mut HashMap, -) -> RcExpr { +fn recurse_substructures_inner(expr: &RcExpr, cache: &mut HashMap) -> RcExpr { let ptr = Rc::as_ptr(expr) as usize; if let Some(cached) = cache.get(&ptr) { return Rc::clone(cached); @@ -282,7 +276,10 @@ fn replace_sloads_inline_match( replace_sloads_inline_inner(a, known, cache), replace_sloads_inline_inner(b, known, cache), )), - EvmExpr::Uop(op, a) => Rc::new(EvmExpr::Uop(*op, replace_sloads_inline_inner(a, known, cache))), + EvmExpr::Uop(op, a) => Rc::new(EvmExpr::Uop( + *op, + replace_sloads_inline_inner(a, known, cache), + )), EvmExpr::Top(op, a, b, c) => Rc::new(EvmExpr::Top( *op, replace_sloads_inline_inner(a, known, cache), @@ -293,7 +290,10 @@ fn replace_sloads_inline_match( replace_sloads_inline_inner(a, known, cache), replace_sloads_inline_inner(b, known, cache), )), - EvmExpr::Get(a, idx) => Rc::new(EvmExpr::Get(replace_sloads_inline_inner(a, known, cache), *idx)), + EvmExpr::Get(a, idx) => Rc::new(EvmExpr::Get( + replace_sloads_inline_inner(a, known, cache), + *idx, + )), EvmExpr::VarStore(name, val) => Rc::new(EvmExpr::VarStore( name.clone(), replace_sloads_inline_inner(val, known, cache), @@ -321,7 +321,10 @@ fn replace_sloads_inline_match( replace_sloads_inline_inner(state, known, cache), )) } - EvmExpr::EnvRead(op, s) => Rc::new(EvmExpr::EnvRead(*op, replace_sloads_inline_inner(s, known, cache))), + EvmExpr::EnvRead(op, s) => Rc::new(EvmExpr::EnvRead( + *op, + replace_sloads_inline_inner(s, known, cache), + )), EvmExpr::EnvRead1(op, a, s) => Rc::new(EvmExpr::EnvRead1( *op, replace_sloads_inline_inner(a, known, cache), diff --git a/crates/ir/src/to_egglog/calls.rs b/crates/ir/src/to_egglog/calls.rs index c79fa25..93e3c9b 100644 --- a/crates/ir/src/to_egglog/calls.rs +++ b/crates/ir/src/to_egglog/calls.rs @@ -1365,7 +1365,12 @@ impl AstToEgglog { self.inline_depth += 1; let t_inline = std::time::Instant::now(); let result = self.lower_code_block(body)?; - tracing::debug!(" inline depth={} prefix={} elapsed={:?}", self.inline_depth, &self.inline_prefix, t_inline.elapsed()); + tracing::debug!( + " inline depth={} prefix={} elapsed={:?}", + self.inline_depth, + &self.inline_prefix, + t_inline.elapsed() + ); self.inline_depth -= 1; self.inline_prefix = old_prefix; self.scopes.pop(); diff --git a/crates/ir/src/to_egglog/mod.rs b/crates/ir/src/to_egglog/mod.rs index 6f4c2cf..95bf5ce 100644 --- a/crates/ir/src/to_egglog/mod.rs +++ b/crates/ir/src/to_egglog/mod.rs @@ -38,7 +38,11 @@ pub(crate) fn references_any_var(expr: &RcExpr, names: &HashSet<&str>) -> bool { references_any_var_inner(expr, names, &mut visited) } -fn references_any_var_inner(expr: &RcExpr, names: &HashSet<&str>, visited: &mut HashSet) -> bool { +fn references_any_var_inner( + expr: &RcExpr, + names: &HashSet<&str>, + visited: &mut HashSet, +) -> bool { let ptr = Rc::as_ptr(expr) as usize; if !visited.insert(ptr) { return false; @@ -52,21 +56,25 @@ fn references_any_var_inner(expr: &RcExpr, names: &HashSet<&str>, visited: &mut | EvmExpr::StorageField(..) | EvmExpr::Drop(_) | EvmExpr::MemRegion(..) => false, - EvmExpr::InlineAsm(inputs, _, _) => inputs.iter().any(|inp| references_any_var_inner(inp, names, visited)), + EvmExpr::InlineAsm(inputs, _, _) => inputs + .iter() + .any(|inp| references_any_var_inner(inp, names, visited)), EvmExpr::Bop(_, a, b) | EvmExpr::Concat(a, b) | EvmExpr::DoWhile(a, b) => { - references_any_var_inner(a, names, visited) || references_any_var_inner(b, names, visited) - } - EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) | EvmExpr::DynAlloc(a) - | EvmExpr::AllocRegion(_, a, _) => { references_any_var_inner(a, names, visited) + || references_any_var_inner(b, names, visited) } + EvmExpr::Uop(_, a) + | EvmExpr::Get(a, _) + | EvmExpr::DynAlloc(a) + | EvmExpr::AllocRegion(_, a, _) => references_any_var_inner(a, names, visited), EvmExpr::Top(_, a, b, c) | EvmExpr::Revert(a, b, c) | EvmExpr::ReturnOp(a, b, c) => { references_any_var_inner(a, names, visited) || references_any_var_inner(b, names, visited) || references_any_var_inner(c, names, visited) } EvmExpr::RegionStore(_, _, val, state) => { - references_any_var_inner(val, names, visited) || references_any_var_inner(state, names, visited) + references_any_var_inner(val, names, visited) + || references_any_var_inner(state, names, visited) } EvmExpr::RegionLoad(_, _, state) => references_any_var_inner(state, names, visited), EvmExpr::If(c, i, t, e) => { @@ -77,12 +85,18 @@ fn references_any_var_inner(expr: &RcExpr, names: &HashSet<&str>, visited: &mut } EvmExpr::VarStore(_, v) => references_any_var_inner(v, names, visited), EvmExpr::LetBind(_, init, body) => { - references_any_var_inner(init, names, visited) || references_any_var_inner(body, names, visited) + references_any_var_inner(init, names, visited) + || references_any_var_inner(body, names, visited) } EvmExpr::EnvRead(_, s) => references_any_var_inner(s, names, visited), - EvmExpr::EnvRead1(_, a, s) => references_any_var_inner(a, names, visited) || references_any_var_inner(s, names, visited), + EvmExpr::EnvRead1(_, a, s) => { + references_any_var_inner(a, names, visited) + || references_any_var_inner(s, names, visited) + } EvmExpr::Log(_, topics, data_offset, data_size, state) => { - topics.iter().any(|t| references_any_var_inner(t, names, visited)) + topics + .iter() + .any(|t| references_any_var_inner(t, names, visited)) || references_any_var_inner(data_offset, names, visited) || references_any_var_inner(data_size, names, visited) || references_any_var_inner(state, names, visited) @@ -90,7 +104,9 @@ fn references_any_var_inner(expr: &RcExpr, names: &HashSet<&str>, visited: &mut EvmExpr::ExtCall(a, b, c, d, e, f, g) => [a, b, c, d, e, f, g] .iter() .any(|x| references_any_var_inner(x, names, visited)), - EvmExpr::Call(_, args) => args.iter().any(|a| references_any_var_inner(a, names, visited)), + EvmExpr::Call(_, args) => args + .iter() + .any(|a| references_any_var_inner(a, names, visited)), EvmExpr::Function(_, _, _, body) => references_any_var_inner(body, names, visited), } } @@ -328,7 +344,7 @@ pub struct AstToEgglog { pub(crate) storage_array_fields: IndexMap, /// Next available region ID for symbolic memory allocation. pub(crate) next_region_id: i64, - /// Mapping from region ID to the LetBind variable name that holds the base pointer. + /// Mapping from region ID to the `LetBind` variable name that holds the base pointer. /// Used by the post-egglog resolution pass to convert RegionStore/RegionLoad to MStore/MLoad. pub(crate) region_var_map: IndexMap, /// Tracks the last composite allocation `(type_name, base_expr)` for wiring @@ -431,9 +447,9 @@ impl AstToEgglog { crate::ast_helpers::mem_region(id, size_words as i64) } - /// Allocate a fresh region ID without creating a MemRegion node. + /// Allocate a fresh region ID without creating a `MemRegion` node. /// Used for symbolic field access tracking on &dm struct instances. - pub(crate) fn fresh_region_id(&mut self) -> i64 { + pub(crate) const fn fresh_region_id(&mut self) -> i64 { let id = self.next_region_id; self.next_region_id += 1; id diff --git a/crates/ir/src/var_opt.rs b/crates/ir/src/var_opt.rs index 73b3401..f84692d 100644 --- a/crates/ir/src/var_opt.rs +++ b/crates/ir/src/var_opt.rs @@ -47,7 +47,7 @@ struct VarInfo { } impl VarInfo { - fn merge(&mut self, other: &VarInfo) { + const fn merge(&mut self, other: &Self) { self.read_count += other.read_count; self.write_count += other.write_count; self.in_loop |= other.in_loop; @@ -188,7 +188,10 @@ fn collect_allocations_inner( pub fn optimize_program(program: &mut crate::schema::EvmProgram, optimization_level: u8) { for contract in &mut program.contracts { contract.runtime = optimize_expr(&contract.runtime); - tracing::debug!(" var_opt after optimize_expr: {} DAG nodes", crate::dag_node_count(&contract.runtime)); + tracing::debug!( + " var_opt after optimize_expr: {} DAG nodes", + crate::dag_node_count(&contract.runtime) + ); if optimization_level >= 1 { // Inline: substitute args, rename locals, splice body at call site. // Include both internal and free functions. @@ -201,11 +204,17 @@ pub fn optimize_program(program: &mut crate::schema::EvmProgram, optimization_le .cloned() .collect(); inline_calls(&mut contract.runtime, &all_functions); - tracing::debug!(" var_opt after inline_calls: {} DAG nodes", crate::dag_node_count(&contract.runtime)); + tracing::debug!( + " var_opt after inline_calls: {} DAG nodes", + crate::dag_node_count(&contract.runtime) + ); } // Insert early Drops in halting branches for better dead-var-elim contract.runtime = insert_early_drops(&contract.runtime); - tracing::debug!(" var_opt after insert_early_drops: {} DAG nodes", crate::dag_node_count(&contract.runtime)); + tracing::debug!( + " var_opt after insert_early_drops: {} DAG nodes", + crate::dag_node_count(&contract.runtime) + ); contract.constructor = insert_early_drops(&contract.constructor); // NOTE: tighten_drops runs LATER in the pipeline (after store forwarding // at O0, or after egglog at O1+) because store forwarding can expose new @@ -222,7 +231,11 @@ pub fn optimize_program(program: &mut crate::schema::EvmProgram, optimization_le fn optimize_expr(expr: &RcExpr) -> RcExpr { let mut cache: HashMap = HashMap::new(); let result = optimize_expr_memo(expr, &mut cache); - tracing::debug!(" optimize_expr: cache_size={}, output_dag={}", cache.len(), crate::dag_node_count(&result)); + tracing::debug!( + " optimize_expr: cache_size={}, output_dag={}", + cache.len(), + crate::dag_node_count(&result) + ); result } @@ -324,7 +337,10 @@ fn rebuild_children_memo(expr: &RcExpr, cache: &mut HashMap) -> R Rc::new(EvmExpr::EnvRead1(*op, a, s)) } EvmExpr::Log(count, topics, data_offset, data_size, state) => { - let ts: Vec<_> = topics.iter().map(|t| optimize_expr_memo(t, cache)).collect(); + let ts: Vec<_> = topics + .iter() + .map(|t| optimize_expr_memo(t, cache)) + .collect(); let doff = optimize_expr_memo(data_offset, cache); let dsz = optimize_expr_memo(data_size, cache); let s = optimize_expr_memo(state, cache); @@ -363,8 +379,12 @@ fn rebuild_children_memo(expr: &RcExpr, cache: &mut HashMap) -> R let e2 = optimize_expr_memo(e, cache); let f2 = optimize_expr_memo(f, cache); let g2 = optimize_expr_memo(g, cache); - if Rc::ptr_eq(&a2, a) && Rc::ptr_eq(&b2, b) && Rc::ptr_eq(&c2, c) - && Rc::ptr_eq(&d2, d) && Rc::ptr_eq(&e2, e) && Rc::ptr_eq(&f2, f) + if Rc::ptr_eq(&a2, a) + && Rc::ptr_eq(&b2, b) + && Rc::ptr_eq(&c2, c) + && Rc::ptr_eq(&d2, d) + && Rc::ptr_eq(&e2, e) + && Rc::ptr_eq(&f2, f) && Rc::ptr_eq(&g2, g) { return Rc::clone(expr); @@ -417,7 +437,10 @@ fn rebuild_children_memo(expr: &RcExpr, cache: &mut HashMap) -> R | EvmExpr::StorageField(..) | EvmExpr::MemRegion(..) => Rc::clone(expr), EvmExpr::InlineAsm(inputs, hex, num_outputs) => { - let new_inputs: Vec<_> = inputs.iter().map(|i| optimize_expr_memo(i, cache)).collect(); + let new_inputs: Vec<_> = inputs + .iter() + .map(|i| optimize_expr_memo(i, cache)) + .collect(); if new_inputs .iter() .zip(inputs.iter()) @@ -474,12 +497,18 @@ fn apply_letbind_opts( if info.read_count == 0 && info.write_count == 0 { if is_pure(init) { let r = Rc::clone(body); - tracing::debug!(" letbind_opt: dead-var-elim (pure) '{name}' body_dag={}", crate::dag_node_count(&r)); + tracing::debug!( + " letbind_opt: dead-var-elim (pure) '{name}' body_dag={}", + crate::dag_node_count(&r) + ); return r; } // Keep side effects let r = Rc::new(EvmExpr::Concat(Rc::clone(init), Rc::clone(body))); - tracing::debug!(" letbind_opt: dead-var-elim (side-effect) '{name}' result_dag={}", crate::dag_node_count(&r)); + tracing::debug!( + " letbind_opt: dead-var-elim (side-effect) '{name}' result_dag={}", + crate::dag_node_count(&r) + ); return r; } @@ -487,7 +516,12 @@ fn apply_letbind_opts( if info.read_count == 1 && info.write_count == 0 && !info.in_loop && is_pure(init) { let body_dag = crate::dag_node_count(body); let r = substitute_var(name, init, body); - tracing::debug!(" letbind_opt: single-use-inline '{name}' init_dag={} body_dag={} result_dag={}", crate::dag_node_count(init), body_dag, crate::dag_node_count(&r)); + tracing::debug!( + " letbind_opt: single-use-inline '{name}' init_dag={} body_dag={} result_dag={}", + crate::dag_node_count(init), + body_dag, + crate::dag_node_count(&r) + ); return r; } @@ -497,7 +531,11 @@ fn apply_letbind_opts( let body_dag = crate::dag_node_count(body); // LetBind is now dead — eliminate it if is_pure(init) { - tracing::debug!(" letbind_opt: last-store-fwd (pure) '{name}' body_dag={} result_dag={}", body_dag, crate::dag_node_count(&new_body)); + tracing::debug!( + " letbind_opt: last-store-fwd (pure) '{name}' body_dag={} result_dag={}", + body_dag, + crate::dag_node_count(&new_body) + ); return new_body; } let r = Rc::new(EvmExpr::Concat(Rc::clone(init), new_body)); @@ -510,7 +548,12 @@ fn apply_letbind_opts( if info.write_count == 0 && !info.in_loop && is_const(init) { let body_dag = crate::dag_node_count(body); let r = substitute_var(name, init, body); - tracing::debug!(" letbind_opt: const-prop '{name}' reads={} body_dag={} result_dag={}", info.read_count, body_dag, crate::dag_node_count(&r)); + tracing::debug!( + " letbind_opt: const-prop '{name}' reads={} body_dag={} result_dag={}", + info.read_count, + body_dag, + crate::dag_node_count(&r) + ); return r; } @@ -519,7 +562,7 @@ fn apply_letbind_opts( /// Analyze how a variable is used within an expression. /// -/// Uses memoization keyed by (node_ptr, in_loop) to avoid exponential DAG traversal +/// Uses memoization keyed by (`node_ptr`, `in_loop`) to avoid exponential DAG traversal /// while returning correct tree-expanded read/write counts (matching codegen's traversal). fn analyze_var(name: &str, expr: &RcExpr) -> VarInfo { let mut cache = HashMap::new(); @@ -549,8 +592,10 @@ fn analyze_var_compute( ) -> VarInfo { match expr.as_ref() { EvmExpr::Var(n) if n == name => { - let mut info = VarInfo::default(); - info.read_count = 1; + let mut info = VarInfo { + read_count: 1, + ..Default::default() + }; if in_loop { info.in_loop = true; } @@ -609,9 +654,7 @@ fn analyze_var_compute( | EvmExpr::Get(a, _) | EvmExpr::DynAlloc(a) | EvmExpr::AllocRegion(_, a, _) => analyze_var_cached(name, a, in_loop, cache), - EvmExpr::RegionStore(_, _, val, _state) => { - analyze_var_cached(name, val, in_loop, cache) - } + EvmExpr::RegionStore(_, _, val, _state) => analyze_var_cached(name, val, in_loop, cache), EvmExpr::RegionLoad(_, _, _state) => VarInfo::default(), EvmExpr::Top(op, a, b, c) => { let mut info = analyze_var_cached(name, a, in_loop, cache); @@ -819,7 +862,10 @@ fn collect_immutable_vars_inner( collect_immutable_vars_inner(t, immutable, mutable, visited); collect_immutable_vars_inner(e, immutable, mutable, visited); } - EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) | EvmExpr::DynAlloc(a) | EvmExpr::AllocRegion(_, a, _) => { + EvmExpr::Uop(_, a) + | EvmExpr::Get(a, _) + | EvmExpr::DynAlloc(a) + | EvmExpr::AllocRegion(_, a, _) => { collect_immutable_vars_inner(a, immutable, mutable, visited); } EvmExpr::RegionStore(_, _, val, state) => { @@ -1091,9 +1137,7 @@ fn references_var_inner_uncached( EvmExpr::Uop(_, a) | EvmExpr::Get(a, _) | EvmExpr::DynAlloc(a) - | EvmExpr::AllocRegion(_, a, _) => { - references_var_inner(a, name, follow_state, cache) - } + | EvmExpr::AllocRegion(_, a, _) => references_var_inner(a, name, follow_state, cache), // RegionStore: state is last arg EvmExpr::RegionStore(_, _, val, state) => { references_var_inner(val, name, follow_state, cache) @@ -1185,9 +1229,7 @@ fn references_var_inner_uncached( EvmExpr::Call(_, args) => args .iter() .any(|a| references_var_inner(a, name, follow_state, cache)), - EvmExpr::Function(_, _, _, body) => { - references_var_inner(body, name, follow_state, cache) - } + EvmExpr::Function(_, _, _, body) => references_var_inner(body, name, follow_state, cache), EvmExpr::Const(..) | EvmExpr::Arg(..) | EvmExpr::Empty(..) @@ -1610,7 +1652,11 @@ fn substitute_var_inner( .iter() .map(|i| substitute_var_memo(name, replacement, i, cache)) .collect(); - if new_inputs.iter().zip(inputs.iter()).all(|(n, o)| Rc::ptr_eq(n, o)) { + if new_inputs + .iter() + .zip(inputs.iter()) + .all(|(n, o)| Rc::ptr_eq(n, o)) + { return Rc::clone(expr); } Rc::new(EvmExpr::InlineAsm(new_inputs, hex.clone(), *num_outputs)) @@ -1714,7 +1760,8 @@ fn substitute_var_inner( let i2 = substitute_var_memo(name, replacement, i, cache); let t2 = substitute_var_memo(name, replacement, t, cache); let e2 = substitute_var_memo(name, replacement, e, cache); - if Rc::ptr_eq(&c2, c) && Rc::ptr_eq(&i2, i) && Rc::ptr_eq(&t2, t) && Rc::ptr_eq(&e2, e) { + if Rc::ptr_eq(&c2, c) && Rc::ptr_eq(&i2, i) && Rc::ptr_eq(&t2, t) && Rc::ptr_eq(&e2, e) + { return Rc::clone(expr); } Rc::new(EvmExpr::If(c2, i2, t2, e2)) @@ -1785,8 +1832,12 @@ fn substitute_var_inner( let e2 = substitute_var_memo(name, replacement, e, cache); let f2 = substitute_var_memo(name, replacement, f, cache); let g2 = substitute_var_memo(name, replacement, g, cache); - if Rc::ptr_eq(&a2, a) && Rc::ptr_eq(&b2, b) && Rc::ptr_eq(&c2, c) - && Rc::ptr_eq(&d2, d) && Rc::ptr_eq(&e2, e) && Rc::ptr_eq(&f2, f) + if Rc::ptr_eq(&a2, a) + && Rc::ptr_eq(&b2, b) + && Rc::ptr_eq(&c2, c) + && Rc::ptr_eq(&d2, d) + && Rc::ptr_eq(&e2, e) + && Rc::ptr_eq(&f2, f) && Rc::ptr_eq(&g2, g) { return Rc::clone(expr); @@ -1798,7 +1849,11 @@ fn substitute_var_inner( .iter() .map(|a| substitute_var_memo(name, replacement, a, cache)) .collect(); - if new_args.iter().zip(args.iter()).all(|(n, o)| Rc::ptr_eq(n, o)) { + if new_args + .iter() + .zip(args.iter()) + .all(|(n, o)| Rc::ptr_eq(n, o)) + { return Rc::clone(expr); } Rc::new(EvmExpr::Call(n.clone(), new_args)) @@ -2070,7 +2125,8 @@ fn substitute_args(body: &RcExpr, in_ty: &EvmType, args: &[RcExpr]) -> RcExpr { let i2 = substitute_args(i, in_ty, args); let t2 = substitute_args(t, in_ty, args); let e2 = substitute_args(e, in_ty, args); - if Rc::ptr_eq(&c2, c) && Rc::ptr_eq(&i2, i) && Rc::ptr_eq(&t2, t) && Rc::ptr_eq(&e2, e) { + if Rc::ptr_eq(&c2, c) && Rc::ptr_eq(&i2, i) && Rc::ptr_eq(&t2, t) && Rc::ptr_eq(&e2, e) + { return Rc::clone(body); } Rc::new(EvmExpr::If(c2, i2, t2, e2)) @@ -2130,7 +2186,11 @@ fn substitute_args(body: &RcExpr, in_ty: &EvmType, args: &[RcExpr]) -> RcExpr { .iter() .map(|a| substitute_args(a, in_ty, args)) .collect(); - if new_args.iter().zip(call_args.iter()).all(|(n, o)| Rc::ptr_eq(n, o)) { + if new_args + .iter() + .zip(call_args.iter()) + .all(|(n, o)| Rc::ptr_eq(n, o)) + { return Rc::clone(body); } Rc::new(EvmExpr::Call(name.clone(), new_args)) @@ -2143,7 +2203,10 @@ fn substitute_args(body: &RcExpr, in_ty: &EvmType, args: &[RcExpr]) -> RcExpr { let d2 = substitute_args(data_off, in_ty, args); let s2 = substitute_args(data_sz, in_ty, args); let st2 = substitute_args(state, in_ty, args); - if topics2.iter().zip(topics.iter()).all(|(n, o)| Rc::ptr_eq(n, o)) + if topics2 + .iter() + .zip(topics.iter()) + .all(|(n, o)| Rc::ptr_eq(n, o)) && Rc::ptr_eq(&d2, data_off) && Rc::ptr_eq(&s2, data_sz) && Rc::ptr_eq(&st2, state) @@ -2160,8 +2223,12 @@ fn substitute_args(body: &RcExpr, in_ty: &EvmType, args: &[RcExpr]) -> RcExpr { let e2 = substitute_args(e, in_ty, args); let f2 = substitute_args(f, in_ty, args); let g2 = substitute_args(g, in_ty, args); - if Rc::ptr_eq(&a2, a) && Rc::ptr_eq(&b2, b) && Rc::ptr_eq(&c2, c) - && Rc::ptr_eq(&d2, d) && Rc::ptr_eq(&e2, e) && Rc::ptr_eq(&f2, f) + if Rc::ptr_eq(&a2, a) + && Rc::ptr_eq(&b2, b) + && Rc::ptr_eq(&c2, c) + && Rc::ptr_eq(&d2, d) + && Rc::ptr_eq(&e2, e) + && Rc::ptr_eq(&f2, f) && Rc::ptr_eq(&g2, g) { return Rc::clone(body); @@ -2173,7 +2240,11 @@ fn substitute_args(body: &RcExpr, in_ty: &EvmType, args: &[RcExpr]) -> RcExpr { .iter() .map(|i| substitute_args(i, in_ty, args)) .collect(); - if new_inputs.iter().zip(inputs.iter()).all(|(n, o)| Rc::ptr_eq(n, o)) { + if new_inputs + .iter() + .zip(inputs.iter()) + .all(|(n, o)| Rc::ptr_eq(n, o)) + { return Rc::clone(body); } Rc::new(EvmExpr::InlineAsm(new_inputs, hex.clone(), *num_outputs)) @@ -2325,7 +2396,7 @@ fn rename_locals_rec( if defined.contains(name) { Rc::new(EvmExpr::VarStore(format!("{name}{suffix}"), v2)) } else if Rc::ptr_eq(&v2, val) { - return Rc::clone(expr); + Rc::clone(expr) } else { Rc::new(EvmExpr::VarStore(name.clone(), v2)) } @@ -2365,7 +2436,8 @@ fn rename_locals_rec( let i2 = rename_locals_rec(i, suffix, defined); let t2 = rename_locals_rec(t, suffix, defined); let e2 = rename_locals_rec(e, suffix, defined); - if Rc::ptr_eq(&c2, c) && Rc::ptr_eq(&i2, i) && Rc::ptr_eq(&t2, t) && Rc::ptr_eq(&e2, e) { + if Rc::ptr_eq(&c2, c) && Rc::ptr_eq(&i2, i) && Rc::ptr_eq(&t2, t) && Rc::ptr_eq(&e2, e) + { return Rc::clone(expr); } Rc::new(EvmExpr::If(c2, i2, t2, e2)) @@ -2417,7 +2489,11 @@ fn rename_locals_rec( .iter() .map(|a| rename_locals_rec(a, suffix, defined)) .collect(); - if new_args.iter().zip(call_args.iter()).all(|(n, o)| Rc::ptr_eq(n, o)) { + if new_args + .iter() + .zip(call_args.iter()) + .all(|(n, o)| Rc::ptr_eq(n, o)) + { return Rc::clone(expr); } Rc::new(EvmExpr::Call(name.clone(), new_args)) @@ -2430,7 +2506,10 @@ fn rename_locals_rec( let d2 = rename_locals_rec(data_off, suffix, defined); let s2 = rename_locals_rec(data_sz, suffix, defined); let st2 = rename_locals_rec(state, suffix, defined); - if topics2.iter().zip(topics.iter()).all(|(n, o)| Rc::ptr_eq(n, o)) + if topics2 + .iter() + .zip(topics.iter()) + .all(|(n, o)| Rc::ptr_eq(n, o)) && Rc::ptr_eq(&d2, data_off) && Rc::ptr_eq(&s2, data_sz) && Rc::ptr_eq(&st2, state) @@ -2447,8 +2526,12 @@ fn rename_locals_rec( let e2 = rename_locals_rec(e, suffix, defined); let f2 = rename_locals_rec(f, suffix, defined); let g2 = rename_locals_rec(g, suffix, defined); - if Rc::ptr_eq(&a2, a) && Rc::ptr_eq(&b2, b) && Rc::ptr_eq(&c2, c) - && Rc::ptr_eq(&d2, d) && Rc::ptr_eq(&e2, e) && Rc::ptr_eq(&f2, f) + if Rc::ptr_eq(&a2, a) + && Rc::ptr_eq(&b2, b) + && Rc::ptr_eq(&c2, c) + && Rc::ptr_eq(&d2, d) + && Rc::ptr_eq(&e2, e) + && Rc::ptr_eq(&f2, f) && Rc::ptr_eq(&g2, g) { return Rc::clone(expr); @@ -2460,7 +2543,11 @@ fn rename_locals_rec( .iter() .map(|i| rename_locals_rec(i, suffix, defined)) .collect(); - if new_inputs.iter().zip(inputs.iter()).all(|(n, o)| Rc::ptr_eq(n, o)) { + if new_inputs + .iter() + .zip(inputs.iter()) + .all(|(n, o)| Rc::ptr_eq(n, o)) + { return Rc::clone(expr); } Rc::new(EvmExpr::InlineAsm(new_inputs, hex.clone(), *num_outputs)) From 08771b1ec9f2d52b5b89e92f688094467a492f31 Mon Sep 17 00:00:00 2001 From: brockelmore <31553173+brockelmore@users.noreply.github.com> Date: Fri, 13 Mar 2026 12:24:53 -0600 Subject: [PATCH 13/13] docs: update spec for &dm, @alloc, @size_of, Vec, Map, stdlib traits Add documentation for features that were implemented but missing from the spec: - Add `&dm` dynamic memory location to data locations spec - Add `@alloc` and `@size_of` runtime builtins - Add EVM environment builtins (@caller, @timestamp, etc.) - Add `[]` index operator to grammar and semantics table - Document operator overloading via std::ops traits (Add, Sub, Mul, Div, Mod, Eq, Ord, Index) - Document checked arithmetic elision by range analysis at O1+ - New Standard Library page covering all std::ops traits, storage/ memory traits, Option, Result, Vec, and Map Co-Authored-By: Claude Opus 4.6 --- book/SUMMARY.md | 1 + book/specs/builtins.md | 53 +++++- book/specs/stdlib.md | 250 ++++++++++++++++++++++++++ book/specs/syntax/locations.md | 64 +++++-- book/specs/syntax/operators.md | 72 +++++++- docs/pages/specs/builtins.md | 30 +++- docs/pages/specs/stdlib.md | 254 +++++++++++++++++++++++++++ docs/pages/specs/syntax/locations.md | 46 ++++- docs/pages/specs/syntax/operators.md | 68 ++++++- vocs.shared.ts | 1 + 10 files changed, 819 insertions(+), 20 deletions(-) create mode 100644 book/specs/stdlib.md create mode 100644 docs/pages/specs/stdlib.md diff --git a/book/SUMMARY.md b/book/SUMMARY.md index 541c2ff..ce1cff8 100644 --- a/book/SUMMARY.md +++ b/book/SUMMARY.md @@ -46,6 +46,7 @@ 1. [Visibility](specs/semantics/visibility.md) 1. [Inline Assembly](specs/inline.md) 1. [Built-In](specs/builtins.md) +1. [Standard Library](specs/stdlib.md) # The Compiler diff --git a/book/specs/builtins.md b/book/specs/builtins.md index 1ddc68e..c117386 100644 --- a/book/specs/builtins.md +++ b/book/specs/builtins.md @@ -8,6 +8,55 @@ Macros contain their own syntax and semantics, however, comptime functionality and built-in assistants cover most of the use cases for macros without leaving the language's native syntax. +## Runtime Builtins + +### `@size_of` + +``` +@size_of::() -> u256 +``` + +Returns the size in bytes of type `T`. For primitive types this is the +ABI-encoded word size (32 bytes for `u256`, `address`, etc.). + +### `@alloc` + +``` +@alloc(size_bytes: u256) -> u256 +``` + +Allocates `size_bytes` of dynamic memory at runtime and returns a pointer +to the start of the region. Uses MSIZE-based pointer arithmetic to ensure +the returned region does not overlap with any other allocation. + +`@alloc` is the foundation for dynamically-sized data structures like +`Vec`. It is used in conjunction with the `&dm` data location annotation +(see [Data Locations](syntax/locations.md)). + +## EVM Environment Builtins + +These builtins expose EVM execution environment values directly. Each +returns a `u256`. + +| builtin | EVM opcode | description | +|----------------------|------------------|--------------------------------------------| +| `@caller()` | `CALLER` | address of the direct caller | +| `@callvalue()` | `CALLVALUE` | wei sent with the call | +| `@calldatasize()` | `CALLDATASIZE` | size of calldata in bytes | +| `@origin()` | `ORIGIN` | transaction origin address | +| `@gasprice()` | `GASPRICE` | gas price of the transaction | +| `@coinbase()` | `COINBASE` | block coinbase address | +| `@timestamp()` | `TIMESTAMP` | block timestamp | +| `@number()` | `NUMBER` | block number | +| `@gaslimit()` | `GASLIMIT` | block gas limit | +| `@chainid()` | `CHAINID` | chain ID | +| `@selfbalance()` | `SELFBALANCE` | balance of the current contract | +| `@basefee()` | `BASEFEE` | block base fee | +| `@gas()` | `GAS` | remaining gas | +| `@address()` | `ADDRESS` | address of the current contract | +| `@codesize()` | `CODESIZE` | size of the contract's code | +| `@returndatasize()` | `RETURNDATASIZE` | size of the return data from the last call | + ## Types ### PrimitiveType @@ -74,7 +123,9 @@ type HardFork = | Cancun; ``` -### Functions +## Comptime Builtins (Future Work) + +The following builtins are planned but not yet implemented: ``` @typeInfo diff --git a/book/specs/stdlib.md b/book/specs/stdlib.md new file mode 100644 index 0000000..d97d99b --- /dev/null +++ b/book/specs/stdlib.md @@ -0,0 +1,250 @@ +# Standard Library + +The Edge standard library provides traits, types, and data structures +that are automatically available or importable via `use std::*`. + +## Operator Traits (`std::ops`) + +These traits enable operator syntax for user-defined types. Import them +with `use std::ops::TraitName;` and implement them on your type. + +### Arithmetic + +```edge +trait Add { + fn add(self, rhs: Self) -> (Self); +} + +trait Sub { + fn sub(self, rhs: Self) -> (Self); +} + +trait Mul { + fn mul(self, rhs: Self) -> (Self); +} + +trait Div { + fn div(self, rhs: Self) -> (Self); +} + +trait Mod { + fn mod_(self, rhs: Self) -> (Self); +} +``` + +When implemented, these traits dispatch from the corresponding binary +operators (`+`, `-`, `*`, `/`, `%`). For primitive types, the compiler +provides built-in implementations with checked overflow behavior. + +### Unchecked Arithmetic + +```edge +trait UnsafeAdd { + fn unsafe_add(self, rhs: Self) -> (Self); +} + +trait UnsafeSub { + fn unsafe_sub(self, rhs: Self) -> (Self); +} + +trait UnsafeMul { + fn unsafe_mul(self, rhs: Self) -> (Self); +} +``` + +These bypass overflow and underflow checks. The compiler provides +built-in implementations for all primitive integer types. Use these +in performance-critical code where overflow is provably impossible +(e.g., internal pointer arithmetic). + +### Comparison + +```edge +trait Eq { + fn eq(self, rhs: Self) -> (bool); +} + +trait Ord { + fn lt(self, rhs: Self) -> (bool); + fn gt(self, rhs: Self) -> (bool); + fn le(self, rhs: Self) -> (bool); + fn ge(self, rhs: Self) -> (bool); +} +``` + +`Eq` dispatches from `==`. `Ord` dispatches from `<`, `>`, `<=`, `>=`. + +### Indexing + +```edge +trait Index { + fn index(self, index: Idx) -> (Output); +} +``` + +Dispatches from the `[]` operator. `Idx` is the key/index type, +`Output` is the return type. Used by `Vec` and `Map`. + +## Storage & Memory Traits (`std::ops`) + +These traits control how types are stored to and loaded from different +data locations. The compiler provides built-in implementations for +primitive types. + +### Storage + +```edge +trait Sstore { + fn sstore(self, base_slot: u256); +} + +trait Sload { + fn sload(base_slot: u256) -> Self; +} +``` + +Control how values are written to (`SSTORE`) and read from (`SLOAD`) +persistent storage. `Map` overrides `Sload` to return the slot +itself (identity), enabling nested map composition. + +### Slot Derivation + +```edge +trait UniqueSlot { + fn derive_slot(self, base_slot: u256) -> u256; +} +``` + +Derives a storage slot from a key and a base slot. Required for `Map` +keys. The compiler provides default implementations for primitive types +using `keccak256(abi.encode(key, base_slot))`, but users can implement +this trait on their own types to define custom slot derivation logic. + +### Memory + +```edge +trait Mstore { + fn mstore(self, offset: u256); +} + +trait Mload { + fn mload(offset: u256) -> Self; +} + +trait Mcopy { + fn mcopy(self, dest: u256, size: u256); +} +``` + +Control how values interact with EVM memory. Used internally by `Vec` +and other memory-backed data structures. + +## Generic Types + +### `Option` + +```edge +type Option = None | Some(T); +``` + +A sum type representing an optional value. `None` indicates absence, +`Some(T)` wraps a present value. + +### `Result` + +```edge +type Result = Ok(T) | Err(E); +``` + +A sum type for fallible operations. `Ok(T)` wraps a success value, +`Err(E)` wraps an error value. + +### `Vec` + +```edge +type Vec = { len: u256, capacity: u256 }; +``` + +A dynamically-allocated, growable array backed by `&dm` (dynamic memory). + +**Memory layout** (contiguous): +``` +[len (32 bytes), capacity (32 bytes), elem0, elem1, ...] + ^--- pointer +``` + +**Construction**: +```edge +let v: &dm Vec = Vec::new(4); // initial capacity of 4 +``` + +**Methods**: + +| method | description | +|----------------------------|---------------------------------------------| +| `Vec::new(cap) -> u256` | Allocate a new Vec with initial capacity | +| `v.len() -> u256` | Current number of elements | +| `v.capacity() -> u256` | Current allocated capacity | +| `v.push(val)` | Append element, growing if needed | +| `v.pop() -> T` | Remove and return last element (reverts if empty) | +| `v.get(index) -> T` | Read element at index (reverts if out of bounds) | +| `v.set(index, val)` | Write element at index (reverts if out of bounds) | +| `v.grow(new_cap)` | Reallocate to larger capacity | + +**Index trait**: `Vec` implements `Index`, so `v[i]` is +equivalent to `v.get(i)`. + +**Growth**: When `push` exceeds capacity, `grow` allocates a new region +via `@alloc`, copies existing data with `MCOPY`, and transparently +updates the caller's pointer via `&dm` aliasing. + +### `Map` + +```edge +type Map = (); +``` + +A storage mapping type. At runtime, a `Map` is just a `u256` representing +its base storage slot — it is a zero-storage type with no runtime overhead. + +**Trait bounds**: Keys must implement `UniqueSlot` (for slot derivation +via keccak256). Values must implement `Sload` and `Sstore`. + +**Usage**: +```edge +contract MyContract { + let balances: &s Map; + let allowances: &s Map>; + + pub fn get_balance(owner: address) -> u256 { + self.balances.get(owner) + } + + pub fn set_balance(owner: address, val: u256) { + self.balances.set(owner, val); + } + + pub fn get_allowance(owner: address, spender: address) -> u256 { + self.allowances[owner][spender] + } +} +``` + +**Methods**: + +| method | description | +|-------------------------|------------------------------------| +| `m.get(key) -> V` | Derive slot from key and SLOAD | +| `m.set(key, val)` | Derive slot from key and SSTORE | + +**Index trait**: `Map` implements `Index`, so `m[key]` is +equivalent to `m.get(key)`. + +**Nested maps**: `Map>` works because `Map` implements +`Sload` as identity — "loading" an inner Map just passes through the +derived slot without an actual `SLOAD`. This means `m[k1][k2]` performs +exactly one `SLOAD` (at the leaf), with two keccak256 slot derivations. + +**Slot derivation**: For a key `k` and base slot `s`, the storage slot +is `keccak256(abi.encode(k, s))`. This matches the Solidity mapping +layout convention. diff --git a/book/specs/syntax/locations.md b/book/specs/syntax/locations.md index c317eda..280a795 100644 --- a/book/specs/syntax/locations.md +++ b/book/specs/syntax/locations.md @@ -4,6 +4,7 @@ ::= "&s" ; ::= "&t" ; ::= "&m" ; + ::= "&dm" ; ::= "&cd" ; ::= "&rd" ; ::= "&ic" ; @@ -13,6 +14,7 @@ | | | + | | | | @@ -20,7 +22,7 @@ ``` The `` is a data location annotation indicating to which data -location a pointer's data exists. We define seven distinct annotations +location a pointer's data exists. We define eight distinct annotations for data location pointers. This is a divergence from general purpose programming languages to more accurately represent the EVM execution environment. @@ -28,6 +30,7 @@ environment. * `&s` persistent storage * `&t` transient storage * `&m` memory +* `&dm` dynamic memory * `&cd` calldata * `&rd` returndata * `&ic` internal (local) code @@ -43,6 +46,46 @@ Persistent and transient storage are part of the map category, 256 bit keys map to 256 bit values. Both may be written or read one word at a time. +#### Dynamic Memory + +The `&dm` annotation designates a pointer to **dynamically allocated +memory**. Unlike `&m` (which refers to compiler-managed fixed-offset +memory), `&dm` pointers are obtained at runtime via the `@alloc` builtin +and point to MSIZE-allocated regions. + +`&dm` enables: + +- **Runtime-sized allocations**: `@alloc(size_bytes)` returns a pointer + to a fresh memory region that does not overlap with any other allocation. +- **Transparent pointer aliasing**: When a method takes `self: &dm Self`, + mutations to the underlying pointer (e.g., during `Vec` growth and + reallocation) are propagated back to the caller transparently. +- **Pass-by-reference semantics**: Methods on `&dm` types receive and + can update the caller's pointer, enabling in-place mutation of + dynamically-sized structures. + +`&dm` is primarily used by `Vec` and other dynamically-sized standard +library types. The compiler tracks `&dm` regions symbolically for +optimization (region-based store forwarding, non-aliasing proofs). + +Example: + +```edge +type Vec = { len: u256, capacity: u256 }; + +impl Vec { + fn len(self: &dm Self) -> u256 { + self.len + } + + fn push(self: &dm Self, val: T) { + // self is a &dm pointer — mutations to self.len + // are visible to the caller + // ... + } +} +``` + #### Buffers Memory, calldata, returndata, internal code, and external code are @@ -77,12 +120,13 @@ Pointers to different data locations consist of different sizes based on the properties of that data location. In depth semantics of each data location are specified in the type system documents. -| Location | Bit Size | Reason | -| -------------------|-----------|---------------------------------------------------------------| -| persistent storage | 256 | Storage is 256 bit key value hashmap | -| transient storage | 256 | Transient storage is 256 bit key value hashmap | -| memory | 32 | Theoretical maximum memory size does not grow to 0xffffffff | -| calldata | 32 | Theoretical maximum calldata size does not grow to 0xffffffff | -| returndata | 32 | Maximum returndata size is equal to maximum memory size | -| internal code | 16 | Code size is less than 0xffff | -| external code | 176 | Contains 160 bit address and 16 bit code pointer | +| Location | Bit Size | Reason | +| -------------------|----------|---------------------------------------------------------------| +| persistent storage | 256 | Storage is 256 bit key value hashmap | +| transient storage | 256 | Transient storage is 256 bit key value hashmap | +| memory | 32 | Theoretical maximum memory size does not grow to 0xffffffff | +| dynamic memory | 32 | Theoretical maximum memory size does not approach 2³² | +| calldata | 32 | Theoretical maximum calldata size does not grow to 0xffffffff | +| returndata | 32 | Maximum returndata size is equal to maximum memory size | +| internal code | 16 | Code size is less than 0xffff | +| external code | 176 | Contains 160 bit address and 16 bit code pointer | diff --git a/book/specs/syntax/operators.md b/book/specs/syntax/operators.md index ee81a43..dece280 100644 --- a/book/specs/syntax/operators.md +++ b/book/specs/syntax/operators.md @@ -48,13 +48,20 @@ Operators are syntax sugar over built-in functions. | ; ``` -## Semantics +## Index + +``` + ::= "[" "]" ; +``` + +The index operator `[]` dispatches to the `Index` trait. Any type implementing +`Index` can be indexed with `value[key]`. -Operator overloading is disallowed. +## Semantics | operator | types | behavior | panic case | | ---------|----------|------------------------------|----------------| -| + | integers | checked addition | overflow | +| + | integers | checked addition | overflow | | - | integers | checked subtraction (binary) | underflow | | - | integers | checked negation (unary) | overflow | | * | integers | checked multiplication | overflow | @@ -66,7 +73,7 @@ Operator overloading is disallowed. | ~ | integers | bitwise NOT | - | | ^ | integers | bitwise XOR | - | | >> | integers | bitwise shift right | - | -| << | integers | bitwise shift left | - | +| << | integers | bitwise shift left | - | | == | any | equality | - | | != | any | inequality | - | | && | booleans | logical AND | - | @@ -76,3 +83,60 @@ Operator overloading is disallowed. | >= | integers | greater than or equal to | - | | < | integers | less than | - | | <= | integers | less than or equal to | - | +| [] | any | index | - | + +### Checked Arithmetic + +The `+`, `-`, and `*` operators are **checked** by default: they revert on +overflow or underflow. The compiler's range analysis pass can **elide** these +checks when it can statically prove the operation is safe (e.g., adding two +values whose combined upper bound fits in the type). This happens +automatically at optimization level O1 and above. + +For performance-critical code where overflow is known to be impossible, the +standard library provides unchecked variants via the `UnsafeAdd`, `UnsafeSub`, +and `UnsafeMul` traits. + +### Operator Overloading + +Direct operator overloading is disallowed. However, the following operators +can be customized for user-defined types by implementing the corresponding +standard library trait from `std::ops`: + +| operator | trait | method(s) | +|---------------|----------------------------|----------------------| +| `+` | `Add` | `add(self, rhs)` | +| `-` | `Sub` | `sub(self, rhs)` | +| `*` | `Mul` | `mul(self, rhs)` | +| `/` | `Div` | `div(self, rhs)` | +| `%` | `Mod` | `mod_(self, rhs)` | +| `==` | `Eq` | `eq(self, rhs)` | +| `<` `<=` `>` `>=` | `Ord` | `lt` `le` `gt` `ge` | +| `[]` | `Index` | `index(self, idx)` | + +Bitwise and logical operators (`&`, `|`, `^`, `~`, `<<`, `>>`, `&&`, `||`) +are currently primitives-only. Exponentiation (`**`) is also primitives-only. +Trait-based overloading for these operators may be added in the future. + +Example: + +```edge +use std::ops::Add; +use std::ops::Eq; + +type Wrapper = { value: u256 }; + +impl Wrapper: Add { + fn add(self, rhs: Self) -> (Self) { + Wrapper { value: self.value + rhs.value } + } +} + +impl Wrapper: Eq { + fn eq(self, rhs: Self) -> (bool) { + self.value == rhs.value + } +} + +// Now `Wrapper { value: 1 } + Wrapper { value: 2 }` works. +``` diff --git a/docs/pages/specs/builtins.md b/docs/pages/specs/builtins.md index e4e74d5..bd75e66 100644 --- a/docs/pages/specs/builtins.md +++ b/docs/pages/specs/builtins.md @@ -47,10 +47,36 @@ fn checkCaller() { } ``` -## Comptime builtins +## Runtime builtins + +### `@size_of` + +```edge +@size_of::() -> u256 +``` + +Returns the size in bytes of type `T`. For primitive types this is the +ABI-encoded word size (32 bytes for `u256`, `address`, etc.). + +### `@alloc` + +```edge +@alloc(size_bytes: u256) -> u256 +``` + +Allocates `size_bytes` of dynamic memory at runtime and returns a pointer +to the start of the region. Uses MSIZE-based pointer arithmetic to ensure +the returned region does not overlap with any other allocation. + +`@alloc` is the foundation for dynamically-sized data structures like +`Vec`. It is used in conjunction with the `&dm` data location annotation +(see [Data Locations](/specs/syntax/locations)). + +## Comptime builtins (future work) These builtins execute at compile time and are used for type introspection, -compile-time assertions, and code generation. +compile-time assertions, and code generation. They are planned but not yet +implemented. ### Types diff --git a/docs/pages/specs/stdlib.md b/docs/pages/specs/stdlib.md new file mode 100644 index 0000000..b3bd1a4 --- /dev/null +++ b/docs/pages/specs/stdlib.md @@ -0,0 +1,254 @@ +--- +title: Standard Library +--- + +# Standard Library + +The Edge standard library provides traits, types, and data structures +that are automatically available or importable via `use std::*`. + +## Operator Traits (`std::ops`) + +These traits enable operator syntax for user-defined types. Import them +with `use std::ops::TraitName;` and implement them on your type. + +### Arithmetic + +```edge +trait Add { + fn add(self, rhs: Self) -> (Self); +} + +trait Sub { + fn sub(self, rhs: Self) -> (Self); +} + +trait Mul { + fn mul(self, rhs: Self) -> (Self); +} + +trait Div { + fn div(self, rhs: Self) -> (Self); +} + +trait Mod { + fn mod_(self, rhs: Self) -> (Self); +} +``` + +When implemented, these traits dispatch from the corresponding binary +operators (`+`, `-`, `*`, `/`, `%`). For primitive types, the compiler +provides built-in implementations with checked overflow behavior. + +### Unchecked Arithmetic + +```edge +trait UnsafeAdd { + fn unsafe_add(self, rhs: Self) -> (Self); +} + +trait UnsafeSub { + fn unsafe_sub(self, rhs: Self) -> (Self); +} + +trait UnsafeMul { + fn unsafe_mul(self, rhs: Self) -> (Self); +} +``` + +These bypass overflow and underflow checks. The compiler provides +built-in implementations for all primitive integer types. Use these +in performance-critical code where overflow is provably impossible +(e.g., internal pointer arithmetic). + +### Comparison + +```edge +trait Eq { + fn eq(self, rhs: Self) -> (bool); +} + +trait Ord { + fn lt(self, rhs: Self) -> (bool); + fn gt(self, rhs: Self) -> (bool); + fn le(self, rhs: Self) -> (bool); + fn ge(self, rhs: Self) -> (bool); +} +``` + +`Eq` dispatches from `==`. `Ord` dispatches from `<`, `>`, `<=`, `>=`. + +### Indexing + +```edge +trait Index { + fn index(self, index: Idx) -> (Output); +} +``` + +Dispatches from the `[]` operator. `Idx` is the key/index type, +`Output` is the return type. Used by `Vec` and `Map`. + +## Storage & Memory Traits (`std::ops`) + +These traits control how types are stored to and loaded from different +data locations. The compiler provides built-in implementations for +primitive types. + +### Storage + +```edge +trait Sstore { + fn sstore(self, base_slot: u256); +} + +trait Sload { + fn sload(base_slot: u256) -> Self; +} +``` + +Control how values are written to (`SSTORE`) and read from (`SLOAD`) +persistent storage. `Map` overrides `Sload` to return the slot +itself (identity), enabling nested map composition. + +### Slot Derivation + +```edge +trait UniqueSlot { + fn derive_slot(self, base_slot: u256) -> u256; +} +``` + +Derives a storage slot from a key and a base slot. Required for `Map` +keys. The compiler provides default implementations for primitive types +using `keccak256(abi.encode(key, base_slot))`, but users can implement +this trait on their own types to define custom slot derivation logic. + +### Memory + +```edge +trait Mstore { + fn mstore(self, offset: u256); +} + +trait Mload { + fn mload(offset: u256) -> Self; +} + +trait Mcopy { + fn mcopy(self, dest: u256, size: u256); +} +``` + +Control how values interact with EVM memory. Used internally by `Vec` +and other memory-backed data structures. + +## Generic Types + +### `Option` + +```edge +type Option = None | Some(T); +``` + +A sum type representing an optional value. `None` indicates absence, +`Some(T)` wraps a present value. + +### `Result` + +```edge +type Result = Ok(T) | Err(E); +``` + +A sum type for fallible operations. `Ok(T)` wraps a success value, +`Err(E)` wraps an error value. + +### `Vec` + +```edge +type Vec = { len: u256, capacity: u256 }; +``` + +A dynamically-allocated, growable array backed by `&dm` (dynamic memory). + +**Memory layout** (contiguous): +``` +[len (32 bytes), capacity (32 bytes), elem0, elem1, ...] + ^--- pointer +``` + +**Construction**: +```edge +let v: &dm Vec = Vec::new(4); // initial capacity of 4 +``` + +**Methods**: + +| method | description | +|----------------------------|---------------------------------------------| +| `Vec::new(cap) -> u256` | Allocate a new Vec with initial capacity | +| `v.len() -> u256` | Current number of elements | +| `v.capacity() -> u256` | Current allocated capacity | +| `v.push(val)` | Append element, growing if needed | +| `v.pop() -> T` | Remove and return last element (reverts if empty) | +| `v.get(index) -> T` | Read element at index (reverts if out of bounds) | +| `v.set(index, val)` | Write element at index (reverts if out of bounds) | +| `v.grow(new_cap)` | Reallocate to larger capacity | + +**Index trait**: `Vec` implements `Index`, so `v[i]` is +equivalent to `v.get(i)`. + +**Growth**: When `push` exceeds capacity, `grow` allocates a new region +via `@alloc`, copies existing data with `MCOPY`, and transparently +updates the caller's pointer via `&dm` aliasing. + +### `Map` + +```edge +type Map = (); +``` + +A storage mapping type. At runtime, a `Map` is just a `u256` representing +its base storage slot — it is a zero-storage type with no runtime overhead. + +**Trait bounds**: Keys must implement `UniqueSlot` (for slot derivation +via keccak256). Values must implement `Sload` and `Sstore`. + +**Usage**: +```edge +contract MyContract { + let balances: &s Map; + let allowances: &s Map>; + + pub fn get_balance(owner: address) -> u256 { + self.balances.get(owner) + } + + pub fn set_balance(owner: address, val: u256) { + self.balances.set(owner, val); + } + + pub fn get_allowance(owner: address, spender: address) -> u256 { + self.allowances[owner][spender] + } +} +``` + +**Methods**: + +| method | description | +|-------------------------|------------------------------------| +| `m.get(key) -> V` | Derive slot from key and SLOAD | +| `m.set(key, val)` | Derive slot from key and SSTORE | + +**Index trait**: `Map` implements `Index`, so `m[key]` is +equivalent to `m.get(key)`. + +**Nested maps**: `Map>` works because `Map` implements +`Sload` as identity — "loading" an inner Map just passes through the +derived slot without an actual `SLOAD`. This means `m[k1][k2]` performs +exactly one `SLOAD` (at the leaf), with two keccak256 slot derivations. + +**Slot derivation**: For a key `k` and base slot `s`, the storage slot +is `keccak256(abi.encode(k, s))`. This matches the Solidity mapping +layout convention. diff --git a/docs/pages/specs/syntax/locations.md b/docs/pages/specs/syntax/locations.md index 196a3b8..4c9d894 100644 --- a/docs/pages/specs/syntax/locations.md +++ b/docs/pages/specs/syntax/locations.md @@ -8,6 +8,7 @@ title: Data locations ::= "&s" ; ::= "&t" ; ::= "&m" ; + ::= "&dm" ; ::= "&cd" ; ::= "&rd" ; ::= "&ic" ; @@ -17,6 +18,7 @@ title: Data locations | | | + | | | | @@ -24,13 +26,14 @@ title: Data locations ``` The `` is a pointer annotation indicating which EVM data region -a value resides in. Edge defines seven distinct location annotations. This is a +a value resides in. Edge defines eight distinct location annotations. This is a divergence from general-purpose programming languages to more accurately represent the EVM execution environment. * `&s` — persistent storage * `&t` — transient storage (EIP-1153) * `&m` — memory +* `&dm` — dynamic memory * `&cd` — calldata * `&rd` — returndata * `&ic` — internal (local) code @@ -38,7 +41,7 @@ the EVM execution environment. :::note The `&` character is heavily overloaded in the lexer. It checks for data-location -sigils first (`&s`, `&t`, `&m`, `&cd`, `&rd`, `&ic`, `&ec`), then `&=`, then +sigils first (`&s`, `&t`, `&m`, `&dm`, `&cd`, `&rd`, `&ic`, `&ec`), then `&=`, then `&&`, and finally falls back to bitwise AND. ::: @@ -52,6 +55,44 @@ Persistent and transient storage are part of the map category — 256-bit keys map to 256-bit values. Both may be written or read one word at a time. +### Dynamic memory + +The `&dm` annotation designates a pointer to **dynamically allocated +memory**. Unlike `&m` (which refers to compiler-managed fixed-offset +memory), `&dm` pointers are obtained at runtime via the `@alloc` builtin +and point to MSIZE-allocated regions. + +`&dm` enables: + +- **Runtime-sized allocations**: `@alloc(size_bytes)` returns a pointer + to a fresh memory region that does not overlap with any other allocation. +- **Transparent pointer aliasing**: When a method takes `self: &dm Self`, + mutations to the underlying pointer (e.g., during `Vec` growth and + reallocation) are propagated back to the caller transparently. +- **Pass-by-reference semantics**: Methods on `&dm` types receive and + can update the caller's pointer, enabling in-place mutation of + dynamically-sized structures. + +`&dm` is primarily used by `Vec` and other dynamically-sized standard +library types. The compiler tracks `&dm` regions symbolically for +optimization (region-based store forwarding, non-aliasing proofs). + +```edge +type Vec = { len: u256, capacity: u256 }; + +impl Vec { + fn len(self: &dm Self) -> u256 { + self.len + } + + fn push(self: &dm Self, val: T) { + // self is a &dm pointer — mutations to self.len + // are visible to the caller + // ... + } +} +``` + ### Buffers Memory, calldata, returndata, internal code, and external code are @@ -91,6 +132,7 @@ location are specified in the type system documents. | persistent storage | 256 | Storage is a 256-bit key–value hashmap | | transient storage | 256 | Transient storage is a 256-bit key–value hashmap | | memory | 32 | Theoretical maximum memory size does not approach 2³² | +| dynamic memory | 32 | Theoretical maximum memory size does not approach 2³² | | calldata | 32 | Theoretical maximum calldata size does not approach 2³² | | returndata | 32 | Maximum returndata size equals maximum memory size | | internal code | 16 | Code size is less than 0xFFFF | diff --git a/docs/pages/specs/syntax/operators.md b/docs/pages/specs/syntax/operators.md index 5ab15e6..ba977a4 100644 --- a/docs/pages/specs/syntax/operators.md +++ b/docs/pages/specs/syntax/operators.md @@ -4,7 +4,7 @@ title: Operators # Operators -Operators are syntax sugar over built-in functions. Operator overloading is disallowed. +Operators are syntax sugar over built-in functions. ## Binary operators @@ -75,6 +75,15 @@ Compound assignment operators (`+=`, `-=`, etc.) are parsed as binary operations and produce `Expr::Binary` nodes with the corresponding `BinOp` variant. +## Index + +```text + ::= "[" "]" ; +``` + +The index operator `[]` dispatches to the `Index` trait. Any type implementing +`Index` can be indexed with `value[key]`. + ## Semantics | Operator | Types | Behavior | Panic case | @@ -101,3 +110,60 @@ operations and produce `Expr::Binary` nodes with the corresponding | `>=` | integers | greater than or equal | — | | `<` | integers | less than | — | | `<=` | integers | less than or equal | — | +| `[]` | any | index | — | + +### Checked arithmetic + +The `+`, `-`, and `*` operators are **checked** by default: they revert on +overflow or underflow. The compiler's range analysis pass can **elide** these +checks when it can statically prove the operation is safe (e.g., adding two +values whose combined upper bound fits in the type). This happens +automatically at optimization level O1 and above. + +For performance-critical code where overflow is known to be impossible, the +standard library provides unchecked variants via the `UnsafeAdd`, `UnsafeSub`, +and `UnsafeMul` traits (see [Standard Library](/specs/stdlib)). + +### Operator overloading + +Direct operator overloading is disallowed. However, the following operators +can be customized for user-defined types by implementing the corresponding +standard library trait from `std::ops`: + +| Operator | Trait | Method(s) | +|----------|-------|-----------| +| `+` | `Add` | `add(self, rhs)` | +| `-` | `Sub` | `sub(self, rhs)` | +| `*` | `Mul` | `mul(self, rhs)` | +| `/` | `Div` | `div(self, rhs)` | +| `%` | `Mod` | `mod_(self, rhs)` | +| `==` | `Eq` | `eq(self, rhs)` | +| `<` `<=` `>` `>=` | `Ord` | `lt` `le` `gt` `ge` | +| `[]` | `Index` | `index(self, idx)` | + +Bitwise and logical operators (`&`, `|`, `^`, `~`, `<<`, `>>`, `&&`, `||`) +are currently primitives-only. Exponentiation (`**`) is also primitives-only. +Trait-based overloading for these operators may be added in the future. + +Example: + +```edge +use std::ops::Add; +use std::ops::Eq; + +type Wrapper = { value: u256 }; + +impl Wrapper: Add { + fn add(self, rhs: Self) -> (Self) { + Wrapper { value: self.value + rhs.value } + } +} + +impl Wrapper: Eq { + fn eq(self, rhs: Self) -> (bool) { + self.value == rhs.value + } +} + +// Now `Wrapper { value: 1 } + Wrapper { value: 2 }` works. +``` diff --git a/vocs.shared.ts b/vocs.shared.ts index 0b5cfc1..2c579b3 100644 --- a/vocs.shared.ts +++ b/vocs.shared.ts @@ -114,6 +114,7 @@ export default defineConfig({ }, { text: 'Inline Assembly', link: '/specs/inline' }, { text: 'Built-In', link: '/specs/builtins' }, + { text: 'Standard Library', link: '/specs/stdlib' }, ], }, {