diff --git a/CHANGELOG.md b/CHANGELOG.md index a25ef812..41049044 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,8 @@ All notable changes to Aver are documented here. Starting with 0.10.0, minor rel - **A law whose generated name clashes with another theorem in the same proof is skipped with a clear note** instead of silently costing a neighboring law its universal credit. The clash is reported in the per-law proof report rather than failing quietly. - **wasm-gc strings count characters, not bytes** — `String.len`, `String.charAt`, `String.slice`, and `String.chars` now use Unicode character (scalar) counts and indices, matching the VM and the documentation on every multi-byte string. `String.byteLength` still counts UTF-8 bytes. `examples/data/json.av` passes `aver verify --wasm-gc` in full, including its emoji cases. +- **wasm-gc verifies tuple results containing generic constructors** — functions that return values like `(Option.None, n)` or compare expected tuples like `(Result.Ok(2), Result.Ok(0))` now keep the concrete payload types and run like they do on the VM. As a side effect, a tuple literal in a driven position (a declared return, an annotated binding, or a verify expectation) whose element doesn't fit the annotation is now rejected when the file is checked, instead of failing later at run time. Known residual: a hand-written `==` between two such tuples outside a verify block still loses the payload types on wasm-gc. +- **wasm-gc `Char.toCode` returns Unicode scalar values** — multi-byte characters such as `ż`, `…`, and `😀` now report the same code points as the VM and roundtrip through `Char.fromCode`. - **Comparing `Result` values no longer traps on wasm-gc** when one module wraps two different record types in `Result<…, String>` — `Result.Ok`/`Result.Err` now build the instantiation the type checker resolved at the site instead of guessing from the payload type. `examples/core/order_total.av` passes `aver verify --wasm-gc` in full. - **`aver audit` counts every check error in its total and exit code** — `error[verify-rhs]` (a verify case calling the function under test on the right side of `=>`) was printed but excluded from both, so a file with only such errors audited green. diff --git a/src/codegen/wasm_gc/body/from_mir/builtins.rs b/src/codegen/wasm_gc/body/from_mir/builtins.rs index 38dff4bd..6e1d30a5 100644 --- a/src/codegen/wasm_gc/body/from_mir/builtins.rs +++ b/src/codegen/wasm_gc/body/from_mir/builtins.rs @@ -1,5 +1,5 @@ //! Built-in call lowering: the custom-inline `Float` / `Int` / `Bool` -//! scalar ops, `Char.toCode`, the custom-inline `String` ops +//! scalar ops, the custom-inline `String` ops //! (`length` / `split` / `join`), the `List` / `Vector` //! / `Map` families, the fused `Option.withDefault(Vector.get, //! )` / `Option.withDefault(Vector.set, v)` / @@ -94,7 +94,7 @@ pub(crate) fn emit_mir_wasip2_effect( /// (builtins.rs): builtins that lower to a fixed inline wasm sequence /// rather than a `fn_map.builtins`-keyed helper call, so a plain /// `fn_map.builtins.get(dotted)` lookup misses them. Covers the native -/// scalar `Float` / `Int` / `Bool` ops, `Char.toCode`, and the `String` +/// scalar `Float` / `Int` / `Bool` ops and the `String` /// ops whose helper is keyed under a *different* name than the surface /// builtin — `String.length` dispatches to the /// `String.len` helper, and `String.split` / `String.join` to the @@ -215,21 +215,6 @@ pub(crate) fn emit_mir_native_scalar_builtin( arg!(0); func.instruction(&Instruction::I32Eqz); } - // `Char.toCode(s) -> Int` — first byte of the 1-char string - // (`array.get_u 0` + widen). Aver `Char` is a single-byte - // `String`. Mirror of `emit_dotted_builtin`'s arm. - "Char.toCode" if args.len() == 1 => { - let s_idx = ctx - .registry - .string_array_type_idx - .ok_or(WasmGcError::Validation( - "Char.toCode requires the String slot allocated".into(), - ))?; - arg!(0); - func.instruction(&Instruction::I32Const(0)); - func.instruction(&Instruction::ArrayGetU(s_idx)); - func.instruction(&Instruction::I64ExtendI32U); - } // Alias spelling for `String.len` (scalar count) — the helper // is keyed under `String.len`, not the surface name. // `String.byteLength` has its own helper (keyed by surface diff --git a/src/codegen/wasm_gc/body/from_mir/mod.rs b/src/codegen/wasm_gc/body/from_mir/mod.rs index b0c10baa..b560179b 100644 --- a/src/codegen/wasm_gc/body/from_mir/mod.rs +++ b/src/codegen/wasm_gc/body/from_mir/mod.rs @@ -39,7 +39,7 @@ //! - [`records`] — `RecordCreate` / `RecordUpdate`. //! - [`collections`] — `List` literals. //! - [`builtins`] — the custom-inline `Float` / `Int` / `Bool` scalar -//! ops, `Char.toCode`, the custom-inline `String` ops (`length` / +//! ops, the custom-inline `String` ops (`length` / //! `split` / `join`), the `List` / `Vector` / `Map` //! families, the //! fused `Option.withDefault(Vector.get(v, i), )` / diff --git a/src/codegen/wasm_gc/builtins/mod.rs b/src/codegen/wasm_gc/builtins/mod.rs index 549f7a94..7ec19914 100644 --- a/src/codegen/wasm_gc/builtins/mod.rs +++ b/src/codegen/wasm_gc/builtins/mod.rs @@ -116,6 +116,7 @@ pub(super) enum BuiltinName { StringEndsWith, StringFromBool, StringCharAt, + CharToCode, CharFromCode, StringChars, /// `Byte.fromHex(s) -> Result`. Parses a 2-char hex @@ -162,6 +163,7 @@ impl BuiltinName { "String.endsWith" => Some(Self::StringEndsWith), "String.fromBool" => Some(Self::StringFromBool), "String.charAt" => Some(Self::StringCharAt), + "Char.toCode" => Some(Self::CharToCode), "Char.fromCode" => Some(Self::CharFromCode), "String.chars" => Some(Self::StringChars), "Byte.fromHex" => Some(Self::ByteFromHex), @@ -191,6 +193,7 @@ impl BuiltinName { Self::StringEndsWith => "String.endsWith", Self::StringFromBool => "String.fromBool", Self::StringCharAt => "String.charAt", + Self::CharToCode => "Char.toCode", Self::CharFromCode => "Char.fromCode", Self::StringChars => "String.chars", Self::ByteFromHex => "Byte.fromHex", @@ -221,6 +224,7 @@ impl BuiltinName { Self::StringEndsWith => Ok(vec![string_ref_ty(registry)?, string_ref_ty(registry)?]), Self::StringFromBool => Ok(vec![ValType::I32]), Self::StringCharAt => Ok(vec![string_ref_ty(registry)?, ValType::I64]), + Self::CharToCode => Ok(vec![string_ref_ty(registry)?]), Self::CharFromCode => Ok(vec![ValType::I64]), Self::StringChars => Ok(vec![string_ref_ty(registry)?]), Self::ByteFromHex => Ok(vec![string_ref_ty(registry)?]), @@ -251,6 +255,7 @@ impl BuiltinName { Self::StringCompare => Ok(vec![ValType::I32]), Self::StringFromBool => Ok(vec![string_ref_ty(registry)?]), Self::StringEndsWith => Ok(vec![ValType::I32]), + Self::CharToCode => Ok(vec![ValType::I64]), Self::StringCharAt | Self::CharFromCode => { Ok(vec![option_ref_ty(registry, "Option")?]) } @@ -285,6 +290,7 @@ impl BuiltinName { Self::StringEndsWith => emit_string_ends_with(registry), Self::StringFromBool => emit_string_from_bool(registry), Self::StringCharAt => emit_string_char_at(registry), + Self::CharToCode => emit_char_to_code(registry), Self::CharFromCode => emit_char_from_code(registry), Self::StringChars => emit_string_chars(registry), Self::ByteFromHex => emit_byte_from_hex(registry), @@ -2342,6 +2348,168 @@ fn emit_string_char_at(registry: &TypeRegistry) -> Result wat_helper::compile_wat_helper(&wat) } +/// `Char.toCode(s: String) -> Int`. Aver represents a Char as a one-scalar +/// String, so decode the first UTF-8 scalar instead of returning byte zero. +/// Empty strings trap, matching the old wasm-gc hard-failure behavior for +/// invalid Char values. +fn emit_char_to_code(registry: &TypeRegistry) -> Result { + let string_idx = registry + .string_array_type_idx + .ok_or(WasmGcError::Validation( + "Char.toCode: String slot not registered".into(), + ))?; + let pre_string = wat_helper::padding_types(string_idx); + let wat = format!( + r#" + (module + {pre_string} + (type $string (array (mut i8))) + (func (export "helper") (param $s (ref null $string)) (result i64) + (local $n i32) + (local $b0 i32) + (local $b1 i32) + (local $b2 i32) + (local $b3 i32) + (local $clen i32) + (local $code i32) + + local.get $s + array.len + local.set $n + + local.get $n + i32.eqz + (if (then unreachable)) + + local.get $s + i32.const 0 + array.get_u $string + local.set $b0 + + ;; UTF-8 length from the lead byte. Treat stray continuation bytes + ;; as length 1, like String.charAt's defensive scanner. + i32.const 1 + local.set $clen + local.get $b0 + i32.const 0xC0 + i32.ge_u + (if (then i32.const 2 local.set $clen)) + local.get $b0 + i32.const 0xE0 + i32.ge_u + (if (then i32.const 3 local.set $clen)) + local.get $b0 + i32.const 0xF0 + i32.ge_u + (if (then i32.const 4 local.set $clen)) + + ;; Clamp malformed truncated tails to the bytes that exist. + local.get $clen + local.get $n + i32.gt_u + (if (then local.get $n local.set $clen)) + + local.get $clen + i32.const 1 + i32.eq + (if + (then + local.get $b0 + local.set $code) + (else + local.get $clen + i32.const 2 + i32.eq + (if + (then + local.get $s + i32.const 1 + array.get_u $string + local.set $b1 + local.get $b0 + i32.const 0x1F + i32.and + i32.const 6 + i32.shl + local.get $b1 + i32.const 0x3F + i32.and + i32.or + local.set $code) + (else + local.get $clen + i32.const 3 + i32.eq + (if + (then + local.get $s + i32.const 1 + array.get_u $string + local.set $b1 + local.get $s + i32.const 2 + array.get_u $string + local.set $b2 + local.get $b0 + i32.const 0x0F + i32.and + i32.const 12 + i32.shl + local.get $b1 + i32.const 0x3F + i32.and + i32.const 6 + i32.shl + i32.or + local.get $b2 + i32.const 0x3F + i32.and + i32.or + local.set $code) + (else + local.get $s + i32.const 1 + array.get_u $string + local.set $b1 + local.get $s + i32.const 2 + array.get_u $string + local.set $b2 + local.get $s + i32.const 3 + array.get_u $string + local.set $b3 + local.get $b0 + i32.const 0x07 + i32.and + i32.const 18 + i32.shl + local.get $b1 + i32.const 0x3F + i32.and + i32.const 12 + i32.shl + i32.or + local.get $b2 + i32.const 0x3F + i32.and + i32.const 6 + i32.shl + i32.or + local.get $b3 + i32.const 0x3F + i32.and + i32.or + local.set $code)))))) + + local.get $code + i64.extend_i32_u) + ) + "# + ); + wat_helper::compile_wat_helper(&wat) +} + /// `Char.fromCode(code: Int) -> Option`. Encodes a Unicode /// code point as UTF-8 (1–4 bytes) into a fresh `(array i8)`. Returns /// Option.None for negative values, codepoints above U+10FFFF, and the diff --git a/src/types/checker/infer/expr.rs b/src/types/checker/infer/expr.rs index 287192d6..2367885c 100644 --- a/src/types/checker/infer/expr.rs +++ b/src/types/checker/infer/expr.rs @@ -256,6 +256,33 @@ impl TypeChecker { _ => None, }, + // Tuple literals adopt expected `Tuple<...>` element-wise. This + // gives generic constructors in tuple slots (for example + // `(Option.None, n)` under `Tuple, Int>`) the same + // bidirectional context list elements already receive. + // Deliberately NOT `Expr::IndependentProduct`: `(...)!` elements + // must be function calls, and that shape check lives on the + // bottom-up path — adopting the expected type here would skip it. + Expr::Tuple(items) => match expected { + Type::Tuple(elems) if elems.len() == items.len() => { + let mut out = Vec::with_capacity(items.len()); + for (idx, (item, elem_expected)) in items.iter().zip(elems.iter()).enumerate() { + let item_ty = self.infer_type_with_expected(item, Some(elem_expected)); + if !self.compatible(&item_ty, elem_expected) { + self.error(format!( + "Tuple element {}: expected {}, got {}", + idx + 1, + elem_expected.display(), + item_ty.display() + )); + } + out.push(item_ty); + } + Some(Type::Tuple(out)) + } + _ => None, + }, + // Empty map literal `{}` adopts expected `Map`. Expr::MapLiteral(entries) if entries.is_empty() => match expected { Type::Map(_, _) => Some(expected.clone()), diff --git a/tests/typechecker_spec.rs b/tests/typechecker_spec.rs index e75ac122..c0bdfce6 100644 --- a/tests/typechecker_spec.rs +++ b/tests/typechecker_spec.rs @@ -3509,3 +3509,25 @@ fn main() -> Unit "expected no cascading 'got Invalid' errors after Iron — A4, got: {errs:?}" ); } + +#[test] +fn independent_product_rejects_non_call_elements_even_under_expected_tuple() { + // `(...)!` elements must be function calls. The expected-type path for + // tuple literals (a `Tuple<...>` return annotation drives each element) + // must not adopt independent products, or this validation is skipped. + let src = "fn pair(n: Int) -> Tuple\n ? \"Pairs.\"\n (1, n)!\n"; + let errs = errors(src); + assert!( + errs.iter() + .any(|m| m.contains("Independent product element must be a function call")), + "expected the non-call `(...)!` element to be rejected, got: {errs:?}" + ); +} + +#[test] +fn plain_tuple_literal_accepts_expected_tuple_elements() { + // Positive control for the test above: the same shape WITHOUT `!` is an + // ordinary tuple literal and must type-check via the expected-type path. + let src = "fn pair(n: Int) -> Tuple\n ? \"Pairs.\"\n (1, n)\n"; + assert_no_errors(src); +} diff --git a/tests/wasm_gc_string_scalar_semantics_regression.rs b/tests/wasm_gc_string_scalar_semantics_regression.rs index c3a627dc..a3a5f4b4 100644 --- a/tests/wasm_gc_string_scalar_semantics_regression.rs +++ b/tests/wasm_gc_string_scalar_semantics_regression.rs @@ -161,3 +161,35 @@ verify charsOf 3, ); } + +/// `Char.toCode` decodes the first Unicode scalar value of the Char string, +/// not the first UTF-8 byte. Pair it with `Char.fromCode` to pin the roundtrip. +#[test] +fn char_to_code_decodes_scalar_values() { + assert_all_pass_on_both( + r#" +fn codeOf(c: String) -> Int + ? "Unicode scalar code of c." + Char.toCode(c) + +fn roundTrip(c: String) -> String + ? "Encode the decoded code point again." + match Char.fromCode(Char.toCode(c)) + Option.Some(out) -> out + Option.None -> "" + +verify codeOf + codeOf("a") => 97 + codeOf("ż") => 380 + codeOf("…") => 8230 + codeOf("😀") => 128512 + +verify roundTrip + roundTrip("a") => "a" + roundTrip("ż") => "ż" + roundTrip("…") => "…" + roundTrip("😀") => "😀" +"#, + 8, + ); +} diff --git a/tests/wasm_gc_verify_option_case_regression.rs b/tests/wasm_gc_verify_option_case_regression.rs index 9a34d25f..c6f66c3a 100644 --- a/tests/wasm_gc_verify_option_case_regression.rs +++ b/tests/wasm_gc_verify_option_case_regression.rs @@ -282,6 +282,92 @@ verify stash ); } +/// Tuple element with a bare `Option.None` in a match arm. The declared +/// tuple return type must drive the Option payload type all the way down +/// to the element expression. +#[test] +fn tuple_option_return_with_none_arm() { + assert_all_cases_pass( + r#" +fn pack(n: Int) -> Tuple, Int> + ? "Pair an option with its source." + match n > 0 + true -> (Option.Some(n), n) + false -> (Option.None, n) + +fn firstOf(t: Tuple, Int>) -> Option + ? "First element." + match t + (a, b) -> a + +verify firstOf + firstOf(pack(3)) => Option.Some(3) + firstOf(pack(-1)) => Option.None +"#, + 2, + ); +} + +/// Tuple-shaped verify RHS with two `Result.Ok(...)` constructors. The +/// LHS tuple type supplies each `Result`'s error type on the expected side. +#[test] +fn tuple_result_expected_side_ok_constructors() { + assert_all_cases_pass( + r#" +fn both(a: Int, b: Int) -> Tuple, Result> + ? "Divide both ways." + (Int.div(a, b), Int.div(b, a)) + +verify both + both(4, 2) => (Result.Ok(2), Result.Ok(0)) +"#, + 1, + ); +} + +/// Direct tuple-return body with `Option.None` as the first element. This +/// was a checker-level false error before tuple expected-type propagation. +#[test] +fn direct_tuple_return_with_none_element() { + assert_all_cases_pass( + r#" +fn packNone(n: Int) -> Tuple, Int> + ? "Always pair none with n." + (Option.None, n) + +fn firstOf(t: Tuple, Int>) -> Option + ? "First element." + match t + (a, b) -> a + +verify firstOf + firstOf(packNone(5)) => Option.None +"#, + 1, + ); +} + +/// Control — tuple element whose payload fixes `T` bottom-up. +#[test] +fn tuple_option_some_only_control() { + assert_all_cases_pass( + r#" +fn packSome(n: Int) -> Tuple, Int> + ? "Always pair some n with n." + (Option.Some(n), n) + +fn firstOf(t: Tuple, Int>) -> Option + ? "First element." + match t + (a, b) -> a + +verify firstOf + firstOf(packSome(5)) => Option.Some(5) +"#, + 1, + ); +} + /// Control — `Result.Err` verify cases plus `!=` against `Option.None` /// and None-on-left equality in fn bodies. All green before the /// second-wave fixes; pinned so the generalized verify-RHS propagation