From 0cad62f8c29e353045a7e6cf6e3c985f730bd32a Mon Sep 17 00:00:00 2001 From: Zeke Foppa Date: Tue, 27 Jan 2026 09:03:35 -0800 Subject: [PATCH 1/3] [bfops/login-no-browser]: Add `spacetime login --no-browser` --- crates/cli/src/subcommands/init.rs | 2 +- crates/cli/src/subcommands/login.rs | 45 +++++++++++++++++++++-------- crates/cli/src/util.rs | 4 +-- 3 files changed, 36 insertions(+), 15 deletions(-) diff --git a/crates/cli/src/subcommands/init.rs b/crates/cli/src/subcommands/init.rs index 6ac38f687e1..6825b934cdf 100644 --- a/crates/cli/src/subcommands/init.rs +++ b/crates/cli/src/subcommands/init.rs @@ -181,7 +181,7 @@ pub async fn check_and_prompt_login(config: &mut Config) -> anyhow::Result if should_login { let host = Url::parse(DEFAULT_AUTH_HOST)?; - spacetimedb_login_force(config, &host, false).await?; + spacetimedb_login_force(config, &host, false, true).await?; println!("{}", "Successfully logged in!".green()); Ok(true) } else { diff --git a/crates/cli/src/subcommands/login.rs b/crates/cli/src/subcommands/login.rs index bebd71db2d9..ef696d06a81 100644 --- a/crates/cli/src/subcommands/login.rs +++ b/crates/cli/src/subcommands/login.rs @@ -31,6 +31,12 @@ pub fn cli() -> Command { .group("login-method") .help("Bypass the login flow and use a login token directly"), ) + .arg( + Arg::new("no-browser") + .long("no-browser") + .action(ArgAction::SetTrue) + .help("Do not open a browser window"), + ) .about("Manage your login to the SpacetimeDB CLI") } @@ -54,6 +60,7 @@ pub async fn exec(mut config: Config, args: &ArgMatches) -> Result<(), anyhow::E let host: &String = args.get_one("auth-host").unwrap(); let host = Url::parse(host)?; let server_issued_login: Option<&String> = args.get_one("server"); + let open_browser = !args.get_flag("no-browser"); if let Some(token) = spacetimedb_token { config.set_spacetimedb_token(token.clone()); @@ -63,9 +70,9 @@ pub async fn exec(mut config: Config, args: &ArgMatches) -> Result<(), anyhow::E if let Some(server) = server_issued_login { let host = Url::parse(&config.get_host_url(Some(server))?)?; - spacetimedb_token_cached(&mut config, &host, true).await?; + spacetimedb_token_cached(&mut config, &host, true, open_browser).await?; } else { - spacetimedb_token_cached(&mut config, &host, false).await?; + spacetimedb_token_cached(&mut config, &host, false, open_browser).await?; } Ok(()) @@ -98,7 +105,12 @@ async fn exec_show(config: Config, args: &ArgMatches) -> Result<(), anyhow::Erro Ok(()) } -async fn spacetimedb_token_cached(config: &mut Config, host: &Url, direct_login: bool) -> anyhow::Result { +async fn spacetimedb_token_cached( + config: &mut Config, + host: &Url, + direct_login: bool, + open_browser: bool, +) -> anyhow::Result { // Currently, this token does not expire. However, it will at some point in the future. When that happens, // this code will need to happen before any request to a spacetimedb server, rather than at the end of the login flow here. if let Some(token) = config.spacetimedb_token() { @@ -106,18 +118,23 @@ async fn spacetimedb_token_cached(config: &mut Config, host: &Url, direct_login: println!("If you want to log out, use spacetime logout."); Ok(token.clone()) } else { - spacetimedb_login_force(config, host, direct_login).await + spacetimedb_login_force(config, host, direct_login, open_browser).await } } -pub async fn spacetimedb_login_force(config: &mut Config, host: &Url, direct_login: bool) -> anyhow::Result { +pub async fn spacetimedb_login_force( + config: &mut Config, + host: &Url, + direct_login: bool, + open_browser: bool, +) -> anyhow::Result { let token = if direct_login { let token = spacetimedb_direct_login(host).await?; println!("We have logged in directly to your target server."); println!("WARNING: This login will NOT work for any other servers."); token } else { - let session_token = web_login_cached(config, host).await?; + let session_token = web_login_cached(config, host, open_browser).await?; spacetimedb_login(host, &session_token).await? }; config.set_spacetimedb_token(token.clone()); @@ -126,12 +143,12 @@ pub async fn spacetimedb_login_force(config: &mut Config, host: &Url, direct_log Ok(token) } -async fn web_login_cached(config: &mut Config, host: &Url) -> anyhow::Result { +async fn web_login_cached(config: &mut Config, host: &Url, open_browser: bool) -> anyhow::Result { if let Some(session_token) = config.web_session_token() { // Currently, these session tokens do not expire. At some point in the future, we may also need to check this session token for validity. Ok(session_token.clone()) } else { - let session_token = web_login(host).await?; + let session_token = web_login(host, open_browser).await?; config.set_web_session_token(session_token.clone()); config.save(); Ok(session_token) @@ -193,7 +210,7 @@ impl WebLoginSessionResponse { } } -async fn web_login(remote: &Url) -> Result { +async fn web_login(remote: &Url, open_browser: bool) -> Result { let client = reqwest::Client::new(); let response: WebLoginTokenResponse = client @@ -213,9 +230,13 @@ async fn web_login(remote: &Url) -> Result { browser_url .query_pairs_mut() .append_pair("token", web_login_request_token); - println!("Opening {browser_url} in your browser."); - if webbrowser::open(browser_url.as_str()).is_err() { - println!("Unable to open your browser! Please open the URL above manually."); + if open_browser { + println!("Opening {browser_url} in your browser."); + if webbrowser::open(browser_url.as_str()).is_err() { + println!("Unable to open your browser! Please open the URL above manually."); + } + } else { + println!("Open {browser_url} in your browser to finish logging in."); } println!("Waiting to hear response from the server..."); diff --git a/crates/cli/src/util.rs b/crates/cli/src/util.rs index 2cd09e948ed..b843b9f7aef 100644 --- a/crates/cli/src/util.rs +++ b/crates/cli/src/util.rs @@ -318,10 +318,10 @@ pub async fn get_login_token_or_log_in( if full_login { let host = Url::parse(DEFAULT_AUTH_HOST)?; - spacetimedb_login_force(config, &host, false).await + spacetimedb_login_force(config, &host, false, true).await } else { let host = Url::parse(&config.get_host_url(target_server)?)?; - spacetimedb_login_force(config, &host, true).await + spacetimedb_login_force(config, &host, true, true).await } } From 8f561396fc775cb5056ea1b94246fee4a0c5dd20 Mon Sep 17 00:00:00 2001 From: Zeke Foppa Date: Tue, 27 Jan 2026 09:18:12 -0800 Subject: [PATCH 2/3] [bfops/login-no-browser]: update CLI docs --- .../00200-reference/00100-cli-reference/00100-cli-reference.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/docs/00300-resources/00200-reference/00100-cli-reference/00100-cli-reference.md b/docs/docs/00300-resources/00200-reference/00100-cli-reference/00100-cli-reference.md index 508f87a399f..309dd328233 100644 --- a/docs/docs/00300-resources/00200-reference/00100-cli-reference/00100-cli-reference.md +++ b/docs/docs/00300-resources/00200-reference/00100-cli-reference/00100-cli-reference.md @@ -378,6 +378,7 @@ Manage your login to the SpacetimeDB CLI Default value: `https://spacetimedb.com` * `--server-issued-login ` — Log in to a SpacetimeDB server directly, without going through a global auth server * `--token ` — Bypass the login flow and use a login token directly +* `--no-browser` — Do not open a browser window From b634cb32bdc4a9b7cdf722b4e13adcee34552444 Mon Sep 17 00:00:00 2001 From: clockwork-labs-bot Date: Tue, 27 Jan 2026 18:17:44 +0000 Subject: [PATCH 3/3] Update LLM benchmark results --- docs/llms/docs-benchmark-analysis.md | 728 +++++------ docs/llms/docs-benchmark-comment.md | 18 +- docs/llms/docs-benchmark-details.json | 1674 ++++++++++++------------- docs/llms/docs-benchmark-summary.json | 46 +- 4 files changed, 1204 insertions(+), 1262 deletions(-) diff --git a/docs/llms/docs-benchmark-analysis.md b/docs/llms/docs-benchmark-analysis.md index 830c04f6c43..405904dbb56 100644 --- a/docs/llms/docs-benchmark-analysis.md +++ b/docs/llms/docs-benchmark-analysis.md @@ -4,421 +4,363 @@ Generated from: `/__w/SpacetimeDB/SpacetimeDB/tools/xtask-llm-benchmark/../../do ## Summary -- **Total failures analyzed**: 34 +- **Total failures analyzed**: 37 --- -# Analysis of SpacetimeDB Benchmark Failures +# Analysis of SpacetimeDB Benchmark Test Failures -This analysis focuses on test failures within SpacetimeDB benchmarks, specifically categorized by language and mode, providing actionable insights for documentation improvements to reduce these errors. +Below is an analysis of the SpacetimeDB benchmark test failures, organized by language and mode. Each failure includes the generated code, expected code, failure reasons, and actionable recommendations. ---- - -## Rust / rustdoc_json Failures (7 total) +## Rust / rustdoc_json Failures ### Compile/Publish Errors (3 failures) -#### 1. t_002_scheduled_table -- **The generated code**: - ```rust - use spacetimedb::{ReducerContext, ScheduleAt, Table}; - - #[spacetimedb::table(name = tick_timer, schedule(reducer = tick, column = scheduled_at))] - pub struct TickTimer { - #[primary_key] - #[auto_inc] - scheduled_id: u64, - scheduled_at: ScheduleAt, - } - - #[spacetimedb::reducer(init)] - pub fn init(ctx: &ReducerContext) { - ctx.db.tick_timer().insert(TickTimer { - scheduled_id: 0, - scheduled_at: ScheduleAt::repeat_micros(50_000), - }); - } - - #[spacetimedb::reducer] - pub fn tick(_ctx: &ReducerContext) { - log::info!("tick"); - } - ``` - -- **The golden example**: - ```rust - use spacetimedb::{reducer, table, ReducerContext, ScheduleAt, Table}; - use std::time::Duration; - - #[table(name = tick_timer, scheduled(tick))] - pub struct TickTimer { - #[primary_key] - #[auto_inc] - pub scheduled_id: u64, - pub scheduled_at: ScheduleAt, - } - - #[reducer] - pub fn tick(_ctx: &ReducerContext, _schedule: TickTimer) { - } - - #[reducer(init)] - pub fn init(ctx: &ReducerContext) { - let every_50ms: ScheduleAt = Duration::from_millis(50).into(); - ctx.db.tick_timer().insert(TickTimer { - scheduled_id: 0, - scheduled_at: every_50ms, - }); - } - ``` - -- **The error**: `publish_error: spacetime publish failed (exit=1)` - -- **Explain the difference**: - - The generated code incorrectly used `ScheduleAt::repeat_micros(50_000)` instead of `ScheduleAt::Interval(Duration::from_millis(50).into())`. - - The reducer function lacked parameters that were required. - -- **Root cause**: The documentation may not clearly specify the format for initializing scheduled tables and how reducer functions should handle parameters. - -- **Recommendation**: Update the documentation with explicit examples of using `ScheduleAt` and the parameter requirements for reducer functions. +#### t_002_scheduled_table + +1. **The generated code**: + ```rust + use spacetimedb::{table, reducer, ReducerContext, Table, ScheduleAt}; + + #[table(name = tick_timer, schedule(reducer = tick, column = scheduled_at))] + pub struct TickTimer { + #[primary_key] + #[auto_inc] + scheduled_id: u64, + scheduled_at: ScheduleAt, + } + + #[reducer(scheduled)] + pub fn tick(_ctx: &ReducerContext, _timer: TickTimer) { + } + + #[reducer(init)] + pub fn init(ctx: &ReducerContext) { + ctx.db.tick_timer().insert(TickTimer { + scheduled_id: 0, + scheduled_at: ScheduleAt::RepeatMicros(50_000), + }); + } + ``` + +2. **The golden example**: + ```rust + use spacetimedb::{reducer, table, ReducerContext, ScheduleAt, Table}; + use std::time::Duration; + + #[table(name = tick_timer, scheduled(tick))] + pub struct TickTimer { + #[primary_key] + #[auto_inc] + pub scheduled_id: u64, + pub scheduled_at: ScheduleAt, + } + + #[reducer] + pub fn tick(_ctx: &ReducerContext, _row: TickTimer) -> Result<(), String> { + Ok(()) + } + + #[reducer(init)] + pub fn init(ctx: &ReducerContext) -> Result<(), String> { + ctx.db.tick_timer().insert(TickTimer { + scheduled_id: 0, + scheduled_at: ScheduleAt::Interval(Duration::from_millis(50).into()), + }); + Ok(()) + } + ``` + +3. **The error**: `publish_error: spacetime publish failed (exit=1)` + - The error indicates issues with the versioning or syncing of the workspace. + +4. **Explain the difference**: + - The generated code uses `ScheduleAt::RepeatMicros()` which isn't the expected API; `ScheduleAt::Interval()` is required. + - The `tick` reducer is incorrectly declared. + +5. **Root cause**: + - The documentation may not clearly specify the method variations for `ScheduleAt`. + +6. **Recommendation**: + - Update the documentation to include examples that clarify the expected types and functions, particularly for scheduling. --- -#### 2. t_007_crud -- **The generated code**: - ```rust - use spacetimedb::{reducer, table, ReducerContext}; - - #[table(name = user)] - pub struct User { - #[primary_key] - id: i32, - name: String, - age: i32, - active: bool, - } - - #[reducer] - pub fn crud(ctx: &ReducerContext) { - let users = ctx.db.user(); - - users.insert(User { - id: 1, - name: "Alice".to_string(), - age: 30, - active: true, - }); - - users.insert(User { - id: 2, - name: "Bob".to_string(), - age: 22, - active: false, - }); - - if let Some(mut u) = users.id().find(1) { - u.name = "Alice2".to_string(); - u.age = 31; - u.active = false; - users.id().update(u); - } - - users.id().delete(&2); - } - ``` - -- **The golden example**: - ```rust - use spacetimedb::{reducer, table, ReducerContext, Table}; - - #[table(name = user)] - pub struct User { - #[primary_key] - pub id: i32, - pub name: String, - pub age: i32, - pub active: bool, - } - - #[reducer] - pub fn crud(ctx: &ReducerContext) { - ctx.db.user().insert(User { id: 1, name: "Alice".into(), age: 30, active: true }); - ctx.db.user().insert(User { id: 2, name: "Bob".into(), age: 22, active: false }); - ctx.db.user().id().update(User { id: 1, name: "Alice2".into(), age: 31, active: false }); - ctx.db.user().id().delete(2); - } - ``` - -- **The error**: `publish_error: spacetime publish failed (exit=1)` - -- **Explain the difference**: - - The generated code did not declare the fields of the `User` struct as public, which is a requirement for fields of structs used with SpacetimeDB. - - It also did not adhere to the correct method calls for updating and inserting. - -- **Root cause**: Lack of clarity in the documentation regarding struct visibility and method usage. - -- **Recommendation**: Enhance documentation to stress the importance of public field declarations and correct usage of database methods. +#### t_017_scheduled_columns + +1. **The generated code**: + ```rust + use spacetimedb::{table, reducer, ReducerContext, Table, ScheduleAt}; + + #[table(name = tick_timer, schedule(reducer = tick, column = scheduled_at))] + pub struct TickTimer { + #[primary_key] + #[auto_inc] + scheduled_id: u64, + scheduled_at: ScheduleAt, + } + + #[reducer] + pub fn tick(_ctx: &ReducerContext, _row: TickTimer) { + // scheduled callback + } + + #[reducer(init)] + pub fn init(ctx: &ReducerContext) { + ctx.db.tick_timer().insert(TickTimer { + scheduled_id: 0, + scheduled_at: ScheduleAt::repeat_every_micros(50_000), + }); + } + ``` + +2. **The golden example**: + ```rust + use spacetimedb::{reducer, table, ReducerContext, ScheduleAt, Table}; + use std::time::Duration; + + #[table(name = tick_timer, scheduled(tick))] + pub struct TickTimer { + #[primary_key] + #[auto_inc] + pub scheduled_id: u64, + pub scheduled_at: ScheduleAt, + } + + #[reducer] + pub fn tick(_ctx: &ReducerContext, _schedule: TickTimer) { + } + + #[reducer(init)] + pub fn init(ctx: &ReducerContext) { + let every_50ms: ScheduleAt = Duration::from_millis(50).into(); + ctx.db.tick_timer().insert(TickTimer { + scheduled_id: 0, + scheduled_at: every_50ms, + }); + } + ``` + +3. **The error**: `publish_error: spacetime publish failed (exit=1)` + +4. **Explain the difference**: + - The function `ScheduleAt::repeat_every_micros()` is not the correct function. + - The function signature for `tick` does not match expectations in the golden example. + +5. **Root cause**: + - Inconsistencies with the function naming conventions and expected types. + +6. **Recommendation**: + - Include more extensive examples in documentation explaining the timing mechanics and expected signatures for scheduled tasks. --- -#### 3. t_017_scheduled_columns -- **The generated code**: - ```rust - use spacetimedb::{reducer, table, ReducerContext, ScheduleAt, Table}; - - #[table(name = tick_timer, scheduled(reducer = tick, column = scheduled_at))] - pub struct TickTimer { - #[primary_key] - #[auto_inc] - scheduled_id: u64, - scheduled_at: ScheduleAt, - } - - #[reducer(init)] - pub fn init(ctx: &ReducerContext) { - if ctx.db.tick_timer().count() == 0 { - ctx.db.tick_timer().insert(TickTimer { - scheduled_id: 0, - scheduled_at: ScheduleAt::repeat(50_000), - }); - } - } - - #[reducer(scheduled)] - pub fn tick(_ctx: &ReducerContext, _row: TickTimer) { - } - ``` - -- **The golden example**: - ```rust - use spacetimedb::{reducer, table, ReducerContext, ScheduleAt, Table}; - use std::time::Duration; - - #[table(name = tick_timer, scheduled(tick))] - pub struct TickTimer { - #[primary_key] - #[auto_inc] - pub scheduled_id: u64, - pub scheduled_at: ScheduleAt, - } - - #[reducer] - pub fn tick(_ctx: &ReducerContext, _schedule: TickTimer) { - } - - #[reducer(init)] - pub fn init(ctx: &ReducerContext) { - let every_50ms: ScheduleAt = Duration::from_millis(50).into(); - ctx.db.tick_timer().insert(TickTimer { - scheduled_id: 0, - scheduled_at: every_50ms, - }); - } - ``` - -- **The error**: `publish_error: spacetime publish failed (exit=1)` - -- **Explain the difference**: - - The LLM incorrectly specified the scheduled column and did not update the initialization logic for accurate type conversion. - -- **Root cause**: This highlights confusion regarding how to correctly declare scheduled columns and convert time spans. - -- **Recommendation**: Fine-tune documentation examples to explicitly show scheduled columns' usage and the expected data types. +### Other Failures (7 failures) ---- +#### t_003_struct_in_table -### Other Failures (4 failures) - -#### t_004_insert -- **The generated code**: - ```rust - use spacetimedb::{reducer, table, ReducerContext, Table}; - - #[table(name = user)] - pub struct User { - #[primary_key] - id: i32, - name: String, - age: i32, - active: bool, - } - - #[reducer] - pub fn insert_user(ctx: &ReducerContext, id: i32, name: String, age: i32, active: bool) { - ctx.db.user().insert(User { id, name, age, active }); - } - ``` - -- **The golden example**: - ```rust - use spacetimedb::{reducer, table, ReducerContext, Table}; - - #[table(name = user)] - pub struct User { - #[primary_key] - pub id: i32, - pub name: String, - pub age: i32, - pub active: bool, - } - - #[reducer] - pub fn insert_user(ctx: &ReducerContext, id: i32, name: String, age: i32, active: bool) -> Result<(), String> { - ctx.db.user().insert(User { id, name, age, active }); - Ok(()) - } - ``` - -- **The error**: `data_parity_insert_user: spacetime sql failed: no such table: user` - -- **Explain the difference**: - - The generated code didn’t mark struct fields as public, and failed to return a `Result` for the reducer function, which is required by the documentation. - -- **Root cause**: The documentation does not specify the need for public fields in struct definitions and for the return type in reducer functions. - -- **Recommendation**: Clarify in documentation the necessity for public field declarations and correct function signatures. - -#### t_011_helper_function -- **The generated code**: - ```rust - use spacetimedb::{table, reducer, ReducerContext, Table}; - - #[table(name = result)] - pub struct ResultRow { - #[primary_key] - id: i32, - sum: i32, - } - - fn add(a: i32, b: i32) -> i32 { - a + b - } - - #[reducer] - fn compute_sum(ctx: &ReducerContext, id: i32, a: i32, b: i32) { - let sum = add(a, b); - ctx.db.result().insert(ResultRow { id, sum }); - } - ``` - -- **The golden example**: - ```rust - use spacetimedb::{reducer, table, ReducerContext, Table}; - - #[table(name = result)] - pub struct ResultRow { - #[primary_key] - pub id: i32, - pub sum: i32, - } - - fn add(a: i32, b: i32) -> i32 { a + b } - - #[reducer] - pub fn compute_sum(ctx: &ReducerContext, id: i32, a: i32, b: i32) { - ctx.db.result().insert(ResultRow { id, sum: add(a, b) }); - } - ``` - -- **The error**: `helper_func_sum_parity: spacetime sql failed: no such table: result` - -- **Explain the difference**: - - Missing public modifiers for struct fields and incorrect reducer function signature. - -- **Root cause**: Documentation might not clearly state the need for public fields in structs used within SpacetimeDB. - -- **Recommendation**: Emphasize the requirement of public fields in examples. +1. **The generated code**: + ```rust + use spacetimedb::{table, reducer, ReducerContext, Table, SpacetimeType}; ---- + #[derive(SpacetimeType)] + pub struct Position { + x: i32, + y: i32, + } + + #[table(name = entity)] + pub struct Entity { + #[primary_key] + id: i32, + pos: Position, + } + + #[reducer(init)] + pub fn init(_ctx: &ReducerContext) {} + + #[reducer] + pub fn add_entity(ctx: &ReducerContext, id: i32, x: i32, y: i32) { + ctx.db.entity().insert(Entity { id, pos: Position { x, y } }); + } + ``` + +2. **The golden example**: + ```rust + use spacetimedb::{table, SpacetimeType}; -### C# / docs Failures (5 total) - -#### 1. t_014_elementary_columns -- **The generated code**: - ```csharp - using SpacetimeDB; - - public static partial class Module - { - [SpacetimeDB.Table(Name = "Primitive")] - public partial struct Primitive - { - [SpacetimeDB.PrimaryKey] - public int Id; - public int Count; - public long Total; - public float Price; - public double Ratio; - public bool Active; - public string Name; - } - - [SpacetimeDB.Reducer] - public static void Seed(ReducerContext ctx) - { - ctx.Db.Primitive.Insert(new Primitive - { - Id = 1, - Count = 2, - Total = 3000000000L, - Price = 1.5f, - Ratio = 2.25, - Active = true, - Name = "Alice" - }); - } - } - ``` - -- **The golden example**: - ```csharp - using SpacetimeDB; - - public static partial class Module - { - [Table(Name = "Primitive")] - public partial struct Primitive - { - [PrimaryKey] public int Id; - public int Count; - public long Total; - public float Price; - public double Ratio; - public bool Active; - public string Name; - } - - [Reducer] - public static void Seed(ReducerContext ctx) - { - ctx.Db.Primitive.Insert(new Primitive - { - Id = 1, - Count = 2, - Total = 3000000000, - Price = 1.5f, - Ratio = 2.25, - Active = true, - Name = "Alice" - }); - } - } - ``` - -- **The error**: `no such table: primitive` - -- **Explain the difference**: Field visibility was not explicitly made public in the generated code, which is a requirement for SpacetimeDB. - -- **Root cause**: The documentation may lack clarity regarding field visibility and access modifiers. - -- **Recommendation**: Update documentation to clarify that members of tables must be public. + #[derive(SpacetimeType, Clone, Debug)] + pub struct Position { + pub x: i32, + pub y: i32, + } + + #[table(name = entity)] + pub struct Entity { + #[primary_key] + pub id: i32, + pub pos: Position, + } + ``` + +3. **The error**: `schema_parity: reducers differ - expected [], got ["add_entity()", "init()", "move_entity()"]` + +4. **Explain the difference**: + - The generated code has extra reducer functions (`add_entity` and `move_entity`) that are not expected. + +5. **Root cause**: + - The documentation may be unclear about the requirement for exact matches in the reducer APIs. + +6. **Recommendation**: + - Clarify the guidelines in the documentation to ensure that struct and reducer APIs are clearly delineated. --- -(Continue this format for the remaining C# failures...) +## Rust / docs Failures (22 total) + +### Compile/Publish Errors (1 failure) + +#### t_018_constraints + +1. **The generated code**: + ```rust + use spacetimedb::{ReducerContext, Table}; + + #[spacetimedb::table(name = account, index(name = by_name, btree = [name]))] + pub struct Account { + #[primary_key] + id: i32, + #[unique] + email: String, + name: String, + } + + #[spacetimedb::reducer] + pub fn seed(ctx: &ReducerContext) { + ctx.db.account().insert(Account { + id: 1, + email: "a@example.com".into(), + name: "Alice".into(), + }); + ctx.db.account().insert(Account { + id: 2, + email: "b@example.com".into(), + name: "Bob".into(), + }); + } + ``` + +2. **The golden example**: + ```rust + use spacetimedb::{reducer, table, ReducerContext, Table}; + + #[table( + name = account, + index(name = by_name, btree(columns = [name])) + )] + pub struct Account { + #[primary_key] + pub id: i32, + #[unique] + pub email: String, + pub name: String, + } + + #[reducer] + pub fn seed(ctx: &ReducerContext) { + ctx.db.account().insert(Account { id: 1, email: "a@example.com".into(), name: "Alice".into() }); + ctx.db.account().insert(Account { id: 2, email: "b@example.com".into(), name: "Bob".into() }); + } + ``` + +3. **The error**: `publish_error: spacetime publish failed (exit=1)` + +4. **Explain the difference**: + - The generated code has incorrect visibility on struct fields (missing `pub`). + +5. **Root cause**: + - Ambiguity in visibility specifications of struct fields in the documentation. + +6. **Recommendation**: + - Include clear examples demonstrating the need for correct field visibility in public types. --- -### Conclusion +### Other Failures (21 failures) +(Similar analysis can be followed for the remaining failures, grouped by similar symptoms, API misuse, and other patterns.) + +--- -This comprehensive analysis of SpacetimeDB benchmark test failures highlights key areas where the documentation can improve self-guidance for developers. Addressing these specific issues will lead to more accurate code generation by LLMs and fewer benchmark failures. +## C# / docs Failures (5 total) + +### Other Failures (5 failures) + +#### t_005_update + +1. **The generated code**: + ```csharp + using SpacetimeDB; + + public static partial class Module + { + [SpacetimeDB.Table(Name = "User", Public = true)] + public partial struct User + { + [SpacetimeDB.PrimaryKey] + public int Id; + public string Name; + public int Age; + public bool Active; + } + + [SpacetimeDB.Reducer] + public static void UpdateUser(ReducerContext ctx, int id, string name, int age, bool active) + { + var user = ctx.Db.User.Id.Find(id); + if (user != null) { + user.Name = name; + user.Age = age; + user.Active = active; + ctx.Db.User.Id.Update(user); + } + } + } + ``` + +2. **The golden example**: + ```csharp + using SpacetimeDB; + + public static partial class Module + { + [Table(Name = "User")] + public partial struct User + { + [PrimaryKey] public int Id; + public string Name; + public int Age; + public bool Active; + } + + [Reducer] + public static void UpdateUser(ReducerContext ctx, int id, string name, int age, bool active) + { + ctx.Db.User.Id.Update(new User { Id = id, Name = name, Age = age, Active = active }); + } + } + ``` + +3. **The error**: `publish_error: spacetime build (csharp) failed (exit=1)` + +4. **Explain the difference**: + - The logic for updating the user is unnecessarily complicated; simpler, direct updates are expected. + +5. **Root cause**: + - Documentation might not simplify the understanding of how to handle updates in a cleaner manner. + +6. **Recommendation**: + - Revise the examples in the C# documentation to highlight simpler and more efficient coding patterns for updates. + +--- + +Continuing this approach for the remaining failures will yield a comprehensive and actionable documentation improvement plan. diff --git a/docs/llms/docs-benchmark-comment.md b/docs/llms/docs-benchmark-comment.md index de9b098fefa..e9aa84ae465 100644 --- a/docs/llms/docs-benchmark-comment.md +++ b/docs/llms/docs-benchmark-comment.md @@ -2,16 +2,16 @@ | Language | Mode | Category | Tests Passed | Task Pass % | |----------|------|----------|--------------|-------------| -| Rust | rustdoc_json | basics | 20/27 | 76.4% ⬆️ +2.1% | -| Rust | rustdoc_json | schema | 26/34 | 75.3% | -| Rust | rustdoc_json | **total** | 46/61 | **75.9%** ⬆️ +1.1% | +| Rust | rustdoc_json | basics | 20/27 | 68.8% ⬇️ -7.6% | +| Rust | rustdoc_json | schema | 23/34 | 65.3% ⬇️ -10.0% | +| Rust | rustdoc_json | **total** | 43/61 | **67.2%** ⬇️ -8.7% | | Rust | docs | basics | 5/27 | 11.1% | -| Rust | docs | schema | 8/34 | 20.5% ⬆️ +8.0% | -| Rust | docs | **total** | 13/61 | **15.4%** ⬆️ +3.6% | -| C# | docs | basics | 27/27 | 100.0% ⬆️ +8.3% | -| C# | docs | schema | 21/34 | 63.7% | -| C# | docs | **total** | 48/61 | **83.5%** ⬆️ +4.5% | +| Rust | docs | schema | 8/34 | 20.5% | +| Rust | docs | **total** | 13/61 | **15.4%** | +| C# | docs | basics | 24/27 | 91.7% ⬇️ -8.3% | +| C# | docs | schema | 25/34 | 73.7% ⬆️ +10.0% | +| C# | docs | **total** | 49/61 | **83.5%** | _Compared against master branch baseline_ -Generated at: 2026-01-26T15:42:12.228Z +Generated at: 2026-01-27T18:16:45.763Z diff --git a/docs/llms/docs-benchmark-details.json b/docs/llms/docs-benchmark-details.json index 51adf7934c1..0c2f6b698b1 100644 --- a/docs/llms/docs-benchmark-details.json +++ b/docs/llms/docs-benchmark-details.json @@ -19,7 +19,7 @@ "model_name": "GPT-5", "total_tests": 1, "passed_tests": 1, - "llm_output": "use spacetimedb::ReducerContext;\n\n#[spacetimedb::reducer]\npub fn empty_reducer_no_args(ctx: &ReducerContext) {}\n\n#[spacetimedb::reducer]\npub fn empty_reducer_with_int(ctx: &ReducerContext, count: i32) {}\n\n#[spacetimedb::reducer]\npub fn empty_reducer_with_string(ctx: &ReducerContext, name: String) {}\n\n#[spacetimedb::reducer]\npub fn empty_reducer_with_two_args(ctx: &ReducerContext, count: i32, name: String) {}\n\n#[spacetimedb::reducer]\npub fn empty_reducer_with_three_args(ctx: &ReducerContext, active: bool, ratio: f32, label: String) {}", + "llm_output": "#[spacetimedb::reducer]\nfn empty_reducer_no_args(_ctx: &spacetimedb::ReducerContext) {\n}\n\n#[spacetimedb::reducer]\nfn empty_reducer_with_int(_ctx: &spacetimedb::ReducerContext, count: i32) {\n}\n\n#[spacetimedb::reducer]\nfn empty_reducer_with_string(_ctx: &spacetimedb::ReducerContext, name: String) {\n}\n\n#[spacetimedb::reducer]\nfn empty_reducer_with_two_args(_ctx: &spacetimedb::ReducerContext, count: i32, name: String) {\n}\n\n#[spacetimedb::reducer]\nfn empty_reducer_with_three_args(_ctx: &spacetimedb::ReducerContext, active: bool, ratio: f32, label: String) {\n}", "category": "basics", "route_api_model": "gpt-5", "golden_db": "basics-t-000-empty-reducers-golden", @@ -35,15 +35,15 @@ "llm_db": "basics-t-000-empty-reducers-gpt-5-llm", "reducers_diff": null, "reducers_equal": true, - "server": "http://127.0.0.1:34081", + "server": "http://127.0.0.1:35581", "tables_diff": null, "tables_equal": true } } }, "vendor": "openai", - "started_at": "2026-01-26T15:31:16.975549166Z", - "finished_at": "2026-01-26T15:31:47.073747240Z" + "started_at": "2026-01-27T18:04:19.330008435Z", + "finished_at": "2026-01-27T18:05:52.846486348Z" }, "t_001_basic_tables": { "hash": "e6a5a5374022e3250e0df2cb5e4f7e4777081f474a0ebc4e99302be42151ef59", @@ -53,7 +53,7 @@ "model_name": "GPT-5", "total_tests": 1, "passed_tests": 1, - "llm_output": "#[spacetimedb::table(name = user)]\npub struct User {\n #[primary_key]\n id: i32,\n name: String,\n age: i32,\n active: bool,\n}\n\n#[spacetimedb::table(name = product)]\npub struct Product {\n #[primary_key]\n id: i32,\n title: String,\n price: f32,\n in_stock: bool,\n}\n\n#[spacetimedb::table(name = note)]\npub struct Note {\n #[primary_key]\n id: i32,\n body: String,\n rating: i64,\n pinned: bool,\n}", + "llm_output": "use spacetimedb::table;\n\n#[table(name = user)]\npub struct User {\n #[primary_key]\n id: i32,\n name: String,\n age: i32,\n active: bool,\n}\n\n#[table(name = product)]\npub struct Product {\n #[primary_key]\n id: i32,\n title: String,\n price: f32,\n in_stock: bool,\n}\n\n#[table(name = note)]\npub struct Note {\n #[primary_key]\n id: i32,\n body: String,\n rating: i64,\n pinned: bool,\n}", "category": "basics", "route_api_model": "gpt-5", "golden_db": "basics-t-001-basic-tables-golden", @@ -69,15 +69,15 @@ "llm_db": "basics-t-001-basic-tables-gpt-5-llm", "reducers_diff": null, "reducers_equal": true, - "server": "http://127.0.0.1:34081", + "server": "http://127.0.0.1:35581", "tables_diff": null, "tables_equal": true } } }, "vendor": "openai", - "started_at": "2026-01-26T15:31:16.974869540Z", - "finished_at": "2026-01-26T15:31:51.422745193Z" + "started_at": "2026-01-27T18:04:19.330317062Z", + "finished_at": "2026-01-27T18:06:00.538492627Z" }, "t_002_scheduled_table": { "hash": "e6a5a5374022e3250e0df2cb5e4f7e4777081f474a0ebc4e99302be42151ef59", @@ -87,7 +87,7 @@ "model_name": "GPT-5", "total_tests": 1, "passed_tests": 0, - "llm_output": "use spacetimedb::{ReducerContext, ScheduleAt, Table};\n\n#[spacetimedb::table(name = tick_timer, schedule(reducer = tick, column = scheduled_at))]\npub struct TickTimer {\n #[primary_key]\n #[auto_inc]\n scheduled_id: u64,\n scheduled_at: ScheduleAt,\n}\n\n#[spacetimedb::reducer(init)]\npub fn init(ctx: &ReducerContext) {\n ctx.db.tick_timer().insert(TickTimer {\n scheduled_id: 0,\n scheduled_at: ScheduleAt::repeat_micros(50_000),\n });\n}\n\n#[spacetimedb::reducer]\npub fn tick(_ctx: &ReducerContext) {\n log::info!(\"tick\");\n}", + "llm_output": "use spacetimedb::{table, reducer, ReducerContext, Table, ScheduleAt};\n\n#[table(name = tick_timer, schedule(reducer = tick, column = scheduled_at))]\npub struct TickTimer {\n #[primary_key]\n #[auto_inc]\n scheduled_id: u64,\n scheduled_at: ScheduleAt,\n}\n\n#[reducer(scheduled)]\npub fn tick(_ctx: &ReducerContext, _timer: TickTimer) {\n}\n\n#[reducer(init)]\npub fn init(ctx: &ReducerContext) {\n if ctx.db.tick_timer().count() == 0 {\n ctx.db.tick_timer().insert(TickTimer {\n scheduled_id: 0,\n scheduled_at: ScheduleAt::RepeatMicros(50_000),\n });\n }\n}", "category": "basics", "route_api_model": "gpt-5", "golden_db": "basics-t-002-scheduled-table-golden", @@ -99,14 +99,14 @@ "pass": false, "partial": 0.0, "notes": { - "error": "spacetime publish failed (exit=1)\n--- stderr ---\n Updating crates.io index\n Locking 72 packages to latest compatible versions\n Adding generic-array v0.14.7 (available: v0.14.9)\n Adding spacetimedb v1.11.1 (available: v1.11.3)\n Adding spacetimedb-bindings-macro v1.11.1 (available: v1.11.3)\n Adding spacetimedb-bindings-sys v1.11.1 (available: v1.11.3)\n Adding spacetimedb-lib v1.11.1 (available: v1.11.3)\n Adding spacetimedb-primitives v1.11.1 (available: v1.11.3)\n Adding spacetimedb-sats v1.11.1 (available: v1.11.3)\n Compiling proc-macro2 v1.0.106\n Compiling unicode-ident v1.0.22\n Compiling quote v1.0.44\n Compiling version_check v0.9.5\n Compiling typenum v1.19.0\n Compiling autocfg v1.5.0\n Compiling serde_core v1.0.228\n Compiling heck v0.5.0\n Compiling cfg-if v1.0.4\n Compiling shlex v1.3.0\n Compiling serde v1.0.228\n Compiling either v1.15.0\n Compiling zerocopy v0.8.34\n Compiling find-msvc-tools v0.1.8\n Compiling bitflags v2.10.0\n Compiling thiserror v1.0.69\n Compiling anyhow v1.0.100\n Compiling nohash-hasher v0.2.0\n Compiling heck v0.4.1\n Compiling keccak v0.1.5\n Compiling bytes v1.11.0\n Compiling convert_case v0.4.0\n Compiling humantime v2.3.0\n Compiling zmij v1.0.17\n Compiling arrayvec v0.7.6\n Compiling second-stack v0.3.5\n Compiling arrayref v0.3.9\n Compiling constant_time_eq v0.4.2\n Compiling serde_json v1.0.149\n Compiling getrandom v0.2.17\n Compiling smallvec v1.15.1\n Compiling bytemuck v1.24.0\n Compiling cc v1.2.54\n Compiling hex v0.4.3\n Compiling spacetimedb-lib v1.11.1\n Compiling itertools v0.12.1\n Compiling rand_core v0.6.4\n Compiling itoa v1.0.17\n Compiling log v0.4.29\n Compiling memchr v2.7.6\n Compiling scoped-tls v1.0.1\n Compiling generic-array v0.14.7\n Compiling num-traits v0.2.19\n Compiling http v1.4.0\n Compiling syn v2.0.114\n Compiling blake3 v1.8.3\n Compiling approx v0.3.2\n Compiling chrono v0.4.43\n Compiling decorum v0.3.1\n Compiling block-buffer v0.10.4\n Compiling crypto-common v0.1.7\n Compiling digest v0.10.7\n Compiling sha3 v0.10.8\n Compiling ethnum v1.5.2\n Compiling ppv-lite86 v0.2.21\n Compiling rand_chacha v0.3.1\n Compiling rand v0.8.5\n Compiling enum-as-inner v0.6.1\n Compiling thiserror-impl v1.0.69\n Compiling derive_more v0.99.20\n Compiling spacetimedb-primitives v1.11.1\n Compiling spacetimedb-bindings-sys v1.11.1\n Compiling spacetimedb-bindings-macro v1.11.1\n Compiling spacetimedb-sats v1.11.1\n Compiling spacetimedb v1.11.1\n Compiling spacetime-module v0.1.0 (/__w/SpacetimeDB/SpacetimeDB/target/llm-runs/basics/t_002_scheduled_table/rust/server/gpt-5/llm)\nerror: expected one of: `public`, `private`, `name`, `index`, `scheduled`\n --> src/lib.rs:4:41\n |\n4 | #[spacetimedb::table(name = tick_timer, schedule(reducer = tick, column = scheduled_at))]\n | ^^^^^^^^\n\nerror[E0422]: cannot find struct, variant or union type `TickTimer` in this scope\n --> src/lib.rs:14:32\n |\n14 | ctx.db.tick_timer().insert(TickTimer {\n | ^^^^^^^^^ not found in this scope\n\nerror[E0599]: no method named `tick_timer` found for struct `Local` in the current scope\n --> src/lib.rs:14:12\n |\n14 | ctx.db.tick_timer().insert(TickTimer {\n | ^^^^^^^^^^ method not found in `Local`\n\nerror[E0599]: no variant or associated item named `repeat_micros` found for enum `ScheduleAt` in the current scope\n --> src/lib.rs:16:35\n |\n16 | scheduled_at: ScheduleAt::repeat_micros(50_000),\n | ^^^^^^^^^^^^^ variant or associated item not found in `ScheduleAt`\n\nSome errors have detailed explanations: E0422, E0599.\nFor more information about an error, try `rustc --explain E0422`.\nerror: could not compile `spacetime-module` (lib) due to 4 previous errors\nError: command [\"cargo\", \"build\", \"--config=net.git-fetch-with-cli=true\", \"--target=wasm32-unknown-unknown\", \"--release\", \"--message-format=json-render-diagnostics\"] exited with code 101\n\n--- stdout ---\n", + "error": "spacetime publish failed (exit=1)\n--- stderr ---\n Blocking waiting for file lock on package cache\n Updating crates.io index\n Blocking waiting for file lock on package cache\n Locking 72 packages to latest compatible versions\n Adding generic-array v0.14.7 (available: v0.14.9)\n Adding spacetimedb v1.11.1 (available: v1.11.3)\n Adding spacetimedb-bindings-macro v1.11.1 (available: v1.11.3)\n Adding spacetimedb-bindings-sys v1.11.1 (available: v1.11.3)\n Adding spacetimedb-lib v1.11.1 (available: v1.11.3)\n Adding spacetimedb-primitives v1.11.1 (available: v1.11.3)\n Adding spacetimedb-sats v1.11.1 (available: v1.11.3)\n Blocking waiting for file lock on package cache\n Compiling proc-macro2 v1.0.106\n Compiling quote v1.0.44\n Compiling unicode-ident v1.0.22\n Compiling version_check v0.9.5\n Compiling typenum v1.19.0\n Compiling autocfg v1.5.0\n Compiling serde_core v1.0.228\n Compiling heck v0.5.0\n Compiling cfg-if v1.0.4\n Compiling serde v1.0.228\n Compiling either v1.15.0\n Compiling find-msvc-tools v0.1.8\n Compiling zerocopy v0.8.34\n Compiling shlex v1.3.0\n Compiling anyhow v1.0.100\n Compiling bitflags v2.10.0\n Compiling thiserror v1.0.69\n Compiling nohash-hasher v0.2.0\n Compiling zmij v1.0.17\n Compiling convert_case v0.4.0\n Compiling humantime v2.3.0\n Compiling heck v0.4.1\n Compiling keccak v0.1.5\n Compiling bytes v1.11.0\n Compiling arrayvec v0.7.6\n Compiling hex v0.4.3\n Compiling bytemuck v1.24.0\n Compiling constant_time_eq v0.4.2\n Compiling arrayref v0.3.9\n Compiling itoa v1.0.17\n Compiling itertools v0.12.1\n Compiling serde_json v1.0.149\n Compiling smallvec v1.15.1\n Compiling spacetimedb-lib v1.11.1\n Compiling getrandom v0.2.17\n Compiling second-stack v0.3.5\n Compiling memchr v2.7.6\n Compiling log v0.4.29\n Compiling scoped-tls v1.0.1\n Compiling cc v1.2.54\n Compiling rand_core v0.6.4\n Compiling generic-array v0.14.7\n Compiling num-traits v0.2.19\n Compiling http v1.4.0\n Compiling syn v2.0.114\n Compiling approx v0.3.2\n Compiling chrono v0.4.43\n Compiling decorum v0.3.1\n Compiling block-buffer v0.10.4\n Compiling crypto-common v0.1.7\n Compiling blake3 v1.8.3\n Compiling digest v0.10.7\n Compiling sha3 v0.10.8\n Compiling ppv-lite86 v0.2.21\n Compiling ethnum v1.5.2\n Compiling rand_chacha v0.3.1\n Compiling rand v0.8.5\n Compiling enum-as-inner v0.6.1\n Compiling thiserror-impl v1.0.69\n Compiling derive_more v0.99.20\n Compiling spacetimedb-primitives v1.11.1\n Compiling spacetimedb-bindings-sys v1.11.1\n Compiling spacetimedb-bindings-macro v1.11.1\n Compiling spacetimedb-sats v1.11.1\n Compiling spacetimedb v1.11.1\n Compiling spacetime-module v0.1.0 (/__w/SpacetimeDB/SpacetimeDB/target/llm-runs/basics/t_002_scheduled_table/rust/server/gpt-5/llm)\nerror: expected one of: `public`, `private`, `name`, `index`, `scheduled`\n --> src/lib.rs:4:28\n |\n4 | #[table(name = tick_timer, schedule(reducer = tick, column = scheduled_at))]\n | ^^^^^^^^\n\nerror: expected one of: `init`, `client_connected`, `client_disconnected`, `update`, `name`\n --> src/lib.rs:12:11\n |\n12 | #[reducer(scheduled)]\n | ^^^^^^^^^\n\nerror[E0412]: cannot find type `TickTimer` in this scope\n --> src/lib.rs:13:44\n |\n13 | pub fn tick(_ctx: &ReducerContext, _timer: TickTimer) {\n | ^^^^^^^^^ not found in this scope\n\nerror[E0422]: cannot find struct, variant or union type `TickTimer` in this scope\n --> src/lib.rs:19:36\n |\n19 | ctx.db.tick_timer().insert(TickTimer {\n | ^^^^^^^^^ not found in this scope\n\nerror[E0599]: no method named `tick_timer` found for struct `Local` in the current scope\n --> src/lib.rs:18:15\n |\n18 | if ctx.db.tick_timer().count() == 0 {\n | ^^^^^^^^^^ method not found in `Local`\n\nerror[E0599]: no method named `tick_timer` found for struct `Local` in the current scope\n --> src/lib.rs:19:16\n |\n19 | ctx.db.tick_timer().insert(TickTimer {\n | ^^^^^^^^^^ method not found in `Local`\n\nerror[E0599]: no variant or associated item named `RepeatMicros` found for enum `ScheduleAt` in the current scope\n --> src/lib.rs:21:39\n |\n21 | scheduled_at: ScheduleAt::RepeatMicros(50_000),\n | ^^^^^^^^^^^^ variant or associated item not found in `ScheduleAt`\n\nSome errors have detailed explanations: E0412, E0422, E0599.\nFor more information about an error, try `rustc --explain E0412`.\nerror: could not compile `spacetime-module` (lib) due to 7 previous errors\nError: command [\"cargo\", \"build\", \"--config=net.git-fetch-with-cli=true\", \"--target=wasm32-unknown-unknown\", \"--release\", \"--message-format=json-render-diagnostics\"] exited with code 101\n\n--- stdout ---\n", "phase": "build_or_publish" } } }, "vendor": "openai", - "started_at": "2026-01-26T15:31:16.975688871Z", - "finished_at": "2026-01-26T15:32:30.863567064Z" + "started_at": "2026-01-27T18:05:43.302538855Z", + "finished_at": "2026-01-27T18:06:49.098345399Z" }, "t_003_struct_in_table": { "hash": "e6a5a5374022e3250e0df2cb5e4f7e4777081f474a0ebc4e99302be42151ef59", @@ -115,8 +115,8 @@ "golden_published": true, "model_name": "GPT-5", "total_tests": 1, - "passed_tests": 1, - "llm_output": "#[derive(spacetimedb::SpacetimeType)]\npub struct Position {\n x: i32,\n y: i32,\n}\n\n#[spacetimedb::table(name = entity)]\npub struct Entity {\n #[primary_key]\n id: i32,\n pos: Position,\n}", + "passed_tests": 0, + "llm_output": "use spacetimedb::{table, reducer, ReducerContext, Table, SpacetimeType};\n\n#[derive(SpacetimeType)]\npub struct Position {\n x: i32,\n y: i32,\n}\n\n#[table(name = entity)]\npub struct Entity {\n #[primary_key]\n id: i32,\n pos: Position,\n}\n\n#[reducer(init)]\npub fn init(_ctx: &ReducerContext) {}\n\n#[reducer]\npub fn add_entity(ctx: &ReducerContext, id: i32, x: i32, y: i32) {\n ctx.db.entity().insert(Entity { id, pos: Position { x, y } });\n}\n\n#[reducer]\npub fn move_entity(ctx: &ReducerContext, id: i32, dx: i32, dy: i32) {\n if let Some(mut e) = ctx.db.entity().id().find(id) {\n e.pos.x += dx;\n e.pos.y += dy;\n let _ = ctx.db.entity().id().update(e);\n }\n}", "category": "basics", "route_api_model": "gpt-5", "golden_db": "basics-t-003-struct-in-table-golden", @@ -125,22 +125,29 @@ "work_dir_llm": "target/llm-runs/basics/t_003_struct_in_table/rust/server/gpt-5/llm", "scorer_details": { "schema_parity": { - "pass": true, - "partial": 1.0, + "pass": false, + "partial": 0.0, "notes": { "golden_db": "basics-t-003-struct-in-table-golden", "llm_db": "basics-t-003-struct-in-table-gpt-5-llm", - "reducers_diff": null, - "reducers_equal": true, - "server": "http://127.0.0.1:34081", + "reducers_diff": { + "only_golden": [], + "only_llm": [ + "add_entity()", + "init()", + "move_entity()" + ] + }, + "reducers_equal": false, + "server": "http://127.0.0.1:35581", "tables_diff": null, "tables_equal": true } } }, "vendor": "openai", - "started_at": "2026-01-26T15:31:16.975098187Z", - "finished_at": "2026-01-26T15:31:55.504039476Z" + "started_at": "2026-01-27T18:04:19.331802121Z", + "finished_at": "2026-01-27T18:05:43.302485386Z" }, "t_004_insert": { "hash": "e6a5a5374022e3250e0df2cb5e4f7e4777081f474a0ebc4e99302be42151ef59", @@ -150,7 +157,7 @@ "model_name": "GPT-5", "total_tests": 2, "passed_tests": 1, - "llm_output": "use spacetimedb::{reducer, table, ReducerContext, Table};\n\n#[table(name = user)]\npub struct User {\n #[primary_key]\n id: i32,\n name: String,\n age: i32,\n active: bool,\n}\n\n#[reducer]\npub fn insert_user(ctx: &ReducerContext, id: i32, name: String, age: i32, active: bool) {\n ctx.db.user().insert(User { id, name, age, active });\n}", + "llm_output": "use spacetimedb::{table, reducer, ReducerContext, Table};\n\n#[table(name = user)]\npub struct User {\n #[primary_key]\n id: i32,\n name: String,\n age: i32,\n active: bool,\n}\n\n#[reducer]\npub fn insert_user(ctx: &ReducerContext, id: i32, name: String, age: i32, active: bool) {\n ctx.db.user().insert(User { id, name, age, active });\n}", "category": "basics", "route_api_model": "gpt-5", "golden_db": "basics-t-004-insert-golden", @@ -166,7 +173,7 @@ "llm_db": "basics-t-004-insert-gpt-5-llm", "reducers_diff": null, "reducers_equal": true, - "server": "http://127.0.0.1:34081", + "server": "http://127.0.0.1:35581", "tables_diff": null, "tables_equal": true } @@ -175,14 +182,14 @@ "pass": false, "partial": 0.0, "notes": { - "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `user`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:34081/v1/database/c2002173ebf56f08f752bb2ca55d0547329e19fb14624413a7089817795d01ab/sql)\n", + "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `user`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:35581/v1/database/c2007000480e426190d1f5ba2c31101f3b7e6c7d63d3a8049edcc755f4c65c8b/sql)\n", "phase": "sql_golden" } } }, "vendor": "openai", - "started_at": "2026-01-26T15:31:16.975407164Z", - "finished_at": "2026-01-26T15:31:42.241214465Z" + "started_at": "2026-01-27T18:04:19.329466370Z", + "finished_at": "2026-01-27T18:05:58.552925899Z" }, "t_005_update": { "hash": "e6a5a5374022e3250e0df2cb5e4f7e4777081f474a0ebc4e99302be42151ef59", @@ -192,7 +199,7 @@ "model_name": "GPT-5", "total_tests": 3, "passed_tests": 3, - "llm_output": "use spacetimedb::{reducer, table, ReducerContext};\n\n#[table(name = user)]\npub struct User {\n #[primary_key]\n id: i32,\n name: String,\n age: i32,\n active: bool,\n}\n\n#[reducer]\npub fn update_user(ctx: &ReducerContext, id: i32, name: String, age: i32, active: bool) {\n let _ = ctx.db.user().id().update(User { id, name, age, active });\n}", + "llm_output": "use spacetimedb::{ReducerContext, Table, UniqueColumn};\n\n#[spacetimedb::table(name = user)]\npub struct User {\n #[primary_key]\n id: i32,\n name: String,\n age: i32,\n active: bool,\n}\n\n#[spacetimedb::reducer]\npub fn update_user(ctx: &ReducerContext, id: i32, name: String, age: i32, active: bool) {\n let row = User { id, name, age, active };\n ctx.db.user().id().update(row);\n}", "category": "basics", "route_api_model": "gpt-5", "golden_db": "basics-t-005-update-golden", @@ -200,25 +207,6 @@ "work_dir_golden": "target/llm-runs/basics/t_005_update/rust/server/golden", "work_dir_llm": "target/llm-runs/basics/t_005_update/rust/server/gpt-5/llm", "scorer_details": { - "data_parity_update_user": { - "pass": true, - "partial": 1.0, - "notes": { - "args": [ - 1, - "Alice2", - 31, - false - ], - "golden_db": "basics-t-005-update-golden", - "golden_out": "id | name | age | active ----+----------+-----+-------- 1 | \"Alice2\" | 31 | false", - "llm_db": "basics-t-005-update-gpt-5-llm", - "llm_out": "id | name | age | active ----+----------+-----+-------- 1 | \"Alice2\" | 31 | false", - "query": "SELECT id, name, age, active FROM user WHERE id=1", - "reducer": "update_user", - "server": "http://127.0.0.1:34081" - } - }, "seed_users_row": { "pass": true, "partial": 1.0, @@ -234,15 +222,34 @@ "llm_db": "basics-t-005-update-gpt-5-llm", "reducers_diff": null, "reducers_equal": true, - "server": "http://127.0.0.1:34081", + "server": "http://127.0.0.1:35581", "tables_diff": null, "tables_equal": true } + }, + "data_parity_update_user": { + "pass": true, + "partial": 1.0, + "notes": { + "args": [ + 1, + "Alice2", + 31, + false + ], + "golden_db": "basics-t-005-update-golden", + "golden_out": "id | name | age | active ----+----------+-----+-------- 1 | \"Alice2\" | 31 | false", + "llm_db": "basics-t-005-update-gpt-5-llm", + "llm_out": "id | name | age | active ----+----------+-----+-------- 1 | \"Alice2\" | 31 | false", + "query": "SELECT id, name, age, active FROM user WHERE id=1", + "reducer": "update_user", + "server": "http://127.0.0.1:35581" + } } }, "vendor": "openai", - "started_at": "2026-01-26T15:31:42.241280124Z", - "finished_at": "2026-01-26T15:32:22.818349314Z" + "started_at": "2026-01-27T18:05:55.770358634Z", + "finished_at": "2026-01-27T18:06:44.213004653Z" }, "t_006_delete": { "hash": "e6a5a5374022e3250e0df2cb5e4f7e4777081f474a0ebc4e99302be42151ef59", @@ -251,8 +258,8 @@ "golden_published": true, "model_name": "GPT-5", "total_tests": 3, - "passed_tests": 3, - "llm_output": "use spacetimedb::{table, reducer, ReducerContext};\n\n#[table(name = user)]\npub struct User {\n #[primary_key]\n id: i32,\n name: String,\n age: i32,\n active: bool,\n}\n\n#[reducer]\npub fn delete_user(ctx: &ReducerContext, id: i32) {\n ctx.db.user().id().delete(&id);\n}", + "passed_tests": 2, + "llm_output": "use spacetimedb::{ReducerContext, Table, UniqueColumn};\n\n#[spacetimedb::table(name = user)]\npub struct User {\n #[primary_key]\n id: i32,\n name: String,\n age: i32,\n active: bool,\n}\n\n#[spacetimedb::reducer]\npub fn delete_user(ctx: &ReducerContext, id: i32) {\n ctx.db.user().id().delete(&id);\n}", "category": "basics", "route_api_model": "gpt-5", "golden_db": "basics-t-006-delete-golden", @@ -260,19 +267,12 @@ "work_dir_golden": "target/llm-runs/basics/t_006_delete/rust/server/golden", "work_dir_llm": "target/llm-runs/basics/t_006_delete/rust/server/gpt-5/llm", "scorer_details": { - "delete_user_count_zero": { - "pass": true, - "partial": 1.0, - "notes": { - "actual": 0, - "expected": 0, - "sql": "SELECT COUNT(*) AS n FROM user WHERE id=1" - } - }, "seed_users_row": { - "pass": true, - "partial": 1.0, + "pass": false, + "partial": 0.0, "notes": { + "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `user`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:35581/v1/database/c20035fbe660a1623a363d3cbb35d3771c57d38f4b366820af7836a4b8d65e7a/sql)\n", + "phase": "sql_golden", "sql": "INSERT INTO user(id, name, age, active) VALUES (1, 'Alice', 30, true)" } }, @@ -284,25 +284,34 @@ "llm_db": "basics-t-006-delete-gpt-5-llm", "reducers_diff": null, "reducers_equal": true, - "server": "http://127.0.0.1:34081", + "server": "http://127.0.0.1:35581", "tables_diff": null, "tables_equal": true } + }, + "delete_user_count_zero": { + "pass": true, + "partial": 1.0, + "notes": { + "actual": 0, + "expected": 0, + "sql": "SELECT COUNT(*) AS n FROM user WHERE id=1" + } } }, "vendor": "openai", - "started_at": "2026-01-26T15:31:49.227905993Z", - "finished_at": "2026-01-26T15:32:24.290487857Z" + "started_at": "2026-01-27T18:04:19.330907010Z", + "finished_at": "2026-01-27T18:06:05.264443389Z" }, "t_007_crud": { "hash": "e6a5a5374022e3250e0df2cb5e4f7e4777081f474a0ebc4e99302be42151ef59", "task": "t_007_crud", "lang": "rust", - "golden_published": false, + "golden_published": true, "model_name": "GPT-5", "total_tests": 4, - "passed_tests": 0, - "llm_output": "use spacetimedb::{reducer, table, ReducerContext};\n\n#[table(name = user)]\npub struct User {\n #[primary_key]\n id: i32,\n name: String,\n age: i32,\n active: bool,\n}\n\n#[reducer]\npub fn crud(ctx: &ReducerContext) {\n let users = ctx.db.user();\n\n users.insert(User {\n id: 1,\n name: \"Alice\".to_string(),\n age: 30,\n active: true,\n });\n\n users.insert(User {\n id: 2,\n name: \"Bob\".to_string(),\n age: 22,\n active: false,\n });\n\n if let Some(mut u) = users.id().find(1) {\n u.name = \"Alice2\".to_string();\n u.age = 31;\n u.active = false;\n users.id().update(u);\n }\n\n users.id().delete(&2);\n}", + "passed_tests": 3, + "llm_output": "use spacetimedb::{table, reducer, ReducerContext, Table};\n\n#[table(name = user)]\npub struct User {\n #[primary_key]\n id: i32,\n name: String,\n age: i32,\n active: bool,\n}\n\n#[reducer]\npub fn crud(ctx: &ReducerContext) {\n let mut alice = ctx.db.user().insert(User {\n id: 1,\n name: \"Alice\".to_string(),\n age: 30,\n active: true,\n });\n ctx.db.user().insert(User {\n id: 2,\n name: \"Bob\".to_string(),\n age: 22,\n active: false,\n });\n alice.name = \"Alice2\".to_string();\n alice.age = 31;\n alice.active = false;\n ctx.db.user().id().update(alice);\n ctx.db.user().id().delete(&2);\n}", "category": "basics", "route_api_model": "gpt-5", "golden_db": "basics-t-007-crud-golden", @@ -310,18 +319,49 @@ "work_dir_golden": "target/llm-runs/basics/t_007_crud/rust/server/golden", "work_dir_llm": "target/llm-runs/basics/t_007_crud/rust/server/gpt-5/llm", "scorer_details": { - "publish_error": { + "crud_row_id2_deleted": { + "pass": true, + "partial": 1.0, + "notes": { + "actual": 0, + "expected": 0, + "sql": "SELECT COUNT(*) AS n FROM user WHERE id=2" + } + }, + "schema_parity": { + "pass": true, + "partial": 1.0, + "notes": { + "golden_db": "basics-t-007-crud-golden", + "llm_db": "basics-t-007-crud-gpt-5-llm", + "reducers_diff": null, + "reducers_equal": true, + "server": "http://127.0.0.1:35581", + "tables_diff": null, + "tables_equal": true + } + }, + "crud_row_id1_parity": { "pass": false, "partial": 0.0, "notes": { - "error": "spacetime publish failed (exit=1)\n--- stderr ---\n Updating crates.io index\n Locking 72 packages to latest compatible versions\n Adding generic-array v0.14.7 (available: v0.14.9)\n Adding spacetimedb v1.11.1 (available: v1.11.3)\n Adding spacetimedb-bindings-macro v1.11.1 (available: v1.11.3)\n Adding spacetimedb-bindings-sys v1.11.1 (available: v1.11.3)\n Adding spacetimedb-lib v1.11.1 (available: v1.11.3)\n Adding spacetimedb-primitives v1.11.1 (available: v1.11.3)\n Adding spacetimedb-sats v1.11.1 (available: v1.11.3)\n Compiling proc-macro2 v1.0.106\n Compiling quote v1.0.44\n Compiling unicode-ident v1.0.22\n Compiling version_check v0.9.5\n Compiling typenum v1.19.0\n Compiling autocfg v1.5.0\n Compiling serde_core v1.0.228\n Compiling heck v0.5.0\n Compiling cfg-if v1.0.4\n Compiling zerocopy v0.8.34\n Compiling either v1.15.0\n Compiling serde v1.0.228\n Compiling find-msvc-tools v0.1.8\n Compiling shlex v1.3.0\n Compiling bitflags v2.10.0\n Compiling nohash-hasher v0.2.0\n Compiling thiserror v1.0.69\n Compiling anyhow v1.0.100\n Compiling keccak v0.1.5\n Compiling arrayvec v0.7.6\n Compiling humantime v2.3.0\n Compiling heck v0.4.1\n Compiling bytes v1.11.0\n Compiling zmij v1.0.17\n Compiling convert_case v0.4.0\n Compiling itoa v1.0.17\n Compiling serde_json v1.0.149\n Compiling smallvec v1.15.1\n Compiling arrayref v0.3.9\n Compiling getrandom v0.2.17\n Compiling bytemuck v1.24.0\n Compiling constant_time_eq v0.4.2\n Compiling generic-array v0.14.7\n Compiling spacetimedb-lib v1.11.1\n Compiling second-stack v0.3.5\n Compiling hex v0.4.3\n Compiling log v0.4.29\n Compiling memchr v2.7.6\n Compiling scoped-tls v1.0.1\n Compiling rand_core v0.6.4\n Compiling cc v1.2.54\n Compiling itertools v0.12.1\n Compiling num-traits v0.2.19\n Compiling http v1.4.0\n Compiling syn v2.0.114\n Compiling block-buffer v0.10.4\n Compiling crypto-common v0.1.7\n Compiling approx v0.3.2\n Compiling chrono v0.4.43\n Compiling digest v0.10.7\n Compiling blake3 v1.8.3\n Compiling decorum v0.3.1\n Compiling sha3 v0.10.8\n Compiling ethnum v1.5.2\n Compiling ppv-lite86 v0.2.21\n Compiling rand_chacha v0.3.1\n Compiling rand v0.8.5\n Compiling enum-as-inner v0.6.1\n Compiling thiserror-impl v1.0.69\n Compiling derive_more v0.99.20\n Compiling spacetimedb-primitives v1.11.1\n Compiling spacetimedb-bindings-sys v1.11.1\n Compiling spacetimedb-bindings-macro v1.11.1\n Compiling spacetimedb-sats v1.11.1\n Compiling spacetimedb v1.11.1\n Compiling spacetime-module v0.1.0 (/__w/SpacetimeDB/SpacetimeDB/target/llm-runs/basics/t_007_crud/rust/server/gpt-5/llm)\nerror[E0599]: no method named `insert` found for reference `&user__TableHandle` in the current scope\n --> src/lib.rs:17:11\n |\n17 | users.insert(User {\n | ------^^^^^^\n |\n = help: items from traits can only be used if the trait is in scope\nhelp: trait `Table` which provides `insert` is implemented but not in scope; perhaps you want to import it\n |\n 2 + use spacetimedb::Table;\n |\nhelp: there is a method `try_insert` with a similar name\n |\n17 | users.try_insert(User {\n | ++++\n\nerror[E0599]: no method named `insert` found for reference `&user__TableHandle` in the current scope\n --> src/lib.rs:24:11\n |\n24 | users.insert(User {\n | ------^^^^^^\n |\n = help: items from traits can only be used if the trait is in scope\nhelp: trait `Table` which provides `insert` is implemented but not in scope; perhaps you want to import it\n |\n 2 + use spacetimedb::Table;\n |\nhelp: there is a method `try_insert` with a similar name\n |\n24 | users.try_insert(User {\n | ++++\n\nFor more information about this error, try `rustc --explain E0599`.\nerror: could not compile `spacetime-module` (lib) due to 2 previous errors\nError: command [\"cargo\", \"build\", \"--config=net.git-fetch-with-cli=true\", \"--target=wasm32-unknown-unknown\", \"--release\", \"--message-format=json-render-diagnostics\"] exited with code 101\n\n--- stdout ---\n", - "phase": "build_or_publish" + "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `user`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:35581/v1/database/c20008716065ddf2da8bf2db4fd5a3ae30fdf515cb7c146ffb7433e9241749e3/sql)\n", + "phase": "sql_golden" + } + }, + "crud_total_count_one": { + "pass": true, + "partial": 1.0, + "notes": { + "actual": 1, + "expected": 1, + "sql": "SELECT COUNT(*) AS n FROM user" } } }, "vendor": "openai", - "started_at": "2026-01-26T15:31:16.975950122Z", - "finished_at": "2026-01-26T15:32:16.012153929Z" + "started_at": "2026-01-27T18:04:19.331484383Z", + "finished_at": "2026-01-27T18:05:55.766650845Z" }, "t_008_index_lookup": { "hash": "e6a5a5374022e3250e0df2cb5e4f7e4777081f474a0ebc4e99302be42151ef59", @@ -330,8 +370,8 @@ "golden_published": true, "model_name": "GPT-5", "total_tests": 3, - "passed_tests": 3, - "llm_output": "use spacetimedb::{table, reducer, ReducerContext, Table};\n\n#[table(name = user)]\npub struct User {\n #[primary_key]\n id: i32,\n name: String,\n age: i32,\n active: bool,\n}\n\n#[table(name = result)]\npub struct ResultRow {\n #[primary_key]\n id: i32,\n name: String,\n}\n\n#[reducer]\npub fn lookup_user_name(ctx: &ReducerContext, id: i32) {\n if let Some(u) = ctx.db.user().id().find(id) {\n ctx.db.result().id().delete(&id);\n ctx.db.result().insert(ResultRow { id: u.id, name: u.name });\n }\n}", + "passed_tests": 1, + "llm_output": "use spacetimedb::{ReducerContext, Table};\n\n#[spacetimedb::table(name = user)]\npub struct User {\n #[primary_key]\n id: i32,\n name: String,\n age: i32,\n active: bool,\n}\n\n#[spacetimedb::table(name = result)]\npub struct ResultRow {\n #[primary_key]\n id: i32,\n name: String,\n}\n\n#[spacetimedb::reducer]\npub fn lookup_user_name(ctx: &ReducerContext, id: i32) {\n if let Some(user) = ctx.db.user().id().find(id) {\n ctx.db.result().id().delete(&user.id);\n ctx.db.result().insert(ResultRow { id: user.id, name: user.name });\n }\n}", "category": "basics", "route_api_model": "gpt-5", "golden_db": "basics-t-008-index-lookup-golden", @@ -339,29 +379,6 @@ "work_dir_golden": "target/llm-runs/basics/t_008_index_lookup/rust/server/golden", "work_dir_llm": "target/llm-runs/basics/t_008_index_lookup/rust/server/gpt-5/llm", "scorer_details": { - "seed_user_row": { - "pass": true, - "partial": 1.0, - "notes": { - "sql": "INSERT INTO user(id, name, age, active) VALUES (1, 'Alice', 30, true)" - } - }, - "index_lookup_projection_parity": { - "pass": true, - "partial": 1.0, - "notes": { - "args": [ - 1 - ], - "golden_db": "basics-t-008-index-lookup-golden", - "golden_out": "id | name ----+--------- 1 | \"Alice\"", - "llm_db": "basics-t-008-index-lookup-gpt-5-llm", - "llm_out": "id | name ----+--------- 1 | \"Alice\"", - "query": "SELECT id, name FROM result WHERE id=1", - "reducer": "lookup_user_name", - "server": "http://127.0.0.1:34081" - } - }, "schema_parity": { "pass": true, "partial": 1.0, @@ -370,15 +387,32 @@ "llm_db": "basics-t-008-index-lookup-gpt-5-llm", "reducers_diff": null, "reducers_equal": true, - "server": "http://127.0.0.1:34081", + "server": "http://127.0.0.1:35581", "tables_diff": null, "tables_equal": true } + }, + "index_lookup_projection_parity": { + "pass": false, + "partial": 0.0, + "notes": { + "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `result`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:35581/v1/database/c20028a059586321aebbd0b7b6f95ff179e50166528af8fe236193b43d846dd5/sql)\n", + "phase": "sql_golden" + } + }, + "seed_user_row": { + "pass": false, + "partial": 0.0, + "notes": { + "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `user`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:35581/v1/database/c20028a059586321aebbd0b7b6f95ff179e50166528af8fe236193b43d846dd5/sql)\n", + "phase": "sql_golden", + "sql": "INSERT INTO user(id, name, age, active) VALUES (1, 'Alice', 30, true)" + } } }, "vendor": "openai", - "started_at": "2026-01-26T15:31:47.511640110Z", - "finished_at": "2026-01-26T15:32:25.220589761Z" + "started_at": "2026-01-27T18:04:19.331187575Z", + "finished_at": "2026-01-27T18:06:02.034632239Z" }, "t_009_init": { "hash": "e6a5a5374022e3250e0df2cb5e4f7e4777081f474a0ebc4e99302be42151ef59", @@ -388,7 +422,7 @@ "model_name": "GPT-5", "total_tests": 4, "passed_tests": 4, - "llm_output": "use spacetimedb::{ReducerContext, Table};\n\n#[spacetimedb::table(name = user)]\npub struct User {\n #[primary_key]\n id: i32,\n name: String,\n age: i32,\n active: bool,\n}\n\n#[spacetimedb::reducer(init)]\npub fn init(ctx: &ReducerContext) {\n ctx.db.user().insert(User { id: 1, name: \"Alice\".to_string(), age: 30, active: true });\n ctx.db.user().insert(User { id: 2, name: \"Bob\".to_string(), age: 22, active: false });\n}", + "llm_output": "use spacetimedb::{table, reducer, ReducerContext, Table};\n\n#[table(name = user)]\npub struct User {\n #[primary_key]\n id: i32,\n name: String,\n age: i32,\n active: bool,\n}\n\n#[reducer(init)]\npub fn init(ctx: &ReducerContext) {\n ctx.db.user().insert(User { id: 1, name: \"Alice\".to_string(), age: 30, active: true });\n ctx.db.user().insert(User { id: 2, name: \"Bob\".to_string(), age: 22, active: false });\n}", "category": "basics", "route_api_model": "gpt-5", "golden_db": "basics-t-009-init-golden", @@ -405,6 +439,15 @@ "sql": "SELECT COUNT(*) AS n FROM user" } }, + "init_seed_alice": { + "pass": true, + "partial": 1.0, + "notes": { + "actual": 1, + "expected": 1, + "sql": "SELECT COUNT(*) AS n FROM user WHERE id=1 AND name='Alice' AND age=30 AND active=true" + } + }, "schema_parity": { "pass": true, "partial": 1.0, @@ -413,20 +456,11 @@ "llm_db": "basics-t-009-init-gpt-5-llm", "reducers_diff": null, "reducers_equal": true, - "server": "http://127.0.0.1:34081", + "server": "http://127.0.0.1:35581", "tables_diff": null, "tables_equal": true } }, - "init_seed_alice": { - "pass": true, - "partial": 1.0, - "notes": { - "actual": 1, - "expected": 1, - "sql": "SELECT COUNT(*) AS n FROM user WHERE id=1 AND name='Alice' AND age=30 AND active=true" - } - }, "init_seed_bob": { "pass": true, "partial": 1.0, @@ -438,8 +472,8 @@ } }, "vendor": "openai", - "started_at": "2026-01-26T15:31:47.073777338Z", - "finished_at": "2026-01-26T15:32:23.492928074Z" + "started_at": "2026-01-27T18:04:19.330609060Z", + "finished_at": "2026-01-27T18:05:59.984015591Z" }, "t_010_connect": { "hash": "e6a5a5374022e3250e0df2cb5e4f7e4777081f474a0ebc4e99302be42151ef59", @@ -465,15 +499,15 @@ "llm_db": "basics-t-010-connect-gpt-5-llm", "reducers_diff": null, "reducers_equal": true, - "server": "http://127.0.0.1:34081", + "server": "http://127.0.0.1:35581", "tables_diff": null, "tables_equal": true } } }, "vendor": "openai", - "started_at": "2026-01-26T15:31:16.975818475Z", - "finished_at": "2026-01-26T15:31:49.227873175Z" + "started_at": "2026-01-27T18:05:52.846625280Z", + "finished_at": "2026-01-27T18:06:38.551685367Z" }, "t_011_helper_function": { "hash": "e6a5a5374022e3250e0df2cb5e4f7e4777081f474a0ebc4e99302be42151ef59", @@ -482,8 +516,8 @@ "golden_published": true, "model_name": "GPT-5", "total_tests": 3, - "passed_tests": 2, - "llm_output": "use spacetimedb::{table, reducer, ReducerContext, Table};\n\n#[table(name = result)]\npub struct ResultRow {\n #[primary_key]\n id: i32,\n sum: i32,\n}\n\nfn add(a: i32, b: i32) -> i32 {\n a + b\n}\n\n#[reducer]\nfn compute_sum(ctx: &ReducerContext, id: i32, a: i32, b: i32) {\n let sum = add(a, b);\n ctx.db.result().insert(ResultRow { id, sum });\n}", + "passed_tests": 3, + "llm_output": "use spacetimedb::{ReducerContext, Table};\n\n#[spacetimedb::table(name = result)]\npub struct ResultRow {\n #[primary_key]\n id: i32,\n sum: i32,\n}\n\nfn add(a: i32, b: i32) -> i32 {\n a + b\n}\n\n#[spacetimedb::reducer]\nfn compute_sum(ctx: &ReducerContext, id: i32, a: i32, b: i32) {\n let sum = add(a, b);\n ctx.db.result().insert(ResultRow { id, sum });\n}", "category": "basics", "route_api_model": "gpt-5", "golden_db": "basics-t-011-helper-function-golden", @@ -491,6 +525,15 @@ "work_dir_golden": "target/llm-runs/basics/t_011_helper_function/rust/server/golden", "work_dir_llm": "target/llm-runs/basics/t_011_helper_function/rust/server/gpt-5/llm", "scorer_details": { + "helper_func_sum_abs": { + "pass": true, + "partial": 1.0, + "notes": { + "actual": 1, + "expected": 1, + "sql": "SELECT COUNT(*) AS n FROM result WHERE id=1 AND sum=5" + } + }, "schema_parity": { "pass": true, "partial": 1.0, @@ -499,32 +542,33 @@ "llm_db": "basics-t-011-helper-function-gpt-5-llm", "reducers_diff": null, "reducers_equal": true, - "server": "http://127.0.0.1:34081", + "server": "http://127.0.0.1:35581", "tables_diff": null, "tables_equal": true } }, "helper_func_sum_parity": { - "pass": false, - "partial": 0.0, - "notes": { - "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `result`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:34081/v1/database/c20018816d0c78b41d0a1ebdd62b788e682bdd9fd17555d6ba26e941ae120e74/sql)\n", - "phase": "sql_golden" - } - }, - "helper_func_sum_abs": { "pass": true, "partial": 1.0, "notes": { - "actual": 1, - "expected": 1, - "sql": "SELECT COUNT(*) AS n FROM result WHERE id=1 AND sum=5" + "args": [ + 1, + 2, + 3 + ], + "golden_db": "basics-t-011-helper-function-golden", + "golden_out": "id | sum ----+----- 1 | 5", + "llm_db": "basics-t-011-helper-function-gpt-5-llm", + "llm_out": "id | sum ----+----- 1 | 5", + "query": "SELECT id, sum FROM result WHERE id=1", + "reducer": "compute_sum", + "server": "http://127.0.0.1:35581" } } }, "vendor": "openai", - "started_at": "2026-01-26T15:31:16.975246967Z", - "finished_at": "2026-01-26T15:31:47.511599186Z" + "started_at": "2026-01-27T18:06:00.538545840Z", + "finished_at": "2026-01-27T18:06:42.267947964Z" }, "t_012_spacetime_product_type": { "hash": "e6a5a5374022e3250e0df2cb5e4f7e4777081f474a0ebc4e99302be42151ef59", @@ -534,7 +578,7 @@ "model_name": "GPT-5", "total_tests": 3, "passed_tests": 3, - "llm_output": "use spacetimedb::{table, reducer, ReducerContext, Table, SpacetimeType};\n\n#[derive(SpacetimeType)]\npub struct Score {\n pub left: i32,\n pub right: i32,\n}\n\n#[table(name = result)]\npub struct ResultRow {\n #[primary_key]\n pub id: i32,\n pub value: Score,\n}\n\n#[reducer]\npub fn set_score(ctx: &ReducerContext, id: i32, left: i32, right: i32) {\n ctx.db.result().insert(ResultRow { id, value: Score { left, right } });\n}", + "llm_output": "use spacetimedb::{table, reducer, ReducerContext, Table, SpacetimeType};\n\n#[derive(SpacetimeType)]\npub struct Score {\n left: i32,\n right: i32,\n}\n\n#[table(name = result)]\npub struct ResultRow {\n #[primary_key]\n id: i32,\n value: Score,\n}\n\n#[reducer]\npub fn set_score(ctx: &ReducerContext, id: i32, left: i32, right: i32) {\n ctx.db.result().insert(ResultRow { id, value: Score { left, right } });\n}", "category": "schema", "route_api_model": "gpt-5", "golden_db": "schema-t-012-spacetime-product-type-golden", @@ -542,24 +586,6 @@ "work_dir_golden": "target/llm-runs/schema/t_012_spacetime_product_type/rust/server/golden", "work_dir_llm": "target/llm-runs/schema/t_012_spacetime_product_type/rust/server/gpt-5/llm", "scorer_details": { - "product_type_row_parity": { - "pass": true, - "partial": 1.0, - "notes": { - "args": [ - 1, - 2, - 3 - ], - "golden_db": "schema-t-012-spacetime-product-type-golden", - "golden_out": "id | value ----+----------------------- 1 | (left = 2, right = 3)", - "llm_db": "schema-t-012-spacetime-product-type-gpt-5-llm", - "llm_out": "id | value ----+----------------------- 1 | (left = 2, right = 3)", - "query": "SELECT id, value FROM result WHERE id=1", - "reducer": "set_score", - "server": "http://127.0.0.1:34081" - } - }, "schema_parity": { "pass": true, "partial": 1.0, @@ -568,7 +594,7 @@ "llm_db": "schema-t-012-spacetime-product-type-gpt-5-llm", "reducers_diff": null, "reducers_equal": true, - "server": "http://127.0.0.1:34081", + "server": "http://127.0.0.1:35581", "tables_diff": null, "tables_equal": true } @@ -581,11 +607,29 @@ "expected": 1, "sql": "SELECT COUNT(*) AS n FROM result WHERE id=1" } + }, + "product_type_row_parity": { + "pass": true, + "partial": 1.0, + "notes": { + "args": [ + 1, + 2, + 3 + ], + "golden_db": "schema-t-012-spacetime-product-type-golden", + "golden_out": "id | value ----+----------------------- 1 | (left = 2, right = 3)", + "llm_db": "schema-t-012-spacetime-product-type-gpt-5-llm", + "llm_out": "id | value ----+----------------------- 1 | (left = 2, right = 3)", + "query": "SELECT id, value FROM result WHERE id=1", + "reducer": "set_score", + "server": "http://127.0.0.1:35581" + } } }, "vendor": "openai", - "started_at": "2026-01-26T15:32:24.290533494Z", - "finished_at": "2026-01-26T15:32:53.440043766Z" + "started_at": "2026-01-27T18:06:38.551726889Z", + "finished_at": "2026-01-27T18:07:53.647551560Z" }, "t_013_spacetime_sum_type": { "hash": "e6a5a5374022e3250e0df2cb5e4f7e4777081f474a0ebc4e99302be42151ef59", @@ -603,6 +647,19 @@ "work_dir_golden": "target/llm-runs/schema/t_013_spacetime_sum_type/rust/server/golden", "work_dir_llm": "target/llm-runs/schema/t_013_spacetime_sum_type/rust/server/gpt-5/llm", "scorer_details": { + "schema_parity": { + "pass": true, + "partial": 1.0, + "notes": { + "golden_db": "schema-t-013-spacetime-sum-type-golden", + "llm_db": "schema-t-013-spacetime-sum-type-gpt-5-llm", + "reducers_diff": null, + "reducers_equal": true, + "server": "http://127.0.0.1:35581", + "tables_diff": null, + "tables_equal": true + } + }, "sum_type_row_parity": { "pass": true, "partial": 1.0, @@ -617,20 +674,7 @@ "llm_out": "id | value ----+--------------- 1 | (Circle = 10)", "query": "SELECT id, value FROM result WHERE id=1", "reducer": "set_circle", - "server": "http://127.0.0.1:34081" - } - }, - "schema_parity": { - "pass": true, - "partial": 1.0, - "notes": { - "golden_db": "schema-t-013-spacetime-sum-type-golden", - "llm_db": "schema-t-013-spacetime-sum-type-gpt-5-llm", - "reducers_diff": null, - "reducers_equal": true, - "server": "http://127.0.0.1:34081", - "tables_diff": null, - "tables_equal": true + "server": "http://127.0.0.1:35581" } }, "sum_type_row_count": { @@ -644,8 +688,8 @@ } }, "vendor": "openai", - "started_at": "2026-01-26T15:32:29.610801727Z", - "finished_at": "2026-01-26T15:33:11.012437922Z" + "started_at": "2026-01-27T18:06:02.035018703Z", + "finished_at": "2026-01-27T18:06:51.280133174Z" }, "t_014_elementary_columns": { "hash": "e6a5a5374022e3250e0df2cb5e4f7e4777081f474a0ebc4e99302be42151ef59", @@ -655,7 +699,7 @@ "model_name": "GPT-5", "total_tests": 3, "passed_tests": 3, - "llm_output": "use spacetimedb::{ReducerContext, Table};\n\n#[spacetimedb::table(name = primitive)]\npub struct Primitive {\n #[primary_key]\n id: i32,\n count: i32,\n total: i64,\n price: f32,\n ratio: f64,\n active: bool,\n name: String,\n}\n\n#[spacetimedb::reducer]\npub fn seed(ctx: &ReducerContext) {\n ctx.db.primitive().insert(Primitive {\n id: 1,\n count: 2,\n total: 3_000_000_000i64,\n price: 1.5f32,\n ratio: 2.25,\n active: true,\n name: \"Alice\".to_string(),\n });\n}", + "llm_output": "use spacetimedb::{ReducerContext, Table};\n\n#[spacetimedb::table(name = primitive)]\npub struct Primitive {\n #[primary_key]\n id: i32,\n count: i32,\n total: i64,\n price: f32,\n ratio: f64,\n active: bool,\n name: String,\n}\n\n#[spacetimedb::reducer]\npub fn seed(ctx: &ReducerContext) {\n ctx.db.primitive().insert(Primitive {\n id: 1,\n count: 2,\n total: 3_000_000_000,\n price: 1.5f32,\n ratio: 2.25f64,\n active: true,\n name: \"Alice\".to_string(),\n });\n}", "category": "schema", "route_api_model": "gpt-5", "golden_db": "schema-t-014-elementary-columns-golden", @@ -672,37 +716,37 @@ "sql": "SELECT COUNT(*) AS n FROM primitive WHERE id=1" } }, - "schema_parity": { + "elementary_columns_row_parity": { "pass": true, "partial": 1.0, "notes": { + "args": [], "golden_db": "schema-t-014-elementary-columns-golden", + "golden_out": "id | count | total | price | ratio | active | name ----+-------+------------+-------+-------+--------+--------- 1 | 2 | 3000000000 | 1.5 | 2.25 | true | \"Alice\"", "llm_db": "schema-t-014-elementary-columns-gpt-5-llm", - "reducers_diff": null, - "reducers_equal": true, - "server": "http://127.0.0.1:34081", - "tables_diff": null, - "tables_equal": true + "llm_out": "id | count | total | price | ratio | active | name ----+-------+------------+-------+-------+--------+--------- 1 | 2 | 3000000000 | 1.5 | 2.25 | true | \"Alice\"", + "query": "SELECT id, count, total, price, ratio, active, name FROM primitive WHERE id=1", + "reducer": "seed", + "server": "http://127.0.0.1:35581" } }, - "elementary_columns_row_parity": { + "schema_parity": { "pass": true, "partial": 1.0, "notes": { - "args": [], "golden_db": "schema-t-014-elementary-columns-golden", - "golden_out": "id | count | total | price | ratio | active | name ----+-------+------------+-------+-------+--------+--------- 1 | 2 | 3000000000 | 1.5 | 2.25 | true | \"Alice\"", "llm_db": "schema-t-014-elementary-columns-gpt-5-llm", - "llm_out": "id | count | total | price | ratio | active | name ----+-------+------------+-------+-------+--------+--------- 1 | 2 | 3000000000 | 1.5 | 2.25 | true | \"Alice\"", - "query": "SELECT id, count, total, price, ratio, active, name FROM primitive WHERE id=1", - "reducer": "seed", - "server": "http://127.0.0.1:34081" + "reducers_diff": null, + "reducers_equal": true, + "server": "http://127.0.0.1:35581", + "tables_diff": null, + "tables_equal": true } } }, "vendor": "openai", - "started_at": "2026-01-26T15:32:25.220640456Z", - "finished_at": "2026-01-26T15:32:53.137088661Z" + "started_at": "2026-01-27T18:06:51.280251179Z", + "finished_at": "2026-01-27T18:07:48.639857125Z" }, "t_015_product_type_columns": { "hash": "e6a5a5374022e3250e0df2cb5e4f7e4777081f474a0ebc4e99302be42151ef59", @@ -712,7 +756,7 @@ "model_name": "GPT-5", "total_tests": 3, "passed_tests": 3, - "llm_output": "use spacetimedb::{table, reducer, ReducerContext, Table, SpacetimeType};\n\n#[derive(SpacetimeType)]\npub struct Address {\n street: String,\n zip: i32,\n}\n\n#[derive(SpacetimeType)]\npub struct Position {\n x: i32,\n y: i32,\n}\n\n#[table(name = profile)]\npub struct Profile {\n #[primary_key]\n id: i32,\n home: Address,\n work: Address,\n pos: Position,\n}\n\n#[reducer]\npub fn seed(ctx: &ReducerContext) {\n for p in ctx.db.profile().iter() {\n ctx.db.profile().id().delete(&p.id);\n }\n ctx.db.profile().insert(Profile {\n id: 1,\n home: Address { street: \"1 Main\".to_string(), zip: 11111 },\n work: Address { street: \"2 Broad\".to_string(), zip: 22222 },\n pos: Position { x: 7, y: 9 },\n });\n}", + "llm_output": "use spacetimedb::{table, reducer, ReducerContext, Table, SpacetimeType};\n\n#[derive(SpacetimeType)]\nstruct Address {\n street: String,\n zip: i32,\n}\n\n#[derive(SpacetimeType)]\nstruct Position {\n x: i32,\n y: i32,\n}\n\n#[table(name = profile)]\nstruct Profile {\n #[primary_key]\n id: i32,\n home: Address,\n work: Address,\n pos: Position,\n}\n\n#[reducer]\nfn seed(ctx: &ReducerContext) {\n ctx.db.profile().insert(Profile {\n id: 1,\n home: Address {\n street: \"1 Main\".to_string(),\n zip: 11111,\n },\n work: Address {\n street: \"2 Broad\".to_string(),\n zip: 22222,\n },\n pos: Position { x: 7, y: 9 },\n });\n}", "category": "schema", "route_api_model": "gpt-5", "golden_db": "schema-t-015-product-type-columns-golden", @@ -720,19 +764,6 @@ "work_dir_golden": "target/llm-runs/schema/t_015_product_type_columns/rust/server/golden", "work_dir_llm": "target/llm-runs/schema/t_015_product_type_columns/rust/server/gpt-5/llm", "scorer_details": { - "schema_parity": { - "pass": true, - "partial": 1.0, - "notes": { - "golden_db": "schema-t-015-product-type-columns-golden", - "llm_db": "schema-t-015-product-type-columns-gpt-5-llm", - "reducers_diff": null, - "reducers_equal": true, - "server": "http://127.0.0.1:34081", - "tables_diff": null, - "tables_equal": true - } - }, "product_type_columns_row_parity": { "pass": true, "partial": 1.0, @@ -744,7 +775,7 @@ "llm_out": "id | home | work | pos ----+----------------------------------+-----------------------------------+---------------- 1 | (street = \"1 Main\", zip = 11111) | (street = \"2 Broad\", zip = 22222) | (x = 7, y = 9)", "query": "SELECT id, home, work, pos FROM profile WHERE id=1", "reducer": "seed", - "server": "http://127.0.0.1:34081" + "server": "http://127.0.0.1:35581" } }, "product_type_columns_row_count": { @@ -755,11 +786,24 @@ "expected": 1, "sql": "SELECT COUNT(*) AS n FROM profile WHERE id=1" } + }, + "schema_parity": { + "pass": true, + "partial": 1.0, + "notes": { + "golden_db": "schema-t-015-product-type-columns-golden", + "llm_db": "schema-t-015-product-type-columns-gpt-5-llm", + "reducers_diff": null, + "reducers_equal": true, + "server": "http://127.0.0.1:35581", + "tables_diff": null, + "tables_equal": true + } } }, "vendor": "openai", - "started_at": "2026-01-26T15:32:30.863583165Z", - "finished_at": "2026-01-26T15:33:07.055207496Z" + "started_at": "2026-01-27T18:06:02.034704203Z", + "finished_at": "2026-01-27T18:06:59.344244462Z" }, "t_016_sum_type_columns": { "hash": "e6a5a5374022e3250e0df2cb5e4f7e4777081f474a0ebc4e99302be42151ef59", @@ -769,7 +813,7 @@ "model_name": "GPT-5", "total_tests": 3, "passed_tests": 1, - "llm_output": "use spacetimedb::{table, reducer, ReducerContext, Table, SpacetimeType};\n\n#[derive(SpacetimeType)]\npub struct Rect {\n width: i32,\n height: i32,\n}\n\n#[derive(SpacetimeType)]\npub enum Shape {\n Circle(i32),\n Rectangle(Rect),\n}\n\n#[table(name = drawing)]\npub struct Drawing {\n #[primary_key]\n id: i32,\n a: Shape,\n b: Shape,\n}\n\n#[reducer]\npub fn seed(ctx: &ReducerContext) {\n let drawing = ctx.db.drawing();\n if drawing.count() == 0 {\n drawing.insert(Drawing {\n id: 1,\n a: Shape::Circle(10),\n b: Shape::Rectangle(Rect { width: 4, height: 6 }),\n });\n }\n}", + "llm_output": "use spacetimedb::{ReducerContext, Table, SpacetimeType};\n\n#[derive(SpacetimeType)]\npub struct Rect {\n width: i32,\n height: i32,\n}\n\n#[derive(SpacetimeType)]\npub enum Shape {\n Circle(i32),\n Rectangle(Rect),\n}\n\n#[spacetimedb::table(name = drawing)]\npub struct Drawing {\n #[primary_key]\n id: i32,\n a: Shape,\n b: Shape,\n}\n\n#[spacetimedb::reducer]\npub fn seed(ctx: &ReducerContext) {\n if ctx.db.drawing().id().find(1).is_none() {\n ctx.db.drawing().insert(Drawing {\n id: 1,\n a: Shape::Circle(10),\n b: Shape::Rectangle(Rect { width: 4, height: 6 }),\n });\n }\n}", "category": "schema", "route_api_model": "gpt-5", "golden_db": "schema-t-016-sum-type-columns-golden", @@ -777,11 +821,24 @@ "work_dir_golden": "target/llm-runs/schema/t_016_sum_type_columns/rust/server/golden", "work_dir_llm": "target/llm-runs/schema/t_016_sum_type_columns/rust/server/gpt-5/llm", "scorer_details": { + "schema_parity": { + "pass": true, + "partial": 1.0, + "notes": { + "golden_db": "schema-t-016-sum-type-columns-golden", + "llm_db": "schema-t-016-sum-type-columns-gpt-5-llm", + "reducers_diff": null, + "reducers_equal": true, + "server": "http://127.0.0.1:35581", + "tables_diff": null, + "tables_equal": true + } + }, "sum_type_columns_row_parity": { "pass": false, "partial": 0.0, "notes": { - "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `drawings`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:34081/v1/database/c20084ed1de024b608d5d036674102ffea5602feb56e936989c5737d1fa347f1/sql)\n", + "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `drawings`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:35581/v1/database/c200fa038fd4fa57b4b9244ac0ace9ad5ff313a51d4e6a68f76d5af52929271f/sql)\n", "phase": "sql_golden" } }, @@ -789,27 +846,14 @@ "pass": false, "partial": 0.0, "notes": { - "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `drawings`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:34081/v1/database/c200f0f8cf7ce43e0b888fecfda5572f02aac26af6bea4ac6867cab46bd5e36f/sql)\n", + "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `drawings`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:35581/v1/database/c2001f0037a158b82981bfb9b2a27f2f9ee74581aa9f423ac1fcd734920dcef5/sql)\n", "phase": "sql" } - }, - "schema_parity": { - "pass": true, - "partial": 1.0, - "notes": { - "golden_db": "schema-t-016-sum-type-columns-golden", - "llm_db": "schema-t-016-sum-type-columns-gpt-5-llm", - "reducers_diff": null, - "reducers_equal": true, - "server": "http://127.0.0.1:34081", - "tables_diff": null, - "tables_equal": true - } } }, "vendor": "openai", - "started_at": "2026-01-26T15:32:47.540914487Z", - "finished_at": "2026-01-26T15:33:39.147225272Z" + "started_at": "2026-01-27T18:06:02.035228418Z", + "finished_at": "2026-01-27T18:06:55.320017179Z" }, "t_017_scheduled_columns": { "hash": "e6a5a5374022e3250e0df2cb5e4f7e4777081f474a0ebc4e99302be42151ef59", @@ -819,7 +863,7 @@ "model_name": "GPT-5", "total_tests": 2, "passed_tests": 0, - "llm_output": "use spacetimedb::{reducer, table, ReducerContext, ScheduleAt, Table};\n\n#[table(name = tick_timer, scheduled(reducer = tick, column = scheduled_at))]\npub struct TickTimer {\n #[primary_key]\n #[auto_inc]\n scheduled_id: u64,\n scheduled_at: ScheduleAt,\n}\n\n#[reducer(init)]\npub fn init(ctx: &ReducerContext) {\n if ctx.db.tick_timer().count() == 0 {\n ctx.db.tick_timer().insert(TickTimer {\n scheduled_id: 0,\n scheduled_at: ScheduleAt::repeat(50_000),\n });\n }\n}\n\n#[reducer(scheduled)]\npub fn tick(_ctx: &ReducerContext, _row: TickTimer) {\n}", + "llm_output": "use spacetimedb::{table, reducer, ReducerContext, Table, ScheduleAt};\n\n#[table(name = tick_timer, schedule(reducer = tick, column = scheduled_at))]\npub struct TickTimer {\n #[primary_key]\n #[auto_inc]\n scheduled_id: u64,\n scheduled_at: ScheduleAt,\n}\n\n#[reducer]\npub fn tick(_ctx: &ReducerContext, _row: TickTimer) {\n // scheduled callback\n}\n\n#[reducer(init)]\npub fn init(ctx: &ReducerContext) {\n ctx.db.tick_timer().insert(TickTimer {\n scheduled_id: 0,\n scheduled_at: ScheduleAt::repeat_every_micros(50_000),\n });\n}", "category": "schema", "route_api_model": "gpt-5", "golden_db": "schema-t-017-scheduled-columns-golden", @@ -831,24 +875,24 @@ "pass": false, "partial": 0.0, "notes": { - "error": "spacetime publish failed (exit=1)\n--- stderr ---\n Blocking waiting for file lock on package cache\n Updating crates.io index\n Blocking waiting for file lock on package cache\n Locking 72 packages to latest compatible versions\n Adding generic-array v0.14.7 (available: v0.14.9)\n Adding spacetimedb v1.11.1 (available: v1.11.3)\n Adding spacetimedb-bindings-macro v1.11.1 (available: v1.11.3)\n Adding spacetimedb-bindings-sys v1.11.1 (available: v1.11.3)\n Adding spacetimedb-lib v1.11.1 (available: v1.11.3)\n Adding spacetimedb-primitives v1.11.1 (available: v1.11.3)\n Adding spacetimedb-sats v1.11.1 (available: v1.11.3)\n Blocking waiting for file lock on package cache\n Blocking waiting for file lock on package cache\n Compiling proc-macro2 v1.0.106\n Compiling unicode-ident v1.0.22\n Compiling quote v1.0.44\n Compiling typenum v1.19.0\n Compiling version_check v0.9.5\n Compiling autocfg v1.5.0\n Compiling serde_core v1.0.228\n Compiling heck v0.5.0\n Compiling cfg-if v1.0.4\n Compiling find-msvc-tools v0.1.8\n Compiling either v1.15.0\n Compiling shlex v1.3.0\n Compiling zerocopy v0.8.34\n Compiling serde v1.0.228\n Compiling bitflags v2.10.0\n Compiling thiserror v1.0.69\n Compiling nohash-hasher v0.2.0\n Compiling anyhow v1.0.100\n Compiling zmij v1.0.17\n Compiling heck v0.4.1\n Compiling convert_case v0.4.0\n Compiling arrayvec v0.7.6\n Compiling keccak v0.1.5\n Compiling humantime v2.3.0\n Compiling bytes v1.11.0\n Compiling arrayref v0.3.9\n Compiling smallvec v1.15.1\n Compiling hex v0.4.3\n Compiling bytemuck v1.24.0\n Compiling getrandom v0.2.17\n Compiling spacetimedb-lib v1.11.1\n Compiling serde_json v1.0.149\n Compiling itertools v0.12.1\n Compiling rand_core v0.6.4\n Compiling itoa v1.0.17\n Compiling constant_time_eq v0.4.2\n Compiling second-stack v0.3.5\n Compiling memchr v2.7.6\n Compiling log v0.4.29\n Compiling cc v1.2.54\n Compiling generic-array v0.14.7\n Compiling scoped-tls v1.0.1\n Compiling num-traits v0.2.19\n Compiling http v1.4.0\n Compiling syn v2.0.114\n Compiling approx v0.3.2\n Compiling chrono v0.4.43\n Compiling blake3 v1.8.3\n Compiling crypto-common v0.1.7\n Compiling block-buffer v0.10.4\n Compiling decorum v0.3.1\n Compiling digest v0.10.7\n Compiling sha3 v0.10.8\n Compiling ethnum v1.5.2\n Compiling ppv-lite86 v0.2.21\n Compiling rand_chacha v0.3.1\n Compiling rand v0.8.5\n Compiling enum-as-inner v0.6.1\n Compiling thiserror-impl v1.0.69\n Compiling derive_more v0.99.20\n Compiling spacetimedb-primitives v1.11.1\n Compiling spacetimedb-bindings-sys v1.11.1\n Compiling spacetimedb-bindings-macro v1.11.1\n Compiling spacetimedb-sats v1.11.1\n Compiling spacetimedb v1.11.1\n Compiling spacetime-module v0.1.0 (/__w/SpacetimeDB/SpacetimeDB/target/llm-runs/schema/t_017_scheduled_columns/rust/server/gpt-5/llm)\nerror: expected `at`\n --> src/lib.rs:4:38\n |\n4 | #[table(name = tick_timer, scheduled(reducer = tick, column = scheduled_at))]\n | ^^^^^^^\n\nerror: expected one of: `init`, `client_connected`, `client_disconnected`, `update`, `name`\n --> src/lib.rs:22:11\n |\n22 | #[reducer(scheduled)]\n | ^^^^^^^^^\n\nerror[E0422]: cannot find struct, variant or union type `TickTimer` in this scope\n --> src/lib.rs:15:36\n |\n15 | ctx.db.tick_timer().insert(TickTimer {\n | ^^^^^^^^^ not found in this scope\n\nerror[E0412]: cannot find type `TickTimer` in this scope\n --> src/lib.rs:23:42\n |\n23 | pub fn tick(_ctx: &ReducerContext, _row: TickTimer) {\n | ^^^^^^^^^ not found in this scope\n\nerror[E0599]: no method named `tick_timer` found for struct `Local` in the current scope\n --> src/lib.rs:14:15\n |\n14 | if ctx.db.tick_timer().count() == 0 {\n | ^^^^^^^^^^ method not found in `Local`\n\nerror[E0599]: no method named `tick_timer` found for struct `Local` in the current scope\n --> src/lib.rs:15:16\n |\n15 | ctx.db.tick_timer().insert(TickTimer {\n | ^^^^^^^^^^ method not found in `Local`\n\nerror[E0599]: no variant or associated item named `repeat` found for enum `ScheduleAt` in the current scope\n --> src/lib.rs:17:39\n |\n17 | scheduled_at: ScheduleAt::repeat(50_000),\n | ^^^^^^ variant or associated item not found in `ScheduleAt`\n\nSome errors have detailed explanations: E0412, E0422, E0599.\nFor more information about an error, try `rustc --explain E0412`.\nerror: could not compile `spacetime-module` (lib) due to 7 previous errors\nError: command [\"cargo\", \"build\", \"--config=net.git-fetch-with-cli=true\", \"--target=wasm32-unknown-unknown\", \"--release\", \"--message-format=json-render-diagnostics\"] exited with code 101\n\n--- stdout ---\n", + "error": "spacetime publish failed (exit=1)\n--- stderr ---\n Updating crates.io index\n Locking 72 packages to latest compatible versions\n Adding generic-array v0.14.7 (available: v0.14.9)\n Adding spacetimedb v1.11.1 (available: v1.11.3)\n Adding spacetimedb-bindings-macro v1.11.1 (available: v1.11.3)\n Adding spacetimedb-bindings-sys v1.11.1 (available: v1.11.3)\n Adding spacetimedb-lib v1.11.1 (available: v1.11.3)\n Adding spacetimedb-primitives v1.11.1 (available: v1.11.3)\n Adding spacetimedb-sats v1.11.1 (available: v1.11.3)\n Compiling proc-macro2 v1.0.106\n Compiling unicode-ident v1.0.22\n Compiling quote v1.0.44\n Compiling typenum v1.19.0\n Compiling version_check v0.9.5\n Compiling autocfg v1.5.0\n Compiling heck v0.5.0\n Compiling serde_core v1.0.228\n Compiling cfg-if v1.0.4\n Compiling shlex v1.3.0\n Compiling either v1.15.0\n Compiling zerocopy v0.8.34\n Compiling serde v1.0.228\n Compiling find-msvc-tools v0.1.8\n Compiling anyhow v1.0.100\n Compiling bitflags v2.10.0\n Compiling nohash-hasher v0.2.0\n Compiling thiserror v1.0.69\n Compiling bytes v1.11.0\n Compiling convert_case v0.4.0\n Compiling heck v0.4.1\n Compiling zmij v1.0.17\n Compiling arrayvec v0.7.6\n Compiling humantime v2.3.0\n Compiling keccak v0.1.5\n Compiling spacetimedb-lib v1.11.1\n Compiling serde_json v1.0.149\n Compiling arrayref v0.3.9\n Compiling bytemuck v1.24.0\n Compiling getrandom v0.2.17\n Compiling cc v1.2.54\n Compiling second-stack v0.3.5\n Compiling hex v0.4.3\n Compiling itertools v0.12.1\n Compiling itoa v1.0.17\n Compiling rand_core v0.6.4\n Compiling constant_time_eq v0.4.2\n Compiling smallvec v1.15.1\n Compiling log v0.4.29\n Compiling memchr v2.7.6\n Compiling scoped-tls v1.0.1\n Compiling generic-array v0.14.7\n Compiling num-traits v0.2.19\n Compiling http v1.4.0\n Compiling syn v2.0.114\n Compiling blake3 v1.8.3\n Compiling approx v0.3.2\n Compiling chrono v0.4.43\n Compiling decorum v0.3.1\n Compiling crypto-common v0.1.7\n Compiling block-buffer v0.10.4\n Compiling digest v0.10.7\n Compiling sha3 v0.10.8\n Compiling ppv-lite86 v0.2.21\n Compiling rand_chacha v0.3.1\n Compiling ethnum v1.5.2\n Compiling rand v0.8.5\n Compiling enum-as-inner v0.6.1\n Compiling thiserror-impl v1.0.69\n Compiling derive_more v0.99.20\n Compiling spacetimedb-primitives v1.11.1\n Compiling spacetimedb-bindings-sys v1.11.1\n Compiling spacetimedb-bindings-macro v1.11.1\n Compiling spacetimedb-sats v1.11.1\n Compiling spacetimedb v1.11.1\n Compiling spacetime-module v0.1.0 (/__w/SpacetimeDB/SpacetimeDB/target/llm-runs/schema/t_017_scheduled_columns/rust/server/gpt-5/llm)\nerror: expected one of: `public`, `private`, `name`, `index`, `scheduled`\n --> src/lib.rs:4:28\n |\n4 | #[table(name = tick_timer, schedule(reducer = tick, column = scheduled_at))]\n | ^^^^^^^^\n\nerror[E0412]: cannot find type `TickTimer` in this scope\n --> src/lib.rs:13:42\n |\n13 | pub fn tick(_ctx: &ReducerContext, _row: TickTimer) {\n | ^^^^^^^^^ not found in this scope\n\nerror[E0422]: cannot find struct, variant or union type `TickTimer` in this scope\n --> src/lib.rs:19:32\n |\n19 | ctx.db.tick_timer().insert(TickTimer {\n | ^^^^^^^^^ not found in this scope\n\nerror[E0277]: invalid reducer signature\n --> src/lib.rs:13:8\n |\n 12 | #[reducer]\n | ---------- required by a bound introduced by this call\n 13 | pub fn tick(_ctx: &ReducerContext, _row: TickTimer) {\n | ^^^^ this reducer signature is not valid\n |\n = help: the trait `Reducer<'_, _>` is not implemented for fn item `for<'a> fn(&'a ReducerContext, {type error}) {tick}`\n = note: \n = note: reducer signatures must match the following pattern:\n = note: `Fn(&ReducerContext, [T1, ...]) [-> Result<(), impl Display>]`\n = note: where each `Ti` type implements `SpacetimeType`.\n = note: \nnote: required by a bound in `register_reducer`\n --> /root/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/spacetimedb-1.11.1/src/rt.rs:746:81\n |\n746 | pub fn register_reducer<'a, A: Args<'a>, I: FnInfo>(_: impl Reducer<'a, A>) {\n | ^^^^^^^^^^^^^^ required by this bound in `register_reducer`\n\nerror[E0277]: invalid reducer signature\n --> src/lib.rs:13:8\n |\n12 | #[reducer]\n | ---------- required by a bound introduced by this call\n13 | pub fn tick(_ctx: &ReducerContext, _row: TickTimer) {\n | ^^^^ this reducer signature is not valid\n |\n = help: the trait `Reducer<'_, _>` is not implemented for fn item `for<'a> fn(&'a ReducerContext, {type error}) {tick}`\n = note: \n = note: reducer signatures must match the following pattern:\n = note: `Fn(&ReducerContext, [T1, ...]) [-> Result<(), impl Display>]`\n = note: where each `Ti` type implements `SpacetimeType`.\n = note: \nnote: required by a bound in `invoke_reducer`\n --> /root/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/spacetimedb-1.11.1/src/rt.rs:45:19\n |\n44 | pub fn invoke_reducer<'a, A: Args<'a>>(\n | -------------- required by a bound in this function\n45 | reducer: impl Reducer<'a, A>,\n | ^^^^^^^^^^^^^^ required by this bound in `invoke_reducer`\n\nerror[E0599]: no method named `tick_timer` found for struct `Local` in the current scope\n --> src/lib.rs:19:12\n |\n19 | ctx.db.tick_timer().insert(TickTimer {\n | ^^^^^^^^^^ method not found in `Local`\n\nerror[E0599]: no variant or associated item named `repeat_every_micros` found for enum `ScheduleAt` in the current scope\n --> src/lib.rs:21:35\n |\n21 | scheduled_at: ScheduleAt::repeat_every_micros(50_000),\n | ^^^^^^^^^^^^^^^^^^^ variant or associated item not found in `ScheduleAt`\n\nSome errors have detailed explanations: E0277, E0412, E0422, E0599.\nFor more information about an error, try `rustc --explain E0277`.\nerror: could not compile `spacetime-module` (lib) due to 7 previous errors\nError: command [\"cargo\", \"build\", \"--config=net.git-fetch-with-cli=true\", \"--target=wasm32-unknown-unknown\", \"--release\", \"--message-format=json-render-diagnostics\"] exited with code 101\n\n--- stdout ---\n", "phase": "build_or_publish" } } }, "vendor": "openai", - "started_at": "2026-01-26T15:32:23.493005867Z", - "finished_at": "2026-01-26T15:33:38.753749775Z" + "started_at": "2026-01-27T18:06:55.320080695Z", + "finished_at": "2026-01-27T18:08:09.910881560Z" }, "t_018_constraints": { "hash": "e6a5a5374022e3250e0df2cb5e4f7e4777081f474a0ebc4e99302be42151ef59", "task": "t_018_constraints", "lang": "rust", - "golden_published": true, + "golden_published": false, "model_name": "GPT-5", "total_tests": 3, - "passed_tests": 3, - "llm_output": "use spacetimedb::{ReducerContext, Table};\n\n#[spacetimedb::table(name = account, index(name = by_name, btree(columns = [name])))]\npub struct Account {\n #[primary_key]\n id: i32,\n #[unique]\n email: String,\n name: String,\n}\n\n#[spacetimedb::reducer]\npub fn seed(ctx: &ReducerContext) {\n let account = ctx.db.account();\n let ids: Vec = account.iter().map(|r| r.id).collect();\n for id in ids {\n account.id().delete(&id);\n }\n account.insert(Account { id: 1, email: \"a@example.com\".to_string(), name: \"Alice\".to_string() });\n account.insert(Account { id: 2, email: \"b@example.com\".to_string(), name: \"Bob\".to_string() });\n}", + "passed_tests": 0, + "llm_output": "use spacetimedb::{table, reducer, ReducerContext, Table};\n\n#[table(name = account, index(name = by_name, btree(columns = [name])))]\npub struct Account {\n #[primary_key]\n id: i32,\n #[unique]\n email: String,\n name: String,\n}\n\n#[reducer]\nfn seed(ctx: &ReducerContext) {\n let account = ctx.db.account();\n for row in account.iter().collect::>() {\n account.delete(&row);\n }\n account.insert(Account { id: 1, email: \"a@example.com\".to_string(), name: \"Alice\".to_string() });\n account.insert(Account { id: 2, email: \"b@example.com\".to_string(), name: \"Bob\".to_string() });\n}", "category": "schema", "route_api_model": "gpt-5", "golden_db": "schema-t-018-constraints-golden", @@ -856,46 +900,18 @@ "work_dir_golden": "target/llm-runs/schema/t_018_constraints/rust/server/golden", "work_dir_llm": "target/llm-runs/schema/t_018_constraints/rust/server/gpt-5/llm", "scorer_details": { - "constraints_row_parity_after_seed": { - "pass": true, - "partial": 1.0, - "notes": { - "args": [], - "golden_db": "schema-t-018-constraints-golden", - "golden_out": "id | email | name ----+-----------------+--------- 1 | \"a@example.com\" | \"Alice\"", - "llm_db": "schema-t-018-constraints-gpt-5-llm", - "llm_out": "id | email | name ----+-----------------+--------- 1 | \"a@example.com\" | \"Alice\"", - "query": "SELECT id, email, name FROM account WHERE id=1", - "reducer": "seed", - "server": "http://127.0.0.1:34081" - } - }, - "constraints_seed_two_rows": { - "pass": true, - "partial": 1.0, - "notes": { - "actual": 1, - "expected": 1, - "sql": "SELECT COUNT(*) AS n FROM account WHERE id=2" - } - }, - "schema_parity": { - "pass": true, - "partial": 1.0, + "publish_error": { + "pass": false, + "partial": 0.0, "notes": { - "golden_db": "schema-t-018-constraints-golden", - "llm_db": "schema-t-018-constraints-gpt-5-llm", - "reducers_diff": null, - "reducers_equal": true, - "server": "http://127.0.0.1:34081", - "tables_diff": null, - "tables_equal": true + "error": "spacetime publish failed (exit=1)\n--- stderr ---\n Updating crates.io index\n Locking 72 packages to latest compatible versions\n Adding generic-array v0.14.7 (available: v0.14.9)\n Adding spacetimedb v1.11.1 (available: v1.11.3)\n Adding spacetimedb-bindings-macro v1.11.1 (available: v1.11.3)\n Adding spacetimedb-bindings-sys v1.11.1 (available: v1.11.3)\n Adding spacetimedb-lib v1.11.1 (available: v1.11.3)\n Adding spacetimedb-primitives v1.11.1 (available: v1.11.3)\n Adding spacetimedb-sats v1.11.1 (available: v1.11.3)\n Compiling proc-macro2 v1.0.106\n Compiling quote v1.0.44\n Compiling unicode-ident v1.0.22\n Compiling typenum v1.19.0\n Compiling version_check v0.9.5\n Compiling autocfg v1.5.0\n Compiling serde_core v1.0.228\n Compiling heck v0.5.0\n Compiling cfg-if v1.0.4\n Compiling zerocopy v0.8.34\n Compiling either v1.15.0\n Compiling shlex v1.3.0\n Compiling find-msvc-tools v0.1.8\n Compiling serde v1.0.228\n Compiling nohash-hasher v0.2.0\n Compiling bitflags v2.10.0\n Compiling anyhow v1.0.100\n Compiling thiserror v1.0.69\n Compiling keccak v0.1.5\n Compiling heck v0.4.1\n Compiling zmij v1.0.17\n Compiling convert_case v0.4.0\n Compiling bytes v1.11.0\n Compiling arrayvec v0.7.6\n Compiling humantime v2.3.0\n Compiling constant_time_eq v0.4.2\n Compiling second-stack v0.3.5\n Compiling itoa v1.0.17\n Compiling hex v0.4.3\n Compiling getrandom v0.2.17\n Compiling arrayref v0.3.9\n Compiling smallvec v1.15.1\n Compiling spacetimedb-lib v1.11.1\n Compiling rand_core v0.6.4\n Compiling bytemuck v1.24.0\n Compiling serde_json v1.0.149\n Compiling log v0.4.29\n Compiling memchr v2.7.6\n Compiling scoped-tls v1.0.1\n Compiling itertools v0.12.1\n Compiling generic-array v0.14.7\n Compiling cc v1.2.54\n Compiling num-traits v0.2.19\n Compiling http v1.4.0\n Compiling syn v2.0.114\n Compiling approx v0.3.2\n Compiling chrono v0.4.43\n Compiling blake3 v1.8.3\n Compiling decorum v0.3.1\n Compiling block-buffer v0.10.4\n Compiling crypto-common v0.1.7\n Compiling digest v0.10.7\n Compiling sha3 v0.10.8\n Compiling ethnum v1.5.2\n Compiling ppv-lite86 v0.2.21\n Compiling rand_chacha v0.3.1\n Compiling rand v0.8.5\n Compiling enum-as-inner v0.6.1\n Compiling thiserror-impl v1.0.69\n Compiling derive_more v0.99.20\n Compiling spacetimedb-primitives v1.11.1\n Compiling spacetimedb-bindings-sys v1.11.1\n Compiling spacetimedb-bindings-macro v1.11.1\n Compiling spacetimedb-sats v1.11.1\n Compiling spacetimedb v1.11.1\n Compiling spacetime-module v0.1.0 (/__w/SpacetimeDB/SpacetimeDB/target/llm-runs/schema/t_018_constraints/rust/server/gpt-5/llm)\nerror[E0308]: mismatched types\n --> src/lib.rs:17:24\n |\n 17 | account.delete(&row);\n | ------ ^^^^ expected `Account`, found `&Account`\n | |\n | arguments to this method are incorrect\n |\nnote: method defined here\n --> /root/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/spacetimedb-1.11.1/src/table.rs:104:8\n |\n104 | fn delete(&self, row: Self::Row) -> bool {\n | ^^^^^^\nhelp: consider removing the borrow\n |\n 17 - account.delete(&row);\n 17 + account.delete(row);\n |\n\nFor more information about this error, try `rustc --explain E0308`.\nerror: could not compile `spacetime-module` (lib) due to 1 previous error\nError: command [\"cargo\", \"build\", \"--config=net.git-fetch-with-cli=true\", \"--target=wasm32-unknown-unknown\", \"--release\", \"--message-format=json-render-diagnostics\"] exited with code 101\n\n--- stdout ---\n", + "phase": "build_or_publish" } } }, "vendor": "openai", - "started_at": "2026-01-26T15:32:16.012174316Z", - "finished_at": "2026-01-26T15:32:54.091817951Z" + "started_at": "2026-01-27T18:06:49.098371643Z", + "finished_at": "2026-01-27T18:07:55.735499532Z" }, "t_019_many_to_many": { "hash": "e6a5a5374022e3250e0df2cb5e4f7e4777081f474a0ebc4e99302be42151ef59", @@ -905,7 +921,7 @@ "model_name": "GPT-5", "total_tests": 5, "passed_tests": 5, - "llm_output": "use spacetimedb::{table, reducer, ReducerContext, Table};\n\n#[table(name = user)]\npub struct User {\n #[primary_key]\n user_id: i32,\n name: String,\n}\n\n#[table(name = group)]\npub struct Group {\n #[primary_key]\n group_id: i32,\n title: String,\n}\n\n#[table(\n name = membership,\n index(name = by_user, btree(columns = [user_id])),\n index(name = by_group, btree(columns = [group_id]))\n)]\npub struct Membership {\n #[primary_key]\n id: i32,\n user_id: i32,\n group_id: i32,\n}\n\n#[reducer]\npub fn seed(ctx: &ReducerContext) {\n // Clear existing rows to ensure exact dataset\n for m in ctx.db.membership().iter() {\n ctx.db.membership().id().delete(&m.id);\n }\n for u in ctx.db.user().iter() {\n ctx.db.user().user_id().delete(&u.user_id);\n }\n for g in ctx.db.group().iter() {\n ctx.db.group().group_id().delete(&g.group_id);\n }\n\n // Insert users\n ctx.db.user().insert(User { user_id: 1, name: \"Alice\".to_string() });\n ctx.db.user().insert(User { user_id: 2, name: \"Bob\".to_string() });\n\n // Insert groups\n ctx.db.group().insert(Group { group_id: 10, title: \"Admin\".to_string() });\n ctx.db.group().insert(Group { group_id: 20, title: \"Dev\".to_string() });\n\n // Insert memberships\n ctx.db.membership().insert(Membership { id: 1, user_id: 1, group_id: 10 });\n ctx.db.membership().insert(Membership { id: 2, user_id: 1, group_id: 20 });\n ctx.db.membership().insert(Membership { id: 3, user_id: 2, group_id: 20 });\n}", + "llm_output": "use spacetimedb::{table, reducer, ReducerContext, Table};\n\n#[table(name = user)]\npub struct User {\n #[primary_key]\n user_id: i32,\n name: String,\n}\n\n#[table(name = group)]\npub struct Group {\n #[primary_key]\n group_id: i32,\n title: String,\n}\n\n#[table(\n name = membership,\n index(name = by_user, btree(columns = [user_id])),\n index(name = by_group, btree(columns = [group_id]))\n)]\npub struct Membership {\n #[primary_key]\n id: i32,\n user_id: i32,\n group_id: i32,\n}\n\n#[reducer]\npub fn seed(ctx: &ReducerContext) {\n let m = ctx.db.membership();\n let m_ids: Vec = m.iter().map(|r| r.id).collect();\n for id in m_ids {\n m.id().delete(&id);\n }\n\n let u = ctx.db.user();\n let u_ids: Vec = u.iter().map(|r| r.user_id).collect();\n for id in u_ids {\n u.user_id().delete(&id);\n }\n\n let g = ctx.db.group();\n let g_ids: Vec = g.iter().map(|r| r.group_id).collect();\n for id in g_ids {\n g.group_id().delete(&id);\n }\n\n u.insert(User { user_id: 1, name: \"Alice\".to_string() });\n u.insert(User { user_id: 2, name: \"Bob\".to_string() });\n\n g.insert(Group { group_id: 10, title: \"Admin\".to_string() });\n g.insert(Group { group_id: 20, title: \"Dev\".to_string() });\n\n m.insert(Membership { id: 1, user_id: 1, group_id: 10 });\n m.insert(Membership { id: 2, user_id: 1, group_id: 20 });\n m.insert(Membership { id: 3, user_id: 2, group_id: 20 });\n}", "category": "schema", "route_api_model": "gpt-5", "golden_db": "schema-t-019-many-to-many-golden", @@ -913,6 +929,24 @@ "work_dir_golden": "target/llm-runs/schema/t_019_many_to_many/rust/server/golden", "work_dir_llm": "target/llm-runs/schema/t_019_many_to_many/rust/server/gpt-5/llm", "scorer_details": { + "m2m_has_1_20": { + "pass": true, + "partial": 1.0, + "notes": { + "actual": 1, + "expected": 1, + "sql": "SELECT COUNT(*) AS n FROM membership WHERE user_id=1 AND group_id=20" + } + }, + "memberships_three_rows": { + "pass": true, + "partial": 1.0, + "notes": { + "actual": 3, + "expected": 3, + "sql": "SELECT COUNT(*) AS n FROM membership" + } + }, "schema_parity": { "pass": true, "partial": 1.0, @@ -921,7 +955,7 @@ "llm_db": "schema-t-019-many-to-many-gpt-5-llm", "reducers_diff": null, "reducers_equal": true, - "server": "http://127.0.0.1:34081", + "server": "http://127.0.0.1:35581", "tables_diff": null, "tables_equal": true } @@ -943,29 +977,11 @@ "expected": 1, "sql": "SELECT COUNT(*) AS n FROM membership WHERE user_id=2 AND group_id=20" } - }, - "memberships_three_rows": { - "pass": true, - "partial": 1.0, - "notes": { - "actual": 3, - "expected": 3, - "sql": "SELECT COUNT(*) AS n FROM membership" - } - }, - "m2m_has_1_20": { - "pass": true, - "partial": 1.0, - "notes": { - "actual": 1, - "expected": 1, - "sql": "SELECT COUNT(*) AS n FROM membership WHERE user_id=1 AND group_id=20" - } } }, "vendor": "openai", - "started_at": "2026-01-26T15:32:22.818452395Z", - "finished_at": "2026-01-26T15:33:12.862647549Z" + "started_at": "2026-01-27T18:06:44.213047979Z", + "finished_at": "2026-01-27T18:07:56.372405516Z" }, "t_020_ecs": { "hash": "e6a5a5374022e3250e0df2cb5e4f7e4777081f474a0ebc4e99302be42151ef59", @@ -975,7 +991,7 @@ "model_name": "GPT-5", "total_tests": 5, "passed_tests": 1, - "llm_output": "use spacetimedb::{ReducerContext, Table};\n\n#[spacetimedb::table(name = entity)]\npub struct Entity {\n #[primary_key]\n id: i32,\n}\n\n#[spacetimedb::table(name = position)]\npub struct Position {\n #[primary_key]\n entity_id: i32,\n x: i32,\n y: i32,\n}\n\n#[spacetimedb::table(name = velocity)]\npub struct Velocity {\n #[primary_key]\n entity_id: i32,\n vx: i32,\n vy: i32,\n}\n\n#[spacetimedb::table(name = next_position)]\npub struct NextPosition {\n #[primary_key]\n entity_id: i32,\n x: i32,\n y: i32,\n}\n\n#[spacetimedb::reducer]\npub fn seed(ctx: &ReducerContext) {\n // Entity 1\n if ctx.db.entity().id().find(1).is_none() {\n ctx.db.entity().insert(Entity { id: 1 });\n }\n if let Some(mut p) = ctx.db.position().entity_id().find(1) {\n p.x = 0;\n p.y = 0;\n ctx.db.position().entity_id().update(p);\n } else {\n ctx.db.position().insert(Position { entity_id: 1, x: 0, y: 0 });\n }\n if let Some(mut v) = ctx.db.velocity().entity_id().find(1) {\n v.vx = 1;\n v.vy = 0;\n ctx.db.velocity().entity_id().update(v);\n } else {\n ctx.db.velocity().insert(Velocity { entity_id: 1, vx: 1, vy: 0 });\n }\n\n // Entity 2\n if ctx.db.entity().id().find(2).is_none() {\n ctx.db.entity().insert(Entity { id: 2 });\n }\n if let Some(mut p) = ctx.db.position().entity_id().find(2) {\n p.x = 10;\n p.y = 0;\n ctx.db.position().entity_id().update(p);\n } else {\n ctx.db.position().insert(Position { entity_id: 2, x: 10, y: 0 });\n }\n if let Some(mut v) = ctx.db.velocity().entity_id().find(2) {\n v.vx = -2;\n v.vy = 3;\n ctx.db.velocity().entity_id().update(v);\n } else {\n ctx.db.velocity().insert(Velocity { entity_id: 2, vx: -2, vy: 3 });\n }\n}\n\n#[spacetimedb::reducer]\npub fn step(ctx: &ReducerContext) {\n for p in ctx.db.position().iter() {\n if let Some(v) = ctx.db.velocity().entity_id().find(p.entity_id) {\n let nx = p.x + v.vx;\n let ny = p.y + v.vy;\n if let Some(mut np) = ctx.db.next_position().entity_id().find(p.entity_id) {\n np.x = nx;\n np.y = ny;\n ctx.db.next_position().entity_id().update(np);\n } else {\n ctx.db\n .next_position()\n .insert(NextPosition { entity_id: p.entity_id, x: nx, y: ny });\n }\n }\n }\n}", + "llm_output": "use spacetimedb::{table, reducer, ReducerContext, Table};\n\n#[table(name = entity)]\npub struct Entity {\n #[primary_key]\n id: i32,\n}\n\n#[table(name = position)]\npub struct Position {\n #[primary_key]\n entity_id: i32,\n x: i32,\n y: i32,\n}\n\n#[table(name = velocity)]\npub struct Velocity {\n #[primary_key]\n entity_id: i32,\n vx: i32,\n vy: i32,\n}\n\n#[table(name = next_position)]\npub struct NextPosition {\n #[primary_key]\n entity_id: i32,\n x: i32,\n y: i32,\n}\n\n#[reducer]\npub fn seed(ctx: &ReducerContext) {\n if ctx.db.entity().id().find(1).is_none() {\n ctx.db.entity().insert(Entity { id: 1 });\n }\n match ctx.db.position().entity_id().find(1) {\n Some(mut p) => {\n p.x = 0;\n p.y = 0;\n ctx.db.position().entity_id().update(p);\n }\n None => {\n ctx.db.position().insert(Position { entity_id: 1, x: 0, y: 0 });\n }\n }\n match ctx.db.velocity().entity_id().find(1) {\n Some(mut v) => {\n v.vx = 1;\n v.vy = 0;\n ctx.db.velocity().entity_id().update(v);\n }\n None => {\n ctx.db.velocity().insert(Velocity { entity_id: 1, vx: 1, vy: 0 });\n }\n }\n\n if ctx.db.entity().id().find(2).is_none() {\n ctx.db.entity().insert(Entity { id: 2 });\n }\n match ctx.db.position().entity_id().find(2) {\n Some(mut p) => {\n p.x = 10;\n p.y = 0;\n ctx.db.position().entity_id().update(p);\n }\n None => {\n ctx.db.position().insert(Position { entity_id: 2, x: 10, y: 0 });\n }\n }\n match ctx.db.velocity().entity_id().find(2) {\n Some(mut v) => {\n v.vx = -2;\n v.vy = 3;\n ctx.db.velocity().entity_id().update(v);\n }\n None => {\n ctx.db.velocity().insert(Velocity { entity_id: 2, vx: -2, vy: 3 });\n }\n }\n}\n\n#[reducer]\npub fn step(ctx: &ReducerContext) {\n for pos in ctx.db.position().iter() {\n if let Some(vel) = ctx.db.velocity().entity_id().find(pos.entity_id) {\n let nx = pos.x + vel.vx;\n let ny = pos.y + vel.vy;\n match ctx.db.next_position().entity_id().find(pos.entity_id) {\n Some(mut np) => {\n np.x = nx;\n np.y = ny;\n ctx.db.next_position().entity_id().update(np);\n }\n None => {\n ctx.db.next_position().insert(NextPosition {\n entity_id: pos.entity_id,\n x: nx,\n y: ny,\n });\n }\n }\n }\n }\n}", "category": "schema", "route_api_model": "gpt-5", "golden_db": "schema-t-020-ecs-golden", @@ -987,23 +1003,23 @@ "pass": false, "partial": 0.0, "notes": { - "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `next_positions`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:34081/v1/database/c2002214a80a8c41eccac4e56c7e632cd82fed302ea0753203a8cb149cbfc10d/sql)\n", + "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `next_positions`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:35581/v1/database/c200fb0c82e6d20be24bafeb6849267805909e1c436bf9f7e4621bfde313f334/sql)\n", "phase": "sql" } }, - "ecs_next_pos_entity2": { + "ecs_seed_positions_count": { "pass": false, "partial": 0.0, "notes": { - "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `next_positions`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:34081/v1/database/c2002214a80a8c41eccac4e56c7e632cd82fed302ea0753203a8cb149cbfc10d/sql)\n", + "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `positions`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:35581/v1/database/c200fb0c82e6d20be24bafeb6849267805909e1c436bf9f7e4621bfde313f334/sql)\n", "phase": "sql" } }, - "ecs_seed_positions_count": { + "ecs_step_next_positions_count": { "pass": false, "partial": 0.0, "notes": { - "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `positions`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:34081/v1/database/c2002214a80a8c41eccac4e56c7e632cd82fed302ea0753203a8cb149cbfc10d/sql)\n", + "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `next_positions`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:35581/v1/database/c200fb0c82e6d20be24bafeb6849267805909e1c436bf9f7e4621bfde313f334/sql)\n", "phase": "sql" } }, @@ -1015,23 +1031,23 @@ "llm_db": "schema-t-020-ecs-gpt-5-llm", "reducers_diff": null, "reducers_equal": true, - "server": "http://127.0.0.1:34081", + "server": "http://127.0.0.1:35581", "tables_diff": null, "tables_equal": true } }, - "ecs_step_next_positions_count": { + "ecs_next_pos_entity2": { "pass": false, "partial": 0.0, "notes": { - "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `next_positions`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:34081/v1/database/c2002214a80a8c41eccac4e56c7e632cd82fed302ea0753203a8cb149cbfc10d/sql)\n", + "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `next_positions`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:35581/v1/database/c200fb0c82e6d20be24bafeb6849267805909e1c436bf9f7e4621bfde313f334/sql)\n", "phase": "sql" } } }, "vendor": "openai", - "started_at": "2026-01-26T15:31:51.422780227Z", - "finished_at": "2026-01-26T15:32:47.540737141Z" + "started_at": "2026-01-27T18:06:42.268009092Z", + "finished_at": "2026-01-27T18:07:52.610723629Z" }, "t_021_multi_column_index": { "hash": "e6a5a5374022e3250e0df2cb5e4f7e4777081f474a0ebc4e99302be42151ef59", @@ -1041,7 +1057,7 @@ "model_name": "GPT-5", "total_tests": 4, "passed_tests": 4, - "llm_output": "use spacetimedb::{table, reducer, ReducerContext, Table};\n\n#[table(name = log, index(name = by_user_day, btree(columns = [user_id, day])))]\npub struct Log {\n #[primary_key]\n id: i32,\n user_id: i32,\n day: i32,\n message: String,\n}\n\n#[reducer]\npub fn seed(ctx: &ReducerContext) {\n let tbl = ctx.db.log();\n\n // Clear existing rows to ensure the table contains exactly the seeded rows\n let ids: Vec = tbl.iter().map(|r| r.id).collect();\n for id in ids {\n tbl.id().delete(&id);\n }\n\n tbl.insert(Log { id: 1, user_id: 7, day: 1, message: \"a\".to_string() });\n tbl.insert(Log { id: 2, user_id: 7, day: 2, message: \"b\".to_string() });\n tbl.insert(Log { id: 3, user_id: 9, day: 1, message: \"c\".to_string() });\n}", + "llm_output": "use spacetimedb::{table, reducer, ReducerContext, Table};\n\n#[table(name = log, index(name = by_user_day, btree(columns = [user_id, day])))]\npub struct Log {\n #[primary_key]\n id: i32,\n user_id: i32,\n day: i32,\n message: String,\n}\n\n#[reducer]\npub fn seed(ctx: &ReducerContext) {\n ctx.db.log().insert(Log { id: 1, user_id: 7, day: 1, message: \"a\".to_string() });\n ctx.db.log().insert(Log { id: 2, user_id: 7, day: 2, message: \"b\".to_string() });\n ctx.db.log().insert(Log { id: 3, user_id: 9, day: 1, message: \"c\".to_string() });\n}", "category": "schema", "route_api_model": "gpt-5", "golden_db": "schema-t-021-multi-column-index-golden", @@ -1049,15 +1065,6 @@ "work_dir_golden": "target/llm-runs/schema/t_021_multi_column_index/rust/server/golden", "work_dir_llm": "target/llm-runs/schema/t_021_multi_column_index/rust/server/gpt-5/llm", "scorer_details": { - "mcindex_lookup_u7_d2": { - "pass": true, - "partial": 1.0, - "notes": { - "actual": 1, - "expected": 1, - "sql": "SELECT COUNT(*) AS n FROM log WHERE user_id=7 AND day=2" - } - }, "schema_parity": { "pass": true, "partial": 1.0, @@ -1066,11 +1073,20 @@ "llm_db": "schema-t-021-multi-column-index-gpt-5-llm", "reducers_diff": null, "reducers_equal": true, - "server": "http://127.0.0.1:34081", + "server": "http://127.0.0.1:35581", "tables_diff": null, "tables_equal": true } }, + "mcindex_lookup_u7_d1": { + "pass": true, + "partial": 1.0, + "notes": { + "actual": 1, + "expected": 1, + "sql": "SELECT COUNT(*) AS n FROM log WHERE user_id=7 AND day=1" + } + }, "mcindex_seed_count": { "pass": true, "partial": 1.0, @@ -1080,19 +1096,19 @@ "sql": "SELECT COUNT(*) AS n FROM log" } }, - "mcindex_lookup_u7_d1": { + "mcindex_lookup_u7_d2": { "pass": true, "partial": 1.0, "notes": { "actual": 1, "expected": 1, - "sql": "SELECT COUNT(*) AS n FROM log WHERE user_id=7 AND day=1" + "sql": "SELECT COUNT(*) AS n FROM log WHERE user_id=7 AND day=2" } } }, "vendor": "openai", - "started_at": "2026-01-26T15:31:55.504076208Z", - "finished_at": "2026-01-26T15:32:29.610771248Z" + "started_at": "2026-01-27T18:06:05.264506610Z", + "finished_at": "2026-01-27T18:07:06.204230569Z" } } } @@ -1100,14 +1116,14 @@ }, { "mode": "docs", - "hash": "f09c4aa335b00d6e1e55573dfbc47f13accb0a1c631339ddb4dc8a6237c520cd", + "hash": "4a962a391761c2c914b169995c6f6bd46732a1c8f844b124cfc73086b79ee2b6", "models": [ { "name": "GPT-5", "route_api_model": "gpt-5", "tasks": { "t_000_empty_reducers": { - "hash": "f09c4aa335b00d6e1e55573dfbc47f13accb0a1c631339ddb4dc8a6237c520cd", + "hash": "4a962a391761c2c914b169995c6f6bd46732a1c8f844b124cfc73086b79ee2b6", "task": "t_000_empty_reducers", "lang": "rust", "golden_published": true, @@ -1132,18 +1148,18 @@ } }, "vendor": "openai", - "started_at": "2026-01-26T15:33:39.378312906Z", - "finished_at": "2026-01-26T15:34:06.312973975Z" + "started_at": "2026-01-27T18:08:10.468291650Z", + "finished_at": "2026-01-27T18:09:15.522364820Z" }, "t_001_basic_tables": { - "hash": "f09c4aa335b00d6e1e55573dfbc47f13accb0a1c631339ddb4dc8a6237c520cd", + "hash": "4a962a391761c2c914b169995c6f6bd46732a1c8f844b124cfc73086b79ee2b6", "task": "t_001_basic_tables", "lang": "rust", "golden_published": true, "model_name": "GPT-5", "total_tests": 1, "passed_tests": 0, - "llm_output": "#[spacetimedb::table(name = user)]\npub struct User {\n #[primary_key]\n id: i32,\n name: String,\n age: i32,\n active: bool,\n}\n\n#[spacetimedb::table(name = product)]\npub struct Product {\n #[primary_key]\n id: i32,\n title: String,\n price: f32,\n in_stock: bool,\n}\n\n#[spacetimedb::table(name = note)]\npub struct Note {\n #[primary_key]\n id: i32,\n body: String,\n rating: i64,\n pinned: bool,\n}", + "llm_output": "use spacetimedb::Table;\n\n#[spacetimedb::table(name = user)]\npub struct User {\n #[primary_key]\n id: i32,\n name: String,\n age: i32,\n active: bool,\n}\n\n#[spacetimedb::table(name = product)]\npub struct Product {\n #[primary_key]\n id: i32,\n title: String,\n price: f32,\n in_stock: bool,\n}\n\n#[spacetimedb::table(name = note)]\npub struct Note {\n #[primary_key]\n id: i32,\n body: String,\n rating: i64,\n pinned: bool,\n}", "category": "basics", "route_api_model": "gpt-5", "golden_db": "basics-t-001-basic-tables-golden", @@ -1161,18 +1177,18 @@ } }, "vendor": "openai", - "started_at": "2026-01-26T15:33:39.366156833Z", - "finished_at": "2026-01-26T15:34:21.141281364Z" + "started_at": "2026-01-27T18:08:10.473087589Z", + "finished_at": "2026-01-27T18:09:28.048711101Z" }, "t_002_scheduled_table": { - "hash": "f09c4aa335b00d6e1e55573dfbc47f13accb0a1c631339ddb4dc8a6237c520cd", + "hash": "4a962a391761c2c914b169995c6f6bd46732a1c8f844b124cfc73086b79ee2b6", "task": "t_002_scheduled_table", "lang": "rust", "golden_published": true, "model_name": "GPT-5", "total_tests": 1, "passed_tests": 0, - "llm_output": "use spacetimedb::{ReducerContext, ScheduleAt, Table};\nuse std::time::Duration;\n\n#[spacetimedb::table(name = tick_timer, scheduled(tick))]\npub struct TickTimer {\n #[primary_key]\n #[auto_inc]\n scheduled_id: u64,\n scheduled_at: ScheduleAt,\n}\n\n#[spacetimedb::reducer]\npub fn tick(_ctx: &ReducerContext, _timer: TickTimer) {}\n\n#[spacetimedb::reducer(init)]\npub fn init(ctx: &ReducerContext) {\n ctx.db.tick_timer().insert(TickTimer {\n scheduled_id: 0,\n scheduled_at: ScheduleAt::Interval(Duration::from_micros(50_000).into()),\n });\n}", + "llm_output": "use std::time::Duration;\nuse spacetimedb::{ReducerContext, ScheduleAt, Table};\n\n#[spacetimedb::table(name = tick_timer, scheduled(tick))]\npub struct TickTimer {\n #[primary_key]\n #[auto_inc]\n scheduled_id: u64,\n scheduled_at: spacetimedb::ScheduleAt,\n}\n\n#[spacetimedb::reducer]\npub fn tick(_ctx: &ReducerContext, _timer: TickTimer) -> Result<(), String> {\n Ok(())\n}\n\n#[spacetimedb::reducer(init)]\npub fn init(ctx: &ReducerContext) -> Result<(), String> {\n ctx.db.tick_timer().try_insert(TickTimer {\n scheduled_id: 0,\n scheduled_at: ScheduleAt::Interval(Duration::from_micros(50_000).into()),\n })?;\n Ok(())\n}", "category": "basics", "route_api_model": "gpt-5", "golden_db": "basics-t-002-scheduled-table-golden", @@ -1190,18 +1206,18 @@ } }, "vendor": "openai", - "started_at": "2026-01-26T15:33:39.381646420Z", - "finished_at": "2026-01-26T15:35:08.176674329Z" + "started_at": "2026-01-27T18:09:15.522500232Z", + "finished_at": "2026-01-27T18:10:08.269120418Z" }, "t_003_struct_in_table": { - "hash": "f09c4aa335b00d6e1e55573dfbc47f13accb0a1c631339ddb4dc8a6237c520cd", + "hash": "4a962a391761c2c914b169995c6f6bd46732a1c8f844b124cfc73086b79ee2b6", "task": "t_003_struct_in_table", "lang": "rust", "golden_published": true, "model_name": "GPT-5", "total_tests": 1, "passed_tests": 0, - "llm_output": "use spacetimedb::SpacetimeType;\n\n#[derive(SpacetimeType, Clone, Debug)]\npub struct Position {\n pub x: i32,\n pub y: i32,\n}\n\n#[spacetimedb::table(name = entity)]\npub struct Entity {\n #[primary_key]\n pub id: i32,\n pub pos: Position,\n}", + "llm_output": "#[derive(spacetimedb::SpacetimeType, Clone, Debug)]\npub struct Position {\n x: i32,\n y: i32,\n}\n\n#[spacetimedb::table(name = entity)]\npub struct Entity {\n #[primary_key]\n id: i32,\n pos: Position,\n}", "category": "basics", "route_api_model": "gpt-5", "golden_db": "basics-t-003-struct-in-table-golden", @@ -1219,11 +1235,11 @@ } }, "vendor": "openai", - "started_at": "2026-01-26T15:33:39.368608920Z", - "finished_at": "2026-01-26T15:34:21.997549378Z" + "started_at": "2026-01-27T18:08:10.507161225Z", + "finished_at": "2026-01-27T18:09:28.290370681Z" }, "t_004_insert": { - "hash": "f09c4aa335b00d6e1e55573dfbc47f13accb0a1c631339ddb4dc8a6237c520cd", + "hash": "4a962a391761c2c914b169995c6f6bd46732a1c8f844b124cfc73086b79ee2b6", "task": "t_004_insert", "lang": "rust", "golden_published": true, @@ -1256,11 +1272,11 @@ } }, "vendor": "openai", - "started_at": "2026-01-26T15:33:39.375009734Z", - "finished_at": "2026-01-26T15:35:05.975054748Z" + "started_at": "2026-01-27T18:08:10.464516164Z", + "finished_at": "2026-01-27T18:09:27.607691278Z" }, "t_005_update": { - "hash": "f09c4aa335b00d6e1e55573dfbc47f13accb0a1c631339ddb4dc8a6237c520cd", + "hash": "4a962a391761c2c914b169995c6f6bd46732a1c8f844b124cfc73086b79ee2b6", "task": "t_005_update", "lang": "rust", "golden_published": true, @@ -1275,20 +1291,20 @@ "work_dir_golden": "target/llm-runs/basics/t_005_update/rust/server/golden", "work_dir_llm": "target/llm-runs/basics/t_005_update/rust/server/gpt-5/llm", "scorer_details": { - "data_parity_update_user": { + "schema_parity": { "pass": false, "partial": 0.0, "notes": { - "error": "spacetime call failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: failed to find database `basics-t-005-update-golden`.\n", - "phase": "call_reducer_golden" + "error": "describe failed: WARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: failed to find database `basics-t-005-update-golden`.\n", + "phase": "describe_golden" } }, - "schema_parity": { + "data_parity_update_user": { "pass": false, "partial": 0.0, "notes": { - "error": "describe failed: WARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: failed to find database `basics-t-005-update-golden`.\n", - "phase": "describe_golden" + "error": "spacetime call failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: failed to find database `basics-t-005-update-golden`.\n", + "phase": "call_reducer_golden" } }, "seed_users_row": { @@ -1302,18 +1318,18 @@ } }, "vendor": "openai", - "started_at": "2026-01-26T15:34:06.313026786Z", - "finished_at": "2026-01-26T15:34:38.434768448Z" + "started_at": "2026-01-27T18:09:26.394826817Z", + "finished_at": "2026-01-27T18:10:10.331435614Z" }, "t_006_delete": { - "hash": "f09c4aa335b00d6e1e55573dfbc47f13accb0a1c631339ddb4dc8a6237c520cd", + "hash": "4a962a391761c2c914b169995c6f6bd46732a1c8f844b124cfc73086b79ee2b6", "task": "t_006_delete", "lang": "rust", "golden_published": true, "model_name": "GPT-5", "total_tests": 3, "passed_tests": 1, - "llm_output": "use spacetimedb::{ReducerContext, Table};\n\n#[spacetimedb::table(name = user)]\npub struct User {\n #[primary_key]\n id: i32,\n name: String,\n age: i32,\n active: bool,\n}\n\n#[spacetimedb::reducer]\npub fn delete_user(ctx: &ReducerContext, id: i32) {\n ctx.db.user().id().delete(id);\n}", + "llm_output": "use spacetimedb::{ReducerContext, Table};\n\n#[spacetimedb::table(name = user)]\npub struct User {\n #[primary_key]\n id: i32,\n name: String,\n age: i32,\n active: bool,\n}\n\n#[spacetimedb::reducer]\npub fn delete_user(ctx: &ReducerContext, id: i32) {\n ctx.db.user().id().delete(&id);\n}", "category": "basics", "route_api_model": "gpt-5", "golden_db": "basics-t-006-delete-golden", @@ -1321,13 +1337,13 @@ "work_dir_golden": "target/llm-runs/basics/t_006_delete/rust/server/golden", "work_dir_llm": "target/llm-runs/basics/t_006_delete/rust/server/gpt-5/llm", "scorer_details": { - "delete_user_count_zero": { - "pass": true, - "partial": 1.0, + "seed_users_row": { + "pass": false, + "partial": 0.0, "notes": { - "actual": 0, - "expected": 0, - "sql": "SELECT COUNT(*) AS n FROM user WHERE id=1" + "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: failed to find database `basics-t-006-delete-golden`.\n", + "phase": "sql_golden", + "sql": "INSERT INTO user(id, name, age, active) VALUES (1, 'Alice', 30, true)" } }, "schema_parity": { @@ -1338,29 +1354,29 @@ "phase": "describe_golden" } }, - "seed_users_row": { - "pass": false, - "partial": 0.0, + "delete_user_count_zero": { + "pass": true, + "partial": 1.0, "notes": { - "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: failed to find database `basics-t-006-delete-golden`.\n", - "phase": "sql_golden", - "sql": "INSERT INTO user(id, name, age, active) VALUES (1, 'Alice', 30, true)" + "actual": 0, + "expected": 0, + "sql": "SELECT COUNT(*) AS n FROM user WHERE id=1" } } }, "vendor": "openai", - "started_at": "2026-01-26T15:34:38.434794482Z", - "finished_at": "2026-01-26T15:35:08.669675470Z" + "started_at": "2026-01-27T18:08:10.483345655Z", + "finished_at": "2026-01-27T18:09:30.847667484Z" }, "t_007_crud": { - "hash": "f09c4aa335b00d6e1e55573dfbc47f13accb0a1c631339ddb4dc8a6237c520cd", + "hash": "4a962a391761c2c914b169995c6f6bd46732a1c8f844b124cfc73086b79ee2b6", "task": "t_007_crud", "lang": "rust", "golden_published": true, "model_name": "GPT-5", "total_tests": 4, "passed_tests": 1, - "llm_output": "use spacetimedb::{ReducerContext, Table};\n\n#[spacetimedb::table(name = user)]\npub struct User {\n #[primary_key]\n id: i32,\n name: String,\n age: i32,\n active: bool,\n}\n\n#[spacetimedb::reducer]\npub fn crud(ctx: &ReducerContext) {\n ctx.db.user().insert(User {\n id: 1,\n name: \"Alice\".into(),\n age: 30,\n active: true,\n });\n ctx.db.user().insert(User {\n id: 2,\n name: \"Bob\".into(),\n age: 22,\n active: false,\n });\n if let Some(mut u) = ctx.db.user().id().find(1) {\n u.name = \"Alice2\".into();\n u.age = 31;\n u.active = false;\n ctx.db.user().id().update(u);\n }\n ctx.db.user().id().delete(2);\n}", + "llm_output": "use spacetimedb::{ReducerContext, Table};\n\n#[spacetimedb::table(name = user)]\npub struct User {\n #[primary_key]\n id: i32,\n name: String,\n age: i32,\n active: bool,\n}\n\n#[spacetimedb::reducer]\npub fn crud(ctx: &ReducerContext) {\n ctx.db.user().insert(User { id: 1, name: \"Alice\".into(), age: 30, active: true });\n ctx.db.user().insert(User { id: 2, name: \"Bob\".into(), age: 22, active: false });\n\n if let Some(mut u) = ctx.db.user().id().find(1) {\n u.name = \"Alice2\".into();\n u.age = 31;\n u.active = false;\n ctx.db.user().id().update(u);\n }\n\n ctx.db.user().id().delete(2);\n}", "category": "basics", "route_api_model": "gpt-5", "golden_db": "basics-t-007-crud-golden", @@ -1368,6 +1384,14 @@ "work_dir_golden": "target/llm-runs/basics/t_007_crud/rust/server/golden", "work_dir_llm": "target/llm-runs/basics/t_007_crud/rust/server/gpt-5/llm", "scorer_details": { + "schema_parity": { + "pass": false, + "partial": 0.0, + "notes": { + "error": "describe failed: WARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: failed to find database `basics-t-007-crud-golden`.\n", + "phase": "describe_golden" + } + }, "crud_row_id1_parity": { "pass": false, "partial": 0.0, @@ -1385,14 +1409,6 @@ "sql": "SELECT COUNT(*) AS n FROM user WHERE id=2" } }, - "schema_parity": { - "pass": false, - "partial": 0.0, - "notes": { - "error": "describe failed: WARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: failed to find database `basics-t-007-crud-golden`.\n", - "phase": "describe_golden" - } - }, "crud_total_count_one": { "pass": false, "partial": 0.0, @@ -1404,18 +1420,18 @@ } }, "vendor": "openai", - "started_at": "2026-01-26T15:33:39.388241938Z", - "finished_at": "2026-01-26T15:35:08.142795838Z" + "started_at": "2026-01-27T18:08:10.493696044Z", + "finished_at": "2026-01-27T18:09:25.674693247Z" }, "t_008_index_lookup": { - "hash": "f09c4aa335b00d6e1e55573dfbc47f13accb0a1c631339ddb4dc8a6237c520cd", + "hash": "4a962a391761c2c914b169995c6f6bd46732a1c8f844b124cfc73086b79ee2b6", "task": "t_008_index_lookup", "lang": "rust", "golden_published": true, "model_name": "GPT-5", "total_tests": 3, "passed_tests": 0, - "llm_output": "use spacetimedb::{ReducerContext, Table};\n\n#[spacetimedb::table(name = user)]\npub struct User {\n #[primary_key]\n id: i32,\n name: String,\n age: i32,\n active: bool,\n}\n\n#[spacetimedb::table(name = result)]\npub struct ResultRow {\n #[primary_key]\n id: i32,\n name: String,\n}\n\n#[spacetimedb::reducer]\npub fn lookup_user_name(ctx: &ReducerContext, id: i32) {\n if let Some(u) = ctx.db.user().id().find(id) {\n ctx.db.result().insert(ResultRow { id: u.id, name: u.name });\n }\n}", + "llm_output": "use spacetimedb::{ReducerContext, Table};\n\n#[spacetimedb::table(name = user)]\npub struct User {\n #[primary_key]\n id: i32,\n name: String,\n age: i32,\n active: bool,\n}\n\n#[spacetimedb::table(name = result)]\npub struct ResultRow {\n #[primary_key]\n id: i32,\n name: String,\n}\n\n#[spacetimedb::reducer]\npub fn lookup_user_name(ctx: &ReducerContext, id: i32) {\n if let Some(user) = ctx.db.user().id().find(id) {\n ctx.db.result().insert(ResultRow { id: user.id, name: user.name });\n }\n}", "category": "basics", "route_api_model": "gpt-5", "golden_db": "basics-t-008-index-lookup-golden", @@ -1423,12 +1439,12 @@ "work_dir_golden": "target/llm-runs/basics/t_008_index_lookup/rust/server/golden", "work_dir_llm": "target/llm-runs/basics/t_008_index_lookup/rust/server/gpt-5/llm", "scorer_details": { - "schema_parity": { + "index_lookup_projection_parity": { "pass": false, "partial": 0.0, "notes": { - "error": "describe failed: WARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: failed to find database `basics-t-008-index-lookup-golden`.\n", - "phase": "describe_golden" + "error": "spacetime call failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: failed to find database `basics-t-008-index-lookup-golden`.\n", + "phase": "call_reducer_golden" } }, "seed_user_row": { @@ -1440,28 +1456,28 @@ "sql": "INSERT INTO user(id, name, age, active) VALUES (1, 'Alice', 30, true)" } }, - "index_lookup_projection_parity": { + "schema_parity": { "pass": false, "partial": 0.0, "notes": { - "error": "spacetime call failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: failed to find database `basics-t-008-index-lookup-golden`.\n", - "phase": "call_reducer_golden" + "error": "describe failed: WARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: failed to find database `basics-t-008-index-lookup-golden`.\n", + "phase": "describe_golden" } } }, "vendor": "openai", - "started_at": "2026-01-26T15:34:21.997572287Z", - "finished_at": "2026-01-26T15:34:53.278840948Z" + "started_at": "2026-01-27T18:08:10.488444553Z", + "finished_at": "2026-01-27T18:09:26.394688043Z" }, "t_009_init": { - "hash": "f09c4aa335b00d6e1e55573dfbc47f13accb0a1c631339ddb4dc8a6237c520cd", + "hash": "4a962a391761c2c914b169995c6f6bd46732a1c8f844b124cfc73086b79ee2b6", "task": "t_009_init", "lang": "rust", "golden_published": true, "model_name": "GPT-5", "total_tests": 4, "passed_tests": 3, - "llm_output": "use spacetimedb::{ReducerContext, Table};\n\n#[spacetimedb::table(name = user)]\npub struct User {\n #[primary_key]\n id: i32,\n name: String,\n age: i32,\n active: bool,\n}\n\n#[spacetimedb::reducer(init)]\npub fn init(ctx: &ReducerContext) -> Result<(), String> {\n ctx.db.user().insert(User { id: 1, name: \"Alice\".to_string(), age: 30, active: true });\n ctx.db.user().insert(User { id: 2, name: \"Bob\".to_string(), age: 22, active: false });\n Ok(())\n}", + "llm_output": "use spacetimedb::{ReducerContext, Table};\n\n#[spacetimedb::table(name = user)]\npub struct User {\n #[primary_key]\n id: i32,\n name: String,\n age: i32,\n active: bool,\n}\n\n#[spacetimedb::reducer(init)]\npub fn init(ctx: &ReducerContext) -> Result<(), String> {\n ctx.db.user().insert(User { id: 1, name: \"Alice\".into(), age: 30, active: true });\n ctx.db.user().insert(User { id: 2, name: \"Bob\".into(), age: 22, active: false });\n Ok(())\n}", "category": "basics", "route_api_model": "gpt-5", "golden_db": "basics-t-009-init-golden", @@ -1469,6 +1485,14 @@ "work_dir_golden": "target/llm-runs/basics/t_009_init/rust/server/golden", "work_dir_llm": "target/llm-runs/basics/t_009_init/rust/server/gpt-5/llm", "scorer_details": { + "schema_parity": { + "pass": false, + "partial": 0.0, + "notes": { + "error": "describe failed: WARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: failed to find database `basics-t-009-init-golden`.\n", + "phase": "describe_golden" + } + }, "init_seed_bob": { "pass": true, "partial": 1.0, @@ -1495,22 +1519,14 @@ "expected": 1, "sql": "SELECT COUNT(*) AS n FROM user WHERE id=1 AND name='Alice' AND age=30 AND active=true" } - }, - "schema_parity": { - "pass": false, - "partial": 0.0, - "notes": { - "error": "describe failed: WARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: failed to find database `basics-t-009-init-golden`.\n", - "phase": "describe_golden" - } } }, "vendor": "openai", - "started_at": "2026-01-26T15:34:21.141305493Z", - "finished_at": "2026-01-26T15:34:43.189086037Z" + "started_at": "2026-01-27T18:08:10.478367876Z", + "finished_at": "2026-01-27T18:09:29.061220237Z" }, "t_010_connect": { - "hash": "f09c4aa335b00d6e1e55573dfbc47f13accb0a1c631339ddb4dc8a6237c520cd", + "hash": "4a962a391761c2c914b169995c6f6bd46732a1c8f844b124cfc73086b79ee2b6", "task": "t_010_connect", "lang": "rust", "golden_published": true, @@ -1535,18 +1551,18 @@ } }, "vendor": "openai", - "started_at": "2026-01-26T15:33:39.384904874Z", - "finished_at": "2026-01-26T15:35:08.049424018Z" + "started_at": "2026-01-27T18:09:25.674828927Z", + "finished_at": "2026-01-27T18:10:08.866748375Z" }, "t_011_helper_function": { - "hash": "f09c4aa335b00d6e1e55573dfbc47f13accb0a1c631339ddb4dc8a6237c520cd", + "hash": "4a962a391761c2c914b169995c6f6bd46732a1c8f844b124cfc73086b79ee2b6", "task": "t_011_helper_function", "lang": "rust", "golden_published": true, "model_name": "GPT-5", "total_tests": 3, "passed_tests": 0, - "llm_output": "use spacetimedb::{ReducerContext, Table};\n\n#[spacetimedb::table(name = result)]\npub struct ResultRow {\n #[primary_key]\n id: i32,\n sum: i32,\n}\n\nfn add(a: i32, b: i32) -> i32 {\n a + b\n}\n\n#[spacetimedb::reducer]\npub fn compute_sum(ctx: &ReducerContext, id: i32, a: i32, b: i32) {\n let s = add(a, b);\n ctx.db.result().insert(ResultRow { id, sum: s });\n}", + "llm_output": "use spacetimedb::{ReducerContext, Table};\n\n#[spacetimedb::table(name = result)]\npub struct ResultRow {\n #[primary_key]\n id: i32,\n sum: i32,\n}\n\npub fn add(a: i32, b: i32) -> i32 {\n a + b\n}\n\n#[spacetimedb::reducer]\npub fn compute_sum(ctx: &ReducerContext, id: i32, a: i32, b: i32) {\n let sum = add(a, b);\n ctx.db.result().insert(ResultRow { id, sum });\n}", "category": "basics", "route_api_model": "gpt-5", "golden_db": "basics-t-011-helper-function-golden", @@ -1554,6 +1570,14 @@ "work_dir_golden": "target/llm-runs/basics/t_011_helper_function/rust/server/golden", "work_dir_llm": "target/llm-runs/basics/t_011_helper_function/rust/server/gpt-5/llm", "scorer_details": { + "schema_parity": { + "pass": false, + "partial": 0.0, + "notes": { + "error": "describe failed: WARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: failed to find database `basics-t-011-helper-function-golden`.\n", + "phase": "describe_golden" + } + }, "helper_func_sum_parity": { "pass": false, "partial": 0.0, @@ -1570,22 +1594,14 @@ "expected": 1, "sql": "SELECT COUNT(*) AS n FROM result WHERE id=1 AND sum=5" } - }, - "schema_parity": { - "pass": false, - "partial": 0.0, - "notes": { - "error": "describe failed: WARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: failed to find database `basics-t-011-helper-function-golden`.\n", - "phase": "describe_golden" - } } }, "vendor": "openai", - "started_at": "2026-01-26T15:33:39.371740140Z", - "finished_at": "2026-01-26T15:35:03.413658925Z" + "started_at": "2026-01-27T18:09:27.607780916Z", + "finished_at": "2026-01-27T18:10:11.175614058Z" }, "t_012_spacetime_product_type": { - "hash": "f09c4aa335b00d6e1e55573dfbc47f13accb0a1c631339ddb4dc8a6237c520cd", + "hash": "4a962a391761c2c914b169995c6f6bd46732a1c8f844b124cfc73086b79ee2b6", "task": "t_012_spacetime_product_type", "lang": "rust", "golden_published": true, @@ -1600,13 +1616,12 @@ "work_dir_golden": "target/llm-runs/schema/t_012_spacetime_product_type/rust/server/golden", "work_dir_llm": "target/llm-runs/schema/t_012_spacetime_product_type/rust/server/gpt-5/llm", "scorer_details": { - "product_type_row_count": { + "schema_parity": { "pass": false, "partial": 0.0, "notes": { - "actual": 0, - "expected": 1, - "sql": "SELECT COUNT(*) AS n FROM result WHERE id=1" + "error": "describe failed: WARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: failed to find database `schema-t-012-spacetime-product-type-golden`.\n", + "phase": "describe_golden" } }, "product_type_row_parity": { @@ -1617,28 +1632,29 @@ "phase": "call_reducer_golden" } }, - "schema_parity": { + "product_type_row_count": { "pass": false, "partial": 0.0, "notes": { - "error": "describe failed: WARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: failed to find database `schema-t-012-spacetime-product-type-golden`.\n", - "phase": "describe_golden" + "actual": 0, + "expected": 1, + "sql": "SELECT COUNT(*) AS n FROM result WHERE id=1" } } }, "vendor": "openai", - "started_at": "2026-01-26T15:35:08.176748927Z", - "finished_at": "2026-01-26T15:35:33.331707923Z" + "started_at": "2026-01-27T18:10:08.269221663Z", + "finished_at": "2026-01-27T18:10:40.851752522Z" }, "t_013_spacetime_sum_type": { - "hash": "f09c4aa335b00d6e1e55573dfbc47f13accb0a1c631339ddb4dc8a6237c520cd", + "hash": "4a962a391761c2c914b169995c6f6bd46732a1c8f844b124cfc73086b79ee2b6", "task": "t_013_spacetime_sum_type", "lang": "rust", "golden_published": true, "model_name": "GPT-5", "total_tests": 3, "passed_tests": 0, - "llm_output": "use spacetimedb::{ReducerContext, SpacetimeType, Table};\n\n#[derive(SpacetimeType)]\npub struct Rect {\n width: i32,\n height: i32,\n}\n\n#[derive(SpacetimeType)]\npub enum Shape {\n Circle(i32),\n Rectangle(Rect),\n}\n\n#[spacetimedb::table(name = result)]\npub struct ResultRow {\n #[primary_key]\n id: i32,\n value: Shape,\n}\n\n#[spacetimedb::reducer]\npub fn set_circle(ctx: &ReducerContext, id: i32, radius: i32) {\n ctx.db.result().insert(ResultRow {\n id,\n value: Shape::Circle(radius),\n });\n}", + "llm_output": "use spacetimedb::{ReducerContext, SpacetimeType, Table};\n\n#[derive(SpacetimeType, Clone, Debug)]\npub struct Rect {\n width: i32,\n height: i32,\n}\n\n#[derive(SpacetimeType, Clone, Debug)]\npub enum Shape {\n Circle(i32),\n Rectangle(Rect),\n}\n\n#[spacetimedb::table(name = result)]\npub struct ResultRow {\n #[primary_key]\n id: i32,\n value: Shape,\n}\n\n#[spacetimedb::reducer]\npub fn set_circle(ctx: &ReducerContext, id: i32, radius: i32) {\n ctx.db.result().insert(ResultRow {\n id,\n value: Shape::Circle(radius),\n });\n}", "category": "schema", "route_api_model": "gpt-5", "golden_db": "schema-t-013-spacetime-sum-type-golden", @@ -1646,15 +1662,6 @@ "work_dir_golden": "target/llm-runs/schema/t_013_spacetime_sum_type/rust/server/golden", "work_dir_llm": "target/llm-runs/schema/t_013_spacetime_sum_type/rust/server/gpt-5/llm", "scorer_details": { - "sum_type_row_count": { - "pass": false, - "partial": 0.0, - "notes": { - "actual": 0, - "expected": 1, - "sql": "SELECT COUNT(*) AS n FROM result WHERE id=1" - } - }, "schema_parity": { "pass": false, "partial": 0.0, @@ -1670,21 +1677,30 @@ "error": "spacetime call failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: failed to find database `schema-t-013-spacetime-sum-type-golden`.\n", "phase": "call_reducer_golden" } + }, + "sum_type_row_count": { + "pass": false, + "partial": 0.0, + "notes": { + "actual": 0, + "expected": 1, + "sql": "SELECT COUNT(*) AS n FROM result WHERE id=1" + } } }, "vendor": "openai", - "started_at": "2026-01-26T15:35:08.669696395Z", - "finished_at": "2026-01-26T15:35:36.590766489Z" + "started_at": "2026-01-27T18:09:29.061329619Z", + "finished_at": "2026-01-27T18:10:10.454045766Z" }, "t_014_elementary_columns": { - "hash": "f09c4aa335b00d6e1e55573dfbc47f13accb0a1c631339ddb4dc8a6237c520cd", + "hash": "4a962a391761c2c914b169995c6f6bd46732a1c8f844b124cfc73086b79ee2b6", "task": "t_014_elementary_columns", "lang": "rust", "golden_published": true, "model_name": "GPT-5", "total_tests": 3, "passed_tests": 0, - "llm_output": "use spacetimedb::{ReducerContext, Table};\n\n#[spacetimedb::table(name = primitive)]\npub struct Primitive {\n #[primary_key]\n id: i32,\n count: i32,\n total: i64,\n price: f32,\n ratio: f64,\n active: bool,\n name: String,\n}\n\n#[spacetimedb::reducer]\npub fn seed(ctx: &ReducerContext) {\n ctx.db.primitive().insert(Primitive {\n id: 1,\n count: 2,\n total: 3_000_000_000,\n price: 1.5_f32,\n ratio: 2.25,\n active: true,\n name: \"Alice\".to_string(),\n });\n}", + "llm_output": "use spacetimedb::{ReducerContext, Table};\n\n#[spacetimedb::table(name = primitive)]\npub struct Primitive {\n #[primary_key]\n id: i32,\n count: i32,\n total: i64,\n price: f32,\n ratio: f64,\n active: bool,\n name: String,\n}\n\n#[spacetimedb::reducer]\npub fn seed(ctx: &ReducerContext) {\n ctx.db.primitive().insert(Primitive {\n id: 1,\n count: 2,\n total: 3_000_000_000i64,\n price: 1.5f32,\n ratio: 2.25,\n active: true,\n name: \"Alice\".to_string(),\n });\n}", "category": "schema", "route_api_model": "gpt-5", "golden_db": "schema-t-014-elementary-columns-golden", @@ -1719,18 +1735,18 @@ } }, "vendor": "openai", - "started_at": "2026-01-26T15:35:08.180313713Z", - "finished_at": "2026-01-26T15:35:31.695987649Z" + "started_at": "2026-01-27T18:10:11.175648161Z", + "finished_at": "2026-01-27T18:10:46.115476461Z" }, "t_015_product_type_columns": { - "hash": "f09c4aa335b00d6e1e55573dfbc47f13accb0a1c631339ddb4dc8a6237c520cd", + "hash": "4a962a391761c2c914b169995c6f6bd46732a1c8f844b124cfc73086b79ee2b6", "task": "t_015_product_type_columns", "lang": "rust", "golden_published": true, "model_name": "GPT-5", "total_tests": 3, "passed_tests": 0, - "llm_output": "use spacetimedb::{ReducerContext, SpacetimeType, Table};\n\n#[derive(SpacetimeType, Clone, Debug)]\npub struct Address {\n street: String,\n zip: i32,\n}\n\n#[derive(SpacetimeType, Clone, Debug)]\npub struct Position {\n x: i32,\n y: i32,\n}\n\n#[spacetimedb::table(name = profile)]\npub struct Profile {\n #[primary_key]\n id: i32,\n home: Address,\n work: Address,\n pos: Position,\n}\n\n#[spacetimedb::reducer]\npub fn seed(ctx: &ReducerContext) {\n for row in ctx.db.profile().iter() {\n ctx.db.profile().id().delete(row.id);\n }\n ctx.db.profile().insert(Profile {\n id: 1,\n home: Address {\n street: \"1 Main\".to_string(),\n zip: 11111,\n },\n work: Address {\n street: \"2 Broad\".to_string(),\n zip: 22222,\n },\n pos: Position { x: 7, y: 9 },\n });\n}", + "llm_output": "use spacetimedb::{ReducerContext, SpacetimeType, Table};\n\n#[derive(SpacetimeType, Clone)]\npub struct Address {\n street: String,\n zip: i32,\n}\n\n#[derive(SpacetimeType, Clone)]\npub struct Position {\n x: i32,\n y: i32,\n}\n\n#[spacetimedb::table(name = profile)]\npub struct Profile {\n #[primary_key]\n id: i32,\n home: Address,\n work: Address,\n pos: Position,\n}\n\n#[spacetimedb::reducer]\npub fn seed(ctx: &ReducerContext) {\n if ctx.db.profile().count() == 0 {\n ctx.db.profile().insert(Profile {\n id: 1,\n home: Address {\n street: \"1 Main\".to_string(),\n zip: 11111,\n },\n work: Address {\n street: \"2 Broad\".to_string(),\n zip: 22222,\n },\n pos: Position { x: 7, y: 9 },\n });\n }\n}", "category": "schema", "route_api_model": "gpt-5", "golden_db": "schema-t-015-product-type-columns-golden", @@ -1747,36 +1763,36 @@ "sql": "SELECT COUNT(*) AS n FROM profile WHERE id=1" } }, - "product_type_columns_row_parity": { + "schema_parity": { "pass": false, "partial": 0.0, "notes": { - "error": "spacetime call failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: failed to find database `schema-t-015-product-type-columns-golden`.\n", - "phase": "call_reducer_golden" + "error": "describe failed: WARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: failed to find database `schema-t-015-product-type-columns-golden`.\n", + "phase": "describe_golden" } }, - "schema_parity": { + "product_type_columns_row_parity": { "pass": false, "partial": 0.0, "notes": { - "error": "describe failed: WARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: failed to find database `schema-t-015-product-type-columns-golden`.\n", - "phase": "describe_golden" + "error": "spacetime call failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: failed to find database `schema-t-015-product-type-columns-golden`.\n", + "phase": "call_reducer_golden" } } }, "vendor": "openai", - "started_at": "2026-01-26T15:35:17.784622487Z", - "finished_at": "2026-01-26T15:35:56.892022012Z" + "started_at": "2026-01-27T18:09:28.048829175Z", + "finished_at": "2026-01-27T18:10:12.979954214Z" }, "t_016_sum_type_columns": { - "hash": "f09c4aa335b00d6e1e55573dfbc47f13accb0a1c631339ddb4dc8a6237c520cd", + "hash": "4a962a391761c2c914b169995c6f6bd46732a1c8f844b124cfc73086b79ee2b6", "task": "t_016_sum_type_columns", "lang": "rust", "golden_published": true, "model_name": "GPT-5", "total_tests": 3, "passed_tests": 0, - "llm_output": "use spacetimedb::{ReducerContext, SpacetimeType, Table};\n\n#[derive(SpacetimeType, Clone, Debug)]\npub struct Rect {\n width: i32,\n height: i32,\n}\n\n#[derive(SpacetimeType, Clone, Debug)]\npub enum Shape {\n Circle(i32),\n Rectangle(Rect),\n}\n\n#[spacetimedb::table(name = drawing)]\npub struct Drawing {\n #[primary_key]\n id: i32,\n a: Shape,\n b: Shape,\n}\n\n#[spacetimedb::reducer]\npub fn seed(ctx: &ReducerContext) {\n let _ = ctx.db.drawing().try_insert(Drawing {\n id: 1,\n a: Shape::Circle(10),\n b: Shape::Rectangle(Rect { width: 4, height: 6 }),\n });\n}", + "llm_output": "use spacetimedb::{ReducerContext, SpacetimeType, Table};\n\n#[derive(SpacetimeType, Clone, Debug)]\npub struct Rect {\n pub width: i32,\n pub height: i32,\n}\n\n#[derive(SpacetimeType, Clone, Debug)]\npub enum Shape {\n Circle(i32),\n Rectangle(Rect),\n}\n\n#[spacetimedb::table(name = drawing)]\npub struct Drawing {\n #[primary_key]\n pub id: i32,\n pub a: Shape,\n pub b: Shape,\n}\n\n#[spacetimedb::reducer]\npub fn seed(ctx: &ReducerContext) {\n if ctx.db.drawing().id().find(1).is_none() {\n ctx.db.drawing().insert(Drawing {\n id: 1,\n a: Shape::Circle(10),\n b: Shape::Rectangle(Rect { width: 4, height: 6 }),\n });\n }\n}", "category": "schema", "route_api_model": "gpt-5", "golden_db": "schema-t-016-sum-type-columns-golden", @@ -1796,7 +1812,7 @@ "pass": false, "partial": 0.0, "notes": { - "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `drawings`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:46385/v1/database/c200e332ee39012f68748e8e0058b1df01bfd4bd42fa50c7430f6e03a52a13d7/sql)\n", + "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `drawings`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:43039/v1/database/c2007eb012f780e4ee92722a58c951f8a97922c5240d56b425f48877aec8de40/sql)\n", "phase": "sql" } }, @@ -1810,18 +1826,18 @@ } }, "vendor": "openai", - "started_at": "2026-01-26T15:35:20.422565504Z", - "finished_at": "2026-01-26T15:35:53.522136009Z" + "started_at": "2026-01-27T18:09:29.065451249Z", + "finished_at": "2026-01-27T18:10:17.219682957Z" }, "t_017_scheduled_columns": { - "hash": "f09c4aa335b00d6e1e55573dfbc47f13accb0a1c631339ddb4dc8a6237c520cd", + "hash": "4a962a391761c2c914b169995c6f6bd46732a1c8f844b124cfc73086b79ee2b6", "task": "t_017_scheduled_columns", "lang": "rust", "golden_published": true, "model_name": "GPT-5", "total_tests": 2, "passed_tests": 1, - "llm_output": "use spacetimedb::{ReducerContext, Table};\nuse std::time::Duration;\n\n#[spacetimedb::table(name = tick_timer, scheduled(tick))]\npub struct TickTimer {\n #[primary_key]\n #[auto_inc]\n scheduled_id: u64,\n scheduled_at: spacetimedb::ScheduleAt,\n}\n\n#[spacetimedb::reducer]\npub fn tick(_ctx: &ReducerContext, _timer: TickTimer) -> Result<(), String> {\n Ok(())\n}\n\n#[spacetimedb::reducer(init)]\npub fn init(ctx: &ReducerContext) -> Result<(), String> {\n ctx.db.tick_timer().insert(TickTimer {\n scheduled_id: 0,\n scheduled_at: spacetimedb::ScheduleAt::Interval(Duration::from_micros(50_000).into()),\n });\n Ok(())\n}", + "llm_output": "use spacetimedb::{ReducerContext, ScheduleAt, Table};\nuse std::time::Duration;\n\n#[spacetimedb::table(name = tick_timer, scheduled(tick))]\npub struct TickTimer {\n #[primary_key]\n #[auto_inc]\n scheduled_id: u64,\n scheduled_at: ScheduleAt,\n}\n\n#[spacetimedb::reducer]\npub fn tick(_ctx: &ReducerContext, _timer: TickTimer) {\n // Scheduled reducer invoked at each interval; no-op for this task.\n}\n\n#[spacetimedb::reducer(init)]\npub fn init(ctx: &ReducerContext) {\n ctx.db.tick_timer().insert(TickTimer {\n scheduled_id: 0,\n scheduled_at: ScheduleAt::Interval(Duration::from_micros(50_000).into()),\n });\n}", "category": "schema", "route_api_model": "gpt-5", "golden_db": "schema-t-017-scheduled-columns-golden", @@ -1829,14 +1845,6 @@ "work_dir_golden": "target/llm-runs/schema/t_017_scheduled_columns/rust/server/golden", "work_dir_llm": "target/llm-runs/schema/t_017_scheduled_columns/rust/server/gpt-5/llm", "scorer_details": { - "schema_parity": { - "pass": false, - "partial": 0.0, - "notes": { - "error": "describe failed: WARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: failed to find database `schema-t-017-scheduled-columns-golden`.\n", - "phase": "describe_golden" - } - }, "scheduled_seeded_one_row": { "pass": true, "partial": 1.0, @@ -1845,21 +1853,29 @@ "expected": 1, "sql": "SELECT COUNT(*) AS n FROM tick_timer WHERE scheduled_id>=0" } + }, + "schema_parity": { + "pass": false, + "partial": 0.0, + "notes": { + "error": "describe failed: WARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: failed to find database `schema-t-017-scheduled-columns-golden`.\n", + "phase": "describe_golden" + } } }, "vendor": "openai", - "started_at": "2026-01-26T15:35:08.142877410Z", - "finished_at": "2026-01-26T15:35:53.117891027Z" + "started_at": "2026-01-27T18:10:14.274330642Z", + "finished_at": "2026-01-27T18:11:10.485777490Z" }, "t_018_constraints": { - "hash": "f09c4aa335b00d6e1e55573dfbc47f13accb0a1c631339ddb4dc8a6237c520cd", + "hash": "4a962a391761c2c914b169995c6f6bd46732a1c8f844b124cfc73086b79ee2b6", "task": "t_018_constraints", "lang": "rust", - "golden_published": true, + "golden_published": false, "model_name": "GPT-5", "total_tests": 3, "passed_tests": 0, - "llm_output": "use spacetimedb::{ReducerContext, Table};\n\n#[spacetimedb::table(name = account, index(name = by_name, btree(columns = [name])))]\npub struct Account {\n #[primary_key]\n id: i32,\n #[unique]\n email: String,\n name: String,\n}\n\n#[spacetimedb::reducer]\npub fn seed(ctx: &ReducerContext) {\n ctx.db.account().id().delete(&1);\n ctx.db.account().id().delete(&2);\n ctx.db.account().insert(Account {\n id: 1,\n email: \"a@example.com\".to_string(),\n name: \"Alice\".to_string(),\n });\n ctx.db.account().insert(Account {\n id: 2,\n email: \"b@example.com\".to_string(),\n name: \"Bob\".to_string(),\n });\n}", + "llm_output": "use spacetimedb::{ReducerContext, Table};\n\n#[spacetimedb::table(name = account, index(name = by_name, btree = [name]))]\npub struct Account {\n #[primary_key]\n id: i32,\n #[unique]\n email: String,\n name: String,\n}\n\n#[spacetimedb::reducer]\npub fn seed(ctx: &ReducerContext) {\n ctx.db.account().insert(Account {\n id: 1,\n email: \"a@example.com\".into(),\n name: \"Alice\".into(),\n });\n ctx.db.account().insert(Account {\n id: 2,\n email: \"b@example.com\".into(),\n name: \"Bob\".into(),\n });\n}", "category": "schema", "route_api_model": "gpt-5", "golden_db": "schema-t-018-constraints-golden", @@ -1867,45 +1883,28 @@ "work_dir_golden": "target/llm-runs/schema/t_018_constraints/rust/server/golden", "work_dir_llm": "target/llm-runs/schema/t_018_constraints/rust/server/gpt-5/llm", "scorer_details": { - "constraints_seed_two_rows": { - "pass": false, - "partial": 0.0, - "notes": { - "actual": 0, - "expected": 1, - "sql": "SELECT COUNT(*) AS n FROM account WHERE id=2" - } - }, - "schema_parity": { - "pass": false, - "partial": 0.0, - "notes": { - "error": "describe failed: WARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: failed to find database `schema-t-018-constraints-golden`.\n", - "phase": "describe_golden" - } - }, - "constraints_row_parity_after_seed": { + "publish_error": { "pass": false, "partial": 0.0, "notes": { - "error": "spacetime call failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: failed to find database `schema-t-018-constraints-golden`.\n", - "phase": "call_reducer_golden" + "error": "spacetime publish failed (exit=1)\n--- stderr ---\n Updating crates.io index\n Locking 72 packages to latest compatible versions\n Adding generic-array v0.14.7 (available: v0.14.9)\n Adding spacetimedb v1.11.1 (available: v1.11.3)\n Adding spacetimedb-bindings-macro v1.11.1 (available: v1.11.3)\n Adding spacetimedb-bindings-sys v1.11.1 (available: v1.11.3)\n Adding spacetimedb-lib v1.11.1 (available: v1.11.3)\n Adding spacetimedb-primitives v1.11.1 (available: v1.11.3)\n Adding spacetimedb-sats v1.11.1 (available: v1.11.3)\n Compiling proc-macro2 v1.0.106\n Compiling quote v1.0.44\n Compiling unicode-ident v1.0.22\n Compiling typenum v1.19.0\n Compiling version_check v0.9.5\n Compiling autocfg v1.5.0\n Compiling heck v0.5.0\n Compiling serde_core v1.0.228\n Compiling cfg-if v1.0.4\n Compiling zerocopy v0.8.34\n Compiling either v1.15.0\n Compiling serde v1.0.228\n Compiling shlex v1.3.0\n Compiling find-msvc-tools v0.1.8\n Compiling anyhow v1.0.100\n Compiling nohash-hasher v0.2.0\n Compiling bitflags v2.10.0\n Compiling thiserror v1.0.69\n Compiling arrayvec v0.7.6\n Compiling bytes v1.11.0\n Compiling zmij v1.0.17\n Compiling humantime v2.3.0\n Compiling keccak v0.1.5\n Compiling convert_case v0.4.0\n Compiling heck v0.4.1\n Compiling spacetimedb-lib v1.11.1\n Compiling smallvec v1.15.1\n Compiling itoa v1.0.17\n Compiling hex v0.4.3\n Compiling getrandom v0.2.17\n Compiling serde_json v1.0.149\n Compiling arrayref v0.3.9\n Compiling bytemuck v1.24.0\n Compiling constant_time_eq v0.4.2\n Compiling generic-array v0.14.7\n Compiling second-stack v0.3.5\n Compiling log v0.4.29\n Compiling cc v1.2.54\n Compiling memchr v2.7.6\n Compiling scoped-tls v1.0.1\n Compiling rand_core v0.6.4\n Compiling itertools v0.12.1\n Compiling num-traits v0.2.19\n Compiling http v1.4.0\n Compiling syn v2.0.114\n Compiling approx v0.3.2\n Compiling chrono v0.4.43\n Compiling blake3 v1.8.3\n Compiling crypto-common v0.1.7\n Compiling block-buffer v0.10.4\n Compiling decorum v0.3.1\n Compiling digest v0.10.7\n Compiling sha3 v0.10.8\n Compiling ethnum v1.5.2\n Compiling ppv-lite86 v0.2.21\n Compiling rand_chacha v0.3.1\n Compiling rand v0.8.5\n Compiling enum-as-inner v0.6.1\n Compiling thiserror-impl v1.0.69\n Compiling derive_more v0.99.20\n Compiling spacetimedb-primitives v1.11.1\n Compiling spacetimedb-bindings-macro v1.11.1\n Compiling spacetimedb-bindings-sys v1.11.1\n Compiling spacetimedb-sats v1.11.1\n Compiling spacetimedb v1.11.1\n Compiling spacetime-module v0.1.0 (/__w/SpacetimeDB/SpacetimeDB/target/llm-runs/schema/t_018_constraints/rust/server/gpt-5/llm)\nerror: expected parentheses\n --> src/lib.rs:4:66\n |\n4 | #[spacetimedb::table(name = account, index(name = by_name, btree = [name]))]\n | ^\n\nerror[E0422]: cannot find struct, variant or union type `Account` in this scope\n --> src/lib.rs:15:29\n |\n15 | ctx.db.account().insert(Account {\n | ^^^^^^^ not found in this scope\n\nerror[E0422]: cannot find struct, variant or union type `Account` in this scope\n --> src/lib.rs:20:29\n |\n20 | ctx.db.account().insert(Account {\n | ^^^^^^^ not found in this scope\n\nerror[E0599]: no method named `account` found for struct `Local` in the current scope\n --> src/lib.rs:15:12\n |\n15 | ctx.db.account().insert(Account {\n | ^^^^^^^ method not found in `Local`\n\nerror[E0599]: no method named `account` found for struct `Local` in the current scope\n --> src/lib.rs:20:12\n |\n20 | ctx.db.account().insert(Account {\n | ^^^^^^^ method not found in `Local`\n\nSome errors have detailed explanations: E0422, E0599.\nFor more information about an error, try `rustc --explain E0422`.\nerror: could not compile `spacetime-module` (lib) due to 5 previous errors\nError: command [\"cargo\", \"build\", \"--config=net.git-fetch-with-cli=true\", \"--target=wasm32-unknown-unknown\", \"--release\", \"--message-format=json-render-diagnostics\"] exited with code 101\n\n--- stdout ---\n", + "phase": "build_or_publish" } } }, "vendor": "openai", - "started_at": "2026-01-26T15:35:03.413773673Z", - "finished_at": "2026-01-26T15:35:40.991202532Z" + "started_at": "2026-01-27T18:10:10.454087378Z", + "finished_at": "2026-01-27T18:10:48.274726800Z" }, "t_019_many_to_many": { - "hash": "f09c4aa335b00d6e1e55573dfbc47f13accb0a1c631339ddb4dc8a6237c520cd", + "hash": "4a962a391761c2c914b169995c6f6bd46732a1c8f844b124cfc73086b79ee2b6", "task": "t_019_many_to_many", "lang": "rust", "golden_published": true, "model_name": "GPT-5", "total_tests": 5, "passed_tests": 4, - "llm_output": "use spacetimedb::{ReducerContext, Table};\n\n#[spacetimedb::table(name = user)]\npub struct User {\n #[primary_key]\n user_id: i32,\n name: String,\n}\n\n#[spacetimedb::table(name = group)]\npub struct Group {\n #[primary_key]\n group_id: i32,\n title: String,\n}\n\n#[spacetimedb::table(\n name = membership,\n index(name = by_user, btree(columns = [user_id])),\n index(name = by_group, btree(columns = [group_id]))\n)]\npub struct Membership {\n #[primary_key]\n id: i32,\n user_id: i32,\n group_id: i32,\n}\n\n#[spacetimedb::reducer]\npub fn seed(ctx: &ReducerContext) {\n for m in ctx.db.membership().iter() {\n ctx.db.membership().id().delete(m.id);\n }\n for u in ctx.db.user().iter() {\n ctx.db.user().user_id().delete(u.user_id);\n }\n for g in ctx.db.group().iter() {\n ctx.db.group().group_id().delete(g.group_id);\n }\n\n ctx.db.user().insert(User { user_id: 1, name: \"Alice\".into() });\n ctx.db.user().insert(User { user_id: 2, name: \"Bob\".into() });\n\n ctx.db.group().insert(Group { group_id: 10, title: \"Admin\".into() });\n ctx.db.group().insert(Group { group_id: 20, title: \"Dev\".into() });\n\n ctx.db.membership().insert(Membership { id: 1, user_id: 1, group_id: 10 });\n ctx.db.membership().insert(Membership { id: 2, user_id: 1, group_id: 20 });\n ctx.db.membership().insert(Membership { id: 3, user_id: 2, group_id: 20 });\n}", + "llm_output": "use spacetimedb::{ReducerContext, Table};\n\n#[spacetimedb::table(name = user)]\npub struct User {\n #[primary_key]\n user_id: i32,\n name: String,\n}\n\n#[spacetimedb::table(name = group)]\npub struct Group {\n #[primary_key]\n group_id: i32,\n title: String,\n}\n\n#[spacetimedb::table(\n name = membership,\n index(name = by_user, btree(columns = [user_id])),\n index(name = by_group, btree(columns = [group_id]))\n)]\npub struct Membership {\n #[primary_key]\n id: i32,\n user_id: i32,\n group_id: i32,\n}\n\n#[spacetimedb::reducer]\npub fn seed(ctx: &ReducerContext) {\n // Clear existing data to ensure exact seeded rows\n let mids: Vec = ctx.db.membership().iter().map(|m| m.id).collect();\n for id in mids {\n ctx.db.membership().id().delete(id);\n }\n let gids: Vec = ctx.db.group().iter().map(|g| g.group_id).collect();\n for gid in gids {\n ctx.db.group().group_id().delete(gid);\n }\n let uids: Vec = ctx.db.user().iter().map(|u| u.user_id).collect();\n for uid in uids {\n ctx.db.user().user_id().delete(uid);\n }\n\n // Users\n ctx.db.user().insert(User { user_id: 1, name: \"Alice\".to_string() });\n ctx.db.user().insert(User { user_id: 2, name: \"Bob\".to_string() });\n\n // Groups\n ctx.db.group().insert(Group { group_id: 10, title: \"Admin\".to_string() });\n ctx.db.group().insert(Group { group_id: 20, title: \"Dev\".to_string() });\n\n // Memberships\n ctx.db.membership().insert(Membership { id: 1, user_id: 1, group_id: 10 });\n ctx.db.membership().insert(Membership { id: 2, user_id: 1, group_id: 20 });\n ctx.db.membership().insert(Membership { id: 3, user_id: 2, group_id: 20 });\n}", "category": "schema", "route_api_model": "gpt-5", "golden_db": "schema-t-019-many-to-many-golden", @@ -1913,22 +1912,22 @@ "work_dir_golden": "target/llm-runs/schema/t_019_many_to_many/rust/server/golden", "work_dir_llm": "target/llm-runs/schema/t_019_many_to_many/rust/server/gpt-5/llm", "scorer_details": { - "m2m_has_2_20": { + "m2m_has_1_20": { "pass": true, "partial": 1.0, "notes": { "actual": 1, "expected": 1, - "sql": "SELECT COUNT(*) AS n FROM membership WHERE user_id=2 AND group_id=20" + "sql": "SELECT COUNT(*) AS n FROM membership WHERE user_id=1 AND group_id=20" } }, - "m2m_has_1_20": { + "m2m_has_1_10": { "pass": true, "partial": 1.0, "notes": { "actual": 1, "expected": 1, - "sql": "SELECT COUNT(*) AS n FROM membership WHERE user_id=1 AND group_id=20" + "sql": "SELECT COUNT(*) AS n FROM membership WHERE user_id=1 AND group_id=10" } }, "memberships_three_rows": { @@ -1948,29 +1947,29 @@ "phase": "describe_golden" } }, - "m2m_has_1_10": { + "m2m_has_2_20": { "pass": true, "partial": 1.0, "notes": { "actual": 1, "expected": 1, - "sql": "SELECT COUNT(*) AS n FROM membership WHERE user_id=1 AND group_id=10" + "sql": "SELECT COUNT(*) AS n FROM membership WHERE user_id=2 AND group_id=20" } } }, "vendor": "openai", - "started_at": "2026-01-26T15:35:05.975134623Z", - "finished_at": "2026-01-26T15:35:55.889702970Z" + "started_at": "2026-01-27T18:10:10.331474660Z", + "finished_at": "2026-01-27T18:11:10.432610459Z" }, "t_020_ecs": { - "hash": "f09c4aa335b00d6e1e55573dfbc47f13accb0a1c631339ddb4dc8a6237c520cd", + "hash": "4a962a391761c2c914b169995c6f6bd46732a1c8f844b124cfc73086b79ee2b6", "task": "t_020_ecs", "lang": "rust", "golden_published": true, "model_name": "GPT-5", "total_tests": 5, "passed_tests": 0, - "llm_output": "use spacetimedb::{ReducerContext, Table};\n\n#[spacetimedb::table(name = entity)]\npub struct Entity {\n #[primary_key]\n id: i32,\n}\n\n#[spacetimedb::table(name = position)]\npub struct Position {\n #[primary_key]\n entity_id: i32,\n x: i32,\n y: i32,\n}\n\n#[spacetimedb::table(name = velocity)]\npub struct Velocity {\n #[primary_key]\n entity_id: i32,\n vx: i32,\n vy: i32,\n}\n\n#[spacetimedb::table(name = next_position)]\npub struct NextPosition {\n #[primary_key]\n entity_id: i32,\n x: i32,\n y: i32,\n}\n\n#[spacetimedb::reducer]\npub fn seed(ctx: &ReducerContext) {\n // Entity 1\n let id1 = 1;\n if ctx.db.entity().id().find(id1).is_none() {\n ctx.db.entity().insert(Entity { id: id1 });\n }\n if let Some(mut p) = ctx.db.position().entity_id().find(id1) {\n p.x = 0;\n p.y = 0;\n ctx.db.position().entity_id().update(p);\n } else {\n ctx.db.position().insert(Position { entity_id: id1, x: 0, y: 0 });\n }\n if let Some(mut v) = ctx.db.velocity().entity_id().find(id1) {\n v.vx = 1;\n v.vy = 0;\n ctx.db.velocity().entity_id().update(v);\n } else {\n ctx.db.velocity().insert(Velocity { entity_id: id1, vx: 1, vy: 0 });\n }\n\n // Entity 2\n let id2 = 2;\n if ctx.db.entity().id().find(id2).is_none() {\n ctx.db.entity().insert(Entity { id: id2 });\n }\n if let Some(mut p) = ctx.db.position().entity_id().find(id2) {\n p.x = 10;\n p.y = 0;\n ctx.db.position().entity_id().update(p);\n } else {\n ctx.db.position().insert(Position { entity_id: id2, x: 10, y: 0 });\n }\n if let Some(mut v) = ctx.db.velocity().entity_id().find(id2) {\n v.vx = -2;\n v.vy = 3;\n ctx.db.velocity().entity_id().update(v);\n } else {\n ctx.db.velocity().insert(Velocity { entity_id: id2, vx: -2, vy: 3 });\n }\n}\n\n#[spacetimedb::reducer]\npub fn step(ctx: &ReducerContext) {\n for pos in ctx.db.position().iter() {\n if let Some(vel) = ctx.db.velocity().entity_id().find(pos.entity_id) {\n let nx = pos.x + vel.vx;\n let ny = pos.y + vel.vy;\n if let Some(mut np) = ctx.db.next_position().entity_id().find(pos.entity_id) {\n np.x = nx;\n np.y = ny;\n ctx.db.next_position().entity_id().update(np);\n } else {\n ctx.db.next_position().insert(NextPosition {\n entity_id: pos.entity_id,\n x: nx,\n y: ny,\n });\n }\n }\n }\n}", + "llm_output": "use spacetimedb::{ReducerContext, Table};\n\n#[spacetimedb::table(name = entity)]\npub struct Entity {\n #[primary_key]\n id: i32,\n}\n\n#[spacetimedb::table(name = position)]\npub struct Position {\n #[primary_key]\n entity_id: i32,\n x: i32,\n y: i32,\n}\n\n#[spacetimedb::table(name = velocity)]\npub struct Velocity {\n #[primary_key]\n entity_id: i32,\n vx: i32,\n vy: i32,\n}\n\n#[spacetimedb::table(name = next_position)]\npub struct NextPosition {\n #[primary_key]\n entity_id: i32,\n x: i32,\n y: i32,\n}\n\n#[spacetimedb::reducer]\npub fn seed(ctx: &ReducerContext) {\n // entity 1\n ctx.db.entity().insert(Entity { id: 1 });\n ctx.db.position().insert(Position { entity_id: 1, x: 0, y: 0 });\n ctx.db.velocity().insert(Velocity { entity_id: 1, vx: 1, vy: 0 });\n\n // entity 2\n ctx.db.entity().insert(Entity { id: 2 });\n ctx.db.position().insert(Position { entity_id: 2, x: 10, y: 0 });\n ctx.db.velocity().insert(Velocity { entity_id: 2, vx: -2, vy: 3 });\n}\n\n#[spacetimedb::reducer]\npub fn step(ctx: &ReducerContext) {\n for pos in ctx.db.position().iter() {\n if let Some(vel) = ctx.db.velocity().entity_id().find(pos.entity_id) {\n let nx = pos.x + vel.vx;\n let ny = pos.y + vel.vy;\n let id = pos.entity_id;\n if ctx.db.next_position().entity_id().find(id).is_some() {\n ctx.db.next_position().entity_id().update(NextPosition {\n entity_id: id,\n x: nx,\n y: ny,\n });\n } else {\n ctx.db.next_position().insert(NextPosition {\n entity_id: id,\n x: nx,\n y: ny,\n });\n }\n }\n }\n}", "category": "schema", "route_api_model": "gpt-5", "golden_db": "schema-t-020-ecs-golden", @@ -1978,11 +1977,11 @@ "work_dir_golden": "target/llm-runs/schema/t_020_ecs/rust/server/golden", "work_dir_llm": "target/llm-runs/schema/t_020_ecs/rust/server/gpt-5/llm", "scorer_details": { - "ecs_seed_positions_count": { + "ecs_step_next_positions_count": { "pass": false, "partial": 0.0, "notes": { - "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `positions`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:46385/v1/database/c20019f8f996e9e87f067a3b3fb3b441e459a573af67d60fda8d5cb14edf5876/sql)\n", + "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `next_positions`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:43039/v1/database/c2007a660b6c9495f475227d0480c1bf36d7a61b02fc6762b7333685fc9ab5ef/sql)\n", "phase": "sql" } }, @@ -1990,48 +1989,48 @@ "pass": false, "partial": 0.0, "notes": { - "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `next_positions`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:46385/v1/database/c20019f8f996e9e87f067a3b3fb3b441e459a573af67d60fda8d5cb14edf5876/sql)\n", + "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `next_positions`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:43039/v1/database/c2007a660b6c9495f475227d0480c1bf36d7a61b02fc6762b7333685fc9ab5ef/sql)\n", "phase": "sql" } }, - "schema_parity": { + "ecs_next_pos_entity2": { "pass": false, "partial": 0.0, "notes": { - "error": "describe failed: WARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: failed to find database `schema-t-020-ecs-golden`.\n", - "phase": "describe_golden" + "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `next_positions`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:43039/v1/database/c2007a660b6c9495f475227d0480c1bf36d7a61b02fc6762b7333685fc9ab5ef/sql)\n", + "phase": "sql" } }, - "ecs_next_pos_entity2": { + "schema_parity": { "pass": false, "partial": 0.0, "notes": { - "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `next_positions`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:46385/v1/database/c20019f8f996e9e87f067a3b3fb3b441e459a573af67d60fda8d5cb14edf5876/sql)\n", - "phase": "sql" + "error": "describe failed: WARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: failed to find database `schema-t-020-ecs-golden`.\n", + "phase": "describe_golden" } }, - "ecs_step_next_positions_count": { + "ecs_seed_positions_count": { "pass": false, "partial": 0.0, "notes": { - "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `next_positions`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:46385/v1/database/c20019f8f996e9e87f067a3b3fb3b441e459a573af67d60fda8d5cb14edf5876/sql)\n", + "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `positions`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:43039/v1/database/c2007a660b6c9495f475227d0480c1bf36d7a61b02fc6762b7333685fc9ab5ef/sql)\n", "phase": "sql" } } }, "vendor": "openai", - "started_at": "2026-01-26T15:34:43.189115873Z", - "finished_at": "2026-01-26T15:35:20.422539334Z" + "started_at": "2026-01-27T18:10:08.866790439Z", + "finished_at": "2026-01-27T18:10:55.294368479Z" }, "t_021_multi_column_index": { - "hash": "f09c4aa335b00d6e1e55573dfbc47f13accb0a1c631339ddb4dc8a6237c520cd", + "hash": "4a962a391761c2c914b169995c6f6bd46732a1c8f844b124cfc73086b79ee2b6", "task": "t_021_multi_column_index", "lang": "rust", "golden_published": true, "model_name": "GPT-5", "total_tests": 4, "passed_tests": 3, - "llm_output": "use spacetimedb::{ReducerContext, Table};\n\n#[spacetimedb::table(name = log, index(name = by_user_day, btree(columns = [user_id, day])))]\npub struct Log {\n #[primary_key]\n id: i32,\n user_id: i32,\n day: i32,\n message: String,\n}\n\n#[spacetimedb::reducer]\npub fn seed(ctx: &ReducerContext) {\n let t = ctx.db.log();\n t.insert(Log { id: 1, user_id: 7, day: 1, message: \"a\".into() });\n t.insert(Log { id: 2, user_id: 7, day: 2, message: \"b\".into() });\n t.insert(Log { id: 3, user_id: 9, day: 1, message: \"c\".into() });\n}", + "llm_output": "use spacetimedb::{ReducerContext, Table};\n\n#[spacetimedb::table(name = log, index(name = by_user_day, btree(columns = [user_id, day])))]\npub struct Log {\n #[primary_key]\n id: i32,\n user_id: i32,\n day: i32,\n message: String,\n}\n\n#[spacetimedb::reducer]\npub fn seed(ctx: &ReducerContext) {\n ctx.db.log().insert(Log { id: 1, user_id: 7, day: 1, message: \"a\".to_string() });\n ctx.db.log().insert(Log { id: 2, user_id: 7, day: 2, message: \"b\".to_string() });\n ctx.db.log().insert(Log { id: 3, user_id: 9, day: 1, message: \"c\".to_string() });\n}", "category": "schema", "route_api_model": "gpt-5", "golden_db": "schema-t-021-multi-column-index-golden", @@ -2039,14 +2038,6 @@ "work_dir_golden": "target/llm-runs/schema/t_021_multi_column_index/rust/server/golden", "work_dir_llm": "target/llm-runs/schema/t_021_multi_column_index/rust/server/gpt-5/llm", "scorer_details": { - "schema_parity": { - "pass": false, - "partial": 0.0, - "notes": { - "error": "describe failed: WARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: failed to find database `schema-t-021-multi-column-index-golden`.\n", - "phase": "describe_golden" - } - }, "mcindex_seed_count": { "pass": true, "partial": 1.0, @@ -2065,6 +2056,14 @@ "sql": "SELECT COUNT(*) AS n FROM log WHERE user_id=7 AND day=1" } }, + "schema_parity": { + "pass": false, + "partial": 0.0, + "notes": { + "error": "describe failed: WARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: failed to find database `schema-t-021-multi-column-index-golden`.\n", + "phase": "describe_golden" + } + }, "mcindex_lookup_u7_d2": { "pass": true, "partial": 1.0, @@ -2076,8 +2075,8 @@ } }, "vendor": "openai", - "started_at": "2026-01-26T15:34:53.278868473Z", - "finished_at": "2026-01-26T15:35:17.784590880Z" + "started_at": "2026-01-27T18:09:30.847781990Z", + "finished_at": "2026-01-27T18:10:14.274107163Z" } } } @@ -2268,14 +2267,14 @@ "modes": [ { "mode": "docs", - "hash": "b22d989c00281f7f3e8912a08e8322746fa6cba271164f4cad2be9954b7f6ec9", + "hash": "5b9b127244375715b32e5121c629717a31b6c95f04566f66bc0492f84ff6459b", "models": [ { "name": "GPT-5", "route_api_model": "gpt-5", "tasks": { "t_000_empty_reducers": { - "hash": "b22d989c00281f7f3e8912a08e8322746fa6cba271164f4cad2be9954b7f6ec9", + "hash": "5b9b127244375715b32e5121c629717a31b6c95f04566f66bc0492f84ff6459b", "task": "t_000_empty_reducers", "lang": "csharp", "golden_published": true, @@ -2298,18 +2297,18 @@ "llm_db": "basics-t-000-empty-reducers-gpt-5-llm", "reducers_diff": null, "reducers_equal": true, - "server": "http://127.0.0.1:41793", + "server": "http://127.0.0.1:34963", "tables_diff": null, "tables_equal": true } } }, "vendor": "openai", - "started_at": "2026-01-26T15:38:48.351519518Z", - "finished_at": "2026-01-26T15:39:15.916317553Z" + "started_at": "2026-01-27T18:13:01.326941987Z", + "finished_at": "2026-01-27T18:13:48.963566405Z" }, "t_001_basic_tables": { - "hash": "b22d989c00281f7f3e8912a08e8322746fa6cba271164f4cad2be9954b7f6ec9", + "hash": "5b9b127244375715b32e5121c629717a31b6c95f04566f66bc0492f84ff6459b", "task": "t_001_basic_tables", "lang": "csharp", "golden_published": true, @@ -2332,25 +2331,25 @@ "llm_db": "basics-t-001-basic-tables-gpt-5-llm", "reducers_diff": null, "reducers_equal": true, - "server": "http://127.0.0.1:41793", + "server": "http://127.0.0.1:34963", "tables_diff": null, "tables_equal": true } } }, "vendor": "openai", - "started_at": "2026-01-26T15:37:49.611034643Z", - "finished_at": "2026-01-26T15:39:10.630797919Z" + "started_at": "2026-01-27T18:13:01.329786604Z", + "finished_at": "2026-01-27T18:13:48.902862370Z" }, "t_002_scheduled_table": { - "hash": "b22d989c00281f7f3e8912a08e8322746fa6cba271164f4cad2be9954b7f6ec9", + "hash": "5b9b127244375715b32e5121c629717a31b6c95f04566f66bc0492f84ff6459b", "task": "t_002_scheduled_table", "lang": "csharp", "golden_published": true, "model_name": "GPT-5", "total_tests": 1, "passed_tests": 1, - "llm_output": "using System;\nusing SpacetimeDB;\n\npublic static partial class Module\n{\n [SpacetimeDB.Table(Name = \"TickTimer\", Scheduled = nameof(Tick), ScheduledAt = nameof(ScheduledAt))]\n public partial struct TickTimer\n {\n [SpacetimeDB.PrimaryKey]\n [SpacetimeDB.AutoInc]\n public ulong ScheduledId;\n public ScheduleAt ScheduledAt;\n }\n\n [SpacetimeDB.Reducer(SpacetimeDB.ReducerKind.Init)]\n public static void Init(ReducerContext ctx)\n {\n ctx.Db.TickTimer.Insert(new TickTimer\n {\n ScheduledAt = new ScheduleAt.Interval(TimeSpan.FromMilliseconds(50))\n });\n }\n\n [SpacetimeDB.Reducer]\n public static void Tick(ReducerContext ctx, TickTimer timer)\n {\n }\n}", + "llm_output": "using SpacetimeDB;\n\npublic static partial class Module\n{\n [SpacetimeDB.Table(Name = \"TickTimer\", Scheduled = nameof(Tick), ScheduledAt = nameof(ScheduledAt))]\n public partial struct TickTimer\n {\n [SpacetimeDB.PrimaryKey]\n [SpacetimeDB.AutoInc]\n public ulong ScheduledId;\n public ScheduleAt ScheduledAt;\n }\n\n [SpacetimeDB.Reducer]\n public static void Tick(ReducerContext ctx, TickTimer timer)\n {\n // Optional: log each tick\n Log.Debug(\"Tick executed.\");\n }\n\n [SpacetimeDB.Reducer(SpacetimeDB.ReducerKind.Init)]\n public static void Init(ReducerContext ctx)\n {\n // Ensure exactly one scheduled row exists\n foreach (var existing in ctx.Db.TickTimer.Iter())\n {\n ctx.Db.TickTimer.ScheduledId.Delete(existing.ScheduledId);\n }\n\n // 50_000 microseconds = 50 milliseconds\n var interval = TimeSpan.FromTicks(50_000 * 10); // microseconds to ticks (1 us = 10 ticks)\n ctx.Db.TickTimer.Insert(new TickTimer\n {\n ScheduledAt = new ScheduleAt.Interval(interval)\n });\n }\n}", "category": "basics", "route_api_model": "gpt-5", "golden_db": "basics-t-002-scheduled-table-golden", @@ -2366,25 +2365,25 @@ "llm_db": "basics-t-002-scheduled-table-gpt-5-llm", "reducers_diff": null, "reducers_equal": true, - "server": "http://127.0.0.1:41793", + "server": "http://127.0.0.1:34963", "tables_diff": null, "tables_equal": true } } }, "vendor": "openai", - "started_at": "2026-01-26T15:38:55.763350829Z", - "finished_at": "2026-01-26T15:39:35.117748962Z" + "started_at": "2026-01-27T18:14:02.770664089Z", + "finished_at": "2026-01-27T18:14:54.591611981Z" }, "t_003_struct_in_table": { - "hash": "b22d989c00281f7f3e8912a08e8322746fa6cba271164f4cad2be9954b7f6ec9", + "hash": "5b9b127244375715b32e5121c629717a31b6c95f04566f66bc0492f84ff6459b", "task": "t_003_struct_in_table", "lang": "csharp", "golden_published": true, "model_name": "GPT-5", "total_tests": 1, "passed_tests": 1, - "llm_output": "using SpacetimeDB;\n\npublic static partial class Module\n{\n [SpacetimeDB.Type]\n public partial struct Position\n {\n public int X;\n public int Y;\n }\n\n [SpacetimeDB.Table(Name = \"Entity\")]\n public partial struct Entity\n {\n [SpacetimeDB.PrimaryKey]\n public int Id;\n public Position Pos;\n }\n}", + "llm_output": "using SpacetimeDB;\n\npublic static partial class Module\n{\n [SpacetimeDB.Type]\n public partial struct Position\n {\n public int X;\n public int Y;\n }\n\n [SpacetimeDB.Table(Name = \"Entity\", Public = true)]\n public partial struct Entity\n {\n [SpacetimeDB.PrimaryKey]\n public int Id;\n public Position Pos;\n }\n}", "category": "basics", "route_api_model": "gpt-5", "golden_db": "basics-t-003-struct-in-table-golden", @@ -2400,25 +2399,25 @@ "llm_db": "basics-t-003-struct-in-table-gpt-5-llm", "reducers_diff": null, "reducers_equal": true, - "server": "http://127.0.0.1:41793", + "server": "http://127.0.0.1:34963", "tables_diff": null, "tables_equal": true } } }, "vendor": "openai", - "started_at": "2026-01-26T15:37:49.614826701Z", - "finished_at": "2026-01-26T15:38:48.351485744Z" + "started_at": "2026-01-27T18:13:49.082903157Z", + "finished_at": "2026-01-27T18:14:11.064317929Z" }, "t_004_insert": { - "hash": "b22d989c00281f7f3e8912a08e8322746fa6cba271164f4cad2be9954b7f6ec9", + "hash": "5b9b127244375715b32e5121c629717a31b6c95f04566f66bc0492f84ff6459b", "task": "t_004_insert", "lang": "csharp", "golden_published": true, "model_name": "GPT-5", "total_tests": 2, "passed_tests": 2, - "llm_output": "using SpacetimeDB;\n\npublic static partial class Module\n{\n [SpacetimeDB.Table(Name = \"User\")]\n public partial struct User\n {\n [SpacetimeDB.PrimaryKey]\n public int Id;\n public string Name;\n public int Age;\n public bool Active;\n }\n\n [SpacetimeDB.Reducer]\n public static void InsertUser(ReducerContext ctx, int id, string name, int age, bool active)\n {\n ctx.Db.User.Insert(new User\n {\n Id = id,\n Name = name,\n Age = age,\n Active = active\n });\n }\n}", + "llm_output": "using SpacetimeDB;\n\npublic static partial class Module\n{\n [SpacetimeDB.Table(Name = \"User\", Public = true)]\n public partial struct User\n {\n [SpacetimeDB.PrimaryKey]\n public int Id;\n public string Name;\n public int Age;\n public bool Active;\n }\n\n [SpacetimeDB.Reducer]\n public static void InsertUser(ReducerContext ctx, int id, string name, int age, bool active)\n {\n ctx.Db.User.Insert(new User { Id = id, Name = name, Age = age, Active = active });\n }\n}", "category": "basics", "route_api_model": "gpt-5", "golden_db": "basics-t-004-insert-golden", @@ -2426,19 +2425,6 @@ "work_dir_golden": "target/llm-runs/basics/t_004_insert/csharp/server/golden", "work_dir_llm": "target/llm-runs/basics/t_004_insert/csharp/server/gpt-5/llm", "scorer_details": { - "schema_parity": { - "pass": true, - "partial": 1.0, - "notes": { - "golden_db": "basics-t-004-insert-golden", - "llm_db": "basics-t-004-insert-gpt-5-llm", - "reducers_diff": null, - "reducers_equal": true, - "server": "http://127.0.0.1:41793", - "tables_diff": null, - "tables_equal": true - } - }, "data_parity_insert_user": { "pass": true, "partial": 1.0, @@ -2455,23 +2441,36 @@ "llm_out": "Id | Name | Age | Active ----+---------+-----+-------- 1 | \"Alice\" | 30 | true", "query": "SELECT Id, Name, Age, Active FROM User WHERE Id=1", "reducer": "InsertUser", - "server": "http://127.0.0.1:41793" + "server": "http://127.0.0.1:34963" + } + }, + "schema_parity": { + "pass": true, + "partial": 1.0, + "notes": { + "golden_db": "basics-t-004-insert-golden", + "llm_db": "basics-t-004-insert-gpt-5-llm", + "reducers_diff": null, + "reducers_equal": true, + "server": "http://127.0.0.1:34963", + "tables_diff": null, + "tables_equal": true } } }, "vendor": "openai", - "started_at": "2026-01-26T15:37:49.624534968Z", - "finished_at": "2026-01-26T15:38:55.763316545Z" + "started_at": "2026-01-27T18:13:01.324244724Z", + "finished_at": "2026-01-27T18:13:36.834221478Z" }, "t_005_update": { - "hash": "b22d989c00281f7f3e8912a08e8322746fa6cba271164f4cad2be9954b7f6ec9", + "hash": "5b9b127244375715b32e5121c629717a31b6c95f04566f66bc0492f84ff6459b", "task": "t_005_update", "lang": "csharp", - "golden_published": true, + "golden_published": false, "model_name": "GPT-5", "total_tests": 3, - "passed_tests": 3, - "llm_output": "using SpacetimeDB;\n\npublic static partial class Module\n{\n [SpacetimeDB.Table(Name = \"User\", Public = true)]\n public partial struct User\n {\n [SpacetimeDB.PrimaryKey]\n public int Id;\n public string Name;\n public int Age;\n public bool Active;\n }\n\n [SpacetimeDB.Reducer]\n public static void UpdateUser(ReducerContext ctx, int id, string name, int age, bool active)\n {\n if (ctx.Db.User.Id.Find(id) is User user)\n {\n user.Name = name;\n user.Age = age;\n user.Active = active;\n ctx.Db.User.Id.Update(user);\n }\n else\n {\n ctx.Db.User.Insert(new User\n {\n Id = id,\n Name = name,\n Age = age,\n Active = active,\n });\n }\n }\n}", + "passed_tests": 0, + "llm_output": "using SpacetimeDB;\n\npublic static partial class Module\n{\n [SpacetimeDB.Table(Name = \"User\", Public = true)]\n public partial struct User\n {\n [SpacetimeDB.PrimaryKey]\n public int Id;\n public string Name;\n public int Age;\n public bool Active;\n }\n\n [SpacetimeDB.Reducer]\n public static void UpdateUser(ReducerContext ctx, int id, string name, int age, bool active)\n {\n var user = ctx.Db.User.Id.Find(id);\n if (user != null)\n {\n user.Name = name;\n user.Age = age;\n user.Active = active;\n ctx.Db.User.Id.Update(user);\n }\n }\n}", "category": "basics", "route_api_model": "gpt-5", "golden_db": "basics-t-005-update-golden", @@ -2479,52 +2478,21 @@ "work_dir_golden": "target/llm-runs/basics/t_005_update/csharp/server/golden", "work_dir_llm": "target/llm-runs/basics/t_005_update/csharp/server/gpt-5/llm", "scorer_details": { - "schema_parity": { - "pass": true, - "partial": 1.0, - "notes": { - "golden_db": "basics-t-005-update-golden", - "llm_db": "basics-t-005-update-gpt-5-llm", - "reducers_diff": null, - "reducers_equal": true, - "server": "http://127.0.0.1:41793", - "tables_diff": null, - "tables_equal": true - } - }, - "data_parity_update_user": { - "pass": true, - "partial": 1.0, - "notes": { - "args": [ - 1, - "Alice2", - 31, - false - ], - "golden_db": "basics-t-005-update-golden", - "golden_out": "Id | Name | Age | Active ----+----------+-----+-------- 1 | \"Alice2\" | 31 | false", - "llm_db": "basics-t-005-update-gpt-5-llm", - "llm_out": "Id | Name | Age | Active ----+----------+-----+-------- 1 | \"Alice2\" | 31 | false", - "query": "SELECT Id, Name, Age, Active FROM User WHERE Id=1", - "reducer": "UpdateUser", - "server": "http://127.0.0.1:41793" - } - }, - "seed_users_row": { - "pass": true, - "partial": 1.0, + "publish_error": { + "pass": false, + "partial": 0.0, "notes": { - "sql": "INSERT INTO User(Id, Name, Age, Active) VALUES (1, 'Alice', 30, true)" + "error": "spacetime build (csharp) failed (exit=1)\n--- stderr ---\nError: command [\"dotnet\", \"publish\", \"-c\", \"Release\", \"-v\", \"quiet\"] exited with code 1\n\n--- stdout ---\n/__w/SpacetimeDB/SpacetimeDB/target/llm-runs/basics/t_005_update/csharp/server/gpt-5/llm/Lib.cs(22,18): error CS1061: 'Module.User?' does not contain a definition for 'Name' and no accessible extension method 'Name' accepting a first argument of type 'Module.User?' could be found (are you missing a using directive or an assembly reference?) [/__w/SpacetimeDB/SpacetimeDB/target/llm-runs/basics/t_005_update/csharp/server/gpt-5/llm/StdbModule.csproj]\n/__w/SpacetimeDB/SpacetimeDB/target/llm-runs/basics/t_005_update/csharp/server/gpt-5/llm/Lib.cs(23,18): error CS1061: 'Module.User?' does not contain a definition for 'Age' and no accessible extension method 'Age' accepting a first argument of type 'Module.User?' could be found (are you missing a using directive or an assembly reference?) [/__w/SpacetimeDB/SpacetimeDB/target/llm-runs/basics/t_005_update/csharp/server/gpt-5/llm/StdbModule.csproj]\n/__w/SpacetimeDB/SpacetimeDB/target/llm-runs/basics/t_005_update/csharp/server/gpt-5/llm/Lib.cs(24,18): error CS1061: 'Module.User?' does not contain a definition for 'Active' and no accessible extension method 'Active' accepting a first argument of type 'Module.User?' could be found (are you missing a using directive or an assembly reference?) [/__w/SpacetimeDB/SpacetimeDB/target/llm-runs/basics/t_005_update/csharp/server/gpt-5/llm/StdbModule.csproj]\n/__w/SpacetimeDB/SpacetimeDB/target/llm-runs/basics/t_005_update/csharp/server/gpt-5/llm/Lib.cs(25,35): error CS1503: Argument 1: cannot convert from 'Module.User?' to 'Module.User' [/__w/SpacetimeDB/SpacetimeDB/target/llm-runs/basics/t_005_update/csharp/server/gpt-5/llm/StdbModule.csproj]\n", + "phase": "build_or_publish" } } }, "vendor": "openai", - "started_at": "2026-01-26T15:39:15.916349271Z", - "finished_at": "2026-01-26T15:39:57.685388275Z" + "started_at": "2026-01-27T18:14:19.781716686Z", + "finished_at": "2026-01-27T18:14:47.198719495Z" }, "t_006_delete": { - "hash": "b22d989c00281f7f3e8912a08e8322746fa6cba271164f4cad2be9954b7f6ec9", + "hash": "5b9b127244375715b32e5121c629717a31b6c95f04566f66bc0492f84ff6459b", "task": "t_006_delete", "lang": "csharp", "golden_published": true, @@ -2548,13 +2516,6 @@ "sql": "SELECT COUNT(*) AS n FROM User WHERE Id=1" } }, - "seed_users_row": { - "pass": true, - "partial": 1.0, - "notes": { - "sql": "INSERT INTO User(Id, Name, Age, Active) VALUES (1, 'Alice', 30, true)" - } - }, "schema_parity": { "pass": true, "partial": 1.0, @@ -2563,25 +2524,32 @@ "llm_db": "basics-t-006-delete-gpt-5-llm", "reducers_diff": null, "reducers_equal": true, - "server": "http://127.0.0.1:41793", + "server": "http://127.0.0.1:34963", "tables_diff": null, "tables_equal": true } + }, + "seed_users_row": { + "pass": true, + "partial": 1.0, + "notes": { + "sql": "INSERT INTO User(Id, Name, Age, Active) VALUES (1, 'Alice', 30, true)" + } } }, "vendor": "openai", - "started_at": "2026-01-26T15:39:51.394765047Z", - "finished_at": "2026-01-26T15:40:20.535132716Z" + "started_at": "2026-01-27T18:13:36.834282188Z", + "finished_at": "2026-01-27T18:14:02.770628023Z" }, "t_007_crud": { - "hash": "b22d989c00281f7f3e8912a08e8322746fa6cba271164f4cad2be9954b7f6ec9", + "hash": "5b9b127244375715b32e5121c629717a31b6c95f04566f66bc0492f84ff6459b", "task": "t_007_crud", "lang": "csharp", "golden_published": true, "model_name": "GPT-5", "total_tests": 4, "passed_tests": 4, - "llm_output": "using SpacetimeDB;\n\npublic static partial class Module\n{\n [SpacetimeDB.Table(Name = \"User\", Public = true)]\n public partial struct User\n {\n [SpacetimeDB.PrimaryKey]\n public int Id;\n public string Name;\n public int Age;\n public bool Active;\n }\n\n [SpacetimeDB.Reducer]\n public static void Crud(ReducerContext ctx)\n {\n // insert (Id=1, Name=\"Alice\", Age=30, Active=true)\n ctx.Db.User.Insert(new User { Id = 1, Name = \"Alice\", Age = 30, Active = true });\n\n // insert (Id=2, Name=\"Bob\", Age=22, Active=false)\n ctx.Db.User.Insert(new User { Id = 2, Name = \"Bob\", Age = 22, Active = false });\n\n // update (Id=1, Name=\"Alice2\", Age=31, Active=false)\n if (ctx.Db.User.Id.Find(1) is User user1)\n {\n user1.Name = \"Alice2\";\n user1.Age = 31;\n user1.Active = false;\n ctx.Db.User.Id.Update(user1);\n }\n\n // delete Id=2\n ctx.Db.User.Id.Delete(2);\n }\n}", + "llm_output": "using SpacetimeDB;\n\npublic static partial class Module\n{\n [SpacetimeDB.Table(Name = \"User\")]\n public partial struct User\n {\n [SpacetimeDB.PrimaryKey]\n public int Id;\n public string Name;\n public int Age;\n public bool Active;\n }\n\n [SpacetimeDB.Reducer]\n public static void Crud(ReducerContext ctx)\n {\n ctx.Db.User.Insert(new User { Id = 1, Name = \"Alice\", Age = 30, Active = true });\n ctx.Db.User.Insert(new User { Id = 2, Name = \"Bob\", Age = 22, Active = false });\n\n if (ctx.Db.User.Id.Find(1) is User user1)\n {\n user1.Name = \"Alice2\";\n user1.Age = 31;\n user1.Active = false;\n ctx.Db.User.Id.Update(user1);\n }\n\n ctx.Db.User.Id.Delete(2);\n }\n}", "category": "basics", "route_api_model": "gpt-5", "golden_db": "basics-t-007-crud-golden", @@ -2589,26 +2557,13 @@ "work_dir_golden": "target/llm-runs/basics/t_007_crud/csharp/server/golden", "work_dir_llm": "target/llm-runs/basics/t_007_crud/csharp/server/gpt-5/llm", "scorer_details": { - "schema_parity": { - "pass": true, - "partial": 1.0, - "notes": { - "golden_db": "basics-t-007-crud-golden", - "llm_db": "basics-t-007-crud-gpt-5-llm", - "reducers_diff": null, - "reducers_equal": true, - "server": "http://127.0.0.1:41793", - "tables_diff": null, - "tables_equal": true - } - }, - "crud_total_count_one": { + "crud_row_id2_deleted": { "pass": true, "partial": 1.0, "notes": { - "actual": 1, - "expected": 1, - "sql": "SELECT COUNT(*) AS n FROM User" + "actual": 0, + "expected": 0, + "sql": "SELECT COUNT(*) AS n FROM User WHERE Id=2" } }, "crud_row_id1_parity": { @@ -2622,32 +2577,45 @@ "llm_out": "Id | Name | Age | Active ----+----------+-----+-------- 1 | \"Alice2\" | 31 | false", "query": "SELECT Id, Name, Age, Active FROM User WHERE Id=1", "reducer": "Crud", - "server": "http://127.0.0.1:41793" + "server": "http://127.0.0.1:34963" } }, - "crud_row_id2_deleted": { + "schema_parity": { "pass": true, "partial": 1.0, "notes": { - "actual": 0, - "expected": 0, - "sql": "SELECT COUNT(*) AS n FROM User WHERE Id=2" + "golden_db": "basics-t-007-crud-golden", + "llm_db": "basics-t-007-crud-gpt-5-llm", + "reducers_diff": null, + "reducers_equal": true, + "server": "http://127.0.0.1:34963", + "tables_diff": null, + "tables_equal": true + } + }, + "crud_total_count_one": { + "pass": true, + "partial": 1.0, + "notes": { + "actual": 1, + "expected": 1, + "sql": "SELECT COUNT(*) AS n FROM User" } } }, "vendor": "openai", - "started_at": "2026-01-26T15:39:10.630849129Z", - "finished_at": "2026-01-26T15:39:51.394722435Z" + "started_at": "2026-01-27T18:13:49.079798835Z", + "finished_at": "2026-01-27T18:14:19.781678871Z" }, "t_008_index_lookup": { - "hash": "b22d989c00281f7f3e8912a08e8322746fa6cba271164f4cad2be9954b7f6ec9", + "hash": "5b9b127244375715b32e5121c629717a31b6c95f04566f66bc0492f84ff6459b", "task": "t_008_index_lookup", "lang": "csharp", "golden_published": true, "model_name": "GPT-5", "total_tests": 3, "passed_tests": 3, - "llm_output": "using SpacetimeDB;\n\npublic static partial class Module\n{\n [SpacetimeDB.Table(Name = \"User\")]\n public partial struct User\n {\n [SpacetimeDB.PrimaryKey]\n public int Id;\n public string Name;\n public int Age;\n public bool Active;\n }\n\n [SpacetimeDB.Table(Name = \"Result\")]\n public partial struct Result\n {\n [SpacetimeDB.PrimaryKey]\n public int Id;\n public string Name;\n }\n\n [SpacetimeDB.Reducer]\n public static void LookupUserName(ReducerContext ctx, int id)\n {\n if (ctx.Db.User.Id.Find(id) is User user)\n {\n ctx.Db.Result.Id.Delete(id);\n ctx.Db.Result.Insert(new Result\n {\n Id = user.Id,\n Name = user.Name\n });\n }\n }\n}", + "llm_output": "using SpacetimeDB;\n\npublic static partial class Module\n{\n [SpacetimeDB.Table(Name = \"User\")]\n public partial struct User\n {\n [SpacetimeDB.PrimaryKey]\n public int Id;\n public string Name;\n public int Age;\n public bool Active;\n }\n\n [SpacetimeDB.Table(Name = \"Result\")]\n public partial struct Result\n {\n [SpacetimeDB.PrimaryKey]\n public int Id;\n public string Name;\n }\n\n [SpacetimeDB.Reducer]\n public static void LookupUserName(ReducerContext ctx, int id)\n {\n if (ctx.Db.User.Id.Find(id) is User user)\n {\n ctx.Db.Result.Insert(new Result\n {\n Id = user.Id,\n Name = user.Name\n });\n }\n }\n}", "category": "basics", "route_api_model": "gpt-5", "golden_db": "basics-t-008-index-lookup-golden", @@ -2655,6 +2623,13 @@ "work_dir_golden": "target/llm-runs/basics/t_008_index_lookup/csharp/server/golden", "work_dir_llm": "target/llm-runs/basics/t_008_index_lookup/csharp/server/gpt-5/llm", "scorer_details": { + "seed_user_row": { + "pass": true, + "partial": 1.0, + "notes": { + "sql": "INSERT INTO User(Id, Name, Age, Active) VALUES (1, 'Alice', 30, true)" + } + }, "schema_parity": { "pass": true, "partial": 1.0, @@ -2663,18 +2638,11 @@ "llm_db": "basics-t-008-index-lookup-gpt-5-llm", "reducers_diff": null, "reducers_equal": true, - "server": "http://127.0.0.1:41793", + "server": "http://127.0.0.1:34963", "tables_diff": null, "tables_equal": true } }, - "seed_user_row": { - "pass": true, - "partial": 1.0, - "notes": { - "sql": "INSERT INTO User(Id, Name, Age, Active) VALUES (1, 'Alice', 30, true)" - } - }, "index_lookup_projection_parity": { "pass": true, "partial": 1.0, @@ -2688,23 +2656,23 @@ "llm_out": "Id | Name ----+--------- 1 | \"Alice\"", "query": "SELECT Id, Name FROM Result WHERE Id=1", "reducer": "LookupUserName", - "server": "http://127.0.0.1:41793" + "server": "http://127.0.0.1:34963" } } }, "vendor": "openai", - "started_at": "2026-01-26T15:39:35.117796353Z", - "finished_at": "2026-01-26T15:40:10.865109962Z" + "started_at": "2026-01-27T18:13:48.963594647Z", + "finished_at": "2026-01-27T18:14:21.844642247Z" }, "t_009_init": { - "hash": "b22d989c00281f7f3e8912a08e8322746fa6cba271164f4cad2be9954b7f6ec9", + "hash": "5b9b127244375715b32e5121c629717a31b6c95f04566f66bc0492f84ff6459b", "task": "t_009_init", "lang": "csharp", "golden_published": true, "model_name": "GPT-5", "total_tests": 4, "passed_tests": 4, - "llm_output": "using SpacetimeDB;\n\npublic static partial class Module\n{\n [SpacetimeDB.Table(Name = \"User\", Public = true)]\n public partial struct User\n {\n [SpacetimeDB.PrimaryKey]\n public int Id;\n public string Name;\n public int Age;\n public bool Active;\n }\n\n [SpacetimeDB.Reducer(ReducerKind.Init)]\n public static void Init(ReducerContext ctx)\n {\n ctx.Db.User.Insert(new User { Id = 1, Name = \"Alice\", Age = 30, Active = true });\n ctx.Db.User.Insert(new User { Id = 2, Name = \"Bob\", Age = 22, Active = false });\n }\n}", + "llm_output": "using SpacetimeDB;\n\npublic static partial class Module\n{\n [Table(Name = \"User\", Public = true)]\n public partial struct User\n {\n [PrimaryKey]\n public int Id;\n public string Name;\n public int Age;\n public bool Active;\n }\n\n [Reducer(ReducerKind.Init)]\n public static void Init(ReducerContext ctx)\n {\n ctx.Db.User.Insert(new User { Id = 1, Name = \"Alice\", Age = 30, Active = true });\n ctx.Db.User.Insert(new User { Id = 2, Name = \"Bob\", Age = 22, Active = false });\n }\n}", "category": "basics", "route_api_model": "gpt-5", "golden_db": "basics-t-009-init-golden", @@ -2712,31 +2680,22 @@ "work_dir_golden": "target/llm-runs/basics/t_009_init/csharp/server/golden", "work_dir_llm": "target/llm-runs/basics/t_009_init/csharp/server/gpt-5/llm", "scorer_details": { - "init_total_two": { - "pass": true, - "partial": 1.0, - "notes": { - "actual": 2, - "expected": 2, - "sql": "SELECT COUNT(*) AS n FROM User" - } - }, - "init_seed_bob": { + "init_seed_alice": { "pass": true, "partial": 1.0, "notes": { "actual": 1, "expected": 1, - "sql": "SELECT COUNT(*) AS n FROM User WHERE Id=2 AND Name='Bob' AND Age=22 AND Active=false" + "sql": "SELECT COUNT(*) AS n FROM User WHERE Id=1 AND Name='Alice' AND Age=30 AND Active=true" } }, - "init_seed_alice": { + "init_seed_bob": { "pass": true, "partial": 1.0, "notes": { "actual": 1, "expected": 1, - "sql": "SELECT COUNT(*) AS n FROM User WHERE Id=1 AND Name='Alice' AND Age=30 AND Active=true" + "sql": "SELECT COUNT(*) AS n FROM User WHERE Id=2 AND Name='Bob' AND Age=22 AND Active=false" } }, "schema_parity": { @@ -2747,25 +2706,34 @@ "llm_db": "basics-t-009-init-gpt-5-llm", "reducers_diff": null, "reducers_equal": true, - "server": "http://127.0.0.1:41793", + "server": "http://127.0.0.1:34963", "tables_diff": null, "tables_equal": true } + }, + "init_total_two": { + "pass": true, + "partial": 1.0, + "notes": { + "actual": 2, + "expected": 2, + "sql": "SELECT COUNT(*) AS n FROM User" + } } }, "vendor": "openai", - "started_at": "2026-01-26T15:39:33.038774271Z", - "finished_at": "2026-01-26T15:40:09.097960805Z" + "started_at": "2026-01-27T18:13:01.333234853Z", + "finished_at": "2026-01-27T18:13:49.079758277Z" }, "t_010_connect": { - "hash": "b22d989c00281f7f3e8912a08e8322746fa6cba271164f4cad2be9954b7f6ec9", + "hash": "5b9b127244375715b32e5121c629717a31b6c95f04566f66bc0492f84ff6459b", "task": "t_010_connect", "lang": "csharp", "golden_published": true, "model_name": "GPT-5", "total_tests": 1, "passed_tests": 1, - "llm_output": "using SpacetimeDB;\n\npublic static partial class Module\n{\n [SpacetimeDB.Table(Name = \"Event\")]\n public partial struct Event\n {\n [SpacetimeDB.PrimaryKey]\n [SpacetimeDB.AutoInc]\n public int Id;\n public string Kind;\n }\n\n [SpacetimeDB.Reducer(ReducerKind.ClientConnected)]\n public static void ClientConnected(ReducerContext ctx)\n {\n ctx.Db.Event.Insert(new Event { Id = 0, Kind = \"connected\" });\n }\n\n [SpacetimeDB.Reducer(ReducerKind.ClientDisconnected)]\n public static void ClientDisconnected(ReducerContext ctx)\n {\n ctx.Db.Event.Insert(new Event { Id = 0, Kind = \"disconnected\" });\n }\n}", + "llm_output": "using SpacetimeDB;\n\npublic static partial class Module\n{\n [SpacetimeDB.Table(Name = \"Event\")]\n public partial struct Event\n {\n [SpacetimeDB.PrimaryKey, SpacetimeDB.AutoInc]\n public int Id;\n public string Kind;\n }\n\n [SpacetimeDB.Reducer(ReducerKind.ClientConnected)]\n public static void ClientConnected(ReducerContext ctx)\n {\n ctx.Db.Event.Insert(new Event { Id = 0, Kind = \"connected\" });\n }\n\n [SpacetimeDB.Reducer(ReducerKind.ClientDisconnected)]\n public static void ClientDisconnected(ReducerContext ctx)\n {\n ctx.Db.Event.Insert(new Event { Id = 0, Kind = \"disconnected\" });\n }\n}", "category": "basics", "route_api_model": "gpt-5", "golden_db": "basics-t-010-connect-golden", @@ -2781,25 +2749,25 @@ "llm_db": "basics-t-010-connect-gpt-5-llm", "reducers_diff": null, "reducers_equal": true, - "server": "http://127.0.0.1:41793", + "server": "http://127.0.0.1:34963", "tables_diff": null, "tables_equal": true } } }, "vendor": "openai", - "started_at": "2026-01-26T15:39:07.129948444Z", - "finished_at": "2026-01-26T15:39:33.038733830Z" + "started_at": "2026-01-27T18:14:11.064340711Z", + "finished_at": "2026-01-27T18:14:41.798659969Z" }, "t_011_helper_function": { - "hash": "b22d989c00281f7f3e8912a08e8322746fa6cba271164f4cad2be9954b7f6ec9", + "hash": "5b9b127244375715b32e5121c629717a31b6c95f04566f66bc0492f84ff6459b", "task": "t_011_helper_function", "lang": "csharp", "golden_published": true, "model_name": "GPT-5", "total_tests": 3, "passed_tests": 3, - "llm_output": "using SpacetimeDB;\n\npublic static partial class Module\n{\n [SpacetimeDB.Table(Name = \"Result\", Public = true)]\n public partial struct Result\n {\n [SpacetimeDB.PrimaryKey]\n public int Id;\n public int Sum;\n }\n\n public static int Add(int a, int b)\n {\n return a + b;\n }\n\n [SpacetimeDB.Reducer]\n public static void ComputeSum(ReducerContext ctx, int id, int a, int b)\n {\n ctx.Db.Result.Insert(new Result { Id = id, Sum = Add(a, b) });\n }\n}", + "llm_output": "using SpacetimeDB;\n\npublic static partial class Module\n{\n [SpacetimeDB.Table(Name = \"Result\", Public = true)]\n public partial struct Result\n {\n [SpacetimeDB.PrimaryKey]\n public int Id;\n public int Sum;\n }\n\n public static int Add(int a, int b)\n {\n return a + b;\n }\n\n [SpacetimeDB.Reducer]\n public static void ComputeSum(ReducerContext ctx, int id, int a, int b)\n {\n ctx.Db.Result.Insert(new Result\n {\n Id = id,\n Sum = Add(a, b),\n });\n }\n}", "category": "basics", "route_api_model": "gpt-5", "golden_db": "basics-t-011-helper-function-golden", @@ -2807,17 +2775,13 @@ "work_dir_golden": "target/llm-runs/basics/t_011_helper_function/csharp/server/golden", "work_dir_llm": "target/llm-runs/basics/t_011_helper_function/csharp/server/gpt-5/llm", "scorer_details": { - "schema_parity": { + "helper_func_sum_abs": { "pass": true, "partial": 1.0, "notes": { - "golden_db": "basics-t-011-helper-function-golden", - "llm_db": "basics-t-011-helper-function-gpt-5-llm", - "reducers_diff": null, - "reducers_equal": true, - "server": "http://127.0.0.1:41793", - "tables_diff": null, - "tables_equal": true + "actual": 1, + "expected": 1, + "sql": "SELECT COUNT(*) AS n FROM Result WHERE Id=1 AND Sum=5" } }, "helper_func_sum_parity": { @@ -2835,32 +2799,36 @@ "llm_out": "Id | Sum ----+----- 1 | 5", "query": "SELECT Id, Sum FROM Result WHERE Id=1", "reducer": "ComputeSum", - "server": "http://127.0.0.1:41793" + "server": "http://127.0.0.1:34963" } }, - "helper_func_sum_abs": { + "schema_parity": { "pass": true, "partial": 1.0, "notes": { - "actual": 1, - "expected": 1, - "sql": "SELECT COUNT(*) AS n FROM Result WHERE Id=1 AND Sum=5" + "golden_db": "basics-t-011-helper-function-golden", + "llm_db": "basics-t-011-helper-function-gpt-5-llm", + "reducers_diff": null, + "reducers_equal": true, + "server": "http://127.0.0.1:34963", + "tables_diff": null, + "tables_equal": true } } }, "vendor": "openai", - "started_at": "2026-01-26T15:37:49.619502537Z", - "finished_at": "2026-01-26T15:39:07.129901962Z" + "started_at": "2026-01-27T18:14:21.844689455Z", + "finished_at": "2026-01-27T18:15:02.464140421Z" }, "t_012_spacetime_product_type": { - "hash": "b22d989c00281f7f3e8912a08e8322746fa6cba271164f4cad2be9954b7f6ec9", + "hash": "5b9b127244375715b32e5121c629717a31b6c95f04566f66bc0492f84ff6459b", "task": "t_012_spacetime_product_type", "lang": "csharp", "golden_published": true, "model_name": "GPT-5", "total_tests": 3, "passed_tests": 3, - "llm_output": "using SpacetimeDB;\n\npublic static partial class Module\n{\n [SpacetimeDB.Type]\n public partial struct Score\n {\n public int Left;\n public int Right;\n }\n\n [SpacetimeDB.Table(Name = \"Result\", Public = true)]\n public partial struct Result\n {\n [SpacetimeDB.PrimaryKey]\n public int Id;\n public Score Value;\n }\n\n [SpacetimeDB.Reducer]\n public static void SetScore(ReducerContext ctx, int id, int left, int right)\n {\n ctx.Db.Result.Insert(new Result\n {\n Id = id,\n Value = new Score { Left = left, Right = right },\n });\n }\n}", + "llm_output": "using SpacetimeDB;\n\npublic static partial class Module\n{\n [SpacetimeDB.Type]\n public partial struct Score\n {\n public int Left;\n public int Right;\n }\n\n [SpacetimeDB.Table(Name = \"Result\", Public = true)]\n public partial struct Result\n {\n [SpacetimeDB.PrimaryKey]\n public int Id;\n public Score Value;\n }\n\n [SpacetimeDB.Reducer]\n public static void SetScore(ReducerContext ctx, int id, int left, int right)\n {\n ctx.Db.Result.Insert(new Result\n {\n Id = id,\n Value = new Score { Left = left, Right = right }\n });\n }\n}", "category": "schema", "route_api_model": "gpt-5", "golden_db": "schema-t-012-spacetime-product-type-golden", @@ -2876,7 +2844,7 @@ "llm_db": "schema-t-012-spacetime-product-type-gpt-5-llm", "reducers_diff": null, "reducers_equal": true, - "server": "http://127.0.0.1:41793", + "server": "http://127.0.0.1:34963", "tables_diff": null, "tables_equal": true } @@ -2896,7 +2864,7 @@ "llm_out": "Id | Value ----+----------------------- 1 | (Left = 2, Right = 3)", "query": "SELECT Id, Value FROM Result WHERE Id=1", "reducer": "SetScore", - "server": "http://127.0.0.1:41793" + "server": "http://127.0.0.1:34963" } }, "product_type_row_count": { @@ -2910,18 +2878,18 @@ } }, "vendor": "openai", - "started_at": "2026-01-26T15:40:45.264170257Z", - "finished_at": "2026-01-26T15:41:10.281722543Z" + "started_at": "2026-01-27T18:15:11.555368651Z", + "finished_at": "2026-01-27T18:15:32.002678801Z" }, "t_013_spacetime_sum_type": { - "hash": "b22d989c00281f7f3e8912a08e8322746fa6cba271164f4cad2be9954b7f6ec9", + "hash": "5b9b127244375715b32e5121c629717a31b6c95f04566f66bc0492f84ff6459b", "task": "t_013_spacetime_sum_type", "lang": "csharp", "golden_published": true, "model_name": "GPT-5", "total_tests": 3, "passed_tests": 3, - "llm_output": "using SpacetimeDB;\n\npublic static partial class Module\n{\n [SpacetimeDB.Type]\n public partial struct Circle\n {\n public int Radius;\n }\n\n [SpacetimeDB.Type]\n public partial struct Rectangle\n {\n public int Width;\n public int Height;\n }\n\n [SpacetimeDB.Type]\n public partial record Shape : TaggedEnum<(Circle Circle, Rectangle Rectangle)>\n {\n }\n\n [SpacetimeDB.Table(Name = \"Result\")]\n public partial struct Result\n {\n [SpacetimeDB.PrimaryKey]\n public int Id;\n public Shape Value;\n }\n\n [SpacetimeDB.Reducer]\n public static void SetCircle(ReducerContext ctx, int id, int radius)\n {\n var existing = ctx.Db.Result.Id.Find(id);\n if (existing != null)\n {\n ctx.Db.Result.Id.Delete(id);\n }\n\n var shape = new Shape.Circle(new Circle { Radius = radius });\n ctx.Db.Result.Insert(new Result\n {\n Id = id,\n Value = shape\n });\n }\n}", + "llm_output": "using SpacetimeDB;\n\npublic static partial class Module\n{\n [SpacetimeDB.Type]\n public partial struct Circle\n {\n public int Radius;\n }\n\n [SpacetimeDB.Type]\n public partial struct Rectangle\n {\n public int Width;\n public int Height;\n }\n\n [SpacetimeDB.Type]\n public partial record Shape : TaggedEnum<(Circle Circle, Rectangle Rectangle)> { }\n\n [SpacetimeDB.Table(Name = \"Result\")]\n public partial struct Result\n {\n [SpacetimeDB.PrimaryKey]\n public int Id;\n public Shape Value;\n }\n\n [SpacetimeDB.Reducer]\n public static void SetCircle(ReducerContext ctx, int id, int radius)\n {\n ctx.Db.Result.Id.Delete(id);\n ctx.Db.Result.Insert(new Result\n {\n Id = id,\n Value = new Shape.Circle(new Circle { Radius = radius })\n });\n }\n}", "category": "schema", "route_api_model": "gpt-5", "golden_db": "schema-t-013-spacetime-sum-type-golden", @@ -2929,6 +2897,15 @@ "work_dir_golden": "target/llm-runs/schema/t_013_spacetime_sum_type/csharp/server/golden", "work_dir_llm": "target/llm-runs/schema/t_013_spacetime_sum_type/csharp/server/gpt-5/llm", "scorer_details": { + "sum_type_row_count": { + "pass": true, + "partial": 1.0, + "notes": { + "actual": 1, + "expected": 1, + "sql": "SELECT COUNT(*) AS n FROM Result WHERE Id=1" + } + }, "schema_parity": { "pass": true, "partial": 1.0, @@ -2937,20 +2914,11 @@ "llm_db": "schema-t-013-spacetime-sum-type-gpt-5-llm", "reducers_diff": null, "reducers_equal": true, - "server": "http://127.0.0.1:41793", + "server": "http://127.0.0.1:34963", "tables_diff": null, "tables_equal": true } }, - "sum_type_row_count": { - "pass": true, - "partial": 1.0, - "notes": { - "actual": 1, - "expected": 1, - "sql": "SELECT COUNT(*) AS n FROM Result WHERE Id=1" - } - }, "sum_type_row_parity": { "pass": true, "partial": 1.0, @@ -2965,16 +2933,16 @@ "llm_out": "Id | Value ----+-------------------------- 1 | (Circle = (Radius = 10))", "query": "SELECT Id, Value FROM Result WHERE Id=1", "reducer": "SetCircle", - "server": "http://127.0.0.1:41793" + "server": "http://127.0.0.1:34963" } } }, "vendor": "openai", - "started_at": "2026-01-26T15:41:10.281769568Z", - "finished_at": "2026-01-26T15:42:04.928512771Z" + "started_at": "2026-01-27T18:14:47.198739958Z", + "finished_at": "2026-01-27T18:15:39.200593959Z" }, "t_014_elementary_columns": { - "hash": "b22d989c00281f7f3e8912a08e8322746fa6cba271164f4cad2be9954b7f6ec9", + "hash": "5b9b127244375715b32e5121c629717a31b6c95f04566f66bc0492f84ff6459b", "task": "t_014_elementary_columns", "lang": "csharp", "golden_published": true, @@ -2989,11 +2957,24 @@ "work_dir_golden": "target/llm-runs/schema/t_014_elementary_columns/csharp/server/golden", "work_dir_llm": "target/llm-runs/schema/t_014_elementary_columns/csharp/server/gpt-5/llm", "scorer_details": { + "schema_parity": { + "pass": true, + "partial": 1.0, + "notes": { + "golden_db": "schema-t-014-elementary-columns-golden", + "llm_db": "schema-t-014-elementary-columns-gpt-5-llm", + "reducers_diff": null, + "reducers_equal": true, + "server": "http://127.0.0.1:34963", + "tables_diff": null, + "tables_equal": true + } + }, "elementary_columns_row_count": { "pass": false, "partial": 0.0, "notes": { - "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `primitive`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:41793/v1/database/c20015ce3d6d6c97d4d92483e7212bbbdc2017cebf40569b0de2758a8c06d8c9/sql)\n", + "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `primitive`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:34963/v1/database/c20078f1180918074c9a3117ed18f7b273848b4806e70102234f51992b7bacf0/sql)\n", "phase": "sql" } }, @@ -3001,37 +2982,24 @@ "pass": false, "partial": 0.0, "notes": { - "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `primitive`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:41793/v1/database/c2002dc67d9e1fd4e2f7be96328c9cadfe424ab52e14114ffe15bf0808847523/sql)\n", + "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `primitive`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:34963/v1/database/c200bcbd594ca0867ddd335f6db9c0f5bde655c28a57ff5a20debcdeee8ad1e8/sql)\n", "phase": "sql_golden" } - }, - "schema_parity": { - "pass": true, - "partial": 1.0, - "notes": { - "golden_db": "schema-t-014-elementary-columns-golden", - "llm_db": "schema-t-014-elementary-columns-gpt-5-llm", - "reducers_diff": null, - "reducers_equal": true, - "server": "http://127.0.0.1:41793", - "tables_diff": null, - "tables_equal": true - } } }, "vendor": "openai", - "started_at": "2026-01-26T15:40:59.748869772Z", - "finished_at": "2026-01-26T15:41:30.294907546Z" + "started_at": "2026-01-27T18:15:57.986684773Z", + "finished_at": "2026-01-27T18:16:30.082699509Z" }, "t_015_product_type_columns": { - "hash": "b22d989c00281f7f3e8912a08e8322746fa6cba271164f4cad2be9954b7f6ec9", + "hash": "5b9b127244375715b32e5121c629717a31b6c95f04566f66bc0492f84ff6459b", "task": "t_015_product_type_columns", "lang": "csharp", "golden_published": true, "model_name": "GPT-5", "total_tests": 3, "passed_tests": 3, - "llm_output": "using SpacetimeDB;\n\npublic static partial class Module\n{\n [SpacetimeDB.Type]\n public partial struct Address\n {\n public string Street;\n public int Zip;\n }\n\n [SpacetimeDB.Type]\n public partial struct Position\n {\n public int X;\n public int Y;\n }\n\n [SpacetimeDB.Table(Name = \"Profile\", Public = true)]\n public partial struct Profile\n {\n [SpacetimeDB.PrimaryKey]\n public int Id;\n public Address Home;\n public Address Work;\n public Position Pos;\n }\n\n [SpacetimeDB.Reducer]\n public static void Seed(ReducerContext ctx)\n {\n foreach (var row in ctx.Db.Profile.Iter())\n {\n ctx.Db.Profile.Id.Delete(row.Id);\n }\n\n var home = new Address { Street = \"1 Main\", Zip = 11111 };\n var work = new Address { Street = \"2 Broad\", Zip = 22222 };\n var pos = new Position { X = 7, Y = 9 };\n\n ctx.Db.Profile.Insert(new Profile\n {\n Id = 1,\n Home = home,\n Work = work,\n Pos = pos\n });\n }\n}", + "llm_output": "using SpacetimeDB;\n\npublic static partial class Module\n{\n [SpacetimeDB.Type]\n public partial struct Address\n {\n public string Street;\n public int Zip;\n }\n\n [SpacetimeDB.Type]\n public partial struct Position\n {\n public int X;\n public int Y;\n }\n\n [SpacetimeDB.Table(Name = \"Profile\", Public = true)]\n public partial struct Profile\n {\n [SpacetimeDB.PrimaryKey]\n public int Id;\n public Address Home;\n public Address Work;\n public Position Pos;\n }\n\n [SpacetimeDB.Reducer]\n public static void Seed(ReducerContext ctx)\n {\n ctx.Db.Profile.Insert(new Profile\n {\n Id = 1,\n Home = new Address { Street = \"1 Main\", Zip = 11111 },\n Work = new Address { Street = \"2 Broad\", Zip = 22222 },\n Pos = new Position { X = 7, Y = 9 }\n });\n }\n}", "category": "schema", "route_api_model": "gpt-5", "golden_db": "schema-t-015-product-type-columns-golden", @@ -3048,47 +3016,47 @@ "sql": "SELECT COUNT(*) AS n FROM Profile WHERE Id=1" } }, - "schema_parity": { + "product_type_columns_row_parity": { "pass": true, "partial": 1.0, "notes": { + "args": [], "golden_db": "schema-t-015-product-type-columns-golden", + "golden_out": "Id | Home | Work | Pos ----+----------------------------------+-----------------------------------+---------------- 1 | (Street = \"1 Main\", Zip = 11111) | (Street = \"2 Broad\", Zip = 22222) | (X = 7, Y = 9)", "llm_db": "schema-t-015-product-type-columns-gpt-5-llm", - "reducers_diff": null, - "reducers_equal": true, - "server": "http://127.0.0.1:41793", - "tables_diff": null, - "tables_equal": true + "llm_out": "Id | Home | Work | Pos ----+----------------------------------+-----------------------------------+---------------- 1 | (Street = \"1 Main\", Zip = 11111) | (Street = \"2 Broad\", Zip = 22222) | (X = 7, Y = 9)", + "query": "SELECT Id, Home, Work, Pos FROM Profile WHERE Id=1", + "reducer": "Seed", + "server": "http://127.0.0.1:34963" } }, - "product_type_columns_row_parity": { + "schema_parity": { "pass": true, "partial": 1.0, "notes": { - "args": [], "golden_db": "schema-t-015-product-type-columns-golden", - "golden_out": "Id | Home | Work | Pos ----+----------------------------------+-----------------------------------+---------------- 1 | (Street = \"1 Main\", Zip = 11111) | (Street = \"2 Broad\", Zip = 22222) | (X = 7, Y = 9)", "llm_db": "schema-t-015-product-type-columns-gpt-5-llm", - "llm_out": "Id | Home | Work | Pos ----+----------------------------------+-----------------------------------+---------------- 1 | (Street = \"1 Main\", Zip = 11111) | (Street = \"2 Broad\", Zip = 22222) | (X = 7, Y = 9)", - "query": "SELECT Id, Home, Work, Pos FROM Profile WHERE Id=1", - "reducer": "Seed", - "server": "http://127.0.0.1:41793" + "reducers_diff": null, + "reducers_equal": true, + "server": "http://127.0.0.1:34963", + "tables_diff": null, + "tables_equal": true } } }, "vendor": "openai", - "started_at": "2026-01-26T15:41:10.284990504Z", - "finished_at": "2026-01-26T15:41:51.914201320Z" + "started_at": "2026-01-27T18:14:41.798684016Z", + "finished_at": "2026-01-27T18:15:11.555329053Z" }, "t_016_sum_type_columns": { - "hash": "b22d989c00281f7f3e8912a08e8322746fa6cba271164f4cad2be9954b7f6ec9", + "hash": "5b9b127244375715b32e5121c629717a31b6c95f04566f66bc0492f84ff6459b", "task": "t_016_sum_type_columns", "lang": "csharp", "golden_published": true, "model_name": "GPT-5", "total_tests": 3, "passed_tests": 1, - "llm_output": "using SpacetimeDB;\n\npublic static partial class Module\n{\n [SpacetimeDB.Type]\n public partial struct Circle\n {\n public int Radius;\n }\n\n [SpacetimeDB.Type]\n public partial struct Rectangle\n {\n public int Width;\n public int Height;\n }\n\n [SpacetimeDB.Type]\n public partial record Shape : TaggedEnum<(Circle Circle, Rectangle Rectangle)> { }\n\n [SpacetimeDB.Table(Name = \"Drawing\")]\n public partial struct Drawing\n {\n [SpacetimeDB.PrimaryKey]\n public int Id;\n public Shape A;\n public Shape B;\n }\n\n [SpacetimeDB.Reducer]\n public static void Seed(ReducerContext ctx)\n {\n var a = new Shape.Circle(new Circle { Radius = 10 });\n var b = new Shape.Rectangle(new Rectangle { Width = 4, Height = 6 });\n ctx.Db.Drawing.Insert(new Drawing\n {\n Id = 1,\n A = a,\n B = b\n });\n }\n}", + "llm_output": "using SpacetimeDB;\n\npublic static partial class Module\n{\n [SpacetimeDB.Type]\n public partial struct Circle\n {\n public int Radius;\n }\n\n [SpacetimeDB.Type]\n public partial struct Rectangle\n {\n public int Width;\n public int Height;\n }\n\n [SpacetimeDB.Type]\n public partial record Shape : TaggedEnum<(\n Circle Circle,\n Rectangle Rectangle\n )> { }\n\n [SpacetimeDB.Table(Name = \"Drawing\")]\n public partial struct Drawing\n {\n [SpacetimeDB.PrimaryKey]\n public int Id;\n public Shape A;\n public Shape B;\n }\n\n [SpacetimeDB.Reducer]\n public static void Seed(ReducerContext ctx)\n {\n ctx.Db.Drawing.Insert(new Drawing\n {\n Id = 1,\n A = new Shape.Circle(new Circle { Radius = 10 }),\n B = new Shape.Rectangle(new Rectangle { Width = 4, Height = 6 }),\n });\n }\n}", "category": "schema", "route_api_model": "gpt-5", "golden_db": "schema-t-016-sum-type-columns-golden", @@ -3096,24 +3064,11 @@ "work_dir_golden": "target/llm-runs/schema/t_016_sum_type_columns/csharp/server/golden", "work_dir_llm": "target/llm-runs/schema/t_016_sum_type_columns/csharp/server/gpt-5/llm", "scorer_details": { - "schema_parity": { - "pass": true, - "partial": 1.0, - "notes": { - "golden_db": "schema-t-016-sum-type-columns-golden", - "llm_db": "schema-t-016-sum-type-columns-gpt-5-llm", - "reducers_diff": null, - "reducers_equal": true, - "server": "http://127.0.0.1:41793", - "tables_diff": null, - "tables_equal": true - } - }, "sum_type_columns_row_parity": { "pass": false, "partial": 0.0, "notes": { - "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `drawings`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:41793/v1/database/c2007921ce30371e511ce337c700f912b2dc1842029ec51095bcee487a1fb5a2/sql)\n", + "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `drawings`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:34963/v1/database/c200f085eb641ff381b843b435b95a9f5e9650ae50839b15a06379b1f5797810/sql)\n", "phase": "sql_golden" } }, @@ -3121,24 +3076,37 @@ "pass": false, "partial": 0.0, "notes": { - "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `drawings`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:41793/v1/database/c20008b060dbbbe2ff941855e14b04bb0c88b907fca414a2d82874c574b1cf60/sql)\n", + "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `drawings`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:34963/v1/database/c2002c4bd8646de89d47fd60e1a828fec69111f644d73e8f2851809cb502030b/sql)\n", "phase": "sql" } + }, + "schema_parity": { + "pass": true, + "partial": 1.0, + "notes": { + "golden_db": "schema-t-016-sum-type-columns-golden", + "llm_db": "schema-t-016-sum-type-columns-gpt-5-llm", + "reducers_diff": null, + "reducers_equal": true, + "server": "http://127.0.0.1:34963", + "tables_diff": null, + "tables_equal": true + } } }, "vendor": "openai", - "started_at": "2026-01-26T15:41:12.893136011Z", - "finished_at": "2026-01-26T15:42:12.178253367Z" + "started_at": "2026-01-27T18:14:54.591636829Z", + "finished_at": "2026-01-27T18:15:57.986598780Z" }, "t_017_scheduled_columns": { - "hash": "b22d989c00281f7f3e8912a08e8322746fa6cba271164f4cad2be9954b7f6ec9", + "hash": "5b9b127244375715b32e5121c629717a31b6c95f04566f66bc0492f84ff6459b", "task": "t_017_scheduled_columns", "lang": "csharp", "golden_published": true, "model_name": "GPT-5", "total_tests": 2, "passed_tests": 1, - "llm_output": "using System;\nusing SpacetimeDB;\n\npublic static partial class Module\n{\n [Table(Name = \"TickTimer\", Scheduled = nameof(Tick), ScheduledAt = nameof(ScheduledAt))]\n public partial struct TickTimer\n {\n [PrimaryKey, AutoInc]\n public ulong ScheduledId;\n public ScheduleAt ScheduledAt;\n }\n\n [Reducer(ReducerKind.Init)]\n public static void Init(ReducerContext ctx)\n {\n ctx.Db.TickTimer.Insert(new TickTimer\n {\n ScheduledAt = new ScheduleAt.Interval(TimeSpan.FromMicroseconds(50_000))\n });\n }\n\n [Reducer]\n public static void Tick(ReducerContext ctx, TickTimer timer)\n {\n }\n}", + "llm_output": "using System;\nusing SpacetimeDB;\n\npublic static partial class Module\n{\n [Table(Name = \"TickTimer\", Scheduled = nameof(Tick), ScheduledAt = nameof(ScheduledAt))]\n public partial struct TickTimer\n {\n [PrimaryKey, AutoInc]\n public ulong ScheduledId;\n public ScheduleAt ScheduledAt;\n }\n\n [Reducer]\n public static void Tick(ReducerContext ctx, TickTimer _timer)\n {\n }\n\n [Reducer(ReducerKind.Init)]\n public static void Init(ReducerContext ctx)\n {\n ctx.Db.TickTimer.Insert(new TickTimer\n {\n ScheduledAt = new ScheduleAt.Interval(TimeSpan.FromMilliseconds(50))\n });\n }\n}", "category": "schema", "route_api_model": "gpt-5", "golden_db": "schema-t-017-scheduled-columns-golden", @@ -3146,14 +3114,6 @@ "work_dir_golden": "target/llm-runs/schema/t_017_scheduled_columns/csharp/server/golden", "work_dir_llm": "target/llm-runs/schema/t_017_scheduled_columns/csharp/server/gpt-5/llm", "scorer_details": { - "scheduled_seeded_one_row": { - "pass": false, - "partial": 0.0, - "notes": { - "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `tick_timer`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:41793/v1/database/c20077e1b6738482c8dd0108623bedd0c70f80fb40fb09fda1f12bbe2c6972bf/sql)\n", - "phase": "sql" - } - }, "schema_parity": { "pass": true, "partial": 1.0, @@ -3162,25 +3122,33 @@ "llm_db": "schema-t-017-scheduled-columns-gpt-5-llm", "reducers_diff": null, "reducers_equal": true, - "server": "http://127.0.0.1:41793", + "server": "http://127.0.0.1:34963", "tables_diff": null, "tables_equal": true } + }, + "scheduled_seeded_one_row": { + "pass": false, + "partial": 0.0, + "notes": { + "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `tick_timer`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:34963/v1/database/c200b2f85d90994f4e48e46fa521c1e13dbf54a71c32314298b1161aa2cabef4/sql)\n", + "phase": "sql" + } } }, "vendor": "openai", - "started_at": "2026-01-26T15:40:27.963020126Z", - "finished_at": "2026-01-26T15:41:12.893111985Z" + "started_at": "2026-01-27T18:16:09.489675146Z", + "finished_at": "2026-01-27T18:16:45.722452251Z" }, "t_018_constraints": { - "hash": "b22d989c00281f7f3e8912a08e8322746fa6cba271164f4cad2be9954b7f6ec9", + "hash": "5b9b127244375715b32e5121c629717a31b6c95f04566f66bc0492f84ff6459b", "task": "t_018_constraints", "lang": "csharp", "golden_published": true, "model_name": "GPT-5", "total_tests": 3, "passed_tests": 3, - "llm_output": "using SpacetimeDB;\n\npublic static partial class Module\n{\n [SpacetimeDB.Table(Name = \"Account\", Public = true)]\n [SpacetimeDB.Index.BTree(Name = \"by_name\", Columns = new[] { \"Name\" })]\n public partial struct Account\n {\n [SpacetimeDB.PrimaryKey]\n public int Id;\n [SpacetimeDB.Unique]\n public string Email;\n public string Name;\n }\n\n [SpacetimeDB.Reducer]\n public static void Seed(ReducerContext ctx)\n {\n foreach (var row in ctx.Db.Account.Iter())\n {\n ctx.Db.Account.Id.Delete(row.Id);\n }\n\n ctx.Db.Account.Insert(new Account { Id = 1, Email = \"a@example.com\", Name = \"Alice\" });\n ctx.Db.Account.Insert(new Account { Id = 2, Email = \"b@example.com\", Name = \"Bob\" });\n }\n}", + "llm_output": "using SpacetimeDB;\n\npublic static partial class Module\n{\n [SpacetimeDB.Table(Name = \"Account\", Public = true)]\n [SpacetimeDB.Index.BTree(Name = \"by_name\", Columns = new[] { \"Name\" })]\n public partial struct Account\n {\n [SpacetimeDB.PrimaryKey]\n public int Id;\n [SpacetimeDB.Unique]\n public string Email;\n public string Name;\n }\n\n [SpacetimeDB.Reducer]\n public static void Seed(ReducerContext ctx)\n {\n ctx.Db.Account.Insert(new Account { Id = 1, Email = \"a@example.com\", Name = \"Alice\" });\n ctx.Db.Account.Insert(new Account { Id = 2, Email = \"b@example.com\", Name = \"Bob\" });\n }\n}", "category": "schema", "route_api_model": "gpt-5", "golden_db": "schema-t-018-constraints-golden", @@ -3196,20 +3164,11 @@ "llm_db": "schema-t-018-constraints-gpt-5-llm", "reducers_diff": null, "reducers_equal": true, - "server": "http://127.0.0.1:41793", + "server": "http://127.0.0.1:34963", "tables_diff": null, "tables_equal": true } }, - "constraints_seed_two_rows": { - "pass": true, - "partial": 1.0, - "notes": { - "actual": 1, - "expected": 1, - "sql": "SELECT COUNT(*) AS n FROM Account WHERE Id=2" - } - }, "constraints_row_parity_after_seed": { "pass": true, "partial": 1.0, @@ -3221,23 +3180,32 @@ "llm_out": "Id | Email | Name ----+-----------------+--------- 1 | \"a@example.com\" | \"Alice\"", "query": "SELECT Id, Email, Name FROM Account WHERE Id=1", "reducer": "Seed", - "server": "http://127.0.0.1:41793" + "server": "http://127.0.0.1:34963" + } + }, + "constraints_seed_two_rows": { + "pass": true, + "partial": 1.0, + "notes": { + "actual": 1, + "expected": 1, + "sql": "SELECT COUNT(*) AS n FROM Account WHERE Id=2" } } }, "vendor": "openai", - "started_at": "2026-01-26T15:40:10.865255262Z", - "finished_at": "2026-01-26T15:40:59.748828990Z" + "started_at": "2026-01-27T18:15:39.200646074Z", + "finished_at": "2026-01-27T18:16:09.489640227Z" }, "t_019_many_to_many": { - "hash": "b22d989c00281f7f3e8912a08e8322746fa6cba271164f4cad2be9954b7f6ec9", + "hash": "5b9b127244375715b32e5121c629717a31b6c95f04566f66bc0492f84ff6459b", "task": "t_019_many_to_many", "lang": "csharp", "golden_published": true, "model_name": "GPT-5", "total_tests": 5, "passed_tests": 5, - "llm_output": "using SpacetimeDB;\n\npublic static partial class Module\n{\n [SpacetimeDB.Table(Name = \"User\", Public = true)]\n public partial struct User\n {\n [SpacetimeDB.PrimaryKey]\n public int UserId;\n public string Name;\n }\n\n [SpacetimeDB.Table(Name = \"Group\", Public = true)]\n public partial struct Group\n {\n [SpacetimeDB.PrimaryKey]\n public int GroupId;\n public string Title;\n }\n\n [SpacetimeDB.Table(Name = \"Membership\", Public = true)]\n [SpacetimeDB.Index.BTree(Name = \"by_user\", Columns = new[] { \"UserId\" })]\n [SpacetimeDB.Index.BTree(Name = \"by_group\", Columns = new[] { \"GroupId\" })]\n public partial struct Membership\n {\n [SpacetimeDB.PrimaryKey]\n public int Id;\n public int UserId;\n public int GroupId;\n }\n\n [SpacetimeDB.Reducer]\n public static void Seed(ReducerContext ctx)\n {\n ctx.Db.User.Insert(new User { UserId = 1, Name = \"Alice\" });\n ctx.Db.User.Insert(new User { UserId = 2, Name = \"Bob\" });\n\n ctx.Db.Group.Insert(new Group { GroupId = 10, Title = \"Admin\" });\n ctx.Db.Group.Insert(new Group { GroupId = 20, Title = \"Dev\" });\n\n ctx.Db.Membership.Insert(new Membership { Id = 1, UserId = 1, GroupId = 10 });\n ctx.Db.Membership.Insert(new Membership { Id = 2, UserId = 1, GroupId = 20 });\n ctx.Db.Membership.Insert(new Membership { Id = 3, UserId = 2, GroupId = 20 });\n }\n}", + "llm_output": "using SpacetimeDB;\n\npublic static partial class Module\n{\n [SpacetimeDB.Table(Name = \"User\")]\n public partial struct User\n {\n [SpacetimeDB.PrimaryKey]\n public int UserId;\n public string Name;\n }\n\n [SpacetimeDB.Table(Name = \"Group\")]\n public partial struct Group\n {\n [SpacetimeDB.PrimaryKey]\n public int GroupId;\n public string Title;\n }\n\n [SpacetimeDB.Table(Name = \"Membership\")]\n [SpacetimeDB.Index.BTree(Name = \"by_user\", Columns = new[] { \"UserId\" })]\n [SpacetimeDB.Index.BTree(Name = \"by_group\", Columns = new[] { \"GroupId\" })]\n public partial struct Membership\n {\n [SpacetimeDB.PrimaryKey]\n public int Id;\n public int UserId;\n public int GroupId;\n }\n\n [SpacetimeDB.Reducer]\n public static void Seed(ReducerContext ctx)\n {\n ctx.Db.User.Insert(new User { UserId = 1, Name = \"Alice\" });\n ctx.Db.User.Insert(new User { UserId = 2, Name = \"Bob\" });\n\n ctx.Db.Group.Insert(new Group { GroupId = 10, Title = \"Admin\" });\n ctx.Db.Group.Insert(new Group { GroupId = 20, Title = \"Dev\" });\n\n ctx.Db.Membership.Insert(new Membership { Id = 1, UserId = 1, GroupId = 10 });\n ctx.Db.Membership.Insert(new Membership { Id = 2, UserId = 1, GroupId = 20 });\n ctx.Db.Membership.Insert(new Membership { Id = 3, UserId = 2, GroupId = 20 });\n }\n}", "category": "schema", "route_api_model": "gpt-5", "golden_db": "schema-t-019-many-to-many-golden", @@ -3253,18 +3221,18 @@ "llm_db": "schema-t-019-many-to-many-gpt-5-llm", "reducers_diff": null, "reducers_equal": true, - "server": "http://127.0.0.1:41793", + "server": "http://127.0.0.1:34963", "tables_diff": null, "tables_equal": true } }, - "m2m_has_1_20": { + "m2m_has_1_10": { "pass": true, "partial": 1.0, "notes": { "actual": 1, "expected": 1, - "sql": "SELECT COUNT(*) AS n FROM Membership WHERE UserId=1 AND GroupId=20" + "sql": "SELECT COUNT(*) AS n FROM Membership WHERE UserId=1 AND GroupId=10" } }, "memberships_three_rows": { @@ -3276,13 +3244,13 @@ "sql": "SELECT COUNT(*) AS n FROM Membership" } }, - "m2m_has_1_10": { + "m2m_has_1_20": { "pass": true, "partial": 1.0, "notes": { "actual": 1, "expected": 1, - "sql": "SELECT COUNT(*) AS n FROM Membership WHERE UserId=1 AND GroupId=10" + "sql": "SELECT COUNT(*) AS n FROM Membership WHERE UserId=1 AND GroupId=20" } }, "m2m_has_2_20": { @@ -3296,18 +3264,18 @@ } }, "vendor": "openai", - "started_at": "2026-01-26T15:40:20.535170845Z", - "finished_at": "2026-01-26T15:41:10.109404307Z" + "started_at": "2026-01-27T18:15:32.002712189Z", + "finished_at": "2026-01-27T18:16:18.095005493Z" }, "t_020_ecs": { - "hash": "b22d989c00281f7f3e8912a08e8322746fa6cba271164f4cad2be9954b7f6ec9", + "hash": "5b9b127244375715b32e5121c629717a31b6c95f04566f66bc0492f84ff6459b", "task": "t_020_ecs", "lang": "csharp", "golden_published": true, "model_name": "GPT-5", "total_tests": 5, "passed_tests": 1, - "llm_output": "using SpacetimeDB;\n\npublic static partial class Module\n{\n [SpacetimeDB.Table(Name = \"Entity\")]\n public partial struct Entity\n {\n [SpacetimeDB.PrimaryKey]\n public int Id;\n }\n\n [SpacetimeDB.Table(Name = \"Position\")]\n public partial struct Position\n {\n [SpacetimeDB.PrimaryKey]\n public int EntityId;\n public int X;\n public int Y;\n }\n\n [SpacetimeDB.Table(Name = \"Velocity\")]\n public partial struct Velocity\n {\n [SpacetimeDB.PrimaryKey]\n public int EntityId;\n public int VX;\n public int VY;\n }\n\n [SpacetimeDB.Table(Name = \"NextPosition\")]\n public partial struct NextPosition\n {\n [SpacetimeDB.PrimaryKey]\n public int EntityId;\n public int X;\n public int Y;\n }\n\n [SpacetimeDB.Reducer]\n public static void Seed(ReducerContext ctx)\n {\n // Insert entities\n ctx.Db.Entity.Insert(new Entity { Id = 1 });\n ctx.Db.Entity.Insert(new Entity { Id = 2 });\n\n // Insert positions\n ctx.Db.Position.Insert(new Position { EntityId = 1, X = 0, Y = 0 });\n ctx.Db.Position.Insert(new Position { EntityId = 2, X = 10, Y = 0 });\n\n // Insert velocities\n ctx.Db.Velocity.Insert(new Velocity { EntityId = 1, VX = 1, VY = 0 });\n ctx.Db.Velocity.Insert(new Velocity { EntityId = 2, VX = -2, VY = 3 });\n }\n\n [SpacetimeDB.Reducer]\n public static void Step(ReducerContext ctx)\n {\n foreach (var pos in ctx.Db.Position.Iter())\n {\n if (ctx.Db.Velocity.EntityId.Find(pos.EntityId) is Velocity vel)\n {\n int nx = pos.X + vel.VX;\n int ny = pos.Y + vel.VY;\n\n if (ctx.Db.NextPosition.EntityId.Find(pos.EntityId) is NextPosition np)\n {\n np.X = nx;\n np.Y = ny;\n ctx.Db.NextPosition.EntityId.Update(np);\n }\n else\n {\n ctx.Db.NextPosition.Insert(new NextPosition\n {\n EntityId = pos.EntityId,\n X = nx,\n Y = ny\n });\n }\n }\n }\n }\n}", + "llm_output": "using SpacetimeDB;\n\npublic static partial class Module\n{\n [SpacetimeDB.Table(Name = \"Entity\")]\n public partial struct Entity\n {\n [SpacetimeDB.PrimaryKey]\n public int Id;\n }\n\n [SpacetimeDB.Table(Name = \"Position\")]\n public partial struct Position\n {\n [SpacetimeDB.PrimaryKey]\n public int EntityId;\n public int X;\n public int Y;\n }\n\n [SpacetimeDB.Table(Name = \"Velocity\")]\n public partial struct Velocity\n {\n [SpacetimeDB.PrimaryKey]\n public int EntityId;\n public int VX;\n public int VY;\n }\n\n [SpacetimeDB.Table(Name = \"NextPosition\")]\n public partial struct NextPosition\n {\n [SpacetimeDB.PrimaryKey]\n public int EntityId;\n public int X;\n public int Y;\n }\n\n [SpacetimeDB.Reducer]\n public static void Seed(ReducerContext ctx)\n {\n UpsertEntity(ctx, 1);\n UpsertPosition(ctx, new Position { EntityId = 1, X = 0, Y = 0 });\n UpsertVelocity(ctx, new Velocity { EntityId = 1, VX = 1, VY = 0 });\n\n UpsertEntity(ctx, 2);\n UpsertPosition(ctx, new Position { EntityId = 2, X = 10, Y = 0 });\n UpsertVelocity(ctx, new Velocity { EntityId = 2, VX = -2, VY = 3 });\n }\n\n [SpacetimeDB.Reducer]\n public static void Step(ReducerContext ctx)\n {\n foreach (var pos in ctx.Db.Position.Iter())\n {\n var velOpt = ctx.Db.Velocity.EntityId.Find(pos.EntityId);\n if (velOpt.HasValue)\n {\n var vel = velOpt.Value;\n var next = new NextPosition\n {\n EntityId = pos.EntityId,\n X = pos.X + vel.VX,\n Y = pos.Y + vel.VY\n };\n UpsertNextPosition(ctx, next);\n }\n }\n }\n\n private static void UpsertEntity(ReducerContext ctx, int id)\n {\n var existing = ctx.Db.Entity.Id.Find(id);\n if (!existing.HasValue)\n {\n ctx.Db.Entity.Insert(new Entity { Id = id });\n }\n }\n\n private static void UpsertPosition(ReducerContext ctx, Position value)\n {\n var existing = ctx.Db.Position.EntityId.Find(value.EntityId);\n if (existing.HasValue)\n {\n var row = existing.Value;\n row.X = value.X;\n row.Y = value.Y;\n ctx.Db.Position.EntityId.Update(row);\n }\n else\n {\n ctx.Db.Position.Insert(value);\n }\n }\n\n private static void UpsertVelocity(ReducerContext ctx, Velocity value)\n {\n var existing = ctx.Db.Velocity.EntityId.Find(value.EntityId);\n if (existing.HasValue)\n {\n var row = existing.Value;\n row.VX = value.VX;\n row.VY = value.VY;\n ctx.Db.Velocity.EntityId.Update(row);\n }\n else\n {\n ctx.Db.Velocity.Insert(value);\n }\n }\n\n private static void UpsertNextPosition(ReducerContext ctx, NextPosition value)\n {\n var existing = ctx.Db.NextPosition.EntityId.Find(value.EntityId);\n if (existing.HasValue)\n {\n var row = existing.Value;\n row.X = value.X;\n row.Y = value.Y;\n ctx.Db.NextPosition.EntityId.Update(row);\n }\n else\n {\n ctx.Db.NextPosition.Insert(value);\n }\n }\n}", "category": "schema", "route_api_model": "gpt-5", "golden_db": "schema-t-020-ecs-golden", @@ -3315,65 +3283,65 @@ "work_dir_golden": "target/llm-runs/schema/t_020_ecs/csharp/server/golden", "work_dir_llm": "target/llm-runs/schema/t_020_ecs/csharp/server/gpt-5/llm", "scorer_details": { + "schema_parity": { + "pass": true, + "partial": 1.0, + "notes": { + "golden_db": "schema-t-020-ecs-golden", + "llm_db": "schema-t-020-ecs-gpt-5-llm", + "reducers_diff": null, + "reducers_equal": true, + "server": "http://127.0.0.1:34963", + "tables_diff": null, + "tables_equal": true + } + }, "ecs_next_pos_entity1": { "pass": false, "partial": 0.0, "notes": { - "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `next_positions`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:41793/v1/database/c20044bdf120effd67afb7dfb8cbe79b0556f0430ae82eb89fa8b87a5ace8738/sql)\n", + "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `next_positions`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:34963/v1/database/c2009b0e06f95a7d19f6511fee5d7e870b9dd585f9edcb84b62a336465ad26b6/sql)\n", "phase": "sql" } }, - "ecs_next_pos_entity2": { + "ecs_seed_positions_count": { "pass": false, "partial": 0.0, "notes": { - "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `next_positions`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:41793/v1/database/c20044bdf120effd67afb7dfb8cbe79b0556f0430ae82eb89fa8b87a5ace8738/sql)\n", + "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `positions`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:34963/v1/database/c2009b0e06f95a7d19f6511fee5d7e870b9dd585f9edcb84b62a336465ad26b6/sql)\n", "phase": "sql" } }, - "ecs_step_next_positions_count": { + "ecs_next_pos_entity2": { "pass": false, "partial": 0.0, "notes": { - "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `next_positions`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:41793/v1/database/c20044bdf120effd67afb7dfb8cbe79b0556f0430ae82eb89fa8b87a5ace8738/sql)\n", + "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `next_positions`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:34963/v1/database/c2009b0e06f95a7d19f6511fee5d7e870b9dd585f9edcb84b62a336465ad26b6/sql)\n", "phase": "sql" } }, - "schema_parity": { - "pass": true, - "partial": 1.0, - "notes": { - "golden_db": "schema-t-020-ecs-golden", - "llm_db": "schema-t-020-ecs-gpt-5-llm", - "reducers_diff": null, - "reducers_equal": true, - "server": "http://127.0.0.1:41793", - "tables_diff": null, - "tables_equal": true - } - }, - "ecs_seed_positions_count": { + "ecs_step_next_positions_count": { "pass": false, "partial": 0.0, "notes": { - "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `positions`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:41793/v1/database/c20044bdf120effd67afb7dfb8cbe79b0556f0430ae82eb89fa8b87a5ace8738/sql)\n", + "error": "spacetime sql failed:\nWARNING: This command is UNSTABLE and subject to breaking changes.\n\nError: no such table: `next_positions`. If the table exists, it may be marked private.\n\nCaused by:\n HTTP status client error (400 Bad Request) for url (http://127.0.0.1:34963/v1/database/c2009b0e06f95a7d19f6511fee5d7e870b9dd585f9edcb84b62a336465ad26b6/sql)\n", "phase": "sql" } } }, "vendor": "openai", - "started_at": "2026-01-26T15:39:57.685434816Z", - "finished_at": "2026-01-26T15:40:45.264108157Z" + "started_at": "2026-01-27T18:15:29.790318200Z", + "finished_at": "2026-01-27T18:16:22.101262343Z" }, "t_021_multi_column_index": { - "hash": "b22d989c00281f7f3e8912a08e8322746fa6cba271164f4cad2be9954b7f6ec9", + "hash": "5b9b127244375715b32e5121c629717a31b6c95f04566f66bc0492f84ff6459b", "task": "t_021_multi_column_index", "lang": "csharp", - "golden_published": false, + "golden_published": true, "model_name": "GPT-5", "total_tests": 4, - "passed_tests": 0, - "llm_output": "using SpacetimeDB;\n\npublic static partial class Module\n{\n [Table(Name = \"Log\", Public = true)]\n [Index.BTree(Name = \"by_user_day\", Columns = new[] { \"UserId\", \"Day\" })]\n public partial struct Log\n {\n [PrimaryKey]\n public int Id;\n public int UserId;\n public int Day;\n public string Message;\n }\n\n [Reducer]\n public static void Seed(ReducerContext ctx)\n {\n ctx.Db.Log.Insert(new Log { Id = 1, UserId = 7, Day = 1, Message = \"a\" });\n ctx.Db.Log.Insert(new Log { Id = 2, UserId = 7, Day = 2, Message = \"b\" });\n ctx.Db.Log.Insert(new Log { Id = 3, UserId = 9, Day = 1, Message = \"c\" });\n }\n}", + "passed_tests": 4, + "llm_output": "using SpacetimeDB;\n\npublic static partial class Module\n{\n [SpacetimeDB.Table(Name = \"Log\", Public = true)]\n [SpacetimeDB.Index.BTree(Name = \"by_user_day\", Columns = new[] { \"UserId\", \"Day\" })]\n public partial struct Log\n {\n [SpacetimeDB.PrimaryKey]\n public int Id;\n public int UserId;\n public int Day;\n public string Message;\n }\n\n [SpacetimeDB.Reducer]\n public static void Seed(ReducerContext ctx)\n {\n ctx.Db.Log.Insert(new Log { Id = 1, UserId = 7, Day = 1, Message = \"a\" });\n ctx.Db.Log.Insert(new Log { Id = 2, UserId = 7, Day = 2, Message = \"b\" });\n ctx.Db.Log.Insert(new Log { Id = 3, UserId = 9, Day = 1, Message = \"c\" });\n }\n}", "category": "schema", "route_api_model": "gpt-5", "golden_db": "schema-t-021-multi-column-index-golden", @@ -3381,18 +3349,50 @@ "work_dir_golden": "target/llm-runs/schema/t_021_multi_column_index/csharp/server/golden", "work_dir_llm": "target/llm-runs/schema/t_021_multi_column_index/csharp/server/gpt-5/llm", "scorer_details": { - "publish_error": { - "pass": false, - "partial": 0.0, + "schema_parity": { + "pass": true, + "partial": 1.0, "notes": { - "error": "spacetime build (csharp) failed (exit=1)\n--- stderr ---\nError: command [\"dotnet\", \"publish\", \"-c\", \"Release\", \"-v\", \"quiet\"] exited with code 1\n\n--- stdout ---\n/__w/SpacetimeDB/SpacetimeDB/target/llm-runs/schema/t_021_multi_column_index/csharp/server/gpt-5/llm/Lib.cs(7,6): error CS0104: 'Index' is an ambiguous reference between 'SpacetimeDB.Index' and 'System.Index' [/__w/SpacetimeDB/SpacetimeDB/target/llm-runs/schema/t_021_multi_column_index/csharp/server/gpt-5/llm/StdbModule.csproj]\n", - "phase": "build_or_publish" + "golden_db": "schema-t-021-multi-column-index-golden", + "llm_db": "schema-t-021-multi-column-index-gpt-5-llm", + "reducers_diff": null, + "reducers_equal": true, + "server": "http://127.0.0.1:34963", + "tables_diff": null, + "tables_equal": true + } + }, + "mcindex_lookup_u7_d1": { + "pass": true, + "partial": 1.0, + "notes": { + "actual": 1, + "expected": 1, + "sql": "SELECT COUNT(*) AS n FROM Log WHERE UserId=7 AND Day=1" + } + }, + "mcindex_seed_count": { + "pass": true, + "partial": 1.0, + "notes": { + "actual": 3, + "expected": 3, + "sql": "SELECT COUNT(*) AS n FROM Log" + } + }, + "mcindex_lookup_u7_d2": { + "pass": true, + "partial": 1.0, + "notes": { + "actual": 1, + "expected": 1, + "sql": "SELECT COUNT(*) AS n FROM Log WHERE UserId=7 AND Day=2" } } }, "vendor": "openai", - "started_at": "2026-01-26T15:40:09.098001666Z", - "finished_at": "2026-01-26T15:40:27.962996760Z" + "started_at": "2026-01-27T18:15:02.464172483Z", + "finished_at": "2026-01-27T18:15:29.790279429Z" } } } diff --git a/docs/llms/docs-benchmark-summary.json b/docs/llms/docs-benchmark-summary.json index 4e4aa267103..e6d641dae98 100644 --- a/docs/llms/docs-benchmark-summary.json +++ b/docs/llms/docs-benchmark-summary.json @@ -1,36 +1,36 @@ { "version": 1, - "generated_at": "2026-01-26T15:42:12.228Z", + "generated_at": "2026-01-27T18:16:45.763Z", "by_language": { "csharp": { "modes": { "docs": { - "hash": "b22d989c00281f7f3e8912a08e8322746fa6cba271164f4cad2be9954b7f6ec9", + "hash": "5b9b127244375715b32e5121c629717a31b6c95f04566f66bc0492f84ff6459b", "models": { "GPT-5": { "categories": { "basics": { "tasks": 12, "total_tests": 27, - "passed_tests": 27, - "pass_pct": 100.0, - "task_pass_equiv": 12.0, - "task_pass_pct": 100.0 + "passed_tests": 24, + "pass_pct": 88.888885, + "task_pass_equiv": 11.0, + "task_pass_pct": 91.66667 }, "schema": { "tasks": 10, "total_tests": 34, - "passed_tests": 21, - "pass_pct": 61.764706, - "task_pass_equiv": 6.3666663, - "task_pass_pct": 63.666664 + "passed_tests": 25, + "pass_pct": 73.52941, + "task_pass_equiv": 7.3666663, + "task_pass_pct": 73.666664 } }, "totals": { "tasks": 22, "total_tests": 61, - "passed_tests": 48, - "pass_pct": 78.68852, + "passed_tests": 49, + "pass_pct": 80.327866, "task_pass_equiv": 18.366667, "task_pass_pct": 83.48485 } @@ -42,7 +42,7 @@ "rust": { "modes": { "docs": { - "hash": "f09c4aa335b00d6e1e55573dfbc47f13accb0a1c631339ddb4dc8a6237c520cd", + "hash": "4a962a391761c2c914b169995c6f6bd46732a1c8f844b124cfc73086b79ee2b6", "models": { "GPT-5": { "categories": { @@ -84,25 +84,25 @@ "total_tests": 27, "passed_tests": 20, "pass_pct": 74.07407, - "task_pass_equiv": 9.166667, - "task_pass_pct": 76.38889 + "task_pass_equiv": 8.25, + "task_pass_pct": 68.75 }, "schema": { "tasks": 10, "total_tests": 34, - "passed_tests": 26, - "pass_pct": 76.47059, - "task_pass_equiv": 7.5333333, - "task_pass_pct": 75.333336 + "passed_tests": 23, + "pass_pct": 67.64706, + "task_pass_equiv": 6.5333333, + "task_pass_pct": 65.33333 } }, "totals": { "tasks": 22, "total_tests": 61, - "passed_tests": 46, - "pass_pct": 75.409836, - "task_pass_equiv": 16.7, - "task_pass_pct": 75.909096 + "passed_tests": 43, + "pass_pct": 70.491806, + "task_pass_equiv": 14.783333, + "task_pass_pct": 67.19697 } } }