From 1b306c94a64ecb4c1e0e2ed0ea2806028d2699b3 Mon Sep 17 00:00:00 2001
From: shijiashuai <shijiashuai@bgi.com>
Date: Fri, 22 May 2026 10:29:25 +0800
Subject: [PATCH 1/3] refactor: deepen inference model loading

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 openspec/specs/api/spec.md           |   8 +-
 openspec/specs/architecture/spec.md  |   4 +
 openspec/specs/product/spec.md       |   4 +
 openspec/specs/testing/spec.md       |  31 ++++
 src/core/GPUContext.ts               |  51 +++----
 src/engine/InferenceEngine.ts        | 129 +++++++++-------
 src/engine/ModelCompiler.ts          |  78 ++++++++++
 src/operators/AddOperator.ts         |  19 +--
 src/operators/convValidation.ts      |   5 +-
 src/operators/tensorContracts.ts     |  48 ++++++
 tests/core/GPUContext.test.ts        |  35 +++++
 tests/engine/InferenceEngine.test.ts | 211 ++++++++++++++++++++++++---
 tests/operators/AddOperator.test.ts  |  10 ++
 13 files changed, 512 insertions(+), 121 deletions(-)
 create mode 100644 src/engine/ModelCompiler.ts
 create mode 100644 src/operators/tensorContracts.ts

diff --git a/openspec/specs/api/spec.md b/openspec/specs/api/spec.md
index 357b8a0..24e6f5a 100644
--- a/openspec/specs/api/spec.md
+++ b/openspec/specs/api/spec.md
@@ -568,7 +568,7 @@ const output = await add.forward([inputA, inputB]);
 | Input | Shape | Description |
 |-------|-------|-------------|
 | inputA | any | First input tensor |
-| inputB | any | Second input tensor (must match inputA shape exactly) |
+| inputB | any | Second input tensor (must match inputA shape and layout exactly) |
 
 ### Output
 
@@ -669,6 +669,12 @@ Load a model definition.
 **Parameters:**
 - `modelDef`: Model definition with layers and weights
 
+**Throws:**
+- `Error`: If the model has no layers
+- `Error`: If a layer type is unknown
+- `Error`: If a layer input cannot be resolved from `input`, prior layers, or weights
+- `Error`: If layer names are duplicated
+
 #### tensorFromArray
 
 ```typescript
diff --git a/openspec/specs/architecture/spec.md b/openspec/specs/architecture/spec.md
index 2f0fd75..78d966e 100644
--- a/openspec/specs/architecture/spec.md
+++ b/openspec/specs/architecture/spec.md
@@ -99,6 +99,10 @@ The system SHALL provide high-level inference orchestration.
 - **WHEN** model is loaded
 - **THEN** operators are mapped by type name for dynamic dispatch
 
+#### Scenario: Model graph compilation
+- **WHEN** model is loaded
+- **THEN** layer names, operator types, and tensor references are validated before inference begins
+
 #### Scenario: Intermediate cleanup
 - **WHEN** inference completes
 - **THEN** intermediate tensors are destroyed to free GPU memory
diff --git a/openspec/specs/product/spec.md b/openspec/specs/product/spec.md
index d68d3e8..bcb8f97 100644
--- a/openspec/specs/product/spec.md
+++ b/openspec/specs/product/spec.md
@@ -227,6 +227,10 @@ The system SHALL provide model loading and end-to-end inference.
 - **WHEN** loading model definition with layers and weights
 - **THEN** weights are allocated as GPU tensors
 
+#### Scenario: Reject invalid graph definitions
+- **WHEN** model definition contains duplicate layer names, unknown operators, or unresolved tensor references
+- **THEN** loading fails before inference starts
+
 #### Scenario: Run inference
 - **WHEN** calling infer() with input tensor
 - **THEN** output tensor is returned with correct shape
diff --git a/openspec/specs/testing/spec.md b/openspec/specs/testing/spec.md
index 02cd102..960dd55 100644
--- a/openspec/specs/testing/spec.md
+++ b/openspec/specs/testing/spec.md
@@ -191,6 +191,27 @@ Feature: Dense Operator
 
 ---
 
+## Feature: Add Operator
+
+```gherkin
+Feature: Add Operator
+  As a deep learning developer
+  I want to add residual tensors element-wise
+  So that I can express skip connections safely
+```
+
+### Scenario: Add basic execution
+
+- **WHEN** I execute Add with two tensors of the same shape and layout
+- **THEN** the output shape should equal the input shape
+
+### Scenario: Add rejects layout mismatch
+
+- **WHEN** I execute Add with tensors that share the same shape but use different layouts
+- **THEN** it should throw an error "same layout"
+
+---
+
 ## Feature: Flatten Operator
 
 ```gherkin
@@ -254,6 +275,16 @@ Feature: Inference Engine
 - **WHEN** I load a model with layers and weights into initialized InferenceEngine
 - **THEN** the weights should be allocated as GPU tensors
 
+### Scenario: Reject invalid model graph at load time
+
+- **WHEN** I load a model with duplicate layer names, unknown operator types, or missing tensor references
+- **THEN** `loadModel()` should throw before inference starts
+
+### Scenario: Preserve previous model on failed reload
+
+- **WHEN** I load a valid model and then attempt to load an invalid replacement model
+- **THEN** the previously loaded model should remain executable
+
 ### Scenario: Run inference
 
 - **WHEN** I run inference on loaded model with correct input tensor shape
diff --git a/src/core/GPUContext.ts b/src/core/GPUContext.ts
index 5c6ace0..622547e 100644
--- a/src/core/GPUContext.ts
+++ b/src/core/GPUContext.ts
@@ -1,4 +1,4 @@
-import { WebGPUNotSupportedError, DeviceInitializationError, ShaderCompilationError } from './errors';
+import { WebGPUNotSupportedError, DeviceInitializationError } from './errors';
 
 export interface GPUContextConfig {
   powerPreference?: 'low-power' | 'high-performance';
@@ -12,42 +12,32 @@ export class GPUContext {
   private adapter: GPUAdapter | null = null;
   private device: GPUDevice | null = null;
   private _isInitialized = false;
-  private pendingResourceCleanup = new Set<Promise<void>>();
-
-  private trackCleanup(cleanup: Promise<void>): void {
-    this.pendingResourceCleanup.add(cleanup);
-    void cleanup.finally(() => {
-      this.pendingResourceCleanup.delete(cleanup);
-    });
-  }
+  private deferredBuffers = new Set<GPUBuffer>();
 
   deferDestroy(buffer: GPUBuffer | null | undefined): void {
     if (!buffer) return;
 
-    const cleanup = this.waitForSubmittedWork()
-      .then(() => {
-        buffer.destroy();
-      })
-      .catch(() => {
-        try {
-          buffer.destroy();
-        } catch {
-          // Ignore cleanup failures after device loss/destroy.
-        }
-      });
-
-    this.trackCleanup(cleanup);
+    this.deferredBuffers.add(buffer);
   }
 
   async flushDeferredDestroys(): Promise<void> {
-    await Promise.allSettled([...this.pendingResourceCleanup]);
+    for (const buffer of this.deferredBuffers) {
+      buffer.destroy();
+    }
+    this.deferredBuffers.clear();
   }
 
   async waitForSubmittedWork(): Promise<void> {
-    // Yield to the event loop to allow pending GPU work to complete.
-    // The deprecated onSubmittedWorkDone() was removed from the WebGPU spec.
-    // In practice, yielding briefly is sufficient for most testing scenarios.
-    return new Promise(resolve => setTimeout(resolve, 0));
+    const queue = this.getDevice().queue as GPUQueue & {
+      onSubmittedWorkDone?: () => Promise<void>;
+    };
+
+    if (typeof queue.onSubmittedWorkDone === 'function') {
+      await queue.onSubmittedWorkDone();
+      return;
+    }
+
+    await new Promise(resolve => setTimeout(resolve, 0));
   }
 
   async sync(): Promise<void> {
@@ -213,15 +203,12 @@ export class GPUContext {
    * Release all GPU resources.
    */
   destroy(): void {
-    for (const cleanup of this.pendingResourceCleanup) {
-      void cleanup.catch(() => {});
-    }
-    this.pendingResourceCleanup.clear();
-
     if (this.device) {
       this.device.destroy();
       this.device = null;
     }
+
+    this.deferredBuffers.clear();
     this.adapter = null;
     this._isInitialized = false;
   }
diff --git a/src/engine/InferenceEngine.ts b/src/engine/InferenceEngine.ts
index 0d2cd56..c8d8a95 100644
--- a/src/engine/InferenceEngine.ts
+++ b/src/engine/InferenceEngine.ts
@@ -11,16 +11,19 @@ import { DenseOperator } from '../operators/DenseOperator';
 import { AddOperator } from '../operators/AddOperator';
 import { BatchNorm2dOperator } from '../operators/BatchNorm2dOperator';
 import { ModelDefinition } from './ModelLoader';
+import { CompiledModel, ModelCompiler } from './ModelCompiler';
 
 export class InferenceEngine {
   private context: GPUContext;
   private operators: Map<string, Operator>;
   private weights: Map<string, Tensor> = new Map();
-  private modelDef: ModelDefinition | null = null;
+  private compiledModel: CompiledModel | null = null;
+  private readonly compiler: ModelCompiler;
 
   constructor() {
     this.context = new GPUContext();
     this.operators = new Map();
+    this.compiler = new ModelCompiler();
   }
 
   async initialize(): Promise<void> {
@@ -39,20 +42,14 @@ export class InferenceEngine {
   }
 
   async loadModel(modelDef: ModelDefinition): Promise<void> {
-    this.modelDef = modelDef;
+    const compiledModel = this.compiler.compile(modelDef, this.operators.keys());
+    const nextWeights = this.materializeWeights(modelDef);
 
     for (const tensor of this.weights.values()) {
       tensor.destroy();
     }
-    this.weights.clear();
-
-    for (const [name, weightDef] of Object.entries(modelDef.weights)) {
-      if (!weightDef.shape || weightDef.shape.length === 0) {
-        throw new Error(`Weight "${name}" is missing shape metadata`);
-      }
-      const tensor = Tensor.fromArray(this.context, weightDef.data, weightDef.shape);
-      this.weights.set(name, tensor);
-    }
+    this.weights = nextWeights;
+    this.compiledModel = compiledModel;
   }
 
   tensorFromArray(
@@ -63,63 +60,92 @@ export class InferenceEngine {
     return Tensor.fromArray(this.context, data, shape, options);
   }
 
-  async infer(input: Tensor): Promise<Tensor> {
-    if (!this.modelDef) {
-      throw new Error('Model not loaded');
-    }
-    if (!input.usesContext(this.context)) {
-      throw new Error('Input tensor must be created from the same GPUContext as the inference engine');
-    }
+  private materializeWeights(modelDef: ModelDefinition): Map<string, Tensor> {
+    const nextWeights = new Map<string, Tensor>();
 
-    const activations = new Map<string, Tensor>();
-    activations.set('input', input);
-
-    // Execute layers in order
-    for (const layer of this.modelDef.layers) {
-      const operator = this.operators.get(layer.type);
-      if (!operator) {
-        throw new Error(`Unknown operator type: ${layer.type}`);
-      }
-
-      // Get inputs
-      const inputs: Tensor[] = [];
-      for (const inputName of layer.inputs) {
-        const tensor = activations.get(inputName) ?? this.weights.get(inputName);
-        if (!tensor) {
-          throw new Error(`Missing input: ${inputName}`);
+    try {
+      for (const [name, weightDef] of Object.entries(modelDef.weights)) {
+        if (!weightDef.shape || weightDef.shape.length === 0) {
+          throw new Error(`Weight "${name}" is missing shape metadata`);
         }
-        inputs.push(tensor);
+        const tensor = Tensor.fromArray(this.context, weightDef.data, weightDef.shape);
+        nextWeights.set(name, tensor);
       }
 
-      // Execute
-      const output = await operator.forward(inputs, layer.params);
-      activations.set(layer.name, output);
-    }
-
-    // Return final output
-    const lastLayer = this.modelDef.layers[this.modelDef.layers.length - 1];
-    const finalOutput = activations.get(lastLayer.name);
-    if (!finalOutput) {
-      throw new Error(`Final output not found for layer: ${lastLayer.name}`);
+      return nextWeights;
+    } catch (error) {
+      for (const tensor of nextWeights.values()) {
+        tensor.destroy();
+      }
+      throw error;
     }
+  }
 
-    // Ensure queued GPU work sees all intermediate activations before releasing them.
+  private async cleanupActivations(
+    activations: Map<string, Tensor>,
+    retainedBuffer: GPUBuffer | null
+  ): Promise<void> {
     await this.context.sync();
 
-    // Destroy intermediate activations to free GPU memory.
-    // If the final output is a view (e.g. flatten/reshape), keep any tensor sharing its buffer alive.
     for (const [name, tensor] of activations.entries()) {
       if (
         name !== 'input' &&
-        name !== lastLayer.name &&
         !this.weights.has(name) &&
-        tensor.buffer !== finalOutput.buffer
+        tensor.buffer !== retainedBuffer
       ) {
         tensor.destroy();
       }
     }
+  }
+
+  async infer(input: Tensor): Promise<Tensor> {
+    if (!this.compiledModel) {
+      throw new Error('Model not loaded');
+    }
+    if (!input.usesContext(this.context)) {
+      throw new Error('Input tensor must be created from the same GPUContext as the inference engine');
+    }
 
-    return finalOutput;
+    const activations = new Map<string, Tensor>();
+    activations.set('input', input);
+    let retainedBuffer: GPUBuffer | null = null;
+
+    try {
+      // Execute layers in order
+      for (const layer of this.compiledModel.layers) {
+        const operator = this.operators.get(layer.type);
+        if (!operator) {
+          throw new Error(`Unknown operator type: ${layer.type}`);
+        }
+
+        // Get inputs
+        const inputs: Tensor[] = [];
+        for (const source of layer.inputs) {
+          const tensor = source.kind === 'weight'
+            ? this.weights.get(source.name)
+            : activations.get(source.name);
+          if (!tensor) {
+            throw new Error(`Missing input: ${source.name}`);
+          }
+          inputs.push(tensor);
+        }
+
+        // Execute
+        const output = await operator.forward(inputs, layer.params);
+        activations.set(layer.name, output);
+      }
+      // Return final output
+      // Return final output
+      const finalOutput = activations.get(this.compiledModel.outputName);
+      if (!finalOutput) {
+        throw new Error(`Final output not found for layer: ${this.compiledModel.outputName}`);
+      }
+
+      retainedBuffer = finalOutput.buffer;
+      return finalOutput;
+    } finally {
+      await this.cleanupActivations(activations, retainedBuffer);
+    }
   }
 
   destroy(): void {
@@ -127,6 +153,7 @@ export class InferenceEngine {
       tensor.destroy();
     }
     this.weights.clear();
+    this.compiledModel = null;
     
     for (const operator of this.operators.values()) {
       operator.destroy();
diff --git a/src/engine/ModelCompiler.ts b/src/engine/ModelCompiler.ts
new file mode 100644
index 0000000..8749bfb
--- /dev/null
+++ b/src/engine/ModelCompiler.ts
@@ -0,0 +1,78 @@
+import { ModelLoadError } from '../core/errors';
+import { ModelDefinition, LayerDefinition } from './ModelLoader';
+
+export type TensorSourceKind = 'input' | 'weight' | 'activation';
+
+export interface TensorSource {
+  kind: TensorSourceKind;
+  name: string;
+}
+
+export interface CompiledLayer {
+  name: string;
+  type: LayerDefinition['type'];
+  params: LayerDefinition['params'];
+  inputs: TensorSource[];
+}
+
+export interface CompiledModel {
+  name: string;
+  layers: CompiledLayer[];
+  outputName: string;
+}
+
+function toTensorSource(name: string, weights: ModelDefinition['weights']): TensorSource {
+  if (name === 'input') {
+    return { kind: 'input', name };
+  }
+
+  if (Object.hasOwn(weights, name)) {
+    return { kind: 'weight', name };
+  }
+
+  return { kind: 'activation', name };
+}
+
+export class ModelCompiler {
+  compile(modelDef: ModelDefinition, operatorTypes: Iterable<string>): CompiledModel {
+    if (modelDef.layers.length === 0) {
+      throw new ModelLoadError('Model must contain at least one layer');
+    }
+
+    const knownOperatorTypes = new Set(operatorTypes);
+    const availableNames = new Set<string>(['input', ...Object.keys(modelDef.weights)]);
+    const compiledLayers: CompiledLayer[] = [];
+
+    for (const layer of modelDef.layers) {
+      if (!knownOperatorTypes.has(layer.type)) {
+        throw new ModelLoadError(`Unknown operator type: ${layer.type}`);
+      }
+
+      if (availableNames.has(layer.name)) {
+        throw new ModelLoadError(`Duplicate layer name: ${layer.name}`);
+      }
+
+      const inputs = layer.inputs.map((inputName) => {
+        if (!availableNames.has(inputName)) {
+          throw new ModelLoadError(`Missing input: ${inputName} for layer: ${layer.name}`);
+        }
+
+        return toTensorSource(inputName, modelDef.weights);
+      });
+
+      compiledLayers.push({
+        name: layer.name,
+        type: layer.type,
+        params: layer.params,
+        inputs
+      });
+      availableNames.add(layer.name);
+    }
+
+    return {
+      name: modelDef.name,
+      layers: compiledLayers,
+      outputName: compiledLayers[compiledLayers.length - 1].name
+    };
+  }
+}
diff --git a/src/operators/AddOperator.ts b/src/operators/AddOperator.ts
index 6d09817..7bf2fdb 100644
--- a/src/operators/AddOperator.ts
+++ b/src/operators/AddOperator.ts
@@ -3,6 +3,7 @@ import { GPUContext } from '../core/GPUContext';
 import { Tensor, TensorShape } from '../core/Tensor';
 import { DEFAULT_WORKGROUP_SIZE } from './constants';
 import { OperatorExecutionError } from '../core/errors';
+import { assertSameShapeAndLayout } from './tensorContracts';
 
 // AddParams uses the base OperatorParams - no additional parameters needed for element-wise addition
 
@@ -78,23 +79,7 @@ export class AddOperator extends Operator {
     }
 
     const [inputA, inputB] = inputs;
-
-    // Validate shapes match
-    if (inputA.shape.length !== inputB.shape.length) {
-      throw new OperatorExecutionError(
-        'Add',
-        `inputs must have same rank: got ${inputA.shape.length} and ${inputB.shape.length}`
-      );
-    }
-
-    for (let i = 0; i < inputA.shape.length; i++) {
-      if (inputA.shape[i] !== inputB.shape[i]) {
-        throw new OperatorExecutionError(
-          'Add',
-          `inputs must have same shape: got [${inputA.shape.join(', ')}] and [${inputB.shape.join(', ')}]`
-        );
-      }
-    }
+    assertSameShapeAndLayout('Add', inputA, inputB, 'inputs', 'inputs');
 
     const outputShape = this.computeOutputShape(inputA.shape);
     const output = new Tensor(this.context, outputShape, { layout: inputA.layout });
diff --git a/src/operators/convValidation.ts b/src/operators/convValidation.ts
index 2f47c05..9974ae7 100644
--- a/src/operators/convValidation.ts
+++ b/src/operators/convValidation.ts
@@ -1,5 +1,6 @@
 import { Tensor, TensorShape } from '../core/Tensor';
 import { OperatorExecutionError } from '../core/errors';
+import { assertTensorLayout } from './tensorContracts';
 
 export type ConvShape = {
   batchSize: number;
@@ -26,9 +27,7 @@ export function validateNonNegativePair(name: string, pair: [number, number]): v
 }
 
 export function validateNchwInput(name: string, input: Tensor): void {
-  if (input.layout !== 'NCHW') {
-    throw new OperatorExecutionError(name, 'currently supports NCHW layout only');
-  }
+  assertTensorLayout(name, input, 'NCHW', 'input', 'currently supports NCHW layout only');
   if (input.shape.length !== 4) {
     throw new OperatorExecutionError(name, 'expects a 4D input tensor');
   }
diff --git a/src/operators/tensorContracts.ts b/src/operators/tensorContracts.ts
new file mode 100644
index 0000000..c91e0f1
--- /dev/null
+++ b/src/operators/tensorContracts.ts
@@ -0,0 +1,48 @@
+import { DataLayout, Tensor } from '../core/Tensor';
+import { OperatorExecutionError } from '../core/errors';
+
+export function assertTensorLayout(
+  operatorName: string,
+  tensor: Tensor,
+  expectedLayout: DataLayout,
+  label = 'input',
+  details?: string
+): void {
+  if (tensor.layout !== expectedLayout) {
+    throw new OperatorExecutionError(
+      operatorName,
+      details ?? `${label} must use ${expectedLayout} layout, got ${tensor.layout}`
+    );
+  }
+}
+
+export function assertSameShapeAndLayout(
+  operatorName: string,
+  first: Tensor,
+  second: Tensor,
+  firstLabel = 'inputA',
+  secondLabel = 'inputB'
+): void {
+  if (first.shape.length !== second.shape.length) {
+    throw new OperatorExecutionError(
+      operatorName,
+      `${firstLabel} and ${secondLabel} must have same rank: got ${first.shape.length} and ${second.shape.length}`
+    );
+  }
+
+  for (let i = 0; i < first.shape.length; i++) {
+    if (first.shape[i] !== second.shape[i]) {
+      throw new OperatorExecutionError(
+        operatorName,
+        `${firstLabel} and ${secondLabel} must have same shape: got [${first.shape.join(', ')}] and [${second.shape.join(', ')}]`
+      );
+    }
+  }
+
+  if (first.layout !== second.layout) {
+    throw new OperatorExecutionError(
+      operatorName,
+      `${firstLabel} and ${secondLabel} must have same layout: got ${first.layout} and ${second.layout}`
+    );
+  }
+}
diff --git a/tests/core/GPUContext.test.ts b/tests/core/GPUContext.test.ts
index 275c2fa..d01c06d 100644
--- a/tests/core/GPUContext.test.ts
+++ b/tests/core/GPUContext.test.ts
@@ -48,6 +48,7 @@ describe('GPUContext', () => {
   });
 
   afterEach(() => {
+    vi.useRealTimers();
     setNavigator(originalNavigator);
   });
 
@@ -174,5 +175,39 @@ describe('GPUContext', () => {
       context.submit([mockCommandBuffer]);
       expect(mockDevice.queue.submit).toHaveBeenCalledWith([mockCommandBuffer]);
     });
+
+    it('should wait for queue completion during sync', async () => {
+      const { mockGPU, mockDevice } = createMockGPU();
+      setNavigator({ gpu: mockGPU });
+
+      const context = new GPUContext();
+      await context.initialize();
+
+      await context.sync();
+
+      expect(mockDevice.queue.onSubmittedWorkDone).toHaveBeenCalledTimes(1);
+    });
+
+    it('should not destroy deferred buffers before sync flushes them', async () => {
+      vi.useFakeTimers();
+      const { mockGPU } = createMockGPU();
+      setNavigator({ gpu: mockGPU });
+
+      const context = new GPUContext();
+      await context.initialize();
+
+      const buffer = {
+        destroy: vi.fn()
+      } as unknown as GPUBuffer;
+
+      context.deferDestroy(buffer);
+      await vi.runAllTimersAsync();
+
+      expect(buffer.destroy).not.toHaveBeenCalled();
+
+      await context.sync();
+
+      expect(buffer.destroy).toHaveBeenCalledTimes(1);
+    });
   });
 });
diff --git a/tests/engine/InferenceEngine.test.ts b/tests/engine/InferenceEngine.test.ts
index cf965f9..918c40d 100644
--- a/tests/engine/InferenceEngine.test.ts
+++ b/tests/engine/InferenceEngine.test.ts
@@ -100,7 +100,14 @@ describe('InferenceEngine', () => {
 
     const invalidModel = {
       name: 'invalid',
-      layers: [],
+      layers: [
+        {
+          name: 'output',
+          type: 'relu',
+          inputs: ['input'],
+          params: {}
+        }
+      ],
       weights: {
         conv_weight: {
           data: new Float32Array([1, 2, 3, 4])
@@ -158,7 +165,7 @@ describe('InferenceEngine', () => {
 
     const engine = new InferenceEngine();
     await engine.initialize();
-    await engine.loadModel({
+    await expect(engine.loadModel({
       name: 'bad-model',
       layers: [
         {
@@ -169,12 +176,7 @@ describe('InferenceEngine', () => {
         }
       ],
       weights: {}
-    });
-
-    const input = Tensor.zeros(engine['context'], [1, 3]);
-    await expect(engine.infer(input)).rejects.toThrow('Unknown operator type');
-
-    input.destroy();
+    })).rejects.toThrow('Unknown operator type');
   });
 
   it('throws error for missing input tensor', async () => {
@@ -183,7 +185,7 @@ describe('InferenceEngine', () => {
 
     const engine = new InferenceEngine();
     await engine.initialize();
-    await engine.loadModel({
+    await expect(engine.loadModel({
       name: 'bad-model',
       layers: [
         {
@@ -194,12 +196,48 @@ describe('InferenceEngine', () => {
         }
       ],
       weights: {}
-    });
+    })).rejects.toThrow('Missing input');
+  });
 
-    const input = Tensor.zeros(engine['context'], [1, 3]);
-    await expect(engine.infer(input)).rejects.toThrow('Missing input');
+  it('rejects empty model graphs at load time', async () => {
+    const { mockGPU } = createMockGPU();
+    setNavigator({ gpu: mockGPU });
 
-    input.destroy();
+    const engine = new InferenceEngine();
+    await engine.initialize();
+
+    await expect(engine.loadModel({
+      name: 'empty-model',
+      layers: [],
+      weights: {}
+    })).rejects.toThrow('at least one layer');
+  });
+
+  it('rejects duplicate layer names at load time', async () => {
+    const { mockGPU } = createMockGPU();
+    setNavigator({ gpu: mockGPU });
+
+    const engine = new InferenceEngine();
+    await engine.initialize();
+
+    await expect(engine.loadModel({
+      name: 'duplicate-layer-model',
+      layers: [
+        {
+          name: 'hidden',
+          type: 'relu',
+          inputs: ['input'],
+          params: {}
+        },
+        {
+          name: 'hidden',
+          type: 'softmax',
+          inputs: ['hidden'],
+          params: { axis: -1 }
+        }
+      ],
+      weights: {}
+    })).rejects.toThrow('Duplicate layer name');
   });
 
   it('executes single layer model correctly', async () => {
@@ -264,6 +302,47 @@ describe('InferenceEngine', () => {
     output.destroy();
   });
 
+  it('cleans intermediate activations when a later layer fails', async () => {
+    const { mockGPU } = createMockGPU();
+    setNavigator({ gpu: mockGPU });
+
+    const engine = new InferenceEngine();
+    await engine.initialize();
+    await engine.loadModel({
+      name: 'failing-multi-layer',
+      layers: [
+        {
+          name: 'relu1',
+          type: 'relu',
+          inputs: ['input'],
+          params: {}
+        },
+        {
+          name: 'output',
+          type: 'softmax',
+          inputs: ['relu1'],
+          params: { axis: -1 }
+        }
+      ],
+      weights: {}
+    });
+
+    const intermediate = Tensor.zeros(engine['context'], [1, 3]);
+    const destroySpy = vi.spyOn(intermediate, 'destroy');
+    const relu = engine['operators'].get('relu');
+    const softmax = engine['operators'].get('softmax');
+
+    vi.spyOn(relu!, 'forward').mockResolvedValue(intermediate);
+    vi.spyOn(softmax!, 'forward').mockRejectedValue(new Error('softmax boom'));
+
+    const input = Tensor.zeros(engine['context'], [1, 3]);
+
+    await expect(engine.infer(input)).rejects.toThrow('softmax boom');
+    expect(destroySpy).toHaveBeenCalledTimes(1);
+
+    input.destroy();
+  });
+
   it('executes add layers through the inference engine', async () => {
     const { mockGPU } = createMockGPU();
     setNavigator({ gpu: mockGPU });
@@ -345,7 +424,14 @@ describe('InferenceEngine', () => {
     const weightData = new Float32Array([1, 2, 3, 4, 5, 6]);
     await engine.loadModel({
       name: 'with-weights',
-      layers: [],
+      layers: [
+        {
+          name: 'output',
+          type: 'relu',
+          inputs: ['input'],
+          params: {}
+        }
+      ],
       weights: {
         my_weight: {
           data: weightData,
@@ -370,7 +456,14 @@ describe('InferenceEngine', () => {
 
     await engine.loadModel({
       name: 'model1',
-      layers: [],
+      layers: [
+        {
+          name: 'output',
+          type: 'relu',
+          inputs: ['input'],
+          params: {}
+        }
+      ],
       weights: {
         weight1: {
           data: new Float32Array([1, 2, 3]),
@@ -383,7 +476,14 @@ describe('InferenceEngine', () => {
 
     await engine.loadModel({
       name: 'model2',
-      layers: [],
+      layers: [
+        {
+          name: 'output',
+          type: 'relu',
+          inputs: ['input'],
+          params: {}
+        }
+      ],
       weights: {
         weight2: {
           data: new Float32Array([4, 5]),
@@ -396,6 +496,51 @@ describe('InferenceEngine', () => {
     expect(engine['weights'].has('weight2')).toBe(true);
   });
 
+  it('keeps previous model when replacement load fails', async () => {
+    const { mockGPU } = createMockGPU();
+    setNavigator({ gpu: mockGPU });
+
+    const engine = new InferenceEngine();
+    await engine.initialize();
+    await engine.loadModel({
+      name: 'stable-model',
+      layers: [
+        {
+          name: 'output',
+          type: 'flatten',
+          inputs: ['input'],
+          params: {}
+        }
+      ],
+      weights: {}
+    });
+
+    await expect(engine.loadModel({
+      name: 'broken-model',
+      layers: [
+        {
+          name: 'output',
+          type: 'relu',
+          inputs: ['input'],
+          params: {}
+        }
+      ],
+      weights: {
+        broken_weight: {
+          data: new Float32Array([1, 2, 3])
+        }
+      }
+    } as unknown as ModelDefinition)).rejects.toThrow('missing shape metadata');
+
+    const input = Tensor.zeros(engine['context'], [1, 1, 2, 2]);
+    const output = await engine.infer(input);
+
+    expect(output.shape).toEqual([1, 4]);
+
+    input.destroy();
+    output.destroy();
+  });
+
   it('destroys all resources on destroy', async () => {
     const { mockGPU, mockDevice } = createMockGPU();
     setNavigator({ gpu: mockGPU });
@@ -404,7 +549,14 @@ describe('InferenceEngine', () => {
     await engine.initialize();
     await engine.loadModel({
       name: 'test',
-      layers: [],
+      layers: [
+        {
+          name: 'output',
+          type: 'relu',
+          inputs: ['input'],
+          params: {}
+        }
+      ],
       weights: {
         w: {
           data: new Float32Array([1, 2, 3]),
@@ -420,6 +572,31 @@ describe('InferenceEngine', () => {
     expect(engine['operators'].size).toBe(0);
   });
 
+  it('unloads compiled model on destroy', async () => {
+    const { mockGPU } = createMockGPU();
+    setNavigator({ gpu: mockGPU });
+
+    const engine = new InferenceEngine();
+    await engine.initialize();
+    await engine.loadModel({
+      name: 'test',
+      layers: [
+        {
+          name: 'output',
+          type: 'relu',
+          inputs: ['input'],
+          params: {}
+        }
+      ],
+      weights: {}
+    });
+
+    const input = Tensor.zeros(engine['context'], [1, 3]);
+    engine.destroy();
+
+    await expect(engine.infer(input)).rejects.toThrow('Model not loaded');
+  });
+
   it('creates tensors via tensorFromArray', async () => {
     const { mockGPU } = createMockGPU();
     setNavigator({ gpu: mockGPU });
diff --git a/tests/operators/AddOperator.test.ts b/tests/operators/AddOperator.test.ts
index a6ff5f3..3ac0809 100644
--- a/tests/operators/AddOperator.test.ts
+++ b/tests/operators/AddOperator.test.ts
@@ -36,6 +36,16 @@ describe('AddOperator', () => {
       await expect(add.forward([inputA, inputB])).rejects.toThrow(/same shape/);
     });
 
+    it('should throw error for different layouts', async () => {
+      const context = createMockContext();
+      const add = new AddOperator(context);
+
+      const inputA = Tensor.fromArray(context, new Float32Array(4).fill(1), [1, 1, 2, 2], { layout: 'NCHW' });
+      const inputB = Tensor.fromArray(context, new Float32Array(4).fill(1), [1, 1, 2, 2], { layout: 'NHWC' });
+
+      await expect(add.forward([inputA, inputB])).rejects.toThrow(/same layout/);
+    });
+
     it('should throw error for wrong number of inputs', async () => {
       const context = createMockContext();
       const add = new AddOperator(context);

From f0fd347e811de4a6447ede0f77e9a6ebf7ed51f2 Mon Sep 17 00:00:00 2001
From: shijiashuai <shijiashuai@bgi.com>
Date: Fri, 22 May 2026 10:39:16 +0800
Subject: [PATCH 2/3] fix: destroy deferred GPU buffers on teardown

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 src/core/GPUContext.ts        |  7 +++++--
 tests/core/GPUContext.test.ts | 17 +++++++++++++++++
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/src/core/GPUContext.ts b/src/core/GPUContext.ts
index 622547e..5c95e24 100644
--- a/src/core/GPUContext.ts
+++ b/src/core/GPUContext.ts
@@ -203,12 +203,15 @@ export class GPUContext {
    * Release all GPU resources.
    */
   destroy(): void {
+    for (const buffer of this.deferredBuffers) {
+      buffer.destroy();
+    }
+    this.deferredBuffers.clear();
+
     if (this.device) {
       this.device.destroy();
       this.device = null;
     }
-
-    this.deferredBuffers.clear();
     this.adapter = null;
     this._isInitialized = false;
   }
diff --git a/tests/core/GPUContext.test.ts b/tests/core/GPUContext.test.ts
index d01c06d..6733536 100644
--- a/tests/core/GPUContext.test.ts
+++ b/tests/core/GPUContext.test.ts
@@ -209,5 +209,22 @@ describe('GPUContext', () => {
 
       expect(buffer.destroy).toHaveBeenCalledTimes(1);
     });
+
+    it('should destroy deferred buffers during teardown', async () => {
+      const { mockGPU } = createMockGPU();
+      setNavigator({ gpu: mockGPU });
+
+      const context = new GPUContext();
+      await context.initialize();
+
+      const buffer = {
+        destroy: vi.fn()
+      } as unknown as GPUBuffer;
+
+      context.deferDestroy(buffer);
+      context.destroy();
+
+      expect(buffer.destroy).toHaveBeenCalledTimes(1);
+    });
   });
 });

From c7ca039afb480efdab77f20e1b35459daea22427 Mon Sep 17 00:00:00 2001
From: shijiashuai <shijiashuai@bgi.com>
Date: Fri, 22 May 2026 10:51:49 +0800
Subject: [PATCH 3/3] fix: preserve deferred cleanup semantics

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 src/core/GPUContext.ts        | 34 +++++++++++++++++++++++++++++-----
 tests/core/GPUContext.test.ts | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+), 5 deletions(-)

diff --git a/src/core/GPUContext.ts b/src/core/GPUContext.ts
index 5c95e24..6891bc8 100644
--- a/src/core/GPUContext.ts
+++ b/src/core/GPUContext.ts
@@ -20,13 +20,17 @@ export class GPUContext {
     this.deferredBuffers.add(buffer);
   }
 
-  async flushDeferredDestroys(): Promise<void> {
+  private destroyDeferredBuffers(): void {
     for (const buffer of this.deferredBuffers) {
       buffer.destroy();
     }
     this.deferredBuffers.clear();
   }
 
+  async flushDeferredDestroys(): Promise<void> {
+    this.destroyDeferredBuffers();
+  }
+
   async waitForSubmittedWork(): Promise<void> {
     const queue = this.getDevice().queue as GPUQueue & {
       onSubmittedWorkDone?: () => Promise<void>;
@@ -41,8 +45,18 @@ export class GPUContext {
   }
 
   async sync(): Promise<void> {
-    await this.waitForSubmittedWork();
+    let waitError: unknown;
+    try {
+      await this.waitForSubmittedWork();
+    } catch (error) {
+      waitError = error;
+    }
+
     await this.flushDeferredDestroys();
+
+    if (waitError) {
+      throw waitError;
+    }
   }
 
   /**
@@ -203,10 +217,20 @@ export class GPUContext {
    * Release all GPU resources.
    */
   destroy(): void {
-    for (const buffer of this.deferredBuffers) {
-      buffer.destroy();
+    const queue = this.device?.queue as (GPUQueue & {
+      onSubmittedWorkDone?: () => Promise<void>;
+    }) | undefined;
+
+    if (this.deferredBuffers.size > 0) {
+      if (typeof queue?.onSubmittedWorkDone === 'function') {
+        void queue.onSubmittedWorkDone().then(
+          () => this.destroyDeferredBuffers(),
+          () => this.destroyDeferredBuffers()
+        );
+      } else {
+        this.destroyDeferredBuffers();
+      }
     }
-    this.deferredBuffers.clear();
 
     if (this.device) {
       this.device.destroy();
diff --git a/tests/core/GPUContext.test.ts b/tests/core/GPUContext.test.ts
index 6733536..53ba9d4 100644
--- a/tests/core/GPUContext.test.ts
+++ b/tests/core/GPUContext.test.ts
@@ -211,11 +211,20 @@ describe('GPUContext', () => {
     });
 
     it('should destroy deferred buffers during teardown', async () => {
+      let settleQueue: (() => void) | undefined;
       const { mockGPU } = createMockGPU();
       setNavigator({ gpu: mockGPU });
 
       const context = new GPUContext();
       await context.initialize();
+      const mockDevice = context.getDevice() as unknown as {
+        queue: { onSubmittedWorkDone: ReturnType<typeof vi.fn> };
+      };
+      mockDevice.queue.onSubmittedWorkDone.mockImplementation(
+        () => new Promise<void>((resolve) => {
+          settleQueue = resolve;
+        })
+      );
 
       const buffer = {
         destroy: vi.fn()
@@ -224,6 +233,32 @@ describe('GPUContext', () => {
       context.deferDestroy(buffer);
       context.destroy();
 
+      expect(buffer.destroy).not.toHaveBeenCalled();
+
+      settleQueue?.();
+      await Promise.resolve();
+
+      expect(buffer.destroy).toHaveBeenCalledTimes(1);
+    });
+
+    it('should flush deferred buffers even if queue completion rejects', async () => {
+      const { mockGPU } = createMockGPU();
+      setNavigator({ gpu: mockGPU });
+
+      const context = new GPUContext();
+      await context.initialize();
+      const mockDevice = context.getDevice() as unknown as {
+        queue: { onSubmittedWorkDone: ReturnType<typeof vi.fn> };
+      };
+      mockDevice.queue.onSubmittedWorkDone.mockRejectedValueOnce(new Error('queue lost'));
+
+      const buffer = {
+        destroy: vi.fn()
+      } as unknown as GPUBuffer;
+
+      context.deferDestroy(buffer);
+
+      await expect(context.sync()).rejects.toThrow('queue lost');
       expect(buffer.destroy).toHaveBeenCalledTimes(1);
     });
   });