From 5f17daf14427233ba449a1cdd54e6cf7824e1ddf Mon Sep 17 00:00:00 2001 From: sourcepirate Date: Sun, 31 May 2026 10:40:47 +0530 Subject: [PATCH 1/2] Updated tests and no autograd docs --- Agents.md | 3 +- README.md | 6 + docs/layers/base.md | 36 ++++ neutro/layers/core/dropout.py | 3 + tests/engine/test_node.py | 46 +++++ tests/layers/core/test_dropout.py | 135 +++++++++++++- tests/layers/core/test_input_layer.py | 52 ++++++ tests/layers/core/test_merging_coverage.py | 123 +++++++++++++ tests/layers/core/test_reparameterization.py | 36 ++++ tests/layers/embedding/test_time_embedding.py | 22 +++ tests/layers/normalization/test_batchnorm.py | 14 ++ tests/layers/pooling/test_global_pooling.py | 18 ++ tests/models/test_model_coverage.py | 166 ++++++++++++++++++ tests/models/test_model_coverage2.py | 156 ++++++++++++++++ tests/test_preprocessing.py | 44 +++++ tests/test_preprocessing_sequence.py | 106 +++++++++++ tests/test_preprocessing_text.py | 114 ++++++++++++ tests/tokenizers/test_tiktoken_compat.py | 43 +++++ tests/utils/test_data_utils.py | 42 +++++ 19 files changed, 1156 insertions(+), 9 deletions(-) create mode 100644 tests/engine/test_node.py create mode 100644 tests/layers/core/test_input_layer.py create mode 100644 tests/layers/core/test_merging_coverage.py create mode 100644 tests/models/test_model_coverage.py create mode 100644 tests/models/test_model_coverage2.py create mode 100644 tests/test_preprocessing_sequence.py create mode 100644 tests/test_preprocessing_text.py create mode 100644 tests/utils/test_data_utils.py diff --git a/Agents.md b/Agents.md index 90652e4..bf4dea6 100644 --- a/Agents.md +++ b/Agents.md @@ -8,7 +8,8 @@ You are an agent working on `neutro`, an "intentionally naive" and educational i 2. **Keras API Fidelity**: Maintain strict compatibility with Keras/TensorFlow APIs (`compile`, `fit`, `predict`, `evaluate`, `summary`, `Sequential`, `Model`). 3. **Educational Clarity**: Code should be readable and reflect the underlying mathematical algorithms (e.g., FlashAttention, MoE routing, RoPE). Use clear variable names and minimal but impactful comments. 4. **No Magic**: Avoid complex meta-programming or obscure libraries. If a layer needs a backward pass, implement it explicitly. -5. **Nested Training**: Ensure that nested layers (layers within blocks) are discovered and updated by the optimizer. Use `Layer.sublayers` to traverse the hierarchy. +5. **No Autograd**: `neutro` has no automatic differentiation engine. There is no equivalent of PyTorch's `autograd` or JAX's `grad`. Every layer MUST implement its own `backward(grad_output)` that manually computes gradients using the chain rule. This is the defining educational feature of the library — you *are* the autograd engine. +6. **Nested Training**: Ensure that nested layers (layers within blocks) are discovered and updated by the optimizer. Use `Layer.sublayers` to traverse the hierarchy. ## Implementation Details diff --git a/README.md b/README.md index e037a17..aec670f 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,12 @@ Let's be honest: modern DL frameworks are black boxes. You pip install 4GB of bi - **A Toy, not a Tool**: This isn't meant for production. It's a playground for learning advanced algorithms (MHA, GQA, FlashAttention, LSTM) in their purest form. - **For the Wisdom-Rich**: If you remember when 64MB of RAM was a flex and "vectorization" meant loop unrolling, this is for you. It's a fun way to play with cutting-edge 2024 algorithms using 1990s-era clarity. +## 🚫 No Autograd + +Unlike PyTorch or TensorFlow, `neutro` has **zero automatic differentiation**. You will not find an `autograd` engine here. Every gradient is computed by hand — each layer implements its own `backward` method using explicit matrix multiplications and the chain rule. + +This is not a bug, it's the feature. Writing `self.grads['W'] = inputs.T @ grad_output` is how you *learn* what backpropagation actually does. + --- ## 🚀 What's Inside? diff --git a/docs/layers/base.md b/docs/layers/base.md index 6cbf5a2..c4415ef 100644 --- a/docs/layers/base.md +++ b/docs/layers/base.md @@ -247,6 +247,42 @@ y = layer(x) 8. `Dense.forward` computes `np.dot(x, W) + b`, applies ReLU, caches `self.inputs` and `self.z`, returns the output. 9. Later, `layer.backward(grad_output)` uses those cached values to compute weight gradients. +## 🚫 No Autograd — You Write the Gradients + +This is the single most important thing to understand about `neutro`: + +**There is no automatic differentiation engine.** + +In PyTorch, you write: + +```python +y = x @ W + b # PyTorch traces this into a graph +y.backward() # PyTorch automatically computes gradients for W and b +``` + +In `neutro`, you write both `forward` AND `backward`: + +```python +def forward(self, x): + self.inputs = x + return x @ self.params['W'] + self.params['b'] + +def backward(self, grad_output): + self.grads['W'] = self.inputs.T @ grad_output + self.grads['b'] = np.sum(grad_output, axis=0) + return grad_output @ self.params['W'].T +``` + +Why? Because every matrix multiplication you write in `backward` — every `@`, every `np.sum`, every `reshape` — is an explicit application of the **chain rule**. You are not calling `loss.backward()`. You *are* the autograd engine. + +This means: +- **If you add a new layer**, you must implement `backward` yourself — no framework will do it for you. +- **If you change the forward pass**, you must update backward to match. Every new line in `forward` probably needs a corresponding line in `backward`. +- **If backward gives wrong shapes**, you'll get a NumPy shape mismatch error — not a cryptic autograd graph error. You'll learn to think in shapes. +- **Every value you cache on `self` in `forward`** (like `self.inputs` or `self.z`) is cached for one reason: `backward` needs it. There is no tape, no graph, no magic — just stored NumPy arrays and chain rule math. + +This is the defining educational feature of the library. You can't hand-wave through gradient descent here. You must understand where gradients come from. + ## Try it yourself Here's how you'd create a custom `MyDense` layer from scratch: diff --git a/neutro/layers/core/dropout.py b/neutro/layers/core/dropout.py index 53a82ac..abbdf63 100644 --- a/neutro/layers/core/dropout.py +++ b/neutro/layers/core/dropout.py @@ -13,6 +13,9 @@ def forward(self, inputs, training=False): self.mask = np.random.binomial(1, 1 - self.rate, size=inputs.shape) / (1 - self.rate) return inputs * self.mask + def compute_output_shape(self, input_shape): + return input_shape + def backward(self, grad_output): if self.mask is None: return grad_output diff --git a/tests/engine/test_node.py b/tests/engine/test_node.py new file mode 100644 index 0000000..43c73b5 --- /dev/null +++ b/tests/engine/test_node.py @@ -0,0 +1,46 @@ +import numpy as np +from neutro.engine.node import KerasTensor, Node + + +class FakeLayer: + def __init__(self): + self.name = "fake_layer" + + +def test_keras_tensor_repr(): + t = KerasTensor(shape=(None, 32, 32, 3), name="input") + r = repr(t) + assert "KerasTensor" in r + assert "(None, 32, 32, 3)" in r + assert "input" in r + + +def test_node_single_output(): + layer = FakeLayer() + output = KerasTensor(shape=(None, 10), name="output") + node = Node(layer, input_tensors=[], output_tensors=output) + + assert node.layer is layer + assert node.output_tensors is output + assert output.node is node + assert layer._inbound_nodes == [node] + + +def test_node_list_output(): + layer = FakeLayer() + out1 = KerasTensor(shape=(None, 5)) + out2 = KerasTensor(shape=(None, 3)) + node = Node(layer, input_tensors=[], output_tensors=[out1, out2]) + + assert node.output_tensors == [out1, out2] + assert out1.node is node + assert out2.node is node + + +def test_node_repr(): + layer = FakeLayer() + output = KerasTensor(shape=(None, 10)) + node = Node(layer, input_tensors=[], output_tensors=output) + r = repr(node) + assert "Node" in r + assert "fake_layer" in r diff --git a/tests/layers/core/test_dropout.py b/tests/layers/core/test_dropout.py index 037f2d3..64418e8 100644 --- a/tests/layers/core/test_dropout.py +++ b/tests/layers/core/test_dropout.py @@ -1,17 +1,136 @@ import numpy as np +import pytest from neutro.layers.core.dropout import Dropout +from neutro.models.base_model import Sequential -def test_dropout(): + +def test_dropout_inference(): layer = Dropout(0.5) x = np.random.rand(10, 10) - - # Inference + out_inf = layer.forward(x, training=False) assert np.all(out_inf == x) - - # Training + + +def test_dropout_training(): + layer = Dropout(0.5) + x = np.random.rand(10, 10) + out_train = layer.forward(x, training=True) assert not np.all(out_train == x) - - grad = layer.backward(np.random.rand(10, 10)) - assert grad.shape == (10, 10) + + +def test_dropout_rate_zero(): + layer = Dropout(0.0) + x = np.random.rand(10, 10) + + out = layer.forward(x, training=True) + assert np.all(out == x) + + grad = np.random.rand(10, 10) + dx = layer.backward(grad) + assert np.all(dx == grad) + + +def test_dropout_1d_input(): + layer = Dropout(0.5) + x = np.random.rand(20) + + out = layer.forward(x, training=True) + assert out.shape == (20,) + assert not np.all(out == x) + + grad = np.random.rand(20) + dx = layer.backward(grad) + assert dx.shape == (20,) + + +def test_dropout_3d_input(): + layer = Dropout(0.3) + x = np.random.rand(4, 16, 64) + + out = layer.forward(x, training=True) + assert out.shape == (4, 16, 64) + + grad = np.random.rand(4, 16, 64) + dx = layer.backward(grad) + assert dx.shape == (4, 16, 64) + + +def test_dropout_statistics(): + layer = Dropout(0.5) + x = np.ones((1000, 100)) + + out = layer.forward(x, training=True) + zero_fraction = np.mean(out == 0) + assert 0.45 < zero_fraction < 0.55 + + +def test_dropout_backward_inference(): + layer = Dropout(0.5) + x = np.random.rand(10, 10) + grad = np.random.rand(10, 10) + + layer.forward(x, training=False) + dx = layer.backward(grad) + assert np.all(dx == grad) + + +def test_dropout_backward_values(): + layer = Dropout(0.5) + x = np.ones((10, 10)) + grad = np.ones((10, 10)) + + layer.forward(x, training=True) + + dx = layer.backward(grad) + expected_dx = grad * layer.mask + np.testing.assert_allclose(dx, expected_dx) + + +def test_dropout_backward_no_forward(): + layer = Dropout(0.5) + + grad = np.random.rand(10, 10) + dx = layer.backward(grad) + assert np.all(dx == grad) + + +def test_dropout_compute_output_shape(): + layer = Dropout(0.5) + + shape = layer.compute_output_shape((None, 32)) + assert shape == (None, 32) + + shape = layer.compute_output_shape((16, 32)) + assert shape == (16, 32) + + shape = layer.compute_output_shape((None, 16, 64)) + assert shape == (None, 16, 64) + + +def test_dropout_in_sequential_model(): + model = Sequential([ + Dropout(0.5), + Dropout(0.3), + Dropout(0.0), + ]) + x = np.random.rand(8, 32) + + out = model.forward(x, training=True) + assert out.shape == (8, 32) + + out_inf = model.forward(x, training=False) + assert np.all(out_inf == x) + + +def test_dropout_mask_recreated_each_forward(): + layer = Dropout(0.5) + x = np.ones((100, 100)) + + out1 = layer.forward(x, training=True) + mask1 = (out1 != 0).astype(float) + out2 = layer.forward(x, training=True) + mask2 = (out2 != 0).astype(float) + + assert not np.all(mask1 == mask2) diff --git a/tests/layers/core/test_input_layer.py b/tests/layers/core/test_input_layer.py new file mode 100644 index 0000000..d3f382f --- /dev/null +++ b/tests/layers/core/test_input_layer.py @@ -0,0 +1,52 @@ +import numpy as np +import pytest +from neutro.layers.core.input_layer import InputLayer, Input +from neutro.engine.node import KerasTensor + + +def test_input_layer_forward(): + layer = InputLayer(input_shape=(4,)) + out = layer.forward(np.array([1, 2, 3, 4])) + assert np.array_equal(out, np.array([1, 2, 3, 4])) + + +def test_input_layer_backward(): + layer = InputLayer(input_shape=(4,)) + grad = layer.backward(np.array([0.1, 0.2, 0.3, 0.4])) + assert np.array_equal(grad, np.array([0.1, 0.2, 0.3, 0.4])) + + +def test_input_layer_build_immediate(): + layer = InputLayer(input_shape=(28, 28, 1)) + assert layer.built + assert layer.input_shape == (28, 28, 1) + + +def test_input_layer_build_explicit(): + layer = InputLayer() + layer.build((None, 28, 28, 1)) + assert layer.built + assert layer.input_shape == (None, 28, 28, 1) + + +def test_input_no_shape_raises(): + with pytest.raises(ValueError, match="Please provide a shape"): + Input(shape=None) + + +def test_input_with_list_shape(): + tensor = Input(shape=[28, 28, 1]) + assert isinstance(tensor, KerasTensor) + assert tensor.shape == (None, 28, 28, 1) + + +def test_input_with_tuple_shape(): + tensor = Input(shape=(28, 28, 1)) + assert isinstance(tensor, KerasTensor) + assert tensor.shape == (None, 28, 28, 1) + + +def test_input_with_batch_shape(): + tensor = Input(shape=(None, 28, 28, 1)) + assert isinstance(tensor, KerasTensor) + assert tensor.shape == (None, 28, 28, 1) diff --git a/tests/layers/core/test_merging_coverage.py b/tests/layers/core/test_merging_coverage.py new file mode 100644 index 0000000..e28fd47 --- /dev/null +++ b/tests/layers/core/test_merging_coverage.py @@ -0,0 +1,123 @@ +import numpy as np +import pytest +from neutro.layers.core.merging import Add, Concatenate, Multiply, Average, Maximum, Minimum + + +class TestMultiply: + def test_forward(self): + layer = Multiply() + a = np.array([[1, 2], [3, 4]]) + b = np.array([[5, 6], [7, 8]]) + out = layer.forward([a, b]) + expected = a * b + np.testing.assert_array_equal(out, expected) + + def test_backward(self): + layer = Multiply() + a = np.array([[1.0, 2.0], [3.0, 4.0]]) + b = np.array([[5.0, 6.0], [7.0, 8.0]]) + layer.forward([a, b]) + grad = np.array([[1.0, 1.0], [1.0, 1.0]]) + grads = layer.backward(grad) + assert len(grads) == 2 + np.testing.assert_array_equal(grads[0], b) + np.testing.assert_array_equal(grads[1], a) + + def test_compute_output_shape(self): + layer = Multiply() + shape = layer.compute_output_shape([(None, 32), (None, 32)]) + assert shape == (None, 32) + + def test_compute_output_shape_single(self): + layer = Multiply() + shape = layer.compute_output_shape((16, 32)) + assert shape == (16, 32) + + +class TestAverage: + def test_forward_and_backward(self): + layer = Average() + a = np.array([[1.0, 3.0], [5.0, 7.0]]) + b = np.array([[2.0, 4.0], [6.0, 8.0]]) + out = layer.forward([a, b]) + expected = (a + b) / 2 + np.testing.assert_array_equal(out, expected) + + grad = np.array([[1.0, 1.0], [1.0, 1.0]]) + grads = layer.backward(grad) + assert len(grads) == 2 + np.testing.assert_array_equal(grads[0], grad / 2) + np.testing.assert_array_equal(grads[1], grad / 2) + + def test_compute_output_shape(self): + layer = Average() + assert layer.compute_output_shape([(None, 32), (None, 32)]) == (None, 32) + assert layer.compute_output_shape((16, 32)) == (16, 32) + + +class TestMaximum: + def test_forward_maximum(self): + layer = Maximum() + a = np.array([[1.0, 5.0], [3.0, 2.0]]) + b = np.array([[4.0, 2.0], [1.0, 6.0]]) + out = layer.forward([a, b]) + expected = np.maximum(a, b) + np.testing.assert_array_equal(out, expected) + + def test_backward_maximum(self): + layer = Maximum() + a = np.array([[1.0, 5.0], [3.0, 2.0]]) + b = np.array([[4.0, 2.0], [1.0, 6.0]]) + layer.forward([a, b]) + grad = np.array([[1.0, 1.0], [1.0, 1.0]]) + grads = layer.backward(grad) + assert len(grads) == 2 + expected_grad_a = np.array([[0.0, 1.0], [1.0, 0.0]]) + expected_grad_b = np.array([[1.0, 0.0], [0.0, 1.0]]) + np.testing.assert_array_equal(grads[0], expected_grad_a) + np.testing.assert_array_equal(grads[1], expected_grad_b) + + def test_compute_output_shape(self): + layer = Maximum() + assert layer.compute_output_shape([(None, 32), (None, 32)]) == (None, 32) + assert layer.compute_output_shape((16, 32)) == (16, 32) + + +class TestMinimum: + def test_forward_minimum(self): + layer = Minimum() + a = np.array([[1.0, 5.0], [3.0, 2.0]]) + b = np.array([[4.0, 2.0], [1.0, 6.0]]) + out = layer.forward([a, b]) + expected = np.minimum(a, b) + np.testing.assert_array_equal(out, expected) + + def test_backward_minimum(self): + layer = Minimum() + a = np.array([[1.0, 5.0], [3.0, 2.0]]) + b = np.array([[4.0, 2.0], [1.0, 6.0]]) + layer.forward([a, b]) + grad = np.array([[1.0, 1.0], [1.0, 1.0]]) + grads = layer.backward(grad) + assert len(grads) == 2 + expected_grad_a = np.array([[1.0, 0.0], [0.0, 1.0]]) + expected_grad_b = np.array([[0.0, 1.0], [1.0, 0.0]]) + np.testing.assert_array_equal(grads[0], expected_grad_a) + np.testing.assert_array_equal(grads[1], expected_grad_b) + + def test_compute_output_shape(self): + layer = Minimum() + assert layer.compute_output_shape([(None, 32), (None, 32)]) == (None, 32) + assert layer.compute_output_shape((16, 32)) == (16, 32) + + +class TestAddComputeOutputShape: + def test_compute_output_shape_non_list(self): + layer = Add() + assert layer.compute_output_shape((16, 32)) == (16, 32) + + +class TestConcatenateComputeOutputShape: + def test_compute_output_shape_non_list(self): + layer = Concatenate() + assert layer.compute_output_shape((16, 32)) == (16, 32) diff --git a/tests/layers/core/test_reparameterization.py b/tests/layers/core/test_reparameterization.py index 2245d4f..5e18d37 100644 --- a/tests/layers/core/test_reparameterization.py +++ b/tests/layers/core/test_reparameterization.py @@ -23,3 +23,39 @@ def test_reparameterization(): assert len(grads) == 2 assert grads[0].shape == (10, 5) # grad_mean assert grads[1].shape == (10, 5) # grad_log_var + +def test_reparameterization_compute_output_shape(): + layer = Reparameterization() + shape = layer.compute_output_shape([(10, 5), (10, 5)]) + assert shape == (10, 5) + +def test_reparameterization_compute_output_shape_single(): + layer = Reparameterization() + shape = layer.compute_output_shape((10, 5)) + assert shape == (10, 5) + +def test_reparameterization_backward_shapes(): + layer = Reparameterization() + mean = np.random.randn(4, 8) + log_var = np.random.randn(4, 8) + layer.forward([mean, log_var], training=True) + + grad_output = np.random.randn(4, 8) + grads = layer.backward(grad_output) + + assert len(grads) == 2 + assert grads[0].shape == (4, 8) + assert grads[1].shape == (4, 8) + +def test_reparameterization_backward_values(): + layer = Reparameterization() + mean = np.zeros((3, 2)) + log_var = np.ones((3, 2)) # var = exp(log_var) = e + + layer.forward([mean, log_var], training=True) + grad_output = np.ones((3, 2)) + + grads = layer.backward(grad_output) + assert np.allclose(grads[0], np.ones((3, 2))) + expected_log_var = np.ones((3, 2)) * np.exp(0.5) * 0.5 * layer.epsilon + assert np.allclose(grads[1], expected_log_var) diff --git a/tests/layers/embedding/test_time_embedding.py b/tests/layers/embedding/test_time_embedding.py index da3bd5e..c40ed4d 100644 --- a/tests/layers/embedding/test_time_embedding.py +++ b/tests/layers/embedding/test_time_embedding.py @@ -22,3 +22,25 @@ def test_time_embedding_backward_shape(): assert grad.shape == t.shape assert np.all(grad == 0) + +def test_time_embedding_build(): + layer = TimeEmbedding(dim=128) + layer.build((4,)) + assert layer.built + +def test_time_embedding_compute_output_shape(): + layer = TimeEmbedding(dim=256) + shape = layer.compute_output_shape((4,)) + assert shape == (4, 256) + +def test_time_embedding_2d_input(): + layer = TimeEmbedding(dim=64) + t = np.array([[0], [10], [50], [100]]) + out = layer.forward(t) + assert out.shape == (4, 64) + +def test_time_embedding_odd_dim(): + layer = TimeEmbedding(dim=129) + t = np.array([0, 10, 50]) + out = layer.forward(t) + assert out.shape == (3, 129) diff --git a/tests/layers/normalization/test_batchnorm.py b/tests/layers/normalization/test_batchnorm.py index 04264ea..7a0ff0f 100644 --- a/tests/layers/normalization/test_batchnorm.py +++ b/tests/layers/normalization/test_batchnorm.py @@ -29,3 +29,17 @@ def test_batch_norm_backward(): assert grad_input.shape == (batch, c) assert layer.grads['gamma'].shape == (c,) assert layer.grads['beta'].shape == (c,) + +def test_batch_norm_inference(): + batch, c = 4, 3 + layer = BatchNormalization(momentum=0.5) + layer.build((batch, c)) + + inputs = np.random.randn(batch, c) * 10 + 5 + out_train = layer.forward(inputs, training=True) + assert out_train.shape == (batch, c) + + out_infer = layer.forward(inputs, training=False) + assert out_infer.shape == (batch, c) + assert np.allclose(layer.running_mean, 0.5 * 0 + 0.5 * np.mean(inputs, axis=0), atol=1e-6) + assert np.allclose(layer.running_var, 0.5 * 1 + 0.5 * np.var(inputs, axis=0), atol=1e-6) diff --git a/tests/layers/pooling/test_global_pooling.py b/tests/layers/pooling/test_global_pooling.py index 5c66984..48ec46b 100644 --- a/tests/layers/pooling/test_global_pooling.py +++ b/tests/layers/pooling/test_global_pooling.py @@ -53,3 +53,21 @@ def test_global_pooling_channels_first(): assert max_out.shape == (2, 3) assert max_out[0, 0] == np.max(inputs[0, 0, :, :]) assert max_layer.backward(np.random.randn(2, 3)).shape == inputs.shape + +def test_global_avg_pooling_invalid_data_format(): + with pytest.raises(ValueError, match="data_format must be"): + GlobalAveragePooling2D(data_format='invalid') + +def test_global_max_pooling_invalid_data_format(): + with pytest.raises(ValueError, match="data_format must be"): + GlobalMaxPooling2D(data_format='invalid') + +def test_global_avg_pooling_compute_output_shape(): + layer = GlobalAveragePooling2D(data_format='channels_last') + shape = layer.compute_output_shape((2, 8, 8, 3)) + assert shape == (2, 3) + +def test_global_max_pooling_compute_output_shape(): + layer = GlobalMaxPooling2D(data_format='channels_last') + shape = layer.compute_output_shape((2, 8, 8, 3)) + assert shape == (2, 3) diff --git a/tests/models/test_model_coverage.py b/tests/models/test_model_coverage.py new file mode 100644 index 0000000..f63e4bc --- /dev/null +++ b/tests/models/test_model_coverage.py @@ -0,0 +1,166 @@ +import numpy as np +import inspect +from neutro.models import Model, Sequential +from neutro.layers import Dense, Input, ReLU, Dropout +from neutro.layers.base import Layer +from neutro.layers.transformer.transformer_block import TransformerBlock +from neutro.layers.attention.kv_cache import KVCache +from neutro.optimizers import SGD + + +class SubclassedModel(Model): + def __init__(self, name=None): + super().__init__(name=name) + self.dense1 = Dense(8) + self.relu = ReLU() + self.dense2 = Dense(4) + + def forward(self, inputs, training=False): + x = self.dense1(inputs) + x = self.relu(x) + return self.dense2(x) + + def build(self, input_shape): + self.dense1.build(input_shape) + shape = self.dense1.compute_output_shape(input_shape) + self.relu.build(shape) + shape = self.relu.compute_output_shape(shape) + self.dense2.build(shape) + self.built = True + + +# 1. _init_graph with single output (not a list) — lines 48, 55 +def test_init_graph_single_output(): + inputs = Input(shape=(10,)) + x = Dense(5, activation='relu')(inputs) + outputs = Dense(3)(x) + model = Model(inputs=inputs, outputs=outputs) + assert len(model._nodes_ordered) > 0 + assert len(model.layers) > 0 + + +# 2. _get_all_layers without arguments — lines 72-73 +def test_get_all_layers_no_args(): + model = Sequential([Dense(10), Dense(5)]) + all_layers = model._get_all_layers() + assert len(all_layers) == 2 + + +# 3. evaluate with metrics — lines 531-536 +def test_evaluate_with_metrics(): + model = Sequential([Dense(8, input_shape=(4,)), Dense(2)]) + model.compile(optimizer=SGD(0.01), loss='mse', metrics=['accuracy']) + x = np.random.rand(10, 4) + y = np.random.rand(10, 2) + model.fit(x, y, epochs=1, batch_size=4, verbose=0) + results = model.evaluate(x, y) + assert 'loss' in results + assert 'accuracy' in results + + +# 4. fit with validation_data — validation loss/metrics path +def test_fit_with_validation_data(): + model = Sequential([Dense(8, input_shape=(4,)), Dense(2)]) + model.compile(optimizer=SGD(0.01), loss='mse', metrics=['accuracy']) + x = np.random.rand(10, 4) + y = np.random.rand(10, 2) + val_data = (np.random.rand(5, 4), np.random.rand(5, 2)) + history = model.fit(x, y, epochs=2, batch_size=5, verbose=0, validation_data=val_data) + assert 'val_loss' in history.history + assert 'val_accuracy' in history.history + + +# 5. backward functional path with single input/output — lines 426, 440, 465-470 +def test_backward_functional_single_io(): + inputs = Input(shape=(4,)) + x = Dense(8, activation='relu')(inputs) + outputs = Dense(2)(x) + model = Model(inputs=inputs, outputs=outputs) + model.compile(optimizer=SGD(0.01), loss='mse') + x_data = np.random.rand(10, 4) + y_data = np.random.rand(10, 2) + model.fit(x_data, y_data, epochs=1, batch_size=4, verbose=0) + for layer in model.layers: + if layer.grads: + for k, v in layer.grads.items(): + assert not np.allclose(v, 0) + + +# 6. build for subclassed model — lines 507-510 +def test_build_subclassed_model(): + model = SubclassedModel() + assert not model.built + model.build((None, 6)) + assert model.built + x = np.random.rand(5, 6) + y = model.forward(x) + assert y.shape == (5, 4) + + +# 7. summary on functional model — "Connected to" column +def test_summary_functional_model(capsys): + inputs = Input(shape=(10,)) + x = Dense(5, activation='relu')(inputs) + outputs = Dense(3)(x) + model = Model(inputs=inputs, outputs=outputs) + model.summary() + captured = capsys.readouterr() + assert 'Connected to' in captured.out + assert 'Total params' in captured.out + + +# 8. clear_layer_grads — static method clears recursively +def test_clear_layer_grads(): + model = Sequential([Dense(5, input_shape=(3,)), Dense(2)]) + model.build((None, 3)) + for layer in model.layers: + for k in layer.params: + layer.grads[k] = np.random.randn(*layer.params[k].shape) + Model._clear_layer_grads(model) + for layer in model.layers: + assert layer.grads == {} + + +# 9a. _accumulate_layer_grads — new key path (line 157) +def test_accumulate_layer_grads_new_key(): + layer = Dense(5, input_shape=(3,)) + layer.build((None, 3)) + layer.grads['W'] = np.ones(layer.params['W'].shape) + layer.grads['b'] = np.ones(layer.params['b'].shape) + accumulator = {} + Model._accumulate_layer_grads(layer, accumulator) + l_id = id(layer) + assert l_id in accumulator + assert np.all(accumulator[l_id]['W'] == 1.0) + assert np.all(accumulator[l_id]['b'] == 1.0) + + +# 9b. _accumulate_layer_grads — existing key path (line 154-155) +def test_accumulate_layer_grads_existing_key(): + layer = Dense(5, input_shape=(3,)) + layer.build((None, 3)) + + layer.grads['W'] = np.ones(layer.params['W'].shape) * 2 + layer.grads['b'] = np.ones(layer.params['b'].shape) * 2 + + accumulator = {} + l_id = id(layer) + accumulator[l_id] = { + 'W': np.ones(layer.params['W'].shape), + 'b': np.ones(layer.params['b'].shape) + } + + Model._accumulate_layer_grads(layer, accumulator) + + assert np.all(accumulator[l_id]['W'] == 3.0) + assert np.all(accumulator[l_id]['b'] == 3.0) + + +# 10. forward sequential with kv_cache — lines 370-377 +def test_forward_sequential_with_kv_cache(): + block = TransformerBlock(embed_dim=8, num_heads=2, ff_dim=16, use_flash=True, causal=True, pre_norm=True) + model = Sequential([block]) + x = np.random.rand(2, 4, 8) + cache = KVCache() + output = model.forward(x, training=False, kv_cache=cache) + assert output.shape == (2, 4, 8) diff --git a/tests/models/test_model_coverage2.py b/tests/models/test_model_coverage2.py new file mode 100644 index 0000000..8ebc76f --- /dev/null +++ b/tests/models/test_model_coverage2.py @@ -0,0 +1,156 @@ +import numpy as np +from neutro.models import Model, Sequential +from neutro.layers import Dense, Input +from neutro.layers.base import Layer + + +class ContainerLayer(Layer): + def __init__(self, units=5): + super().__init__() + self.dense = Dense(units) + + def forward(self, x, training=False): + return self.dense(x, training=training) + + def backward(self, grad_output): + return self.dense.backward(grad_output) + + +class DoubleRefLayer(Layer): + def __init__(self, units=5): + super().__init__() + self.inner = Dense(units) + self.inner_copy = self.inner + + def forward(self, x, training=False): + return self.inner(x, training=training) + + def backward(self, grad_output): + return self.inner.backward(grad_output) + + +class SubclassNoBuild(Model): + def __init__(self): + super().__init__() + self.dense = Dense(10) + + def forward(self, x, training=False): + return self.dense(x, training=training) + + +class BrokenLayer(Layer): + def compute_output_shape(self, input_shape): + raise ValueError("broken") + + +def test_clear_layer_grads_with_sublayers(): + model = Sequential([ContainerLayer(4), Dense(3)]) + model.build((None, 4)) + x = np.random.rand(2, 4) + out = model.forward(x, training=True) + grad = np.random.rand(2, 3) + model.backward(grad) + container = model.layers[0] + assert len(container.dense.grads) > 0 + Model._clear_layer_grads(model) + assert len(container.dense.grads) == 0 + + +def test_accumulate_layer_grads_visited_check(): + layer = DoubleRefLayer(5) + layer.build((None, 4)) + layer.inner.build((None, 4)) + layer.grads['W'] = np.ones((4, 5)) + layer.inner.grads['W'] = np.ones((4, 5)) * 2 + accumulator = {} + Model._accumulate_layer_grads(layer, accumulator) + double_ref_id = id(layer) + inner_id = id(layer.inner) + assert double_ref_id in accumulator + assert inner_id in accumulator + assert np.all(accumulator[double_ref_id]['W'] == 1.0) + assert np.all(accumulator[inner_id]['W'] == 2.0) + + +def test_restore_layer_state_with_sublayers(): + container = ContainerLayer(5) + container.build((None, 4)) + container.dense.build((None, 4)) + container.dense.custom_attr = "original" + state = Model._capture_layer_state(container) + container.dense.custom_attr = "modified" + assert container.dense.custom_attr == "modified" + Model._restore_layer_state(container, state) + assert container.dense.custom_attr == "original" + + +def test_functional_compute_output_shape(): + inputs = Input(shape=(10,)) + x = Dense(5)(inputs) + outputs = Dense(3)(x) + model = Model(inputs=inputs, outputs=outputs) + shape = model.compute_output_shape((None, 10)) + assert shape == (None, 3) + + +def test_functional_build(): + inputs = Input(shape=(10,)) + x = Dense(5)(inputs) + outputs = Dense(3)(x) + model = Model(inputs=inputs, outputs=outputs) + model.build((None, 10)) + assert model.built is True + + +def test_backward_functional_single_output(): + inputs = Input(shape=(10,)) + x = Dense(5, activation='relu')(inputs) + outputs = Dense(3)(x) + model = Model(inputs=inputs, outputs=outputs) + x_data = np.random.rand(4, 10) + y = model.forward(x_data, training=True) + grad = np.random.rand(4, 3) + grad_inputs = model.backward(grad) + assert grad_inputs.shape == (4, 10) + + +def test_subclassed_model_build_no_override(): + model = SubclassNoBuild() + model.build((None, 5)) + assert model.built is True + assert model.input_shape == (None, 5) + + +def test_sequential_forward_without_kv_cache(): + model = Sequential([Dense(5, input_shape=(10,)), Dense(3)]) + x = np.random.rand(4, 10) + out = model.forward(x, training=False, kv_cache=None) + assert out.shape == (4, 3) + + +def test_sequential_add_with_input_shape(): + model = Sequential() + dense = Dense(5, input_shape=(10,)) + model.add(dense) + assert len(model.layers) == 1 + assert model.layers[0].built + + +def test_summary_unbuilt_layer(capsys): + model = Sequential() + layer = Dense(5) + model.layers.append(layer) + model.summary() + captured = capsys.readouterr() + assert "unbuilt" in captured.out + + +def test_summary_exception_built(capsys): + model = Sequential() + layer = BrokenLayer() + layer.built = True + layer.input_shape = (None, 10) + model.layers.append(layer) + model.summary() + captured = capsys.readouterr() + assert "multiple" in captured.out diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py index 948771e..cc7287b 100644 --- a/tests/test_preprocessing.py +++ b/tests/test_preprocessing.py @@ -58,3 +58,47 @@ def test_image_data_generator_channels_first(): assert batch_x.shape == (2, 3, 8, 8) assert batch_y.shape == (2,) np.testing.assert_allclose(batch_x, x[:2] / 255.0, atol=1e-5) + +def test_image_data_generator_invalid_data_format(): + with pytest.raises(ValueError, match="data_format must be"): + ImageDataGenerator(data_format='invalid') + +def test_image_data_generator_vertical_flip(): + img = np.zeros((32, 32, 3)) + img[0, :, 0] = 1.0 + + datagen = ImageDataGenerator(vertical_flip=True) + np.random.seed(42) + + flipped = False + for _ in range(10): + transformed = datagen.apply_transform(img) + if np.all(transformed[-1, :, 0] == 1.0): + flipped = True + break + assert flipped + +def test_image_data_generator_width_height_shift(): + img = np.zeros((32, 32, 3)) + img[16, 16, :] = 1.0 + + datagen = ImageDataGenerator(width_shift_range=0.5, height_shift_range=0.5) + np.random.seed(0) + transformed = datagen.apply_transform(img) + + assert transformed.shape == (32, 32, 3) + +def test_image_data_generator_channels_first_vertical_flip(): + img = np.zeros((3, 32, 32)) + img[:, 0, :] = 1.0 + + datagen = ImageDataGenerator(vertical_flip=True, data_format='channels_first') + np.random.seed(42) + + flipped = False + for _ in range(10): + transformed = datagen.apply_transform(img) + if np.all(transformed[:, -1, :] == 1.0): + flipped = True + break + assert flipped diff --git a/tests/test_preprocessing_sequence.py b/tests/test_preprocessing_sequence.py new file mode 100644 index 0000000..9561690 --- /dev/null +++ b/tests/test_preprocessing_sequence.py @@ -0,0 +1,106 @@ +import numpy as np +import pytest +from neutro.preprocessing.sequence import pad_sequences + + +class TestPadSequencesBasic: + def test_padding_pre_default(self): + sequences = [[1, 2], [3, 4, 5]] + result = pad_sequences(sequences, maxlen=3) + expected = np.array([[0, 1, 2], [3, 4, 5]], dtype="int32") + np.testing.assert_array_equal(result, expected) + + def test_padding_post(self): + sequences = [[1, 2], [3, 4, 5]] + result = pad_sequences(sequences, maxlen=3, padding="post") + expected = np.array([[1, 2, 0], [3, 4, 5]], dtype="int32") + np.testing.assert_array_equal(result, expected) + + +class TestPadSequencesTruncating: + def test_truncating_pre(self): + sequences = [[1, 2, 3, 4, 5]] + result = pad_sequences(sequences, maxlen=3, truncating="pre") + expected = np.array([[3, 4, 5]], dtype="int32") + np.testing.assert_array_equal(result, expected) + + def test_truncating_post(self): + sequences = [[1, 2, 3, 4, 5]] + result = pad_sequences(sequences, maxlen=3, truncating="post") + expected = np.array([[1, 2, 3]], dtype="int32") + np.testing.assert_array_equal(result, expected) + + +class TestPadSequencesMaxlen: + def test_custom_maxlen_shorter_than_longest(self): + sequences = [[1, 2, 3, 4, 5], [1, 2]] + result = pad_sequences(sequences, maxlen=3) + expected = np.array([[3, 4, 5], [0, 1, 2]], dtype="int32") + np.testing.assert_array_equal(result, expected) + + def test_maxlen_none_auto_detect(self): + sequences = [[1, 2], [3, 4, 5, 6], [7]] + result = pad_sequences(sequences, maxlen=None) + expected = np.array( + [[0, 0, 1, 2], [3, 4, 5, 6], [0, 0, 0, 7]], dtype="int32" + ) + np.testing.assert_array_equal(result, expected) + + +class TestPadSequencesDtypeAndValue: + def test_custom_dtype(self): + sequences = [[1, 2], [3, 4, 5]] + result = pad_sequences(sequences, maxlen=3, dtype="float32") + assert result.dtype == np.float32 + expected = np.array([[0, 1, 2], [3, 4, 5]], dtype="float32") + np.testing.assert_array_equal(result, expected) + + def test_custom_padding_value(self): + sequences = [[1, 2], [3, 4, 5]] + result = pad_sequences(sequences, maxlen=3, value=99) + expected = np.array([[99, 1, 2], [3, 4, 5]], dtype="int32") + np.testing.assert_array_equal(result, expected) + + +class TestPadSequencesEdgeCases: + def test_empty_sequence_in_list(self): + sequences = [[1, 2, 3], [], [4, 5]] + result = pad_sequences(sequences, maxlen=3) + expected = np.array([[1, 2, 3], [0, 0, 0], [0, 4, 5]], dtype="int32") + np.testing.assert_array_equal(result, expected) + + +class TestPadSequencesErrors: + def test_invalid_truncating_type(self): + sequences = [[1, 2, 3]] + with pytest.raises(ValueError, match='Truncating type "middle" not understood'): + pad_sequences(sequences, maxlen=2, truncating="middle") + + def test_invalid_padding_type(self): + sequences = [[1, 2, 3]] + with pytest.raises(ValueError, match='Padding type "middle" not understood'): + pad_sequences(sequences, maxlen=2, padding="middle") + + +class TestPadSequencesMixed: + def test_padding_pre_with_truncating_post(self): + sequences = [[1, 2, 3, 4, 5], [1, 2]] + result = pad_sequences(sequences, maxlen=3, truncating="post") + expected = np.array([[1, 2, 3], [0, 1, 2]], dtype="int32") + np.testing.assert_array_equal(result, expected) + + def test_padding_post_with_truncating_pre(self): + sequences = [[1, 2, 3, 4, 5], [1, 2]] + result = pad_sequences( + sequences, maxlen=3, padding="post", truncating="pre" + ) + expected = np.array([[3, 4, 5], [1, 2, 0]], dtype="int32") + np.testing.assert_array_equal(result, expected) + + def test_padding_post_with_truncating_post(self): + sequences = [[1, 2, 3, 4, 5], [1, 2]] + result = pad_sequences( + sequences, maxlen=3, padding="post", truncating="post" + ) + expected = np.array([[1, 2, 3], [1, 2, 0]], dtype="int32") + np.testing.assert_array_equal(result, expected) diff --git a/tests/test_preprocessing_text.py b/tests/test_preprocessing_text.py new file mode 100644 index 0000000..ca29d50 --- /dev/null +++ b/tests/test_preprocessing_text.py @@ -0,0 +1,114 @@ +import pytest +from neutro.preprocessing.text import Tokenizer + + +class TestTokenizerFitOnTexts: + def test_fit_on_texts_basic_lowercase(self): + t = Tokenizer() + t.fit_on_texts(["Hello World", "Hello Keras"]) + assert t.word_index == {"hello": 1, "world": 2, "keras": 3} + assert t.index_word[1] == "hello" + assert t.word_counts == {"hello": 2, "world": 1, "keras": 1} + + def test_fit_on_texts_with_oov_token(self): + t = Tokenizer(oov_token="") + t.fit_on_texts(["cat dog", "cat fish"]) + assert t.word_index[""] == 1 + assert t.index_word[1] == "" + assert t.word_index["cat"] == 2 + assert t.word_index["dog"] == 3 + + def test_fit_on_texts_num_words_limit(self): + t = Tokenizer(num_words=2, oov_token="") + t.fit_on_texts(["apple banana cherry", "apple banana date"]) + # oov_token gets index 1, "apple" gets 2, "banana" gets 3 + # num_words=2 means only index < 2 is kept? No — sorted_words is trimmed to 2 words, + # but oov_token is added outside that. So we get: oov=1, apple=2, banana=3, + # but sorted_words only had 2 entries. With num_words=2, sorted_words[:2] = [apple, banana] + # So word_index has: :1, apple:2, banana:3 + assert "" in t.word_index + assert "apple" in t.word_index + assert "banana" in t.word_index + assert "cherry" not in t.word_index + assert "date" not in t.word_index + + def test_fit_on_texts_empty_list(self): + t = Tokenizer() + t.fit_on_texts([]) + assert t.word_index == {} + assert t.index_word == {} + + +class TestTokenizerTextsToSequences: + def test_texts_to_sequences_basic(self): + t = Tokenizer() + t.fit_on_texts(["the cat sat", "the dog ran"]) + seqs = t.texts_to_sequences(["the cat sat"]) + assert seqs == [[1, 2, 3]] + + def test_texts_to_sequences_with_oov(self): + t = Tokenizer(oov_token="") + t.fit_on_texts(["cat dog fish"]) + seqs = t.texts_to_sequences(["cat bird dog"]) + # cat=2, dog=3, bird is unknown -> oov=1 + assert seqs == [[2, 1, 3]] + + def test_texts_to_sequences_num_words_filters_to_oov(self): + t = Tokenizer(num_words=2, oov_token="") + t.fit_on_texts(["apple banana cherry", "apple banana date"]) + seqs = t.texts_to_sequences(["apple banana cherry"]) + # =1, apple=2, banana=3 + # num_words=2, so indices >= 2 are filtered to OOV + # apple (2) >= 2 -> OOV, banana (3) >= 2 -> OOV, cherry unknown -> OOV + assert seqs == [[1, 1, 1]] + + def test_texts_to_sequences_empty_words(self): + t = Tokenizer() + t.fit_on_texts(["hello world"]) + seqs = t.texts_to_sequences([" hello world "]) + assert seqs == [[1, 2]] + + +class TestTokenizerSequencesToTexts: + def test_sequences_to_texts_basic(self): + t = Tokenizer() + t.fit_on_texts(["hello world"]) + texts = t.sequences_to_texts([[1, 2]]) + assert texts == ["hello world"] + + def test_sequences_to_texts_unknown_index(self): + t = Tokenizer() + t.fit_on_texts(["hello world"]) + texts = t.sequences_to_texts([[1, 999]]) + assert texts == ["hello ?"] + + +class TestTokenizerGetConfig: + def test_get_config_returns_all_keys(self): + t = Tokenizer(num_words=10, oov_token="", lower=False) + t.fit_on_texts(["hello world"]) + config = t.get_config() + assert config["num_words"] == 10 + assert config["oov_token"] == "" + assert config["lower"] is False + assert config["split"] == " " + assert "filters" in config + assert "word_index" in config + assert "index_word" in config + + +class TestTokenizerNoLowercase: + def test_fit_on_texts_without_lowercase(self): + t = Tokenizer(lower=False) + t.fit_on_texts(["Hello World"]) + assert "Hello" in t.word_index + assert "hello" not in t.word_index + assert "World" in t.word_index + + def test_texts_to_sequences_without_lowercase(self): + t = Tokenizer(lower=False) + t.fit_on_texts(["Hello World"]) + seqs = t.texts_to_sequences(["Hello World"]) + assert seqs == [[1, 2]] + seqs_mismatch = t.texts_to_sequences(["hello world"]) + assert seqs_mismatch == [[]] diff --git a/tests/tokenizers/test_tiktoken_compat.py b/tests/tokenizers/test_tiktoken_compat.py index 6f94136..b4c2208 100644 --- a/tests/tokenizers/test_tiktoken_compat.py +++ b/tests/tokenizers/test_tiktoken_compat.py @@ -51,3 +51,46 @@ def test_tiktoken_compatible_tokenizer(): encoded_special = tokenizer.encode(text_with_special, allowed_special="all") assert 1000 in encoded_special assert tokenizer.decode(encoded_special) == text_with_special + + +from unittest.mock import patch, MagicMock + + +@patch('urllib.request.urlopen') +@patch('os.path.exists') +@patch('os.makedirs') +@patch('tempfile.gettempdir') +@patch('builtins.open', new_callable=MagicMock) +def test_load_tiktoken_bpe_url(mock_file_open, mock_gettempdir, mock_makedirs, mock_exists, mock_urlopen): + import base64 + + mock_gettempdir.return_value = "/tmp" + mock_exists.return_value = False + + content = base64.b64encode(b"hello") + b" 258\n" + base64.b64encode(b"world") + b" 259\n" + + mock_response = MagicMock() + mock_response.read.return_value = content + mock_response.__enter__.return_value = mock_response + mock_urlopen.return_value = mock_response + + mock_file = MagicMock() + mock_file.__enter__.return_value.read.return_value = content + mock_file_open.return_value = mock_file + + from neutro.tokenizers.tiktoken_compat import load_tiktoken_bpe + ranks = load_tiktoken_bpe("https://example.com/test.tiktoken") + assert len(ranks) == 2 + assert ranks[b"hello"] == 258 + assert ranks[b"world"] == 259 + + +@patch('neutro.tokenizers.tiktoken_compat.load_tiktoken_bpe') +def test_get_gpt2_tokenizer(mock_load_bpe): + mock_load_bpe.return_value = {} + + from neutro.tokenizers.tiktoken_compat import get_gpt2_tokenizer + tokenizer = get_gpt2_tokenizer() + assert tokenizer is not None + assert tokenizer.special_tokens["<|endoftext|>"] == 50256 + mock_load_bpe.assert_called_once() diff --git a/tests/utils/test_data_utils.py b/tests/utils/test_data_utils.py new file mode 100644 index 0000000..1e7ed54 --- /dev/null +++ b/tests/utils/test_data_utils.py @@ -0,0 +1,42 @@ +import numpy as np +import pytest +import json +from unittest.mock import patch, MagicMock +from neutro.utils.data_utils import load_imdb, get_imdb_word_index + + +@patch('neutro.utils.data_utils.download_file') +@patch('numpy.load') +@patch('os.path.expanduser') +def test_load_imdb(mock_expanduser, mock_load, mock_download): + mock_expanduser.return_value = "/tmp" + mock_data = MagicMock() + mock_data.__enter__.return_value = { + 'x_train': np.zeros((100,)), + 'y_train': np.zeros(100), + 'x_test': np.zeros((20,)), + 'y_test': np.zeros(20) + } + mock_load.return_value = mock_data + + (x_train, y_train), (x_test, y_test) = load_imdb() + assert x_train.shape == (100,) + assert y_train.shape == (100,) + assert x_test.shape == (20,) + mock_download.assert_called_once() + + +@patch('neutro.utils.data_utils.download_file') +@patch('builtins.open', new_callable=MagicMock) +@patch('json.load') +@patch('os.path.expanduser') +def test_get_imdb_word_index(mock_expanduser, mock_json_load, mock_open, mock_download): + mock_expanduser.return_value = "/tmp" + mock_json_load.return_value = {"the": 1, "and": 2, "a": 3} + mock_file = MagicMock() + mock_file.__enter__.return_value = mock_file + mock_open.return_value = mock_file + + word_index = get_imdb_word_index() + assert word_index == {"the": 1, "and": 2, "a": 3} + mock_download.assert_called_once() From d71d3d6bfefe17109cda4bd487df6c7584a1747c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 31 May 2026 05:20:34 +0000 Subject: [PATCH 2/2] Fix dropout inference mask regression coverage and docs --- docs/layers/core/core_utility_layers.md | 9 +++++++++ neutro/layers/core/dropout.py | 1 + tests/layers/core/test_dropout.py | 1 + 3 files changed, 11 insertions(+) diff --git a/docs/layers/core/core_utility_layers.md b/docs/layers/core/core_utility_layers.md index 44065f3..16c43f3 100644 --- a/docs/layers/core/core_utility_layers.md +++ b/docs/layers/core/core_utility_layers.md @@ -60,6 +60,15 @@ def backward(self, grad_output): 🔍 **Line `if self.mask is None`**: If we never called forward (or called it with `training=False`), there's no mask. In that case, the gradient passes through unchanged — just like the forward pass. +#### `compute_output_shape` + +```python +def compute_output_shape(self, input_shape): + return input_shape +``` + +Dropout does not change tensor rank or dimensions; it only masks values during training. So the output shape is always identical to the input shape. + --- ## Flatten — `neutro/layers/core/flatten.py` diff --git a/neutro/layers/core/dropout.py b/neutro/layers/core/dropout.py index abbdf63..975ae29 100644 --- a/neutro/layers/core/dropout.py +++ b/neutro/layers/core/dropout.py @@ -9,6 +9,7 @@ def __init__(self, rate, **kwargs): def forward(self, inputs, training=False): if not training or self.rate == 0: + self.mask = None return inputs self.mask = np.random.binomial(1, 1 - self.rate, size=inputs.shape) / (1 - self.rate) return inputs * self.mask diff --git a/tests/layers/core/test_dropout.py b/tests/layers/core/test_dropout.py index 64418e8..c1ef38d 100644 --- a/tests/layers/core/test_dropout.py +++ b/tests/layers/core/test_dropout.py @@ -71,6 +71,7 @@ def test_dropout_backward_inference(): x = np.random.rand(10, 10) grad = np.random.rand(10, 10) + layer.forward(x, training=True) layer.forward(x, training=False) dx = layer.backward(grad) assert np.all(dx == grad)