⚡️ Speed up function _print_matrix by 9%
#13
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
📄 9% (0.09x) speedup for
_print_matrixinspacy/cli/debug_model.py⏱️ Runtime :
95.4 microseconds→87.4 microseconds(best of119runs)📝 Explanation and details
The optimized code achieves a 9% speedup through three key micro-optimizations that reduce Python interpreter overhead:
What was optimized:
value.shapeandvalue.ndimare stored in local variables instead of being accessed repeatedlyWhy these optimizations work:
value.shapeandvalue.ndimmultiple times; caching reduces these expensive lookupsrange()function creates an iterator object with additional method calls. A simple while loop with integer comparison is faster for small iteration counts+concatenationPerformance characteristics:
The optimizations are most effective for:
Real-world impact:
This function appears to be used for debugging/diagnostic purposes in spaCy's model inspection tools. While the absolute time savings are small (microseconds), the optimizations are particularly valuable when called repeatedly during model analysis or in debugging workflows where this function might be invoked many times.
✅ Correctness verification report:
🌀 Generated Regression Tests and Runtime
import types
imports
import pytest
from spacy.cli.debug_model import _print_matrix
--- Helper Classes for Testing ---
class FakeMatrix:
"""A minimal mock object to simulate a matrix-like object with shape, ndim, and getitem."""
def init(self, data):
# data: nested list representing the matrix
self._data = data
self.ndim = self._calc_ndim(data)
self.shape = self._calc_shape(data)
--- Unit Tests ---
-------------------------------
1. Basic Test Cases
-------------------------------
def test_none_input():
"""Should return None if input is None."""
codeflash_output = _print_matrix(None) # 488ns -> 449ns (8.69% faster)
def test_bool_input_true():
"""Should return True if input is True."""
codeflash_output = _print_matrix(True) # 454ns -> 494ns (8.10% slower)
def test_bool_input_false():
"""Should return False if input is False."""
codeflash_output = _print_matrix(False) # 437ns -> 455ns (3.96% slower)
def test_1d_matrix_basic():
"""Should handle 1D matrix and show shape and first 5 elements."""
mat = FakeMatrix([1, 2, 3, 4, 5, 6, 7])
codeflash_output = _print_matrix(mat); result = codeflash_output # 3.77μs -> 3.04μs (24.0% faster)
def test_2d_matrix_basic():
"""Should handle 2D matrix and show shape and first 5 elements of first row."""
mat = FakeMatrix([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]])
codeflash_output = _print_matrix(mat); result = codeflash_output # 3.06μs -> 2.58μs (18.3% faster)
def test_3d_matrix_basic():
"""Should handle 3D matrix and show shape and first 5 elements of first row of first matrix."""
mat = FakeMatrix([
[[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
[[13, 14, 15, 16, 17, 18], [19, 20, 21, 22, 23, 24]]
])
codeflash_output = _print_matrix(mat); result = codeflash_output # 2.82μs -> 2.62μs (7.64% faster)
-------------------------------
2. Edge Test Cases
-------------------------------
def test_empty_matrix():
"""Should handle empty matrix gracefully."""
mat = FakeMatrix([])
codeflash_output = _print_matrix(mat); result = codeflash_output # 2.41μs -> 2.05μs (17.3% faster)
def test_matrix_with_less_than_5_elements():
"""Should handle matrix with fewer than 5 elements."""
mat = FakeMatrix([10, 20])
codeflash_output = _print_matrix(mat); result = codeflash_output # 2.57μs -> 2.31μs (10.9% faster)
def test_matrix_with_exactly_5_elements():
"""Should handle matrix with exactly 5 elements."""
mat = FakeMatrix([1, 2, 3, 4, 5])
codeflash_output = _print_matrix(mat); result = codeflash_output # 2.50μs -> 2.13μs (17.5% faster)
def test_matrix_with_non_numeric_elements():
"""Should handle matrix with string elements."""
mat = FakeMatrix(["a", "b", "c", "d", "e", "f"])
codeflash_output = _print_matrix(mat); result = codeflash_output # 2.83μs -> 2.58μs (10.1% faster)
def test_matrix_with_nested_empty_lists():
"""Should handle matrix with nested empty lists."""
mat = FakeMatrix([[], [], []])
codeflash_output = _print_matrix(mat); result = codeflash_output # 2.77μs -> 2.23μs (24.1% faster)
def test_matrix_with_one_element():
"""Should handle matrix with only one element."""
mat = FakeMatrix([42])
codeflash_output = _print_matrix(mat); result = codeflash_output # 2.44μs -> 1.97μs (23.9% faster)
def test_matrix_with_high_ndim():
"""Should handle high-dimensional matrix (4D)."""
mat = FakeMatrix([[[[1, 2, 3, 4, 5, 6]]]])
codeflash_output = _print_matrix(mat); result = codeflash_output # 2.95μs -> 2.64μs (11.9% faster)
def test_matrix_with_tuple_elements():
"""Should handle matrix where elements are tuples."""
mat = FakeMatrix([(1, 2), (3, 4), (5, 6), (7, 8), (9, 10), (11, 12)])
codeflash_output = _print_matrix(mat); result = codeflash_output # 3.59μs -> 3.30μs (8.75% faster)
-------------------------------
3. Large Scale Test Cases
-------------------------------
def test_large_1d_matrix():
"""Should handle a large 1D matrix (length 1000) and only show first 5 elements."""
mat = FakeMatrix(list(range(1000)))
codeflash_output = _print_matrix(mat); result = codeflash_output # 2.54μs -> 2.46μs (3.54% faster)
def test_large_2d_matrix():
"""Should handle a large 2D matrix (1000 x 10) and only show first 5 elements of first row."""
mat = FakeMatrix([list(range(10)) for _ in range(1000)])
codeflash_output = _print_matrix(mat); result = codeflash_output # 3.01μs -> 2.85μs (5.46% faster)
def test_large_3d_matrix():
"""Should handle a large 3D matrix (10 x 10 x 100) and only show first 5 elements of first row of first matrix."""
mat = FakeMatrix([ [ list(range(100)) for _ in range(10) ] for _ in range(10) ])
codeflash_output = _print_matrix(mat); result = codeflash_output # 2.90μs -> 2.83μs (2.65% faster)
def test_large_matrix_with_non_int_elements():
"""Should handle large matrix with non-integer (float) elements."""
mat = FakeMatrix([float(i) for i in range(1000)])
codeflash_output = _print_matrix(mat); result = codeflash_output # 3.86μs -> 3.61μs (6.96% faster)
def test_large_matrix_with_strings():
"""Should handle large matrix with string elements."""
mat = FakeMatrix([f"item{i}" for i in range(1000)])
codeflash_output = _print_matrix(mat); result = codeflash_output # 3.02μs -> 2.87μs (5.51% faster)
-------------------------------
4. Negative and Robustness Test Cases
-------------------------------
def test_input_is_integer():
"""Should raise AttributeError if input is not matrix-like (no shape/ndim)."""
with pytest.raises(AttributeError):
_print_matrix(123) # 1.41μs -> 1.18μs (19.6% faster)
def test_input_is_string():
"""Should raise AttributeError if input is not matrix-like (no shape/ndim)."""
with pytest.raises(AttributeError):
_print_matrix("not a matrix") # 1.21μs -> 1.15μs (5.84% faster)
def test_input_is_list():
"""Should raise AttributeError if input is a plain list."""
with pytest.raises(AttributeError):
_print_matrix([1, 2, 3]) # 1.28μs -> 1.14μs (11.5% faster)
def test_input_is_dict():
"""Should raise AttributeError if input is a dict."""
with pytest.raises(AttributeError):
_print_matrix({'a': 1, 'b': 2}) # 1.24μs -> 1.30μs (4.98% slower)
#------------------------------------------------
import pytest
from spacy.cli.debug_model import _print_matrix
Helper class to simulate numpy-like arrays for testing (since numpy is not allowed)
class FakeArray:
def init(self, data):
self.data = data
self.ndim = self._get_ndim(data)
self.shape = self._get_shape(data)
def getitem(self, idx):
if isinstance(idx, slice):
return self.data[idx]
return self.data[idx]
def _get_ndim(self, data):
if not isinstance(data, list):
return 0
elif data and isinstance(data[0], list):
return 1 + self._get_ndim(data[0])
else:
return 1
def _get_shape(self, data):
if not isinstance(data, list):
return ()
shape = []
while isinstance(data, list):
shape.append(len(data))
if data:
data = data[0]
else:
break
return tuple(shape)
# For compatibility with sample_matrix[0:5]
def len(self):
return len(self.data)
def str(self):
return str(self.data)
def repr(self):
return repr(self.data)
------------------------
Basic Test Cases
------------------------
def test_none_input():
# Should return None if input is None
codeflash_output = _print_matrix(None) # 444ns -> 437ns (1.60% faster)
def test_bool_input_true():
# Should return True if input is True
codeflash_output = _print_matrix(True) # 460ns -> 489ns (5.93% slower)
def test_bool_input_false():
# Should return False if input is False
codeflash_output = _print_matrix(False) # 426ns -> 434ns (1.84% slower)
def test_1d_array():
# Test with a simple 1D array
arr = FakeArray([1, 2, 3, 4, 5, 6])
expected = str(arr.shape) + " - sample: " + str([1, 2, 3, 4, 5])
codeflash_output = _print_matrix(arr) # 2.77μs -> 2.52μs (9.97% faster)
def test_2d_array():
# Test with a simple 2D array
arr = FakeArray([[1,2,3,4,5,6],[7,8,9,10,11,12]])
expected = str(arr.shape) + " - sample: " + str([1,2,3,4,5])
codeflash_output = _print_matrix(arr) # 2.50μs -> 2.24μs (11.9% faster)
def test_3d_array():
# Test with a simple 3D array
arr = FakeArray([
[
[1,2,3,4,5,6],
[7,8,9,10,11,12]
],
[
[13,14,15,16,17,18],
[19,20,21,22,23,24]
]
])
expected = str(arr.shape) + " - sample: " + str([1,2,3,4,5])
codeflash_output = _print_matrix(arr) # 2.40μs -> 2.25μs (6.65% faster)
------------------------
Edge Test Cases
------------------------
def test_empty_1d_array():
# Test with empty 1D array
arr = FakeArray([])
expected = str(arr.shape) + " - sample: " + str([])
codeflash_output = _print_matrix(arr) # 1.97μs -> 1.73μs (13.7% faster)
def test_empty_2d_array():
# Test with empty 2D array
arr = FakeArray([[]])
expected = str(arr.shape) + " - sample: " + str([])
codeflash_output = _print_matrix(arr) # 2.29μs -> 1.98μs (15.9% faster)
def test_short_1d_array():
# Test with 1D array shorter than 5 elements
arr = FakeArray([42, 43])
expected = str(arr.shape) + " - sample: " + str([42, 43])
codeflash_output = _print_matrix(arr) # 2.04μs -> 1.86μs (9.50% faster)
def test_short_2d_array():
# Test with 2D array whose first row is shorter than 5 elements
arr = FakeArray([[1,2]])
expected = str(arr.shape) + " - sample: " + str([1,2])
codeflash_output = _print_matrix(arr) # 2.24μs -> 2.03μs (10.2% faster)
def test_irregular_2d_array():
# Test with 2D array where rows are of different lengths
arr = FakeArray([[1,2,3],[4,5],[6]])
# Only the first row is sampled
expected = str(arr.shape) + " - sample: " + str([1,2,3])
codeflash_output = _print_matrix(arr) # 2.12μs -> 1.97μs (7.73% faster)
def test_single_element_3d_array():
# Test with 3D array with a single element
arr = FakeArray([[[99]]])
expected = str(arr.shape) + " - sample: " + str([99])
codeflash_output = _print_matrix(arr) # 2.17μs -> 2.07μs (4.63% faster)
def test_zero_length_inner_list():
# Test with 2D array whose first row is empty
arr = FakeArray([[], [1,2,3]])
expected = str(arr.shape) + " - sample: " + str([])
codeflash_output = _print_matrix(arr) # 2.06μs -> 1.82μs (13.2% faster)
def test_non_list_string():
# Test with a scalar string value (not a list)
arr = FakeArray("hello")
expected = str(arr.shape) + " - sample: " + str([])
codeflash_output = _print_matrix(arr) # 2.55μs -> 1.93μs (31.8% faster)
------------------------
Large Scale Test Cases
------------------------
def test_large_1d_array():
# Test with large 1D array (1000 elements)
arr = FakeArray(list(range(1000)))
expected = str(arr.shape) + " - sample: " + str(list(range(5)))
codeflash_output = _print_matrix(arr) # 2.24μs -> 2.26μs (0.754% slower)
def test_large_2d_array():
# Test with large 2D array (1000x1000, but only first row is sampled)
arr = FakeArray([list(range(1000)) for _ in range(1000)])
expected = str(arr.shape) + " - sample: " + str(list(range(5)))
codeflash_output = _print_matrix(arr) # 3.59μs -> 3.77μs (4.73% slower)
def test_large_3d_array():
# Test with large 3D array (10x10x10, but only first row and first sub-row is sampled)
arr = FakeArray([[[i + j + k for k in range(10)] for j in range(10)] for i in range(10)])
# The sample is the first 5 elements of arr[0][0]
expected = str(arr.shape) + " - sample: " + str([0,1,2,3,4])
codeflash_output = _print_matrix(arr) # 2.75μs -> 2.71μs (1.36% faster)
def test_large_edge_short_first_row():
# Test with large 2D array, but first row is short
arr = FakeArray([[1,2], *[list(range(1000)) for _ in range(999)]])
expected = str(arr.shape) + " - sample: " + str([1,2])
codeflash_output = _print_matrix(arr) # 3.38μs -> 3.43μs (1.40% slower)
def test_large_edge_empty_first_row():
# Test with large 2D array, but first row is empty
arr = FakeArray([[], *[list(range(1000)) for _ in range(999)]])
expected = str(arr.shape) + " - sample: " + str([])
codeflash_output = _print_matrix(arr) # 3.47μs -> 3.26μs (6.40% faster)
codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
To edit these changes
git checkout codeflash/optimize-_print_matrix-mhtr07n7and push.