From 605e7d988e1947d3637876ec076f1f9e0383b826 Mon Sep 17 00:00:00 2001 From: Alexander Goscinski Date: Thu, 14 Sep 2023 08:32:29 +0200 Subject: [PATCH 1/2] make cond parameter optional and improve doc in it --- src/equisolve/numpy/models/linear_model.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/equisolve/numpy/models/linear_model.py b/src/equisolve/numpy/models/linear_model.py index 630355b..fb64d81 100644 --- a/src/equisolve/numpy/models/linear_model.py +++ b/src/equisolve/numpy/models/linear_model.py @@ -106,7 +106,7 @@ def _solve( y: TensorBlock, alpha: TensorBlock, sample_weight: TensorBlock, - cond: float, + cond: Optional[float] = None, ) -> TensorBlock: """A regularized solver using ``np.linalg.lstsq``.""" self._used_auto_solver = None @@ -197,10 +197,12 @@ def _solve( # and b is [y*sqrt(w), 0] X_eff = np.vstack([sqrt_sw_arr * X_arr, np.diag(np.sqrt(alpha_arr))]) y_eff = np.hstack([y_arr * sqrt_sw_arr.flatten(), np.zeros(num_properties)]) + if cond is None: + cond = max(X_arr.shape) * np.finfo(X_arr.dtype.char.lower()).eps w = scipy.linalg.lstsq(X_eff, y_eff, cond=cond, overwrite_a=True)[0].ravel() else: raise ValueError( - f"Unknown solver {self._solver} only 'auto', 'cholesky'," + f"Unknown solver {self._solver!r} only 'auto', 'cholesky'," " 'cholesky_dual' and 'lstsq' are supported." ) @@ -225,7 +227,7 @@ def fit( alpha: Union[float, TensorMap] = 1.0, sample_weight: Union[float, TensorMap] = None, solver="auto", - cond: float = None, + cond: Optional[float] = None, ) -> None: """Fit a regression model to each block in `X`. @@ -264,7 +266,8 @@ def fit( :param cond: Cut-off ratio for small singular values during the fit. For the purposes of rank determination, singular values are treated as zero if they are smaller - than `cond` times the largest singular value in "weights" matrix. + than `cond` times the largest singular value in "weights" matrix. Only + important when solver "lstsq" is used. """ self._solver = solver From 9772d31cbf5a06d3f089f05344c02dc76782f497 Mon Sep 17 00:00:00 2001 From: Alexander Goscinski Date: Tue, 19 Sep 2023 09:37:19 +0200 Subject: [PATCH 2/2] rename cond -> rcond There is a slight mess with the naming condition number in scipy and numpy. scipy uses word `cond` and numpy `rcond`. rcond makes actually more sense because it is a lower bound on the reciprocal condition number. --- src/equisolve/numpy/models/linear_model.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/equisolve/numpy/models/linear_model.py b/src/equisolve/numpy/models/linear_model.py index fb64d81..1020aa2 100644 --- a/src/equisolve/numpy/models/linear_model.py +++ b/src/equisolve/numpy/models/linear_model.py @@ -106,7 +106,7 @@ def _solve( y: TensorBlock, alpha: TensorBlock, sample_weight: TensorBlock, - cond: Optional[float] = None, + rcond: Optional[float] = None, ) -> TensorBlock: """A regularized solver using ``np.linalg.lstsq``.""" self._used_auto_solver = None @@ -197,9 +197,11 @@ def _solve( # and b is [y*sqrt(w), 0] X_eff = np.vstack([sqrt_sw_arr * X_arr, np.diag(np.sqrt(alpha_arr))]) y_eff = np.hstack([y_arr * sqrt_sw_arr.flatten(), np.zeros(num_properties)]) - if cond is None: - cond = max(X_arr.shape) * np.finfo(X_arr.dtype.char.lower()).eps - w = scipy.linalg.lstsq(X_eff, y_eff, cond=cond, overwrite_a=True)[0].ravel() + if rcond is None: + rcond = max(X_arr.shape) * np.finfo(X_arr.dtype.char.lower()).eps + w = scipy.linalg.lstsq(X_eff, y_eff, cond=rcond, overwrite_a=True)[ + 0 + ].ravel() else: raise ValueError( f"Unknown solver {self._solver!r} only 'auto', 'cholesky'," @@ -227,7 +229,7 @@ def fit( alpha: Union[float, TensorMap] = 1.0, sample_weight: Union[float, TensorMap] = None, solver="auto", - cond: Optional[float] = None, + rcond: Optional[float] = None, ) -> None: """Fit a regression model to each block in `X`. @@ -263,7 +265,7 @@ def fit( on the dual problem (X@X.T) w_dual = y, the primal weights are obtained by w = X.T @ w_dual - **"lstsq"**: using :func:`scipy.linalg.lstsq` on the linear system X w = y - :param cond: + :param rcond: Cut-off ratio for small singular values during the fit. For the purposes of rank determination, singular values are treated as zero if they are smaller than `cond` times the largest singular value in "weights" matrix. Only @@ -306,7 +308,7 @@ def fit( alpha_block = alpha.block(key) sw_block = sample_weight.block(key) - weight_block = self._solve(X_block, y_block, alpha_block, sw_block, cond) + weight_block = self._solve(X_block, y_block, alpha_block, sw_block, rcond) weights_blocks.append(weight_block)