diff --git a/src/LMTR_alg.jl b/src/LMTR_alg.jl index 26187432..547e91d0 100644 --- a/src/LMTR_alg.jl +++ b/src/LMTR_alg.jl @@ -127,6 +127,10 @@ For advanced usage, first define a solver "LMSolver" to preallocate the memory u - `atol::T = √eps(T)`: absolute tolerance; - `sub_atol::T = atol`: subsolver absolute tolerance; - `rtol::T = √eps(T)`: relative tolerance; +- `atol_decr::T = atol`: (advanced) absolute tolerance for the optimality measure `√(ξₖ/νₖ)` (see below); +- `rtol_decr::T = rtol`: (advanced) relative tolerance for the optimality measure `√(ξₖ/νₖ)` (see below); +- `atol_step::T = atol`: (advanced) absolute tolerance for the optimality measure `‖sₖ₁‖/ν₁` (see below); +- `rtol_step::T = rtol`: (advanced) relative tolerance for the optimality measure `‖sₖ₁‖/ν₁` (see below); - `neg_tol::T = zero(T): negative tolerance; - `max_eval::Int = -1`: maximum number of evaluation of the objective function (negative number means unlimited); - `max_time::Float64 = 30.0`: maximum time limit in seconds; @@ -142,7 +146,7 @@ For advanced usage, first define a solver "LMSolver" to preallocate the memory u - `subsolver::S = R2Solver`: subsolver used to solve the subproblem that appears at each iteration. - `sub_kwargs::NamedTuple = NamedTuple()`: a named tuple containing the keyword arguments to be sent to the subsolver. The solver will fail if invalid keyword arguments are provided to the subsolver. For example, if the subsolver is `R2Solver`, you can pass `sub_kwargs = (max_iter = 100, σmin = 1e-6,)`. -The algorithm stops either when `√(ξₖ/νₖ) < atol + rtol*√(ξ₀/ν₀) ` or `ξₖ < 0` and `√(-ξₖ/νₖ) < neg_tol` where ξₖ := f(xₖ) + h(xₖ) - φ(sₖ; xₖ) - ψ(sₖ; xₖ), and √(ξₖ/νₖ) is a stationarity measure. +The algorithm stops either when `√(ξₖ/νₖ) < atol_decr + rtol_decr*√(ξ₀/ν₀) ` or `ξₖ < 0` and `√(-ξₖ/νₖ) < neg_tol` where ξₖ := f(xₖ) + h(xₖ) - φ(sₖ; xₖ) - ψ(sₖ; xₖ), and √(ξₖ/νₖ) is a stationarity measure or when `‖sₖ₁‖/νₖ < atol_step + rtol_step*‖s₀‖/ν₀` where `sₖ₁` is the Cauchy step. # Output The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. @@ -193,6 +197,10 @@ function SolverCore.solve!( atol::T = √eps(T), sub_atol::T = atol, rtol::T = √eps(T), + atol_decr::T = atol, + rtol_decr::T = rtol, + atol_step::T = atol, + rtol_step::T = rtol, neg_tol::T = zero(T), verbose::Int = 0, max_iter::Int = 10000, @@ -251,12 +259,13 @@ function SolverCore.solve!( if verbose > 0 @info log_header( - [:outer, :inner, :fx, :hx, :xi, :ρ, :Δ, :normx, :norms, :ν, :arrow], - [Int, Int, T, T, T, T, T, T, T, T, Char], + [:outer, :inner, :fx, :hx, :xi, :norms1dnu, :ρ, :Δ, :normx, :norms, :ν, :arrow], + [Int, Int, T, T, T, T, T, T, T, T, T, Char], hdr_override = Dict{Symbol, String}( :fx => "f(x)", :hx => "h(x)", :xi => "√(ξ1/ν)", + :norms1dnu => "‖s₁‖/ν", :normx => "‖x‖", :norms => "‖s‖", :arrow => "LMTR", @@ -302,14 +311,21 @@ function SolverCore.solve!( # Take first proximal gradient step s1 and see if current xk is nearly stationary. # s1 minimizes φ1(d) + ‖d‖² / 2 / ν + ψ(d) ⟺ s1 ∈ prox{νψ}(-ν∇φ1(0)) prox!(s, ψ, mν∇fk, ν) + + # Estimate optimality and check stopping criteria ξ1 = fk + hk - mk1(s) + max(1, abs(fk + hk)) * 10 * eps() sqrt_ξ1_νInv = ξ1 ≥ 0 ? sqrt(ξ1 / ν) : sqrt(-ξ1 / ν) - solved = (ξ1 < 0 && sqrt_ξ1_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ1_νInv ≤ atol) (ξ1 < 0 && sqrt_ξ1_νInv > neg_tol) && error("LMTR: prox-gradient step should produce a decrease but ξ1 = $(ξ1)") - atol += rtol * sqrt_ξ1_νInv # make stopping test absolute and relative + atol_decr += rtol_decr * sqrt_ξ1_νInv # make stopping test absolute and relative sub_atol += rtol * sqrt_ξ1_νInv + norm_s_cauchy = norm(s) + norm_s_cauchydν = norm_s_cauchy / ν + atol_step += rtol_step * norm_s_cauchydν # make stopping test absolute and relative + + solved = (ξ1 < 0 && sqrt_ξ1_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ1_νInv ≤ atol_decr) || (norm_s_cauchydν ≤ atol_step) + set_status!( stats, get_status( @@ -390,6 +406,7 @@ function SolverCore.solve!( fk, hk, sqrt_ξ1_νInv, + norm_s_cauchydν, ρk, ∆_effective, χ(xk), @@ -455,12 +472,17 @@ function SolverCore.solve!( prox!(s, ψ, mν∇fk, ν) mks = mk1(s) + # Estimate optimality and check stopping criteria ξ1 = fk + hk - mks + max(1, abs(hk)) * 10 * eps() sqrt_ξ1_νInv = ξ1 ≥ 0 ? sqrt(ξ1 / ν) : sqrt(-ξ1 / ν) - solved = (ξ1 < 0 && sqrt_ξ1_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ1_νInv ≤ atol) (ξ1 < 0 && sqrt_ξ1_νInv > neg_tol) && error("LM: prox-gradient step should produce a decrease but ξ1 = $(ξ1)") + norm_s_cauchy = norm(s) + norm_s_cauchydν = norm_s_cauchy / ν + + solved = (ξ1 < 0 && sqrt_ξ1_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ1_νInv ≤ atol_decr) || (norm_s_cauchydν ≤ atol_step) + set_status!( stats, get_status( @@ -481,11 +503,11 @@ function SolverCore.solve!( end if verbose > 0 && stats.status == :first_order - @info log_row(Any[stats.iter, 0, fk, hk, sqrt_ξ1_νInv, ρk, Δk, χ(xk), χ(s), ν, ""], colsep = 1) - @info "LMTR: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv)" + @info log_row(Any[stats.iter, 0, fk, hk, sqrt_ξ1_νInv, norm_s_cauchydν, ρk, Δk, χ(xk), χ(s), ν, ""], colsep = 1) + @info "LMTR: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv) and ‖s₁‖/ν = $(norm_s_cauchydν)" end set_solution!(stats, xk) - set_residuals!(stats, zero(T), sqrt_ξ1_νInv) + set_residuals!(stats, zero(T), min(sqrt_ξ1_νInv, norm_s_cauchydν)) return stats end diff --git a/src/LM_alg.jl b/src/LM_alg.jl index ef514b44..186a3ada 100644 --- a/src/LM_alg.jl +++ b/src/LM_alg.jl @@ -127,6 +127,10 @@ For advanced usage, first define a solver "LMSolver" to preallocate the memory u - `nonlinear::Bool = true`: whether the function `F` is nonlinear or not. - `atol::T = √eps(T)`: absolute tolerance; - `rtol::T = √eps(T)`: relative tolerance; +- `atol_decr::T = atol`: (advanced) absolute tolerance for the optimality measure `√(ξₖ/νₖ)` (see below); +- `rtol_decr::T = rtol`: (advanced) relative tolerance for the optimality measure `√(ξₖ/νₖ)` (see below); +- `atol_step::T = atol`: (advanced) absolute tolerance for the optimality measure `‖sₖ₁‖/ν₁` (see below); +- `rtol_step::T = rtol`: (advanced) relative tolerance for the optimality measure `‖sₖ₁‖/ν₁` (see below); - `neg_tol::T = zero(T): negative tolerance; - `max_eval::Int = -1`: maximum number of evaluation of the objective function (negative number means unlimited); - `max_time::Float64 = 30.0`: maximum time limit in seconds; @@ -142,7 +146,7 @@ For advanced usage, first define a solver "LMSolver" to preallocate the memory u - `subsolver = R2Solver`: the solver used to solve the subproblems. - `sub_kwargs::NamedTuple = NamedTuple()`: a named tuple containing the keyword arguments to be sent to the subsolver. The solver will fail if invalid keyword arguments are provided to the subsolver. For example, if the subsolver is `R2Solver`, you can pass `sub_kwargs = (max_iter = 100, σmin = 1e-6,)`. -The algorithm stops either when `√(ξₖ/νₖ) < atol + rtol*√(ξ₀/ν₀) ` or `ξₖ < 0` and `√(-ξₖ/νₖ) < neg_tol` where ξₖ := f(xₖ) + h(xₖ) - φ(sₖ; xₖ) - ψ(sₖ; xₖ), and √(ξₖ/νₖ) is a stationarity measure. +The algorithm stops either when `√(ξₖ/νₖ) < atol_decr + rtol_decr*√(ξ₀/ν₀) ` or `ξₖ < 0` and `√(-ξₖ/νₖ) < neg_tol` where ξₖ := f(xₖ) + h(xₖ) - φ(sₖ; xₖ) - ψ(sₖ; xₖ), and √(ξₖ/νₖ) is a stationarity measure or when `‖sₖ₁‖/νₖ < atol_step + rtol_step*‖s₀₁‖/ν₀` where `sₖ₁` is the Cauchy step. # Output The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. @@ -192,6 +196,10 @@ function SolverCore.solve!( nonlinear::Bool = true, atol::T = √eps(T), rtol::T = √eps(T), + atol_decr::T = atol, + rtol_decr::T = rtol, + atol_step::T = atol, + rtol_step::T = rtol, neg_tol::T = zero(T), verbose::Int = 0, max_iter::Int = 10000, @@ -254,12 +262,13 @@ function SolverCore.solve!( if verbose > 0 @info log_header( - [:outer, :inner, :fx, :hx, :xi, :ρ, :σ, :normx, :norms, :normJ, :arrow], - [Int, Int, T, T, T, T, T, T, T, T, Char], + [:outer, :inner, :fx, :hx, :xi, :normsdnu, :ρ, :σ, :normx, :norms, :normJ, :arrow], + [Int, Int, T, T, T, T, T, T, T, T, T, Char], hdr_override = Dict{Symbol, String}( :fx => "f(x)", :hx => "h(x)", :xi => "√(ξ1/ν)", + :normsdnu => "‖s₁‖/ν", :normx => "‖x‖", :norms => "‖s‖", :normJ => "‖J‖²", @@ -306,12 +315,19 @@ function SolverCore.solve!( end prox!(s, ψ, mν∇fk, ν) + + # Estimate optimality and check stopping criteria ξ1 = fk + hk - mk1(s) + max(1, abs(fk + hk)) * 10 * eps() sqrt_ξ1_νInv = ξ1 ≥ 0 ? sqrt(ξ1 / ν) : sqrt(-ξ1 / ν) - solved = (ξ1 < 0 && sqrt_ξ1_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ1_νInv ≤ atol) (ξ1 < 0 && sqrt_ξ1_νInv > neg_tol) && error("LM: prox-gradient step should produce a decrease but ξ1 = $(ξ1)") - atol += rtol * sqrt_ξ1_νInv # make stopping test absolute and relative + atol_decr += rtol_decr * sqrt_ξ1_νInv # make stopping test absolute and relative + + norm_s_cauchy = norm(s) + norm_s_cauchydν = norm_s_cauchy / ν + atol_step += rtol_step * norm_s_cauchydν # make stopping test absolute and relative + + solved = (ξ1 < 0 && sqrt_ξ1_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ1_νInv ≤ atol_decr) || (norm_s_cauchydν ≤ atol_step) set_status!( stats, @@ -384,6 +400,7 @@ function SolverCore.solve!( fk, hk, sqrt_ξ1_νInv, + norm_s_cauchydν, ρk, σk, norm(xk), @@ -443,14 +460,17 @@ function SolverCore.solve!( prox!(s, ψ, mν∇fk, ν) mks = mk1(s) + # Estimate optimality and check stopping criteria ξ1 = fk + hk - mks + max(1, abs(hk)) * 10 * eps() - sqrt_ξ1_νInv = ξ1 ≥ 0 ? sqrt(ξ1 / ν) : sqrt(-ξ1 / ν) - solved = (ξ1 < 0 && sqrt_ξ1_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ1_νInv ≤ atol) - (ξ1 < 0 && sqrt_ξ1_νInv > neg_tol) && error("LM: prox-gradient step should produce a decrease but ξ1 = $(ξ1)") + norm_s_cauchy = norm(s) + norm_s_cauchydν = norm_s_cauchy / ν + + solved = (ξ1 < 0 && sqrt_ξ1_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ1_νInv ≤ atol_decr) || (norm_s_cauchydν ≤ atol_step) + set_status!( stats, get_status( @@ -472,13 +492,13 @@ function SolverCore.solve!( if verbose > 0 && stats.status == :first_order @info log_row( - Any[stats.iter, 0, fk, hk, sqrt_ξ1_νInv, ρk, σk, norm(xk), norm(s), 1 / ν, ""], + Any[stats.iter, 0, fk, hk, sqrt_ξ1_νInv, norm_s_cauchydν, ρk, σk, norm(xk), norm(s), 1 / ν, ""], colsep = 1, ) - @info "LM: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv)" + @info "LM: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv) and ‖s‖/ν = $(norm_s_cauchydν)" end set_solution!(stats, xk) - set_residuals!(stats, zero(T), sqrt_ξ1_νInv) + set_residuals!(stats, zero(T), min(sqrt_ξ1_νInv, norm_s_cauchydν)) return stats end diff --git a/src/R2DH.jl b/src/R2DH.jl index de93ffcb..f9e10ae7 100644 --- a/src/R2DH.jl +++ b/src/R2DH.jl @@ -121,6 +121,10 @@ or - `x::V = nlp.meta.x0`: the initial guess; - `atol::T = √eps(T)`: absolute tolerance; - `rtol::T = √eps(T)`: relative tolerance; +- `atol_decr::T = atol`: (advanced) absolute tolerance for the optimality measure `√(ξₖ/νₖ)` (see below); +- `rtol_decr::T = rtol`: (advanced) relative tolerance for the optimality measure `√(ξₖ/νₖ)` (see below); +- `atol_step::T = atol`: (advanced) absolute tolerance for the optimality measure `‖sₖ‖/ν₁` (see below); +- `rtol_step::T = rtol`: (advanced) relative tolerance for the optimality measure `‖sₖ‖/ν₁` (see below); - `neg_tol::T = eps(T)^(1 / 4)`: negative tolerance - `max_eval::Int = -1`: maximum number of evaluation of the objective function (negative number means unlimited); - `max_time::Float64 = 30.0`: maximum time limit in seconds; @@ -136,7 +140,7 @@ or - `compute_obj::Bool = true`: (advanced) whether `f(x₀)` should be computed or not. If set to false, then the value is retrieved from `stats.solver_specific[:smooth_obj]`; - `compute_grad::Bool = true`: (advanced) whether `∇f(x₀)` should be computed or not. If set to false, then the value is retrieved from `solver.∇fk`; -The algorithm stops either when `√(ξₖ/νₖ) < atol + rtol*√(ξ₀/ν₀) ` or `ξₖ < 0` and `√(-ξₖ/νₖ) < neg_tol` where ξₖ := f(xₖ) + h(xₖ) - φ(sₖ; xₖ) - ψ(sₖ; xₖ), and √(ξₖ/νₖ) is a stationarity measure. +The algorithm stops either when `√(ξₖ/νₖ) < atol_decr + rtol_decr*√(ξ₀/ν₀) ` or `ξₖ < 0` and `√(-ξₖ/νₖ) < neg_tol` where ξₖ := f(xₖ) + h(xₖ) - φ(sₖ; xₖ) - ψ(sₖ; xₖ), and √(ξₖ/νₖ) is a stationarity measure or when `‖sₖ‖/νₖ < atol_step + rtol_step*‖s₀‖/ν₀` where `sₖ` is the current step. # Output The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. @@ -168,7 +172,7 @@ function R2DH( η1 = options.η1, η2 = options.η2, γ = options.γ, - θ = options.θ, + θ = options.θ; kwargs_dict..., ) end @@ -225,6 +229,10 @@ function SolverCore.solve!( x::V = reg_nlp.model.meta.x0, atol::T = √eps(T), rtol::T = √eps(T), + atol_decr::T = atol, + rtol_decr::T = rtol, + atol_step::T = atol, + rtol_step::T = rtol, neg_tol::T = eps(T)^(1 / 4), verbose::Int = 0, max_iter::Int = 10000, @@ -286,12 +294,13 @@ function SolverCore.solve!( if verbose > 0 @info log_header( - [:iter, :fx, :hx, :xi, :ρ, :σ, :normx, :norms, :arrow], - [Int, T, T, T, T, T, T, T, Char], + [:iter, :fx, :hx, :xi, :normsdnu, :ρ, :σ, :normx, :norms, :arrow], + [Int, T, T, T, T, T, T, T, T, Char], hdr_override = Dict{Symbol, String}( # TODO: Add this as constant dict elsewhere :fx => "f(x)", :hx => "h(x)", :xi => "√(ξ/ν)", + :normsdnu => "‖s‖/ν", :normx => "‖x‖", :norms => "‖s‖", :arrow => "R2DH", @@ -341,12 +350,18 @@ function SolverCore.solve!( mks = mk(s) + # Estimate optimality and check stopping criteria ξ = hk - mks + max(1, abs(hk)) * 10 * eps() sqrt_ξ_νInv = ξ ≥ 0 ? sqrt(ξ / ν₁) : sqrt(-ξ / ν₁) - solved = (ξ < 0 && sqrt_ξ_νInv ≤ neg_tol) || (ξ ≥ 0 && sqrt_ξ_νInv ≤ atol) (ξ < 0 && sqrt_ξ_νInv > neg_tol) && error("R2DH: prox-gradient step should produce a decrease but ξ = $(ξ)") - atol += rtol * sqrt_ξ_νInv # make stopping test absolute and relative + atol_decr += rtol_decr * sqrt_ξ_νInv # make stopping test absolute and relative + + norm_s = norm(s) + norm_sdν = norm_s / ν₁ + atol_step += rtol_step * norm_sdν # make stopping test absolute and relative + + solved = (ξ < 0 && sqrt_ξ_νInv ≤ neg_tol) || (ξ ≥ 0 && sqrt_ξ_νInv ≤ atol_decr) || (norm_sdν ≤ atol_step) set_status!( stats, @@ -386,10 +401,11 @@ function SolverCore.solve!( fk, hk, sqrt_ξ_νInv, + norm_sdν, ρk, σk, norm(xk), - norm(s), + norm_s, (η2 ≤ ρk < Inf) ? '↘' : (ρk < η1 ? '↗' : '='), ], colsep = 1, @@ -439,12 +455,17 @@ function SolverCore.solve!( spectral_test ? prox!(s, ψ, mν∇fk, ν₁) : iprox!(s, ψ, ∇fk, dkσk) mks = mk(s) + # Estimate optimality and check stopping criteria ξ = hk - mks + max(1, abs(hk)) * 10 * eps() sqrt_ξ_νInv = ξ ≥ 0 ? sqrt(ξ / ν₁) : sqrt(-ξ / ν₁) - solved = (ξ < 0 && sqrt_ξ_νInv ≤ neg_tol) || (ξ ≥ 0 && sqrt_ξ_νInv ≤ atol) (ξ < 0 && sqrt_ξ_νInv > neg_tol) && error("R2DH: prox-gradient step should produce a decrease but ξ = $(ξ)") + norm_s = norm(s) + norm_sdν = norm_s / ν₁ + + solved = (ξ < 0 && sqrt_ξ_νInv ≤ neg_tol) || (ξ ≥ 0 && sqrt_ξ_νInv ≤ atol_decr) || (norm_sdν ≤ atol_step) + set_status!( stats, get_status( @@ -465,11 +486,11 @@ function SolverCore.solve!( end if verbose > 0 && stats.status == :first_order - @info log_row(Any[stats.iter, fk, hk, sqrt_ξ_νInv, ρk, σk, norm(xk), norm(s), ""], colsep = 1) - @info "R2DH: terminating with √(ξ/ν) = $(sqrt_ξ_νInv)" + @info log_row(Any[stats.iter, fk, hk, sqrt_ξ_νInv, norm_sdν, ρk, σk, norm(xk), norm_s, ""], colsep = 1) + @info "R2DH: terminating with √(ξ/ν) = $(sqrt_ξ_νInv) and ‖s‖/ν = $(norm_sdν)" end set_solution!(stats, xk) - set_residuals!(stats, zero(eltype(xk)), sqrt_ξ_νInv) + set_residuals!(stats, zero(eltype(xk)), min(sqrt_ξ_νInv, norm_sdν)) return stats end diff --git a/src/R2N.jl b/src/R2N.jl index 51ad3c56..22ba6e36 100644 --- a/src/R2N.jl +++ b/src/R2N.jl @@ -136,6 +136,10 @@ For advanced usage, first define a solver "R2NSolver" to preallocate the memory - `x::V = nlp.meta.x0`: the initial guess; - `atol::T = √eps(T)`: absolute tolerance; - `rtol::T = √eps(T)`: relative tolerance; +- `atol_decr::T = atol`: (advanced) absolute tolerance for the optimality measure `√(ξₖ/νₖ)` (see below); +- `rtol_decr::T = rtol`: (advanced) relative tolerance for the optimality measure `√(ξₖ/νₖ)` (see below); +- `atol_step::T = atol`: (advanced) absolute tolerance for the optimality measure `‖sₖ₁‖/ν₁` (see below); +- `rtol_step::T = rtol`: (advanced) relative tolerance for the optimality measure `‖sₖ₁‖/ν₁` (see below); - `neg_tol::T = eps(T)^(1 / 4)`: negative tolerance; - `max_eval::Int = -1`: maximum number of evaluation of the objective function (negative number means unlimited); - `max_time::Float64 = 30.0`: maximum time limit in seconds; @@ -153,7 +157,7 @@ For advanced usage, first define a solver "R2NSolver" to preallocate the memory - `compute_grad::Bool = true`: (advanced) whether `∇f(x₀)` should be computed or not. If set to false, then the value is retrieved from `solver.∇fk`; - `sub_kwargs::NamedTuple = NamedTuple()`: a named tuple containing the keyword arguments to be sent to the subsolver. The solver will fail if invalid keyword arguments are provided to the subsolver. For example, if the subsolver is `R2Solver`, you can pass `sub_kwargs = (max_iter = 100, σmin = 1e-6,)`. -The algorithm stops either when `√(ξₖ/νₖ) < atol + rtol*√(ξ₀/ν₀) ` or `ξₖ < 0` and `√(-ξₖ/νₖ) < neg_tol` where ξₖ := f(xₖ) + h(xₖ) - φ(sₖ; xₖ) - ψ(sₖ; xₖ), and √(ξₖ/νₖ) is a stationarity measure. +The algorithm stops either when `√(ξₖ/νₖ) < atol_decr + rtol_decr*√(ξ₀/ν₀) ` or `ξₖ < 0` and `√(-ξₖ/νₖ) < neg_tol` where ξₖ := f(xₖ) + h(xₖ) - φ(sₖ; xₖ) - ψ(sₖ; xₖ), and √(ξₖ/νₖ) is a stationarity measure or when `‖sₖ₁‖/νₖ < atol_step + rtol_step*‖s₀‖/ν₀` where `sₖ₁` is the Cauchy step. # Output The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. @@ -215,6 +219,10 @@ function SolverCore.solve!( x::V = reg_nlp.model.meta.x0, atol::T = √eps(T), rtol::T = √eps(T), + atol_decr::T = atol, + rtol_decr::T = rtol, + atol_step::T = atol, + rtol_step::T = rtol, neg_tol::T = eps(T)^(1 / 4), verbose::Int = 0, max_iter::Int = 10000, @@ -278,12 +286,13 @@ function SolverCore.solve!( if verbose > 0 @info log_header( - [:outer, :inner, :fx, :hx, :xi, :ρ, :σ, :normx, :norms, :normB, :arrow], - [Int, Int, T, T, T, T, T, T, T, T, Char], + [:outer, :inner, :fx, :hx, :xi, :norms1dnu, :ρ, :σ, :normx, :norms, :normB, :arrow], + [Int, Int, T, T, T, T, T, T, T, T, T, Char], hdr_override = Dict{Symbol, String}( :fx => "f(x)", :hx => "h(x)", :xi => "√(ξ1/ν)", + :norms1dnu => "‖s₁‖/ν", :normx => "‖x‖", :norms => "‖s‖", :normB => "‖B‖", @@ -344,12 +353,18 @@ function SolverCore.solve!( prox!(s1, ψ, mν∇fk, ν₁) mks = mk1(s1) + # Estimate optimality and check stopping criteria ξ1 = hk - mks + max(1, abs(hk)) * 10 * eps() sqrt_ξ1_νInv = ξ1 ≥ 0 ? sqrt(ξ1 / ν₁) : sqrt(-ξ1 / ν₁) - solved = (ξ1 < 0 && sqrt_ξ1_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ1_νInv ≤ atol) (ξ1 < 0 && sqrt_ξ1_νInv > neg_tol) && error("R2N: prox-gradient step should produce a decrease but ξ1 = $(ξ1)") - atol += rtol * sqrt_ξ1_νInv # make stopping test absolute and relative + atol_decr += rtol_decr * sqrt_ξ1_νInv # make stopping test absolute and relative + + norm_s_cauchy = norm(s1) + norm_s_cauchydν = norm_s_cauchy / ν₁ + atol_step += rtol_step * norm_s_cauchydν # make stopping test absolute and relative + + solved = (ξ1 < 0 && sqrt_ξ1_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ1_νInv ≤ atol_decr) || (norm_s_cauchydν ≤ atol_step) set_status!( stats, @@ -428,6 +443,7 @@ function SolverCore.solve!( fk, hk, sqrt_ξ1_νInv, + norm_s_cauchydν, ρk, σk, norm(xk), @@ -491,13 +507,17 @@ function SolverCore.solve!( prox!(s1, ψ, mν∇fk, ν₁) mks = mk1(s1) + # Estimate optimality and check stopping criteria ξ1 = hk - mks + max(1, abs(hk)) * 10 * eps() - sqrt_ξ1_νInv = ξ1 ≥ 0 ? sqrt(ξ1 / ν₁) : sqrt(-ξ1 / ν₁) - solved = (ξ1 < 0 && sqrt_ξ1_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ1_νInv ≤ atol) - (ξ1 < 0 && sqrt_ξ1_νInv > neg_tol) && error("R2N: prox-gradient step should produce a decrease but ξ1 = $(ξ1)") + + norm_s_cauchy = norm(s1) + norm_s_cauchydν = norm_s_cauchy / ν₁ + + solved = (ξ1 < 0 && sqrt_ξ1_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ1_νInv ≤ atol_decr) || (norm_s_cauchydν ≤ atol_step) + set_status!( stats, get_status( @@ -519,14 +539,14 @@ function SolverCore.solve!( if verbose > 0 && stats.status == :first_order @info log_row( - Any[stats.iter, 0, fk, hk, sqrt_ξ1_νInv, ρk, σk, norm(xk), norm(s), λmax, ""], + Any[stats.iter, 0, fk, hk, sqrt_ξ1_νInv, norm_s_cauchydν, ρk, σk, norm(xk), norm(s), λmax, ""], colsep = 1, ) - @info "R2N: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv)" + @info "R2N: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv) and ‖s₁‖/ν = $(norm_s_cauchydν)" end set_solution!(stats, xk) - set_residuals!(stats, zero(eltype(xk)), sqrt_ξ1_νInv) + set_residuals!(stats, zero(eltype(xk)), min(sqrt_ξ1_νInv, norm_s_cauchydν)) return stats end diff --git a/src/R2_alg.jl b/src/R2_alg.jl index 48bfc54e..516d97e1 100644 --- a/src/R2_alg.jl +++ b/src/R2_alg.jl @@ -143,6 +143,10 @@ For advanced usage, first define a solver "R2Solver" to preallocate the memory u - `x::V = nlp.meta.x0`: the initial guess; - `atol::T = √eps(T)`: absolute tolerance; - `rtol::T = √eps(T)`: relative tolerance; +- `atol_decr::T = atol`: (advanced) absolute tolerance for the optimality measure `√(ξₖ/νₖ)` (see below); +- `rtol_decr::T = rtol`: (advanced) relative tolerance for the optimality measure `√(ξₖ/νₖ)` (see below); +- `atol_step::T = atol`: (advanced) absolute tolerance for the optimality measure `‖sₖ‖/ν₁` (see below); +- `rtol_step::T = rtol`: (advanced) relative tolerance for the optimality measure `‖sₖ‖/ν₁` (see below); - `neg_tol::T = eps(T)^(1 / 4)`: negative tolerance - `max_eval::Int = -1`: maximum number of evaluation of the objective function (negative number means unlimited); - `max_time::Float64 = 30.0`: maximum time limit in seconds; @@ -156,7 +160,7 @@ For advanced usage, first define a solver "R2Solver" to preallocate the memory u - `compute_obj::Bool = true`: (advanced) whether `f(x₀)` should be computed or not. If set to false, then the value is retrieved from `stats.solver_specific[:smooth_obj]`; - `compute_grad::Bool = true`: (advanced) whether `∇f(x₀)` should be computed or not. If set to false, then the value is retrieved from `solver.∇fk`; -The algorithm stops either when `√(ξₖ/νₖ) < atol + rtol*√(ξ₀/ν₀) ` or `ξₖ < 0` and `√(-ξₖ/νₖ) < neg_tol` where ξₖ := f(xₖ) + h(xₖ) - φ(sₖ; xₖ) - ψ(sₖ; xₖ), and √(ξₖ/νₖ) is a stationarity measure. +The algorithm stops either when `√(ξₖ/νₖ) < atol_decr + rtol_decr*√(ξ₀/ν₀) ` or `ξₖ < 0` and `√(-ξₖ/νₖ) < neg_tol` where ξₖ := f(xₖ) + h(xₖ) - φ(sₖ; xₖ) - ψ(sₖ; xₖ), and √(ξₖ/νₖ) is a stationarity measure or when `‖sₖ‖/νₖ < atol_step + rtol_step*‖s₀‖/ν₀` where `sₖ` is the current step. # Output The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. @@ -315,6 +319,10 @@ function SolverCore.solve!( x::V = reg_nlp.model.meta.x0, atol::T = √eps(T), rtol::T = √eps(T), + atol_decr::T = atol, + rtol_decr::T = rtol, + atol_step::T = atol, + rtol_step::T = rtol, neg_tol::T = eps(T)^(1 / 4), verbose::Int = 0, max_iter::Int = 10000, @@ -367,13 +375,14 @@ function SolverCore.solve!( if verbose > 0 @info log_header( - [:iter, :fx, :hx, :xi, :ρ, :σ, :normx, :norms, :arrow], - [Int, Float64, Float64, Float64, Float64, Float64, Float64, Float64, Char], + [:iter, :fx, :hx, :xi, :normsdnu, :ρ, :σ, :normx, :norms, :arrow], + [Int, T, T, T, T, T, T, T, T, Char], hdr_override = Dict{Symbol, String}( # TODO: Add this as constant dict elsewhere :iter => "iter", :fx => "f(x)", :hx => "h(x)", :xi => "√(ξ/ν)", + :normsdnu => "‖s‖/ν", :ρ => "ρ", :σ => "σ", :normx => "‖x‖", @@ -408,14 +417,18 @@ function SolverCore.solve!( prox!(s, ψ, mν∇fk, ν) mks = mk(s) + # Estimate optimality and check stopping criteria ξ = hk - mks + max(1, abs(hk)) * 10 * eps() - sqrt_ξ_νInv = ξ ≥ 0 ? sqrt(ξ / ν) : sqrt(-ξ / ν) - atol += rtol * sqrt_ξ_νInv # make stopping test absolute and relative - - solved = (ξ < 0 && sqrt_ξ_νInv ≤ neg_tol) || (ξ ≥ 0 && sqrt_ξ_νInv ≤ atol) (ξ < 0 && sqrt_ξ_νInv > neg_tol) && error("R2: prox-gradient step should produce a decrease but ξ = $(ξ)") + atol_decr += rtol_decr * sqrt_ξ_νInv # make stopping test absolute and relative + + norm_s = norm(s) + norm_sdν = norm_s / ν + atol_step += rtol_step * norm_sdν # make stopping test absolute and relative + + solved = (ξ < 0 && sqrt_ξ_νInv ≤ neg_tol) || (ξ ≥ 0 && sqrt_ξ_νInv ≤ atol_decr) || (norm_sdν ≤ atol_step) set_status!( stats, @@ -454,10 +467,11 @@ function SolverCore.solve!( fk, hk, sqrt_ξ_νInv, + norm_sdν, ρk, σk, norm(xk), - norm(s), + norm_s, (η2 ≤ ρk < Inf) ? '↘' : (ρk < η1 ? '↗' : '='), ], colsep = 1, @@ -499,10 +513,14 @@ function SolverCore.solve!( ξ = hk - mks + max(1, abs(hk)) * 10 * eps() sqrt_ξ_νInv = ξ ≥ 0 ? sqrt(ξ / ν) : sqrt(-ξ / ν) - solved = (ξ < 0 && sqrt_ξ_νInv ≤ neg_tol) || (ξ ≥ 0 && sqrt_ξ_νInv ≤ atol) (ξ < 0 && sqrt_ξ_νInv > neg_tol) && error("R2: prox-gradient step should produce a decrease but ξ = $(ξ)") + norm_s = norm(s) + norm_sdν = norm_s / ν + + solved = (ξ < 0 && sqrt_ξ_νInv ≤ neg_tol) || (ξ ≥ 0 && sqrt_ξ_νInv ≤ atol_decr) || (norm_sdν ≤ atol_step) + set_status!( stats, get_status( @@ -523,11 +541,11 @@ function SolverCore.solve!( end if verbose > 0 && stats.status == :first_order - @info log_row(Any[stats.iter, fk, hk, sqrt_ξ_νInv, ρk, σk, norm(xk), norm(s), ""], colsep = 1) - @info "R2: terminating with √(ξ/ν) = $(sqrt_ξ_νInv)" + @info log_row(Any[stats.iter, fk, hk, sqrt_ξ_νInv, norm_sdν, ρk, σk, norm(xk), norm_s, ""], colsep = 1) + @info "R2: terminating with √(ξ/ν) = $(sqrt_ξ_νInv) and ‖s‖/ν = $(norm_sdν)" end set_solution!(stats, xk) - set_residuals!(stats, zero(eltype(xk)), sqrt_ξ_νInv) + set_residuals!(stats, zero(eltype(xk)), min(sqrt_ξ_νInv, norm_sdν)) return stats end diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl index 6a76ce0c..612462f3 100644 --- a/src/TRDH_alg.jl +++ b/src/TRDH_alg.jl @@ -131,6 +131,10 @@ For advanced usage, first define a solver "TRDHSolver" to preallocate the memory - `x::V = nlp.meta.x0`: the initial guess; - `atol::T = √eps(T)`: absolute tolerance; - `rtol::T = √eps(T)`: relative tolerance; +- `atol_decr::T = atol`: (advanced) absolute tolerance for the optimality measure `√(ξₖ/νₖ)` (see below); +- `rtol_decr::T = rtol`: (advanced) relative tolerance for the optimality measure `√(ξₖ/νₖ)` (see below); +- `atol_step::T = atol`: (advanced) absolute tolerance for the optimality measure `‖sₖ‖/ν₁` (see below); +- `rtol_step::T = rtol`: (advanced) relative tolerance for the optimality measure `‖sₖ‖/ν₁` (see below); - `neg_tol::T = eps(T)^(1 / 4)`: negative tolerance; - `max_eval::Int = -1`: maximum number of evaluation of the objective function (negative number means unlimited); - `max_time::Float64 = 30.0`: maximum time limit in seconds; @@ -146,7 +150,7 @@ For advanced usage, first define a solver "TRDHSolver" to preallocate the memory - `compute_obj::Bool = true`: (advanced) whether `f(x₀)` should be computed or not. If set to false, then the value is retrieved from `stats.solver_specific[:smooth_obj]`; - `compute_grad::Bool = true`: (advanced) whether `∇f(x₀)` should be computed or not. If set to false, then the value is retrieved from `solver.∇fk`; -The algorithm stops either when `√(ξₖ/νₖ) < atol + rtol*√(ξ₀/ν₀) ` or `ξₖ < 0` and `√(-ξₖ/νₖ) < neg_tol` where ξₖ := f(xₖ) + h(xₖ) - φ(sₖ; xₖ) - ψ(sₖ; xₖ), and √(ξₖ/νₖ) is a stationarity measure. +The algorithm stops either when `√(ξₖ/νₖ) < atol_decr + rtol_decr*√(ξ₀/ν₀) ` or `ξₖ < 0` and `√(-ξₖ/νₖ) < neg_tol` where ξₖ := f(xₖ) + h(xₖ) - φ(sₖ; xₖ) - ψ(sₖ; xₖ), and √(ξₖ/νₖ) is a stationarity measure or when `‖sₖ‖/νₖ < atol_step + rtol_step*‖s₀‖/ν₀` where `sₖ` is the current step. Alternatively, if `reduce_TR = true`, then ξₖ₁ := f(xₖ) + h(xₖ) - φ(sₖ₁; xₖ) - ψ(sₖ₁; xₖ) is used instead of ξₖ, where sₖ₁ is the Cauchy point. # Output @@ -239,6 +243,10 @@ function SolverCore.solve!( x::V = reg_nlp.model.meta.x0, atol::T = √eps(T), rtol::T = √eps(T), + atol_decr::T = atol, + rtol_decr::T = rtol, + atol_step::T = atol, + rtol_step::T = rtol, neg_tol::T = eps(T)^(1 / 4), verbose::Int = 0, max_iter::Int = 10000, @@ -304,12 +312,13 @@ function SolverCore.solve!( if verbose > 0 @info log_header( - [:iter, :fx, :hx, :xi, :ρ, :Δ, :normx, :norms, :normD, :arrow], - [Int, T, T, T, T, T, T, T, T, Char], + [:iter, :fx, :hx, :xi, :normsdnu, :ρ, :Δ, :normx, :norms, :normD, :arrow], + [Int, T, T, T, T, T, T, T, T, T, Char], hdr_override = Dict{Symbol, String}( # TODO: Add this as constant dict elsewhere :fx => "f(x)", :hx => "h(x)", :xi => "√(ξ/ν)", + :normsdnu => "‖s‖/ν", :normx => "‖x‖", :norms => "‖s‖", :normD => "‖D‖", @@ -372,10 +381,9 @@ function SolverCore.solve!( ξ1 = hk - mks + max(1, abs(hk)) * 10 * eps() sqrt_ξ_νInv = ξ1 ≥ 0 ? sqrt(ξ1 / ν) : sqrt(-ξ1 / ν) - solved = (ξ1 < 0 && sqrt_ξ_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ_νInv ≤ atol) (ξ1 < 0 && sqrt_ξ_νInv > neg_tol) && error("TR: prox-gradient step should produce a decrease but ξ = $(ξ)") - atol += rtol * sqrt_ξ_νInv # make stopping test absolute and relative + atol_decr += rtol_decr * sqrt_ξ_νInv # make stopping test absolute and relative end Δ_effective = reduce_TR ? min(β * χ(s), Δk) : Δk @@ -394,12 +402,17 @@ function SolverCore.solve!( if !reduce_TR sqrt_ξ_νInv = ξ ≥ 0 ? sqrt(ξ / ν) : sqrt(-ξ / ν) - solved = (ξ < 0 && sqrt_ξ_νInv ≤ neg_tol) || (ξ ≥ 0 && sqrt_ξ_νInv < atol) (ξ < 0 && sqrt_ξ_νInv > neg_tol) && error("TRDH: prox-gradient step should produce a decrease but ξ = $(ξ)") - atol += rtol * sqrt_ξ_νInv # make stopping test absolute and relative #TODO : this is redundant code with the other case of the test. + atol_decr += rtol_decr * sqrt_ξ_νInv # make stopping test absolute and relative #TODO : this is redundant code with the other case of the test. end + norm_s = norm(s) + norm_sdν = norm_s / ν + atol_step += rtol_step * norm_sdν # make stopping test absolute and relative + + solved = reduce_TR ? (ξ1 < 0 && sqrt_ξ_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ_νInv ≤ atol_decr) || (norm_sdν ≤ atol_step) : + (ξ < 0 && sqrt_ξ_νInv ≤ neg_tol) || (ξ ≥ 0 && sqrt_ξ_νInv ≤ atol_decr) || (norm_sdν ≤ atol_step) set_status!( stats, get_status( @@ -434,6 +447,7 @@ function SolverCore.solve!( fk, hk, sqrt_ξ_νInv, + norm_sdν, ρk, Δk, χ(xk), @@ -491,7 +505,6 @@ function SolverCore.solve!( prox!(s, ψ, mν∇fk, ν) ξ1 = hk - mk1(s) + max(1, abs(hk)) * 10 * eps() sqrt_ξ_νInv = ξ1 ≥ 0 ? sqrt(ξ1 / ν) : sqrt(-ξ1 / ν) - solved = (ξ1 < 0 && sqrt_ξ_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ_νInv < atol) (ξ1 < 0 && sqrt_ξ_νInv > neg_tol) && error("TRDH: prox-gradient step should produce a decrease but ξ = $(ξ)") end @@ -503,11 +516,15 @@ function SolverCore.solve!( if !reduce_TR ξ = hk - mk(s) + max(1, abs(hk)) * 10 * eps() sqrt_ξ_νInv = ξ ≥ 0 ? sqrt(ξ / ν) : sqrt(-ξ / ν) - solved = (ξ < 0 && sqrt_ξ_νInv ≤ neg_tol) || (ξ ≥ 0 && sqrt_ξ_νInv < atol) (ξ < 0 && sqrt_ξ_νInv > neg_tol) && error("TRDH: prox-gradient step should produce a decrease but ξ = $(ξ)") end + norm_s = norm(s) + norm_sdν = norm_s / ν + + solved = (ξ < 0 && sqrt_ξ_νInv ≤ neg_tol) || (ξ ≥ 0 && sqrt_ξ_νInv ≤ atol_decr) || (norm_sdν ≤ atol_step) + set_status!( stats, get_status( @@ -529,13 +546,13 @@ function SolverCore.solve!( if verbose > 0 && stats.status == :first_order @info log_row( - Any[stats.iter, fk, hk, sqrt_ξ_νInv, ρk, Δk, χ(xk), sNorm, norm(D.d), ""], + Any[stats.iter, fk, hk, sqrt_ξ_νInv, norm_sdν, ρk, Δk, χ(xk), sNorm, norm(D.d), ""], colsep = 1, ) - @info "TRDH: terminating with √(ξ/ν) = $(sqrt_ξ_νInv)" + @info "TRDH: terminating with √(ξ/ν) = $(sqrt_ξ_νInv) and ‖s‖/ν = $(norm_sdν)" end set_solution!(stats, xk) - set_residuals!(stats, zero(eltype(xk)), sqrt_ξ_νInv) + set_residuals!(stats, zero(eltype(xk)), min(sqrt_ξ_νInv, norm_sdν)) return stats end diff --git a/src/TR_alg.jl b/src/TR_alg.jl index 7d3d9bbe..ec771916 100644 --- a/src/TR_alg.jl +++ b/src/TR_alg.jl @@ -136,6 +136,10 @@ For advanced usage, first define a solver "TRSolver" to preallocate the memory u - `x::V = nlp.meta.x0`: the initial guess; - `atol::T = √eps(T)`: absolute tolerance; - `rtol::T = √eps(T)`: relative tolerance; +- `atol_decr::T = atol`: (advanced) absolute tolerance for the optimality measure `√(ξₖ/νₖ)` (see below); +- `rtol_decr::T = rtol`: (advanced) relative tolerance for the optimality measure `√(ξₖ/νₖ)` (see below); +- `atol_step::T = atol`: (advanced) absolute tolerance for the optimality measure `‖sₖ₁‖/ν₁` (see below); +- `rtol_step::T = rtol`: (advanced) relative tolerance for the optimality measure `‖sₖ₁‖/ν₁` (see below); - `neg_tol::T = eps(T)^(1 / 4)`: negative tolerance (see stopping conditions below); - `max_eval::Int = -1`: maximum number of evaluation of the objective function (negative number means unlimited); - `max_time::Float64 = 30.0`: maximum time limit in seconds; @@ -153,7 +157,7 @@ For advanced usage, first define a solver "TRSolver" to preallocate the memory u - `compute_grad::Bool = true`: (advanced) whether `∇f(x₀)` should be computed or not. If set to false, then the value is retrieved from `solver.∇fk`; - `sub_kwargs::NamedTuple = NamedTuple()`: a named tuple containing the keyword arguments to be sent to the subsolver. The solver will fail if invalid keyword arguments are provided to the subsolver. For example, if the subsolver is `R2Solver`, you can pass `sub_kwargs = (max_iter = 100, σmin = 1e-6,)`. -The algorithm stops either when `√(ξₖ/νₖ) < atol + rtol*√(ξ₀/ν₀) ` or `ξₖ < 0` and `√(-ξₖ/νₖ) < neg_tol` where ξₖ := f(xₖ) + h(xₖ) - φ(sₖ; xₖ) - ψ(sₖ; xₖ), and √(ξₖ/νₖ) is a stationarity measure. +The algorithm stops either when `√(ξₖ/νₖ) < atol_decr + rtol_decr*√(ξ₀/ν₀) ` or `ξₖ < 0` and `√(-ξₖ/νₖ) < neg_tol` where ξₖ := f(xₖ) + h(xₖ) - φ(sₖ; xₖ) - ψ(sₖ; xₖ), and √(ξₖ/νₖ) is a stationarity measure or when `‖sₖ₁‖/νₖ < atol_step + rtol_step*‖s₀‖/ν₀` where `sₖ₁` is the Cauchy step. # Output The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. @@ -211,6 +215,10 @@ function SolverCore.solve!( atol::T = √eps(T), sub_atol::T = √eps(T), rtol::T = √eps(T), + atol_decr::T = atol, + rtol_decr::T = rtol, + atol_step::T = atol, + rtol_step::T = rtol, neg_tol::T = eps(T)^(1 / 4), verbose::Int = 0, subsolver_logger::Logging.AbstractLogger = Logging.SimpleLogger(), @@ -276,12 +284,13 @@ function SolverCore.solve!( if verbose > 0 @info log_header( - [:outer, :inner, :fx, :hx, :xi, :ρ, :Δ, :normx, :norms, :normB, :arrow], - [Int, Int, T, T, T, T, T, T, T, T, Char], + [:outer, :inner, :fx, :hx, :xi, :norms1dnu, :ρ, :Δ, :normx, :norms, :normB, :arrow], + [Int, Int, T, T, T, T, T, T, T, T, T, Char], hdr_override = Dict{Symbol, String}( # TODO: Add this as constant dict elsewhere :fx => "f(x)", :hx => "h(x)", :xi => "√(ξ1/ν)", + :norms1dnu => "‖sₖ₁‖/ν", :normx => "‖x‖", :norms => "‖s‖", :normB => "‖B‖", @@ -340,16 +349,21 @@ function SolverCore.solve!( end prox!(s, ψ, mν∇fk, ν₁) - ξ1 = hk - mk1(s) + max(1, abs(hk)) * 10 * eps() - ξ1 > 0 || error("TR: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") - sqrt_ξ1_νInv = sqrt(ξ1 / ν₁) - solved = (ξ1 < 0 && sqrt_ξ1_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ1_νInv ≤ atol) + # Estimate optimality and check stopping criteria + ξ1 = hk - mk1(s) + max(1, abs(hk)) * 10 * eps() + sqrt_ξ1_νInv = ξ1 ≥ 0 ? sqrt(ξ1 / ν₁) : sqrt(-ξ1 / ν₁) (ξ1 < 0 && sqrt_ξ1_νInv > neg_tol) && error("TR: prox-gradient step should produce a decrease but ξ1 = $(ξ1)") - atol += rtol * sqrt_ξ1_νInv # make stopping test absolute and relative + atol_decr += rtol_decr * sqrt_ξ1_νInv # make stopping test absolute and relative sub_atol += rtol * sqrt_ξ1_νInv + norm_s_cauchy = norm(s) + norm_s_cauchydν = norm_s_cauchy / ν₁ + atol_step += rtol_step * norm_s_cauchydν # make stopping test absolute and relative + + solved = (ξ1 < 0 && sqrt_ξ1_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ1_νInv ≤ atol_decr) || (norm_s_cauchydν ≤ atol_step) + set_status!( stats, get_status( @@ -431,6 +445,7 @@ function SolverCore.solve!( fk, hk, sqrt_ξ1_νInv, + norm_s_cauchydν, ρk, Δk, χ(xk), @@ -504,12 +519,17 @@ function SolverCore.solve!( @. mν∇fk = -ν₁ * ∇fk prox!(s, ψ, mν∇fk, ν₁) - ξ1 = hk - mk1(s) + max(1, abs(hk)) * 10 * eps() - sqrt_ξ1_νInv = sqrt(ξ1 / ν₁) - solved = (ξ1 < 0 && sqrt_ξ1_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ1_νInv ≤ atol) + # Estimate optimality and check stopping criteria + ξ1 = hk - mk1(s) + max(1, abs(hk)) * 10 * eps() + sqrt_ξ1_νInv = ξ1 ≥ 0 ? sqrt(ξ1 / ν₁) : sqrt(-ξ1 / ν₁) (ξ1 < 0 && sqrt_ξ1_νInv > neg_tol) && error("TR: prox-gradient step should produce a decrease but ξ1 = $(ξ1)") + + norm_s_cauchy = norm(s) + norm_s_cauchydν = norm_s_cauchy / ν₁ + + solved = (ξ1 < 0 && sqrt_ξ1_νInv ≤ neg_tol) || (ξ1 ≥ 0 && sqrt_ξ1_νInv ≤ atol_decr) || (norm_s_cauchydν ≤ atol_step) set_status!( stats, @@ -531,12 +551,12 @@ function SolverCore.solve!( end if verbose > 0 && stats.status == :first_order @info log_row( - Any[stats.iter, solver.substats.iter, fk, hk, sqrt_ξ1_νInv, ρk, Δk, χ(xk), χ(s), λmax, ""], + Any[stats.iter, solver.substats.iter, fk, hk, sqrt_ξ1_νInv, norm_s_cauchydν, ρk, Δk, χ(xk), χ(s), λmax, ""], colsep = 1, ) - @info "TR: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv)" + @info "TR: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv) and ‖s₁‖/ν = $(norm_s_cauchydν)" end set_solution!(stats, xk) - set_residuals!(stats, zero(eltype(xk)), sqrt_ξ1_νInv) + set_residuals!(stats, zero(eltype(xk)), min(sqrt_ξ1_νInv, norm_s_cauchydν)) end diff --git a/test/runtests.jl b/test/runtests.jl index caaf8e12..3d37177c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -38,6 +38,21 @@ for (mod, mod_name) ∈ ((x -> x, "exact"), (LSR1Model, "lsr1"), (LBFGSModel, "l @test typeof(out.dual_feas) == eltype(out.solution) @test out.status == :first_order @test out.step_status == (out.iter > 0 ? :accepted : :unknown) + + # Test with the different stopping criteria + out = solver(mod(bpdn), h, args..., options, x0 = x0, atol_decr = 1e-6, rtol_decr = 1e-6, atol_step = 0.0, rtol_step = 0.0) + @test typeof(out.solution) == typeof(bpdn.meta.x0) + @test length(out.solution) == bpdn.meta.nvar + @test typeof(out.dual_feas) == eltype(out.solution) + @test out.status == :first_order + @test out.step_status == (out.iter > 0 ? :accepted : :unknown) + + out = solver(mod(bpdn), h, args..., options, x0 = x0, atol_decr = 0.0, rtol_decr = 0.0, atol_step = 1e-6, rtol_step = 1e-6) + @test typeof(out.solution) == typeof(bpdn.meta.x0) + @test length(out.solution) == bpdn.meta.nvar + @test typeof(out.dual_feas) == eltype(out.solution) + @test out.status == :first_order + @test out.step_status == (out.iter > 0 ? :accepted : :unknown) end end end @@ -57,6 +72,21 @@ for (mod, mod_name) ∈ ((SpectralGradientModel, "spg"),) @test typeof(out.dual_feas) == eltype(out.solution) @test out.status == :first_order @test out.step_status == (out.iter > 0 ? :accepted : :unknown) + + # Test with the different stopping criteria + out = TRDH(mod(bpdn), h, χ, options, x0 = x0, atol_decr = 1e-6, rtol_decr = 1e-6, atol_step = 0.0, rtol_step = 0.0) + @test typeof(out.solution) == typeof(bpdn.meta.x0) + @test length(out.solution) == bpdn.meta.nvar + @test typeof(out.dual_feas) == eltype(out.solution) + @test out.status == :first_order + @test out.step_status == (out.iter > 0 ? :accepted : :unknown) + + out = TRDH(mod(bpdn), h, χ, options, x0 = x0, atol_decr = 0.0, rtol_decr = 0.0, atol_step = 1e-6, rtol_step = 1e-6) + @test typeof(out.solution) == typeof(bpdn.meta.x0) + @test length(out.solution) == bpdn.meta.nvar + @test typeof(out.dual_feas) == eltype(out.solution) + @test out.status == :first_order + @test out.step_status == (out.iter > 0 ? :accepted : :unknown) end end end @@ -74,6 +104,21 @@ for (mod, mod_name) ∈ ((LSR1Model, "lsr1"), (LBFGSModel, "lbfgs")) @test typeof(TR_out.dual_feas) == eltype(TR_out.solution) @test TR_out.status == :first_order @test TR_out.step_status == (TR_out.iter > 0 ? :accepted : :unknown) + + # Test with the different stopping criteria + TR_out = TR(mod(bpdn), h, NormL2(1.0), options, x0 = x0, atol_decr = 1e-6, rtol_decr = 1e-6, atol_step = 0.0, rtol_step = 0.0) + @test typeof(TR_out.solution) == typeof(bpdn.meta.x0) + @test length(TR_out.solution) == bpdn.meta.nvar + @test typeof(TR_out.dual_feas) == eltype(TR_out.solution) + @test TR_out.status == :first_order + @test TR_out.step_status == (TR_out.iter > 0 ? :accepted : :unknown) + + TR_out = TR(mod(bpdn), h, NormL2(1.0), options, x0 = x0, atol_decr = 0.0, rtol_decr = 0.0, atol_step = 1e-6, rtol_step = 1e-6) + @test typeof(TR_out.solution) == typeof(bpdn.meta.x0) + @test length(TR_out.solution) == bpdn.meta.nvar + @test typeof(TR_out.dual_feas) == eltype(TR_out.solution) + @test TR_out.status == :first_order + @test TR_out.step_status == (TR_out.iter > 0 ? :accepted : :unknown) end end end @@ -94,6 +139,21 @@ for (h, h_name) ∈ ((NormL0(λ), "l0"), (NormL1(λ), "l1"), (IndBallL0(10 * com @test typeof(out.dual_feas) == eltype(out.solution) @test out.status == :first_order @test out.step_status == (out.iter > 0 ? :accepted : :unknown) + + # Test with the different stopping criteria + out = solver(bpdn_nls, h, args..., options, x0 = x0, atol_decr = 1e-6, rtol_decr = 1e-6, atol_step = 0.0, rtol_step = 0.0) + @test typeof(out.solution) == typeof(bpdn.meta.x0) + @test length(out.solution) == bpdn.meta.nvar + @test typeof(out.dual_feas) == eltype(out.solution) + @test out.status == :first_order + @test out.step_status == (out.iter > 0 ? :accepted : :unknown) + + out = solver(bpdn_nls, h, args..., options, x0 = x0, atol_decr = 0.0, rtol_decr = 0.0, atol_step = 1e-6, rtol_step = 1e-6) + @test typeof(out.solution) == typeof(bpdn.meta.x0) + @test length(out.solution) == bpdn.meta.nvar + @test typeof(out.dual_feas) == eltype(out.solution) + @test out.status == :first_order + @test out.step_status == (out.iter > 0 ? :accepted : :unknown) end end end @@ -135,6 +195,21 @@ for (mod, mod_name) ∈ ( @test typeof(out.dual_feas) == eltype(out.solution) @test out.status == :first_order @test out.step_status == (out.iter > 0 ? :accepted : :unknown) + + # Test with the different stopping criteria + out = solver(mod(bpdn), h, options, x0 = x0, atol_decr = 1e-6, rtol_decr = 1e-6, atol_step = 0.0, rtol_step = 0.0) + @test typeof(out.solution) == typeof(bpdn.meta.x0) + @test length(out.solution) == bpdn.meta.nvar + @test typeof(out.dual_feas) == eltype(out.solution) + @test out.status == :first_order + @test out.step_status == (out.iter > 0 ? :accepted : :unknown) + + out = solver(mod(bpdn), h, options, x0 = x0, atol_decr = 0.0, rtol_decr = 0.0, atol_step = 1e-6, rtol_step = 1e-6) + @test typeof(out.solution) == typeof(bpdn.meta.x0) + @test length(out.solution) == bpdn.meta.nvar + @test typeof(out.dual_feas) == eltype(out.solution) + @test out.status == :first_order + @test out.step_status == (out.iter > 0 ? :accepted : :unknown) end end end