From 1156abf07e1f6a5369821f014830f5b5c86bd70f Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Thu, 4 Jun 2026 16:31:58 +0100 Subject: [PATCH 1/5] initial commit --- Project.toml | 9 ++- examples/catenary.jl | 111 ++++++++++++++++++++++++++++++++---- examples/controlproblem.jl | 22 ++++--- examples/jacobians.jl | 4 +- examples/nestedduals.jl | 4 +- examples/newtonpendulum.jl | 98 +++++++++++++++++++++++++++---- examples/plaplacian.jl | 65 +++++++++++++++++++++ src/DualArrays.jl | 4 +- src/arithmetic.jl | 42 ++++++++++---- src/types.jl | 65 +++++++++++---------- src/utilities.jl | 15 ++++- test/array_operator_test.jl | 14 +++++ test/runtests.jl | 11 ++++ 13 files changed, 382 insertions(+), 82 deletions(-) create mode 100644 examples/plaplacian.jl diff --git a/Project.toml b/Project.toml index 94ea596..43ee63b 100644 --- a/Project.toml +++ b/Project.toml @@ -9,16 +9,20 @@ BandedMatrices = "aae01518-5342-5314-be14-df237901396f" DiffRules = "b552c78f-8df3-52c6-915a-8e097449b14b" FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +SparseArrayKit = "a9a3c162-d163-4c15-8926-b8794fbefed2" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" TensorOperations = "6aa20fa7-93e2-5fca-9bc0-fbd0db3c71a2" + [compat] DiffRules = "1.15.1" ForwardDiff = "1.2.2" Lux = "1.29.1" Plots = "1.41.1" +SparseArrayKit = "0.4.3" SparseArrays = "1.10" -TensorOperations = "5.5.2" +TensorOperations = "5.6.1" +Zygote = "0.7.10" [extras] BandedMatrices = "aae01518-5342-5314-be14-df237901396f" @@ -27,7 +31,8 @@ Lux = "b2108857-7c20-44ae-9111-449ecde12c47" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [targets] -examples = ["Plots", "ForwardDiff", "BandedMatrices", "Lux"] +examples = ["Plots", "ForwardDiff", "BandedMatrices", "Lux", "Zygote"] test = ["Test", "ForwardDiff", "BandedMatrices", "SparseArrays"] diff --git a/examples/catenary.jl b/examples/catenary.jl index 3ef7be6..ea2f3b7 100644 --- a/examples/catenary.jl +++ b/examples/catenary.jl @@ -1,4 +1,4 @@ -using DualArrays, LinearAlgebra, Plots, BenchmarkTools +using DualArrays, LinearAlgebra, Plots, BenchmarkTools, BandedMatrices, ForwardDiff, Zygote """ Solving the Catenary Problem using DualArrays.jl @@ -11,6 +11,19 @@ The computation of finite differences keeps the Jacobian sparse, so we can use DualArrays.jl """ +const plot_args = ( + xscale = :log10, + yscale = :log10, + lw = 2.5, + marker = :circle, + color = :steelblue4, + legend = :topleft, + framestyle = :box, + size = (960, 600), + left_margin = 10Plots.mm, + bottom_margin = 10Plots.mm, +) + function L(y, h, alpha, beta) """ Evaluate functional with boundary conditions (alpha, beta) @@ -24,33 +37,109 @@ function L(y, h, alpha, beta) return y_mid .* sqrt.(1 .+ dy.^2) end -function learn_catenary(h = 0.1, alpha = cosh(-1), beta = cosh(1), epochs = 2000, lr = 0.01) +function learn_catenary_dualarrays(h = 0.1, alpha = cosh(-1), beta = cosh(1), epochs = 2000, lr = 0.01) + n = Int(2 / h) - 1 + y = ones(n) * (alpha + beta) / 2 + for _ = 1:epochs + jac = DualArrays.jacobian(y -> L(y, h, alpha, beta), y, BandedMatrix) + grads = h * sum(jac, dims=1) + y -= lr * vec(grads) + end + return y +end + +function learn_catenary_forwarddiff(h = 0.1, alpha = cosh(-1), beta = cosh(1), epochs = 2000, lr = 0.01) + n = Int(2 / h) - 1 + y = ones(n) * (alpha + beta) / 2 + for _ = 1:epochs + jac = ForwardDiff.jacobian(y -> L(y, h, alpha, beta), y) + grads = h * sum(jac, dims=1) + y -= lr * vec(grads) + end + return y +end + +function learn_catenary_zygote(h = 0.1, alpha = cosh(-1), beta = cosh(1), epochs = 2000, lr = 0.01) n = Int(2 / h) - 1 y = ones(n) * (alpha + beta) / 2 for _ = 1:epochs - jac = jacobian(y -> L(y, h, alpha, beta), y, id="banded") + jac = only(Zygote.jacobian(y -> L(y, h, alpha, beta), y)) grads = h * sum(jac, dims=1) y -= lr * vec(grads) end return y end -function plot_solution(h) +function plot_solution(save=undef;h = 0.1, alpha = cosh(-1), beta = cosh(1), epochs = 5000, lr = 0.02) x = collect(-1+h:h:1-h) - y = learn_catenary() - plot(x, y, label = "Approximation") - plot!(x, cosh.(x), label = "Exact Solution") + y = learn_catenary_dualarrays(h, alpha, beta, epochs, lr) + plot(x, y, label = "Approximate solution (DualArrays)", title="Catenary Solution", legend=:topleft) + plot!(x, cosh.(x), label = "Exact Solution (cosh(x))", ls = :dash) + if save !== undef + savefig(save) + end end -function plot_times() - hs = [0.1, 0.05, 0.02, 0.01, 0.005, 0.002] +function plot_times(save=undef; hs = [0.04, 0.02, 0.01, 0.005, 0.0025, 0.00125], epochs = 200, lr = 0.01) ns = Int.(2 ./ hs) .- 1 dualvector_times = Float64[] + forwarddiff_times = Float64[] + zygote_times = Float64[] for (h, n) in zip(hs, ns) println("Computing solution with h = $h, n = $n") - push!(dualvector_times, @belapsed learn_catenary($h, cosh(-1), cosh(1), 2000, 0.01)) + push!(dualvector_times, @belapsed learn_catenary_dualarrays($h, cosh(-1), cosh(1), $epochs, $lr)) + push!(forwarddiff_times, @belapsed learn_catenary_forwarddiff($h, cosh(-1), cosh(1), $epochs, $lr)) + push!(zygote_times, @belapsed learn_catenary_zygote($h, cosh(-1), cosh(1), $epochs, $lr)) + end + + plot( + ns, + dualvector_times, + ; + label = "DualArrays", + title = "Catenary Gradient Descent Runtime", + xlabel = "Number of points (n)", + ylabel = "Runtime (seconds)", + yticks = 10 .^ collect(-3:0.25:2), + xticks = 10 .^ collect(1.5:0.25:3.5), + plot_args..., + ) + plot!(ns, forwarddiff_times, label = "ForwardDiff", lw = 2.5, marker = :square, color = :darkorange2) + plot!(ns, zygote_times, label = "Zygote", lw = 2.5, marker = :diamond, color = :forestgreen) + if save !== undef + savefig(save) end +end - plot(ns, dualvector_times, label="DualArrays") +function plot_memory(save=undef; hs = [0.04, 0.02, 0.01, 0.005, 0.0025, 0.00125], epochs = 200, lr = 0.01) + ns = Int.(2 ./ hs) .- 1 + dualvector_memory = Float64[] + forwarddiff_memory = Float64[] + zygote_memory = Float64[] + + for (h, n) in zip(hs, ns) + println("Computing solution with h = $h, n = $n") + push!(dualvector_memory, @ballocated learn_catenary_dualarrays($h, cosh(-1), cosh(1), $epochs, $lr)) + push!(forwarddiff_memory, @ballocated learn_catenary_forwarddiff($h, cosh(-1), cosh(1), $epochs, $lr)) + push!(zygote_memory, @ballocated learn_catenary_zygote($h, cosh(-1), cosh(1), $epochs, $lr)) + end + + plot( + ns, + dualvector_memory, + ; + label = "DualArrays", + title = "Catenary Gradient Descent Memory", + xlabel = "Number of points (n)", + ylabel = "Allocated bytes", + yticks = 10 .^ collect(6:0.5:11), + xticks = 10 .^ collect(1.5:0.25:3.5), + plot_args..., + ) + plot!(ns, forwarddiff_memory, label = "ForwardDiff", lw = 2.5, marker = :square, color = :darkorange2) + plot!(ns, zygote_memory, label = "Zygote", lw = 2.5, marker = :diamond, color = :forestgreen) + if save !== undef + savefig(save) + end end \ No newline at end of file diff --git a/examples/controlproblem.jl b/examples/controlproblem.jl index 07d5a36..861703f 100644 --- a/examples/controlproblem.jl +++ b/examples/controlproblem.jl @@ -12,7 +12,7 @@ using LinearAlgebra, DualArrays, Plots # Provided f ∈ L²(Ω), there exists a unique solution u ∈ H₀¹(Ω) to the boundary value problem. # A problem arises that given a desired solution d, we want to find f such that the solution u # is approximately d. An intuitive way of asking this is: "given that we want a certain temperature -# field in a room, how do we heat or cool the walls?" +# field in a room, how do we position heat sources/sinks?" # # A way of doing this is to define an objective to be minimised given by: # @@ -79,7 +79,7 @@ function solve_1D(f0, d, h, kappa, alpha, iters = 30) K = (kappa / h ^ 2) * Tridiagonal(-ones(n - 1), 2 * ones(n), -ones(n - 1)) for _ = 1:iters grads = grad_objective(K, DualVector(fvector, I(length(fvector))), d, alpha) - step = grads.jacobian \ grads.value + step = grads.jacobian.data \ grads.value fvector -= step end @@ -106,7 +106,7 @@ function solve_2D(x0, d, h, kappa, alpha, iters = 30) K = (kappa / h ^ 2) * (kron(I(n), T) + kron(T, I(n))) for _ = 1:iters grads = grad_objective(K, DualVector(fvector, I(length(fvector))), d, alpha) - step = grads.jacobian \ grads.value + step = grads.jacobian.data \ grads.value fvector -= step end @@ -115,7 +115,7 @@ end # Solve the 1D Poisson control on the unit interval and plot the solution. # We compare it to a known analytical solution. -function plot_solution_1D() +function plot_solution_1D(save=undef) # setup problem kappa = 1 @@ -132,13 +132,16 @@ function plot_solution_1D() fexact = coeff .* sin.(pi .* x[2:end-1]) f = solve_1D(zeros(length(d)), d, h, kappa, alpha) - plot(x[2:end-1], f, label="Computed Solution") - plot!(x[2:end-1], fexact, label="Exact Solution") + plot(x[2:end-1], f, label="Computed Control", title="1D Poisson Control Solution") + plot!(x[2:end-1], fexact, label="Exact Control") + if save !== undef + savefig(save) + end end # Solve the 2D Poisson control on the unit square and plot the solution. # We compare it to a known analytical solution given in the dolfin-adjoint example. -function plot_solution_2D() +function plot_solution_2D(save=undef) # setup problem kappa = 1 @@ -164,5 +167,8 @@ function plot_solution_2D() p1 = heatmap(x, x, F, title="Computed Control", aspect_ratio=1) p2 = heatmap(x, x, fexact, title="Exact Control", aspect_ratio=1) - plot(p1, p2, layout=(1, 2), size=(900, 400)) + plot(p1, p2, layout=(1, 2), size=(900, 400), plot_title="2D Poisson Control Solution", plot_titlevspan=0.12) + if save !== undef + savefig(save) + end end \ No newline at end of file diff --git a/examples/jacobians.jl b/examples/jacobians.jl index 1a94212..6538b41 100644 --- a/examples/jacobians.jl +++ b/examples/jacobians.jl @@ -9,7 +9,7 @@ # ### -using DualArrays, ForwardDiff, Plots, BenchmarkTools +using DualArrays, ForwardDiff, Plots, BenchmarkTools, BandedMatrices f(x) = sin.(x) @@ -20,7 +20,7 @@ function plot_times() for n in ns println("Computing jacobian for n = $n") - push!(dualarray_times, @belapsed DualArrays.jacobian(f, rand($n))) + push!(dualarray_times, @belapsed DualArrays.jacobian(f, rand($n), BandedMatrix)) push!(forwarddiff_times, @belapsed ForwardDiff.jacobian(f, rand($n))) end diff --git a/examples/nestedduals.jl b/examples/nestedduals.jl index 51af36d..4dd90c5 100644 --- a/examples/nestedduals.jl +++ b/examples/nestedduals.jl @@ -42,14 +42,14 @@ # # We now implement an example using f(x) = x[1] * x[2] below. -using DualArrays +using DualArrays, SparseArrayKit f(x) = x[1] * x[2] # a + Bϵ x = DualVector([2, 3], [1 0; 0 1]) # C + Dϵ -y = DualMatrix([1 0;0 1], zeros(2,2,2)) +y = DualMatrix([1 0;0 1], SparseArray{Float64, 3}(undef, (2, 2, 2))) # Setup the nested dual vector (a + Bϵ) + (C + Dϵ)ϵ' z = DualVector(x, y) diff --git a/examples/newtonpendulum.jl b/examples/newtonpendulum.jl index a24e329..41a550f 100644 --- a/examples/newtonpendulum.jl +++ b/examples/newtonpendulum.jl @@ -9,15 +9,28 @@ # Accurately and in O(n) time. ## -using LinearAlgebra, ForwardDiff, Plots, DualArrays, FillArrays, BenchmarkTools, BandedMatrices +using LinearAlgebra, ForwardDiff, Plots, DualArrays, FillArrays, BenchmarkTools, BandedMatrices, Zygote #Boundary Conditions -a = 0.4 +a = 0.1 b = 0.0 Tmax = 5.0 ts = 0.01 +const plot_args = ( + xscale = :log10, + yscale = :log10, + lw = 2.5, + marker = :circle, + color = :steelblue4, + legend = :topleft, + framestyle = :box, + size = (960, 600), + left_margin = 10Plots.mm, + bottom_margin = 10Plots.mm, +) + #LHS of ode function f(x) n = length(x) @@ -38,37 +51,98 @@ end #Newton's method using DualArrays.jl function newton_method_dualvector(f, x0, n) x = x0 - l = length(x0) for i = 1:n - ∇f = jacobian(f, x, BandedMatrix) + ∇f = DualArrays.jacobian(f, x, BandedMatrix) + x = x - ∇f \ f(x) + end + x +end + +#Newton's method using Zygote.jl +function newton_method_zygote(f, x0, n) + x = x0 + for i = 1:n + ∇f = only(Zygote.jacobian(f, x)) x = x - ∇f \ f(x) end x end -# Plot times for Newton's method using DualArrays and ForwardDiff -function plot_times() - ns = [10, 50, 100, 200, 500] +# Plot times for Newton's method using DualArrays, ForwardDiff, and Zygote +function plot_times(save=undef; ns = [50, 100, 200, 400, 800], iterations = 10) dualvector_times = Float64[] forwarddiff_times = Float64[] + zygote_times = Float64[] for n in ns println("Computing solution with n = $n") x0 = zeros(Float64, n) - push!(dualvector_times, @belapsed newton_method_dualvector(f, $x0, 10)) - push!(forwarddiff_times, @belapsed newton_method_forwarddiff(f, $x0, 10)) + push!(dualvector_times, @belapsed newton_method_dualvector(f, $x0, $iterations)) + push!(forwarddiff_times, @belapsed newton_method_forwarddiff(f, $x0, $iterations)) + push!(zygote_times, @belapsed newton_method_zygote(f, $x0, $iterations)) end - plot(ns, dualvector_times, label="DualArrays") - plot!(ns, forwarddiff_times, label="ForwardDiff") + plot( + ns, + dualvector_times, + ; + label = "DualArrays", + title = "Pendulum Newton Solve Runtime", + xlabel = "Number of points (n)", + ylabel = "Runtime (seconds)", + yticks = 10 .^ collect(-4:0.25:1), + xticks = 10 .^ collect(1.5:0.25:3), + plot_args..., + ) + plot!(ns, forwarddiff_times, label = "ForwardDiff", lw = 2.5, marker = :square, color = :darkorange2) + plot!(ns, zygote_times, label = "Zygote", lw = 2.5, marker = :diamond, color = :forestgreen) + if save !== undef + savefig(save) + end +end + +# Plot memory allocations for Newton's method using DualArrays, ForwardDiff, and Zygote +function plot_memory(save=undef; ns = [50, 100, 200, 400, 800], iterations = 10) + dualvector_memory = Float64[] + forwarddiff_memory = Float64[] + zygote_memory = Float64[] + + for n in ns + println("Computing solution with n = $n") + x0 = zeros(Float64, n) + push!(dualvector_memory, @ballocated newton_method_dualvector(f, $x0, $iterations)) + push!(forwarddiff_memory, @ballocated newton_method_forwarddiff(f, $x0, $iterations)) + push!(zygote_memory, @ballocated newton_method_zygote(f, $x0, $iterations)) + end + + plot( + ns, + dualvector_memory, + ; + label = "DualArrays", + title = "Pendulum Newton Solve Memory", + xlabel = "Number of points (n)", + ylabel = "Allocated bytes", + yticks = 10 .^ collect(5:0.5:12), + xticks = 10 .^ collect(1.5:0.25:3), + plot_args..., + ) + plot!(ns, forwarddiff_memory, label = "ForwardDiff", lw = 2.5, marker = :square, color = :darkorange2) + plot!(ns, zygote_memory, label = "Zygote", lw = 2.5, marker = :diamond, color = :forestgreen) + if save !== undef + savefig(save) + end end # Plot solution with obtainded through newtons method with DualArrays. # Used to verify correctness. -function plot_solution() +function plot_solution(save=undef) n = Int(Tmax/ts) - 1 x0 = zeros(Float64, n) sol = newton_method_dualvector(f, x0, 10) t = ts:ts:(n * ts) plot(t, sol, label="Pendulum Solution", xlabel="Time", ylabel="Angle") + if save !== undef + savefig(save) + end end \ No newline at end of file diff --git a/examples/plaplacian.jl b/examples/plaplacian.jl new file mode 100644 index 0000000..97396f4 --- /dev/null +++ b/examples/plaplacian.jl @@ -0,0 +1,65 @@ +## +# Solve the p-Laplacian by minimising energy function using +# 2nd-order Newton's method +# +# The p-Laplacian PDE (https://en.wikipedia.org/wiki/P-Laplacian): +# +# -Δ_p u = f in Ω = (0, 1) +# u = 0 on ∂Ω +# +# Its weak solution is the minimiser of the energy functional +# +# J(u) = (1/p) ∫₀¹ |u'(x)|ᵖ dx - ∫₀¹ f(x) u(x) dx +# +# We can discretise this and solve for 0 using 2nd order Newton's method. +## + +using LinearAlgebra, DualArrays, Plots, BandedMatrices + +function energy_function(p, f, h) + n = length(f) + D = BandedMatrix(0 => ones(n), -1 => -ones(n))[:, 1:end-1] / h + return u -> (h / p) * sum(abs.(D * u) .^ p) - h * dot(f, u) +end + + +function newton_solve(J, u, n; n_iter = 20) + for _ in 1:n_iter + ∇J = J(DualVector(u, I(n))).partials + H = hessian(J, u) + u = u - H \ ∇J + end + return u +end + +# Exact solution for f = 1 on [0,1] +exact(x, p) = begin + q = p / (p - 1) + ((1 / 2)^q .- abs.(x .- 1 / 2) .^ q) ./ q +end + +function solve(n = 40, p = 2.0) + h = 1.0 / (n + 1) + xs = range(h, step = h, length = n) + f = ones(n) + J = energy_function(p, f, h) + u0 = 0.1 .* sin.(pi .* xs) + u = newton_solve(J, u0, n; n_iter = 20) + return xs, u +end + +function plot_solution(save=undef, n = 40, ps = [2.0, 2.25, 2.5, 2.75, 3.0]) + plt = plot() + for p in ps + xs, u = solve(; n, p) + plot!(plt, xs, u, label = "Newton (DualArrays), p = $p", lw = 2) + plot!(plt, xs, exact(xs, p), label = "Exact, p = $p", lw = 2, ls = :dash) + end + xlabel!(plt, "x") + ylabel!(plt, "u(x)") + title!(plt, "p-Laplacian computed vs analytical solutions") + display(plt) + if save !== undef + savefig(plt, save) + end +end diff --git a/src/DualArrays.jl b/src/DualArrays.jl index 000fd9f..360c597 100644 --- a/src/DualArrays.jl +++ b/src/DualArrays.jl @@ -13,11 +13,11 @@ Differentiation rules are mostly provided by ChainRules.jl. """ module DualArrays -export DualVector, DualMatrix, Dual, jacobian, ArrayOperator +export DualVector, DualMatrix, Dual, jacobian, ArrayOperator, hessian import Base: +, -, ==, getindex, size, axes, broadcasted, show, sum, vcat, convert, *, isapprox, \, eltype, transpose, permutedims -using LinearAlgebra, ArrayLayouts, FillArrays, DiffRules, TensorOperations +using LinearAlgebra, ArrayLayouts, FillArrays, DiffRules, TensorOperations, SparseArrayKit import FillArrays: elconvert diff --git a/src/arithmetic.jl b/src/arithmetic.jl index 4217d80..3798e2c 100644 --- a/src/arithmetic.jl +++ b/src/arithmetic.jl @@ -10,9 +10,9 @@ for op in (:+, :-) @eval $op(x::AbstractVector, y::DualVector) = DualVector($op(x, y.value), y.jacobian) end - # Matrix multiplication with a DualVector. *(x::AbstractMatrix, y::DualVector) = DualVector(x * y.value, x * y.jacobian) +*(x::LayoutMatrix, y::DualVector) = DualVector(x * y.value, x * y.jacobian) ### # this section attempts to define broadcasting rules on DualVectors for functions @@ -101,7 +101,11 @@ for (_, f, n) in DiffRules.diffrules(filter_modules=(:Base,)) end end -# Special cases: integer powers +# Necessary for promotion between Dual and Real numbers. +Base.signbit(x::Dual) = signbit(x.value) +Base.zero(x::Dual) = Dual(zero(x.value), zero(x.partials)) +Base.one(x::Dual) = Dual(one(x.value), zero(x.partials)) + Base.:^(x::Dual, y::Integer) = Dual(x.value ^ y, y * x.value^(y - 1) * x.partials) Base.broadcasted(::typeof(^), x::DualVector, y::Integer) = DualVector(x.value .^ y, y * x.value .^ (y - 1) .* x.jacobian) Base.broadcasted(::typeof(Base.literal_pow), ::typeof(^), x::DualVector, ::Val{y}) where y = DualVector(x.value .^ y, y * x.value .^ (y - 1) .* x.jacobian) @@ -122,9 +126,10 @@ Multiplication (*) for ArrayOperators generalises from matrix/vector multiplication and uses tensor contraction provided by TensorOperations.jl. Let an (A, B) ArrayOperator denote an ArrayOperator with output dimension A and input dimension B -(i.e an ArrayOperator{A+B, T, A, B} for some type T). We only -allow multiplication between an (A, B) ArrayOperator and a (B, C) -ArrayOperator, with the result being an (A, C) ArrayOperator. +(i.e an ArrayOperator{A+B, T, A, B} for some type T). For multiplication +between an (A, B) ArrayOperator and a (C, D) ArrayOperator, we sum over the +leading B dimensions of the left ArrayOperator with the leading B dimensions +of the right ArrayOperator. This returns an (A + C - B, D) ArrayOperator. This generalises from: - An inner product: a row vector * a column vector -> a scalar: @@ -152,8 +157,8 @@ function _contract(x, y, A, B, C) end -function *(x::ArrayOperator{A, B}, y::ArrayOperator{B, C}) where {A, B, C} - return ArrayOperator{A}(_contract(x.data, y.data, A, B, C)) +function *(x::ArrayOperator{A, B}, y::ArrayOperator{C, D}) where {A, B, C, D} + return ArrayOperator{A + C - B}(_contract(x.data, y.data, A, B, C + D - B)) end function *(x::ArrayOperator{A, B}, y::AbstractArray{<:Any, L}) where {A, B, L} @@ -166,13 +171,28 @@ function *(x::AbstractArray{<:Any, L}, y::ArrayOperator{B, C}) where {L, B, C} return ArrayOperator{A}(_contract(x, y.data, A, B, C)) end +# Matrix overrides to avoid contract and preserve sparsity +*(x::ArrayOperator{1, 1}, y::ArrayOperator{1, C}) where {C} = ArrayOperator{1}(x.data * y.data) +*(x::ArrayOperator{1, 1}, y::AbstractVecOrMat) = ArrayOperator{1}(x.data * y) +*(x::AbstractMatrix, y::ArrayOperator{1, C}) where {C} = ArrayOperator{1}(x * y.data) + +transpose(x::DualMatrix) = DualMatrix( + transpose(x.value), + ArrayOperator{2}(permutedims(x.jacobian.data, (2, 1, ntuple(i -> i + 2, ndims(x.jacobian.data) - 2)...))), +) + +function *(x::DualMatrix, y::AbstractVector) + jac = _contract(permutedims(x.jacobian.data, (1, 3, 2)), y, 2, 1, 0) + DualVector(x.value * y, jac) +end + +*(x::AbstractMatrix, y::DualMatrix) = DualArray(x * y.value, ArrayOperator{1}(x) * y.jacobian) +*(x::LayoutMatrix, y::DualMatrix) = DualArray(x * y.value, ArrayOperator{1}(x) * y.jacobian) + # Extra required arithmetic for ArrayOperators Base.:+(x::ArrayOperator, y::ArrayOperator) = x .+ y Base.:-(x::ArrayOperator, y::ArrayOperator) = x .- y Base.:-(x::ArrayOperator) = (-).(x) -Base.:*(a::Number, t::ArrayOperator{N, M, T, L}) where {N, M, T, L} = - ArrayOperator{N, M, promote_type(typeof(a), T), L}(a .* t.data) +Base.:*(a::Number, t::ArrayOperator{N, M, T, L}) where {N, M, T, L} = ArrayOperator{N}(a .* t.data) Base.:*(t::ArrayOperator, a::Number) = a * t - - diff --git a/src/types.jl b/src/types.jl index da914a3..53c2b8d 100644 --- a/src/types.jl +++ b/src/types.jl @@ -24,17 +24,17 @@ a + Jϵ Where a is an N-array of real numbers, J is an N+M=tensor and ϵ is an M-array of dual parts. In the simplest case, where N = 0, we have a Dual number with dual parts arranged in an M-array. """ -struct ArrayOperator{N, M, T, L} - data::AbstractArray{T, L} +struct ArrayOperator{N, M, T, L, A <: AbstractArray{T, L}} + data::A end # Constructor to wrap an array with a tensor, given a contraction rule represented by N function ArrayOperator{N}(data::AbstractArray{T, L}) where {L, T, N} - ArrayOperator{N, L - N, T, L}(data) + ArrayOperator{N, L - N, T, L, typeof(data)}(data) end # Helper convert function -elconvert(::Type{T}, t::ArrayOperator{N, M, S, L}) where {T, N, M, S, L} = ArrayOperator{N, M, T, L}(elconvert(T, t.data)) +elconvert(::Type{T}, t::ArrayOperator{N, M, S, L, A}) where {T, N, M, S, L, A} = ArrayOperator{N, M, T, L, typeof(elconvert(T, t.data))}(elconvert(T, t.data)) # Basic array interface for op in (:size, :axes, :iterate) @@ -81,20 +81,20 @@ as extra information. struct ArrayOperatorBroadcastStyle{L, N} <: Broadcast.AbstractArrayStyle{L} end Base.BroadcastStyle(::Type{<:ArrayOperator{N, <:Any, <:Any, L}}) where {L, N} = ArrayOperatorBroadcastStyle{L, N}() -function Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{L, N}, ::Broadcast.DefaultArrayStyle{M}) where {L, N, M} - # Julia optimises these checks at compile time. - if L >= M - ArrayOperatorBroadcastStyle{L, N}() - else - throw(ArgumentError("Ambiguous output dimension for resulting ArrayOperator")) - end +function Base.BroadcastStyle(s::ArrayOperatorBroadcastStyle{L, N}, ::Broadcast.DefaultArrayStyle{M}) where {L, N, M} + L >= M ? s : throw(ArgumentError("Array has higher dimensionality than ArrayOperator")) +end +function Base.BroadcastStyle(s::ArrayOperatorBroadcastStyle{L, N}, ::S) where {L, N, M, S <: Broadcast.AbstractArrayStyle{M}} + L >= M ? s : throw(ArgumentError("Array has higher dimensionality than ArrayOperator")) end -Base.BroadcastStyle(a::Broadcast.DefaultArrayStyle{M}, b::ArrayOperatorBroadcastStyle{L, N}) where {L, N, M} = Base.BroadcastStyle(b, a) -function Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{L1, N1},::ArrayOperatorBroadcastStyle{L2, N2}) where {L1, N1, L2, N2} +Base.BroadcastStyle(s::Broadcast.AbstractArrayStyle, t::ArrayOperatorBroadcastStyle) = Base.BroadcastStyle(t, s) +function Base.BroadcastStyle(s1::ArrayOperatorBroadcastStyle{L1, N1}, s2::ArrayOperatorBroadcastStyle{L2, N2}) where {L1, N1, L2, N2} if L1 > L2 - ArrayOperatorBroadcastStyle{L1, N1}() + s1 elseif L2 > L1 - ArrayOperatorBroadcastStyle{L2, N2}() + s2 + elseif N1 == N2 + s1 else throw(ArgumentError("Ambiguous output dimension for resulting ArrayOperator")) end @@ -103,30 +103,24 @@ end # Helper functions to help define broadcasting/arithmetic with ArrayOperators. # By converting a broadcast involving ArrayOperators into a broadcast # involving the underlying arrays. -_unwrap(t::ArrayOperator) = t.data -_unwrap(bc::Broadcast.Broadcasted) = Broadcast.Broadcasted(bc.f, _unwrap_args(bc.args), bc.axes) -_unwrap(x) = x -_unwrap_args(args::Tuple) = map(_unwrap, args) +_wrap_dual_matrix(x) = x +_unwrap_arg(t::ArrayOperator) = t.data +_unwrap_arg(bc::Broadcast.Broadcasted{<:ArrayOperatorBroadcastStyle}) = _unwrap_arg(Broadcast.materialize(bc)) +_unwrap_arg(bc::Broadcast.Broadcasted) = Broadcast.materialize(bc) +_unwrap_arg(x) = x # copy ensures that arithmetic involving a Tensor returns a Tensor function Base.copy(bc::Broadcast.Broadcasted{ArrayOperatorBroadcastStyle{L, N}}) where {L, N} # We create a Broadcasted of the underlying arrays and create a Tensor containing # the evaluated broadcast. We check if Base.broadcasted is a Broadcasted # or is overriden such as with DualArrays - databroadcast = Base.broadcasted(bc.f, _unwrap_args(bc.args)...) - result = databroadcast isa Broadcast.Broadcasted ? copy(Broadcast.flatten(databroadcast)) : databroadcast - ArrayOperator{N}(result) + ArrayOperator{N}(_wrap_dual_matrix(Broadcast.materialize(Broadcast.broadcasted(bc.f, map(_unwrap_arg, bc.args)...)))) end # copyto adds support for .= function Base.copyto!(dest::ArrayOperator, bc::Broadcast.Broadcasted{ArrayOperatorBroadcastStyle{L, N}}) where {L, N} # As above - databroadcast = Base.broadcasted(bc.f, _unwrap_args(bc.args)...) - if databroadcast isa Broadcast.Broadcasted - copyto!(dest.data, Broadcast.flatten(databroadcast)) - else - copyto!(dest.data, databroadcast) - end + copyto!(dest.data, Broadcast.broadcasted(bc.f, map(_unwrap_arg, bc.args)...)) dest end @@ -223,7 +217,8 @@ DualArray{T,N}(value, jacobian) where {T, N} = DualArray{T,N}(elconvert(T, value # Constructor that forces type compatibility function DualArray(value::AbstractArray, jacobian::ArrayOperator) T = promote_type(eltype(value), eltype(jacobian)) - DualArray{T}(value, jacobian) + N = ndims(value) + DualArray{T,N}(value, jacobian) end # Helper function to define DualArrays with AbstractArray jacobians @@ -264,6 +259,18 @@ function DualMatrix(value::AbstractMatrix{S}, jacobian::AbstractArray{T, N}) whe DualMatrix(value, ArrayOperator{2}(jacobian)) end +_wrap_dual_matrix(x::DualMatrix) = x + +# If we have a matrix of dual numbers, we construct it into a DualMatrix +# This helps (for now) resolve issues surrounding broadcasting with ArrayOperator +# and DualMatrix nested (in 2nd order autodiff). A proper fix involves +# redesigning the broadcasting of dual arrays. +function _wrap_dual_matrix(x::AbstractMatrix{<:Dual}) + npartials = length(first(x).partials) + partial_columns = reduce(hcat, vec(getfield.(x, :partials))) + partial_tensor = permutedims(reshape(partial_columns, npartials, size(x)...), (2, 3, 1)) + DualMatrix(getfield.(x, :value), partial_tensor) +end elconvert(::Type{Dual{T}}, a::DualVector) where {T} = DualVector(elconvert(T, a.value), elconvert(T, a.jacobian)) elconvert(::Type{Dual{T}}, a::DualMatrix) where {T} = DualMatrix(elconvert(T, a.value), elconvert(T, a.jacobian)) diff --git a/src/utilities.jl b/src/utilities.jl index 9111077..ce1b92d 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -2,9 +2,9 @@ using FillArrays, BandedMatrices, LinearAlgebra # Sum all elements of a DualVector, returning a single Dual number. +# We use dot as it provides a clean way of summing over nested dual vectors as well. function Base.sum(x::DualVector) - n = length(x.value) - Dual(sum(x.value), vec(sum(x.jacobian; dims=1))) + dot(ones(Int, length(x)), x) end # Helper functions for vcat operations @@ -62,5 +62,14 @@ function jacobian(f::Function, x::AbstractVector, id=Eye) Matrix(I(length(x))) end d = DualVector(x, J) - return f(d).jacobian + return f(d).jacobian.data +end + +function hessian(f::Function, x::AbstractVector) + n = length(x) + d1 = DualVector(x, I(n)) + d2 = DualMatrix(I(n), zeros(n, n, n)) + d = DualVector(d1, d2) + ret = f(d) + return ret.partials.jacobian.data end diff --git a/test/array_operator_test.jl b/test/array_operator_test.jl index 42b0af2..29f1921 100644 --- a/test/array_operator_test.jl +++ b/test/array_operator_test.jl @@ -55,6 +55,11 @@ using FillArrays @test t * [1, 0, 0] == ArrayOperator{1}([1,4,7]) @test [1 0 0] * t == ArrayOperator{1}([1 2 3]) + left = ArrayOperator{1}([1 2; 3 4]) + right = ArrayOperator{2}(ones(2,2,2)) + expected = cat((left.data * right.data[:, :, k] for k in axes(right.data, 3))..., dims=3) + @test left * right == ArrayOperator{2}(expected) + @test t * 2 == ArrayOperator{1}([2 4 6;8 10 12;14 16 18]) @test 2 * t == ArrayOperator{1}([2 4 6;8 10 12;14 16 18]) @@ -75,4 +80,13 @@ using FillArrays b .= a .* 3 @test b == ArrayOperator{1}(OneElement(3, 1, 3)) end + + @testset "Arithmetic with BandedMatrix" begin + bm1 = BandedMatrix(0 => [1, 2, 3], 1 => [1, 1], -1 => [1, 1]) + bm2 = BandedMatrix(0 => [4, 5, 6], 1 => [1, 1], -1 => [1, 1]) + a = ArrayOperator{1}(bm1) + b = ArrayOperator{1}(bm2) + @test (a .+ b).data == bm1 .+ bm2 + @test (a .+ b).data isa BandedMatrix + end end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index ab24156..86fe037 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -148,6 +148,17 @@ using DualArrays: ArrayOperator @test d2[1].value == Dual(2, [1, 0]) @test d2[1].partials == DualVector([1.0, 0], zeros(2, 2)) end + + @testset "Hessian" begin + M = [1 1; 1 1] + dm = DualMatrix([1 2;3 4], zeros(2, 2, 2)) + + @test transpose(dm) isa DualMatrix + @test transpose(dm) * [2, -1] == DualVector([-1, 0], zeros(2, 2)) + + f(x) = sum(abs.(M * x) .^ 2) + @test hessian(f, [1, 1]) ≈ 2 .* (M' * M) + end include("broadcast_test.jl") include("array_operator_test.jl") From 264becbb46c41d5d705f68d40c394435db07cd3f Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Thu, 4 Jun 2026 16:36:09 +0100 Subject: [PATCH 2/5] fix keyword mismatch --- examples/nestedduals.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/nestedduals.jl b/examples/nestedduals.jl index 4dd90c5..5d796ae 100644 --- a/examples/nestedduals.jl +++ b/examples/nestedduals.jl @@ -65,7 +65,7 @@ gradient = result.value.partials # equivalent to c gradient2 = result.partials.value # equivalent to d -hessian = result.partials.jacobian.data +_hessian = result.partials.jacobian.data # We expect this to be 6 print("Value: ", value, "\n") @@ -74,5 +74,5 @@ print("Gradient: ", gradient, "\n") # We expect this to be the same print("Gradient: ", gradient2, "\n") # We expect this to be [0 1; 1 0] -print("Hessian: ", hessian, "\n") +print("Hessian: ", _hessian, "\n") From a2b5e7a228564322848e0345ce9a42452eca31a9 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Thu, 4 Jun 2026 16:53:45 +0100 Subject: [PATCH 3/5] improve coverage --- examples/plaplacian.jl | 2 +- src/arithmetic.jl | 2 -- src/types.jl | 7 ------- test/runtests.jl | 6 ++++++ 4 files changed, 7 insertions(+), 10 deletions(-) diff --git a/examples/plaplacian.jl b/examples/plaplacian.jl index 97396f4..62bf32f 100644 --- a/examples/plaplacian.jl +++ b/examples/plaplacian.jl @@ -51,7 +51,7 @@ end function plot_solution(save=undef, n = 40, ps = [2.0, 2.25, 2.5, 2.75, 3.0]) plt = plot() for p in ps - xs, u = solve(; n, p) + xs, u = solve(n, p) plot!(plt, xs, u, label = "Newton (DualArrays), p = $p", lw = 2) plot!(plt, xs, exact(xs, p), label = "Exact, p = $p", lw = 2, ls = :dash) end diff --git a/src/arithmetic.jl b/src/arithmetic.jl index 3798e2c..d7f8142 100644 --- a/src/arithmetic.jl +++ b/src/arithmetic.jl @@ -103,10 +103,8 @@ end # Necessary for promotion between Dual and Real numbers. Base.signbit(x::Dual) = signbit(x.value) -Base.zero(x::Dual) = Dual(zero(x.value), zero(x.partials)) Base.one(x::Dual) = Dual(one(x.value), zero(x.partials)) -Base.:^(x::Dual, y::Integer) = Dual(x.value ^ y, y * x.value^(y - 1) * x.partials) Base.broadcasted(::typeof(^), x::DualVector, y::Integer) = DualVector(x.value .^ y, y * x.value .^ (y - 1) .* x.jacobian) Base.broadcasted(::typeof(Base.literal_pow), ::typeof(^), x::DualVector, ::Val{y}) where y = DualVector(x.value .^ y, y * x.value .^ (y - 1) .* x.jacobian) Base.literal_pow(::typeof(^), x::Dual, ::Val{y}) where y = Dual(x.value ^ y, y * x.value^(y - 1) * x.partials) diff --git a/src/types.jl b/src/types.jl index 53c2b8d..93b4de3 100644 --- a/src/types.jl +++ b/src/types.jl @@ -84,17 +84,12 @@ Base.BroadcastStyle(::Type{<:ArrayOperator{N, <:Any, <:Any, L}}) where {L, N} = function Base.BroadcastStyle(s::ArrayOperatorBroadcastStyle{L, N}, ::Broadcast.DefaultArrayStyle{M}) where {L, N, M} L >= M ? s : throw(ArgumentError("Array has higher dimensionality than ArrayOperator")) end -function Base.BroadcastStyle(s::ArrayOperatorBroadcastStyle{L, N}, ::S) where {L, N, M, S <: Broadcast.AbstractArrayStyle{M}} - L >= M ? s : throw(ArgumentError("Array has higher dimensionality than ArrayOperator")) -end Base.BroadcastStyle(s::Broadcast.AbstractArrayStyle, t::ArrayOperatorBroadcastStyle) = Base.BroadcastStyle(t, s) function Base.BroadcastStyle(s1::ArrayOperatorBroadcastStyle{L1, N1}, s2::ArrayOperatorBroadcastStyle{L2, N2}) where {L1, N1, L2, N2} if L1 > L2 s1 elseif L2 > L1 s2 - elseif N1 == N2 - s1 else throw(ArgumentError("Ambiguous output dimension for resulting ArrayOperator")) end @@ -259,8 +254,6 @@ function DualMatrix(value::AbstractMatrix{S}, jacobian::AbstractArray{T, N}) whe DualMatrix(value, ArrayOperator{2}(jacobian)) end -_wrap_dual_matrix(x::DualMatrix) = x - # If we have a matrix of dual numbers, we construct it into a DualMatrix # This helps (for now) resolve issues surrounding broadcasting with ArrayOperator # and DualMatrix nested (in 2nd order autodiff). A proper fix involves diff --git a/test/runtests.jl b/test/runtests.jl index 86fe037..510c265 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -117,6 +117,12 @@ using DualArrays: ArrayOperator d = DualVector([2, 3], [4 5; 6 7]) @test M * d isa DualVector @test M * d == DualVector([5,5],[10 12;10 12]) + + L = BandedMatrix([2 0; 0 3], (0, 0)) + @test L * d == DualVector([4, 9], [8 10; 18 21]) + + dm = DualMatrix([1 2; 3 4], cat([1 3; 2 4], [5 7; 6 8]; dims=3)) + @test L * dm == DualMatrix([2 4; 9 12], cat([2 6; 6 12], [10 14; 18 24]; dims=3)) end @testset "vcat" begin x = Dual(1, [1, 2, 3]) From 7e57add537e8e3e1ece527fece3122d0004fa7db Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Mon, 8 Jun 2026 11:18:45 +0100 Subject: [PATCH 4/5] fix mutability, implement nested dual ad for poisson control --- examples/controlproblem.jl | 60 ++++++++++++++++++++++++++++---------- src/DualArrays.jl | 2 +- src/arithmetic.jl | 11 ++++++- src/indexing.jl | 6 ++++ test/broadcast_test.jl | 9 ++++++ 5 files changed, 70 insertions(+), 18 deletions(-) diff --git a/examples/controlproblem.jl b/examples/controlproblem.jl index 861703f..f1afbac 100644 --- a/examples/controlproblem.jl +++ b/examples/controlproblem.jl @@ -49,6 +49,18 @@ using LinearAlgebra, DualArrays, Plots # If we propagate f as a DualVector above, we get the f.jacobian as the Hessian of J, # and this enables us to solve via newton's method. +""" +This calculates the scalar objective + + (1/2) * ||u - d||² + (alpha / 2) * ||f||² + +where u solves K * u = f. +""" +function objective(K, f, d, alpha, h) + u = K \ f + return (h / 2) * sum((u - d) .^ 2) + (alpha * h / 2) * sum(f .^ 2) +end + """ This calculates the gradient of the objective as detailed above. K: the discretised Laplacian @@ -73,13 +85,20 @@ kappa: diffusivity constant alpha: regularisation parameter iters: number of iterations to run newton's method. """ -function solve_1D(f0, d, h, kappa, alpha, iters = 30) +function solve_1D(f0, d, h, kappa, alpha, iters = 30; method = :adjoint) fvector = copy(f0) n = length(f0) K = (kappa / h ^ 2) * Tridiagonal(-ones(n - 1), 2 * ones(n), -ones(n - 1)) for _ = 1:iters - grads = grad_objective(K, DualVector(fvector, I(length(fvector))), d, alpha) - step = grads.jacobian.data \ grads.value + gradient, hess = if method == :adjoint + grads = grad_objective(K, DualVector(fvector, I(length(fvector))), d, alpha) + (grads.value, grads.jacobian.data) + elseif method == :hessian + J = f -> objective(K, f, d, alpha, h) + gradient = J(DualVector(fvector, I(length(fvector)))).partials + (gradient, hessian(J, fvector)) + end + step = hess \ gradient fvector -= step end @@ -97,7 +116,7 @@ kappa: diffusivity constant alpha: regularisation parameter iters: number of iterations to run newton's method. """ -function solve_2D(x0, d, h, kappa, alpha, iters = 30) +function solve_2D(x0, d, h, kappa, alpha, iters = 30; method = :adjoint) fvector = copy(x0) n = isqrt(length(x0)) T = Tridiagonal(-ones(n - 1), 2 * ones(n), -ones(n - 1)) @@ -105,8 +124,15 @@ function solve_2D(x0, d, h, kappa, alpha, iters = 30) # source: https://www.petercheng.me/blog/discrete-laplacian-matrix K = (kappa / h ^ 2) * (kron(I(n), T) + kron(T, I(n))) for _ = 1:iters - grads = grad_objective(K, DualVector(fvector, I(length(fvector))), d, alpha) - step = grads.jacobian.data \ grads.value + gradient, hess = if method == :adjoint + grads = grad_objective(K, DualVector(fvector, I(length(fvector))), d, alpha) + (grads.value, grads.jacobian.data) + elseif method == :hessian + J = f -> objective(K, f, d, alpha, h) + gradient = J(DualVector(fvector, I(length(fvector)))).partials + (gradient, hessian(J, fvector)) + end + step = hess \ gradient fvector -= step end @@ -115,7 +141,7 @@ end # Solve the 1D Poisson control on the unit interval and plot the solution. # We compare it to a known analytical solution. -function plot_solution_1D(save=undef) +function plot_solution_1D(save=undef; method = :adjoint) # setup problem kappa = 1 @@ -131,17 +157,18 @@ function plot_solution_1D(save=undef) coeff = (kappa * pi^2) / (1 + alpha * kappa^2 * pi^4) fexact = coeff .* sin.(pi .* x[2:end-1]) - f = solve_1D(zeros(length(d)), d, h, kappa, alpha) - plot(x[2:end-1], f, label="Computed Control", title="1D Poisson Control Solution") - plot!(x[2:end-1], fexact, label="Exact Control") + f = solve_1D(zeros(length(d)), d, h, kappa, alpha; method=method) + plt = plot(x[2:end-1], f, label="Computed Control ($(String(method)))", title="1D Poisson Control Solution") + plot!(plt, x[2:end-1], fexact, label="Exact Control") + display(plt) if save !== undef - savefig(save) + savefig(plt, save) end end # Solve the 2D Poisson control on the unit square and plot the solution. # We compare it to a known analytical solution given in the dolfin-adjoint example. -function plot_solution_2D(save=undef) +function plot_solution_2D(save=undef; method = :adjoint) # setup problem kappa = 1 @@ -162,13 +189,14 @@ function plot_solution_2D(save=undef) # analytical optimal control as specified in dolfin-adjoint fexact = (1 / (1 + 4 * alpha * pi^4)) .* sin.(pi .* X) .* sin.(pi .* Y) - f = solve_2D(zeros(n * n), vec(d), h, kappa, alpha) + f = solve_2D(zeros(n * n), vec(d), h, kappa, alpha; method=method) F = reshape(f, n, n) - p1 = heatmap(x, x, F, title="Computed Control", aspect_ratio=1) + p1 = heatmap(x, x, F, title="Computed Control ($(String(method)))", aspect_ratio=1) p2 = heatmap(x, x, fexact, title="Exact Control", aspect_ratio=1) - plot(p1, p2, layout=(1, 2), size=(900, 400), plot_title="2D Poisson Control Solution", plot_titlevspan=0.12) + plt = plot(p1, p2, layout=(1, 2), size=(900, 400), plot_title="2D Poisson Control Solution", plot_titlevspan=0.12) + display(plt) if save !== undef - savefig(save) + savefig(plt, save) end end \ No newline at end of file diff --git a/src/DualArrays.jl b/src/DualArrays.jl index 360c597..2161772 100644 --- a/src/DualArrays.jl +++ b/src/DualArrays.jl @@ -15,7 +15,7 @@ module DualArrays export DualVector, DualMatrix, Dual, jacobian, ArrayOperator, hessian -import Base: +, -, ==, getindex, size, axes, broadcasted, show, sum, vcat, convert, *, isapprox, \, eltype, transpose, permutedims +import Base: +, -, ==, getindex, setindex!, size, axes, broadcasted, show, sum, vcat, convert, *, isapprox, \, eltype, transpose, permutedims using LinearAlgebra, ArrayLayouts, FillArrays, DiffRules, TensorOperations, SparseArrayKit diff --git a/src/arithmetic.jl b/src/arithmetic.jl index d7f8142..f835ffb 100644 --- a/src/arithmetic.jl +++ b/src/arithmetic.jl @@ -115,7 +115,16 @@ LinearAlgebra.dot(x::DualVector, y::AbstractVector) = Dual(dot(x.value, y), tran LinearAlgebra.dot(x::AbstractVector, y::DualVector) = Dual(dot(x, y.value), transpose(y.jacobian) * x) # solve -\(x::AbstractMatrix, y::DualVector) = DualVector(x \ y.value, ArrayOperator{1}(x \ y.jacobian.data)) +function \(x::AbstractMatrix, y::DualVector) + DualVector(x \ y.value, ArrayOperator{1}(x \ y.jacobian.data)) +end + +function \(x::AbstractMatrix, y::DualMatrix) + jac = y.jacobian.data + reshaped = reshape(jac, size(jac, 1), :) + solved = x \ reshaped + DualMatrix(x \ y.value, ArrayOperator{2}(reshape(solved, size(jac)))) +end diff --git a/src/indexing.jl b/src/indexing.jl index b357811..2cbf31e 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -78,6 +78,12 @@ function getindex(x::DualMatrix, y::Vararg{Union{Colon, UnitRange}}) DualMatrix(newval, newjac) end +function setindex!(x::DualVector, y::Number, i::Int) + x.value[i] = y + x.jacobian.data[i, :] .= 0 + return x +end + # DualArray array interface for op in (:size, :axes) @eval $op(a::DualArray) = $op(a.value) diff --git a/test/broadcast_test.jl b/test/broadcast_test.jl index e368e08..1d66fd3 100644 --- a/test/broadcast_test.jl +++ b/test/broadcast_test.jl @@ -71,4 +71,13 @@ using DualArrays, Test, SparseArrays, LinearAlgebra @test s.jacobian.data ≈ [0.0 -2.0 -3.0; -1.0 -1.0 -3.0; -1.0 -2.0 -2.0] @test m.jacobian.data ≈ [3.0 2.0 3.0; 2.0 6.0 6.0; 3.0 6.0 11.0] @test div.jacobian.data ≈ [0.25 -0.5 -0.75; -0.5 -0.5 -1.5; -0.75 -1.5 -1.75] +end + +@testset "DualVector setindex!" begin + x = DualVector([1, 2, 3], [1 0 0; 0 1 0; 0 0 1]) + + x[2] = 4 + + @test x.value == [1, 4, 3] + @test x.jacobian.data == [1 0 0; 0 0 0; 0 0 1] end \ No newline at end of file From 7138cdf9070e2e8ab4bcf081b3d65cabbb385dfe Mon Sep 17 00:00:00 2001 From: Maxim Vassiliev Date: Thu, 11 Jun 2026 10:00:42 +0100 Subject: [PATCH 5/5] add testing --- test/runtests.jl | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/runtests.jl b/test/runtests.jl index 510c265..ad16e8c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -112,6 +112,16 @@ using DualArrays: ArrayOperator @test A \ b == DualVector([1, 1], [1.5 2; 1.5 2]) end + @testset "Solve (DualMatrix)" begin + A = [1 0;0 2] + dm = DualMatrix([1.0 2.0; 3.0 4.0], zeros(2, 2, 2)) + + res = A \ dm + + @test res.value == A \ dm.value + @test res.jacobian.data == zeros(2, 2, 2) + end + @testset "Matrix multiplication" begin M = [1 1; 1 1] d = DualVector([2, 3], [4 5; 6 7])