From 1156abf07e1f6a5369821f014830f5b5c86bd70f Mon Sep 17 00:00:00 2001
From: Max Vassiliev <vassiliev.max@gmail.com>
Date: Thu, 4 Jun 2026 16:31:58 +0100
Subject: [PATCH 1/5] initial commit

---
 Project.toml                |   9 ++-
 examples/catenary.jl        | 111 ++++++++++++++++++++++++++++++++----
 examples/controlproblem.jl  |  22 ++++---
 examples/jacobians.jl       |   4 +-
 examples/nestedduals.jl     |   4 +-
 examples/newtonpendulum.jl  |  98 +++++++++++++++++++++++++++----
 examples/plaplacian.jl      |  65 +++++++++++++++++++++
 src/DualArrays.jl           |   4 +-
 src/arithmetic.jl           |  42 ++++++++++----
 src/types.jl                |  65 +++++++++++----------
 src/utilities.jl            |  15 ++++-
 test/array_operator_test.jl |  14 +++++
 test/runtests.jl            |  11 ++++
 13 files changed, 382 insertions(+), 82 deletions(-)
 create mode 100644 examples/plaplacian.jl

diff --git a/Project.toml b/Project.toml
index 94ea596..43ee63b 100644
--- a/Project.toml
+++ b/Project.toml
@@ -9,16 +9,20 @@ BandedMatrices = "aae01518-5342-5314-be14-df237901396f"
 DiffRules = "b552c78f-8df3-52c6-915a-8e097449b14b"
 FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+SparseArrayKit = "a9a3c162-d163-4c15-8926-b8794fbefed2"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 TensorOperations = "6aa20fa7-93e2-5fca-9bc0-fbd0db3c71a2"
 
+
 [compat]
 DiffRules = "1.15.1"
 ForwardDiff = "1.2.2"
 Lux = "1.29.1"
 Plots = "1.41.1"
+SparseArrayKit = "0.4.3"
 SparseArrays = "1.10"
-TensorOperations = "5.5.2"
+TensorOperations = "5.6.1"
+Zygote = "0.7.10"
 
 [extras]
 BandedMatrices = "aae01518-5342-5314-be14-df237901396f"
@@ -27,7 +31,8 @@ Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [targets]
-examples = ["Plots", "ForwardDiff", "BandedMatrices", "Lux"]
+examples = ["Plots", "ForwardDiff", "BandedMatrices", "Lux", "Zygote"]
 test = ["Test", "ForwardDiff", "BandedMatrices", "SparseArrays"]
diff --git a/examples/catenary.jl b/examples/catenary.jl
index 3ef7be6..ea2f3b7 100644
--- a/examples/catenary.jl
+++ b/examples/catenary.jl
@@ -1,4 +1,4 @@
-using DualArrays, LinearAlgebra, Plots, BenchmarkTools
+using DualArrays, LinearAlgebra, Plots, BenchmarkTools, BandedMatrices, ForwardDiff, Zygote
 
 """
 Solving the Catenary Problem using DualArrays.jl
@@ -11,6 +11,19 @@ The computation of finite differences keeps the Jacobian sparse,
 so we can use DualArrays.jl
 """
 
+const plot_args = (
+    xscale = :log10,
+    yscale = :log10,
+    lw = 2.5,
+    marker = :circle,
+    color = :steelblue4,
+    legend = :topleft,
+    framestyle = :box,
+    size = (960, 600),
+    left_margin = 10Plots.mm,
+    bottom_margin = 10Plots.mm,
+)
+
 function L(y, h, alpha, beta)
     """
     Evaluate functional with boundary conditions (alpha, beta)
@@ -24,33 +37,109 @@ function L(y, h, alpha, beta)
     return y_mid .* sqrt.(1 .+ dy.^2)
 end
 
-function learn_catenary(h = 0.1, alpha = cosh(-1), beta = cosh(1), epochs = 2000, lr = 0.01)
+function learn_catenary_dualarrays(h = 0.1, alpha = cosh(-1), beta = cosh(1), epochs = 2000, lr = 0.01)
+    n = Int(2 / h) - 1
+    y = ones(n) * (alpha + beta) / 2
+    for _ = 1:epochs
+        jac = DualArrays.jacobian(y -> L(y, h, alpha, beta), y, BandedMatrix)
+        grads = h * sum(jac, dims=1)
+        y -= lr * vec(grads)
+    end
+    return y
+end
+
+function learn_catenary_forwarddiff(h = 0.1, alpha = cosh(-1), beta = cosh(1), epochs = 2000, lr = 0.01)
+    n = Int(2 / h) - 1
+    y = ones(n) * (alpha + beta) / 2
+    for _ = 1:epochs
+        jac = ForwardDiff.jacobian(y -> L(y, h, alpha, beta), y)
+        grads = h * sum(jac, dims=1)
+        y -= lr * vec(grads)
+    end
+    return y
+end
+
+function learn_catenary_zygote(h = 0.1, alpha = cosh(-1), beta = cosh(1), epochs = 2000, lr = 0.01)
     n = Int(2 / h) - 1
     y = ones(n) * (alpha + beta) / 2
     for _ = 1:epochs
-        jac = jacobian(y -> L(y, h, alpha, beta), y, id="banded")
+        jac = only(Zygote.jacobian(y -> L(y, h, alpha, beta), y))
         grads = h * sum(jac, dims=1)
         y -= lr * vec(grads)
     end
     return y
 end
 
-function plot_solution(h)
+function plot_solution(save=undef;h = 0.1, alpha = cosh(-1), beta = cosh(1), epochs = 5000, lr = 0.02)
     x = collect(-1+h:h:1-h)
-    y = learn_catenary()
-    plot(x, y, label = "Approximation")
-    plot!(x, cosh.(x), label = "Exact Solution")
+    y = learn_catenary_dualarrays(h, alpha, beta, epochs, lr)
+    plot(x, y, label = "Approximate solution (DualArrays)", title="Catenary Solution", legend=:topleft)
+    plot!(x, cosh.(x), label = "Exact Solution (cosh(x))", ls = :dash)
+    if save !== undef
+        savefig(save)
+    end
 end
 
-function plot_times()
-    hs = [0.1, 0.05, 0.02, 0.01, 0.005, 0.002]
+function plot_times(save=undef; hs = [0.04, 0.02, 0.01, 0.005, 0.0025, 0.00125], epochs = 200, lr = 0.01)
     ns = Int.(2 ./ hs) .- 1
     dualvector_times = Float64[]
+    forwarddiff_times = Float64[]
+    zygote_times = Float64[]
 
     for (h, n) in zip(hs, ns)
         println("Computing solution with h = $h, n = $n")
-        push!(dualvector_times, @belapsed learn_catenary($h, cosh(-1), cosh(1), 2000, 0.01))
+        push!(dualvector_times, @belapsed learn_catenary_dualarrays($h, cosh(-1), cosh(1), $epochs, $lr))
+        push!(forwarddiff_times, @belapsed learn_catenary_forwarddiff($h, cosh(-1), cosh(1), $epochs, $lr))
+        push!(zygote_times, @belapsed learn_catenary_zygote($h, cosh(-1), cosh(1), $epochs, $lr))
+    end
+
+    plot(
+        ns,
+        dualvector_times,
+        ;
+        label = "DualArrays",
+        title = "Catenary Gradient Descent Runtime",
+        xlabel = "Number of points (n)",
+        ylabel = "Runtime (seconds)",
+        yticks = 10 .^ collect(-3:0.25:2),
+        xticks = 10 .^ collect(1.5:0.25:3.5),
+        plot_args...,
+    )
+    plot!(ns, forwarddiff_times, label = "ForwardDiff", lw = 2.5, marker = :square, color = :darkorange2)
+    plot!(ns, zygote_times, label = "Zygote", lw = 2.5, marker = :diamond, color = :forestgreen)
+    if save !== undef
+        savefig(save)
     end
+end
 
-    plot(ns, dualvector_times, label="DualArrays")
+function plot_memory(save=undef; hs = [0.04, 0.02, 0.01, 0.005, 0.0025, 0.00125], epochs = 200, lr = 0.01)
+    ns = Int.(2 ./ hs) .- 1
+    dualvector_memory = Float64[]
+    forwarddiff_memory = Float64[]
+    zygote_memory = Float64[]
+
+    for (h, n) in zip(hs, ns)
+        println("Computing solution with h = $h, n = $n")
+        push!(dualvector_memory, @ballocated learn_catenary_dualarrays($h, cosh(-1), cosh(1), $epochs, $lr))
+        push!(forwarddiff_memory, @ballocated learn_catenary_forwarddiff($h, cosh(-1), cosh(1), $epochs, $lr))
+        push!(zygote_memory, @ballocated learn_catenary_zygote($h, cosh(-1), cosh(1), $epochs, $lr))
+    end
+
+    plot(
+        ns,
+        dualvector_memory,
+        ;
+        label = "DualArrays",
+        title = "Catenary Gradient Descent Memory",
+        xlabel = "Number of points (n)",
+        ylabel = "Allocated bytes",
+        yticks = 10 .^ collect(6:0.5:11),
+        xticks = 10 .^ collect(1.5:0.25:3.5),
+        plot_args...,
+    )
+    plot!(ns, forwarddiff_memory, label = "ForwardDiff", lw = 2.5, marker = :square, color = :darkorange2)
+    plot!(ns, zygote_memory, label = "Zygote", lw = 2.5, marker = :diamond, color = :forestgreen)
+    if save !== undef
+        savefig(save)
+    end
 end
\ No newline at end of file
diff --git a/examples/controlproblem.jl b/examples/controlproblem.jl
index 07d5a36..861703f 100644
--- a/examples/controlproblem.jl
+++ b/examples/controlproblem.jl
@@ -12,7 +12,7 @@ using LinearAlgebra, DualArrays, Plots
 # Provided f ∈ L²(Ω), there exists a unique solution u ∈ H₀¹(Ω) to the boundary value problem.
 # A problem arises that given a desired solution d, we want to find f such that the solution u
 # is approximately d. An intuitive way of asking this is: "given that we want a certain temperature
-# field in a room, how do we heat or cool the walls?"
+# field in a room, how do we position heat sources/sinks?"
 #
 # A way of doing this is to define an objective to be minimised given by:
 #
@@ -79,7 +79,7 @@ function solve_1D(f0, d, h, kappa, alpha, iters = 30)
     K = (kappa / h ^ 2) * Tridiagonal(-ones(n - 1), 2 * ones(n), -ones(n - 1))
     for _ = 1:iters
         grads = grad_objective(K, DualVector(fvector, I(length(fvector))), d, alpha)
-        step = grads.jacobian \ grads.value
+        step = grads.jacobian.data \ grads.value
         fvector -= step 
     end
 
@@ -106,7 +106,7 @@ function solve_2D(x0, d, h, kappa, alpha, iters = 30)
     K = (kappa / h ^ 2) * (kron(I(n), T) + kron(T, I(n)))
     for _ = 1:iters
         grads = grad_objective(K, DualVector(fvector, I(length(fvector))), d, alpha)
-        step = grads.jacobian \ grads.value
+        step = grads.jacobian.data \ grads.value
         fvector -= step 
     end
 
@@ -115,7 +115,7 @@ end
 
 # Solve the 1D Poisson control on the unit interval and plot the solution.
 # We compare it to a known analytical solution.
-function plot_solution_1D()
+function plot_solution_1D(save=undef)
 
     # setup problem
     kappa = 1
@@ -132,13 +132,16 @@ function plot_solution_1D()
     fexact = coeff .* sin.(pi .* x[2:end-1])
 
     f = solve_1D(zeros(length(d)), d, h, kappa, alpha)
-    plot(x[2:end-1], f, label="Computed Solution")
-    plot!(x[2:end-1], fexact, label="Exact Solution")
+    plot(x[2:end-1], f, label="Computed Control", title="1D Poisson Control Solution")
+    plot!(x[2:end-1], fexact, label="Exact Control")
+    if save !== undef
+        savefig(save)
+    end
 end
     
 # Solve the 2D Poisson control on the unit square and plot the solution.
 # We compare it to a known analytical solution given in the dolfin-adjoint example.
-function plot_solution_2D()
+function plot_solution_2D(save=undef)
 
     # setup problem
     kappa = 1
@@ -164,5 +167,8 @@ function plot_solution_2D()
 
     p1 = heatmap(x, x, F, title="Computed Control", aspect_ratio=1)
     p2 = heatmap(x, x, fexact, title="Exact Control", aspect_ratio=1)
-    plot(p1, p2, layout=(1, 2), size=(900, 400))
+    plot(p1, p2, layout=(1, 2), size=(900, 400), plot_title="2D Poisson Control Solution", plot_titlevspan=0.12)
+    if save !== undef
+        savefig(save)
+    end
 end
\ No newline at end of file
diff --git a/examples/jacobians.jl b/examples/jacobians.jl
index 1a94212..6538b41 100644
--- a/examples/jacobians.jl
+++ b/examples/jacobians.jl
@@ -9,7 +9,7 @@
 #
 ###
 
-using DualArrays, ForwardDiff, Plots, BenchmarkTools
+using DualArrays, ForwardDiff, Plots, BenchmarkTools, BandedMatrices
 
 f(x) = sin.(x)
 
@@ -20,7 +20,7 @@ function plot_times()
 
     for n in ns
         println("Computing jacobian for n = $n")
-        push!(dualarray_times, @belapsed DualArrays.jacobian(f, rand($n)))
+        push!(dualarray_times, @belapsed DualArrays.jacobian(f, rand($n), BandedMatrix))
         push!(forwarddiff_times, @belapsed ForwardDiff.jacobian(f, rand($n)))
     end
 
diff --git a/examples/nestedduals.jl b/examples/nestedduals.jl
index 51af36d..4dd90c5 100644
--- a/examples/nestedduals.jl
+++ b/examples/nestedduals.jl
@@ -42,14 +42,14 @@
 #
 # We now implement an example using f(x) = x[1] * x[2] below.
 
-using DualArrays
+using DualArrays, SparseArrayKit
 f(x) = x[1] * x[2]
 
 # a + Bϵ
 x = DualVector([2, 3], [1 0; 0 1])
 
 # C + Dϵ
-y = DualMatrix([1 0;0 1], zeros(2,2,2))
+y = DualMatrix([1 0;0 1], SparseArray{Float64, 3}(undef, (2, 2, 2)))
 
 # Setup the nested dual vector (a + Bϵ) + (C + Dϵ)ϵ'
 z = DualVector(x, y)
diff --git a/examples/newtonpendulum.jl b/examples/newtonpendulum.jl
index a24e329..41a550f 100644
--- a/examples/newtonpendulum.jl
+++ b/examples/newtonpendulum.jl
@@ -9,15 +9,28 @@
 # Accurately and in O(n) time.
 ##
 
-using LinearAlgebra, ForwardDiff, Plots, DualArrays, FillArrays, BenchmarkTools, BandedMatrices
+using LinearAlgebra, ForwardDiff, Plots, DualArrays, FillArrays, BenchmarkTools, BandedMatrices, Zygote
 
 #Boundary Conditions
-a = 0.4
+a = 0.1
 b = 0.0
 
 Tmax = 5.0
 ts = 0.01
 
+const plot_args = (
+    xscale = :log10,
+    yscale = :log10,
+    lw = 2.5,
+    marker = :circle,
+    color = :steelblue4,
+    legend = :topleft,
+    framestyle = :box,
+    size = (960, 600),
+    left_margin = 10Plots.mm,
+    bottom_margin = 10Plots.mm,
+)
+
 #LHS of ode
 function f(x)
     n = length(x)
@@ -38,37 +51,98 @@ end
 #Newton's method using DualArrays.jl
 function newton_method_dualvector(f, x0, n)
     x = x0
-    l = length(x0)
     for i = 1:n
-        ∇f = jacobian(f, x, BandedMatrix)
+        ∇f = DualArrays.jacobian(f, x, BandedMatrix)
+        x = x - ∇f \ f(x)
+    end
+    x
+end
+
+#Newton's method using Zygote.jl
+function newton_method_zygote(f, x0, n)
+    x = x0
+    for i = 1:n
+        ∇f = only(Zygote.jacobian(f, x))
         x = x - ∇f \ f(x)
     end
     x
 end
 
-# Plot times for Newton's method using DualArrays and ForwardDiff
-function plot_times()
-    ns = [10, 50, 100, 200, 500]
+# Plot times for Newton's method using DualArrays, ForwardDiff, and Zygote
+function plot_times(save=undef; ns = [50, 100, 200, 400, 800], iterations = 10)
     dualvector_times = Float64[]
     forwarddiff_times = Float64[]
+    zygote_times = Float64[]
 
     for n in ns
         println("Computing solution with n = $n")
         x0 = zeros(Float64, n)
-        push!(dualvector_times, @belapsed newton_method_dualvector(f, $x0, 10))
-        push!(forwarddiff_times, @belapsed newton_method_forwarddiff(f, $x0, 10))
+        push!(dualvector_times, @belapsed newton_method_dualvector(f, $x0, $iterations))
+        push!(forwarddiff_times, @belapsed newton_method_forwarddiff(f, $x0, $iterations))
+        push!(zygote_times, @belapsed newton_method_zygote(f, $x0, $iterations))
     end
 
-    plot(ns, dualvector_times, label="DualArrays")
-    plot!(ns, forwarddiff_times, label="ForwardDiff")
+    plot(
+        ns,
+        dualvector_times,
+        ;
+        label = "DualArrays",
+        title = "Pendulum Newton Solve Runtime",
+        xlabel = "Number of points (n)",
+        ylabel = "Runtime (seconds)",
+        yticks = 10 .^ collect(-4:0.25:1),
+        xticks = 10 .^ collect(1.5:0.25:3),
+        plot_args...,
+    )
+    plot!(ns, forwarddiff_times, label = "ForwardDiff", lw = 2.5, marker = :square, color = :darkorange2)
+    plot!(ns, zygote_times, label = "Zygote", lw = 2.5, marker = :diamond, color = :forestgreen)
+    if save !== undef
+        savefig(save)
+    end
+end
+
+# Plot memory allocations for Newton's method using DualArrays, ForwardDiff, and Zygote
+function plot_memory(save=undef; ns = [50, 100, 200, 400, 800], iterations = 10)
+    dualvector_memory = Float64[]
+    forwarddiff_memory = Float64[]
+    zygote_memory = Float64[]
+
+    for n in ns
+        println("Computing solution with n = $n")
+        x0 = zeros(Float64, n)
+        push!(dualvector_memory, @ballocated newton_method_dualvector(f, $x0, $iterations))
+        push!(forwarddiff_memory, @ballocated newton_method_forwarddiff(f, $x0, $iterations))
+        push!(zygote_memory, @ballocated newton_method_zygote(f, $x0, $iterations))
+    end
+
+    plot(
+        ns,
+        dualvector_memory,
+        ;
+        label = "DualArrays",
+        title = "Pendulum Newton Solve Memory",
+        xlabel = "Number of points (n)",
+        ylabel = "Allocated bytes",
+        yticks = 10 .^ collect(5:0.5:12),
+        xticks = 10 .^ collect(1.5:0.25:3),
+        plot_args...,
+    )
+    plot!(ns, forwarddiff_memory, label = "ForwardDiff", lw = 2.5, marker = :square, color = :darkorange2)
+    plot!(ns, zygote_memory, label = "Zygote", lw = 2.5, marker = :diamond, color = :forestgreen)
+    if save !== undef
+        savefig(save)
+    end
 end
 
 # Plot solution with obtainded through newtons method with DualArrays.
 # Used to verify correctness.
-function plot_solution()
+function plot_solution(save=undef)
     n = Int(Tmax/ts) - 1
     x0 = zeros(Float64, n)
     sol = newton_method_dualvector(f, x0, 10)
     t = ts:ts:(n * ts)
     plot(t, sol, label="Pendulum Solution", xlabel="Time", ylabel="Angle")
+    if save !== undef
+        savefig(save)
+    end
 end
\ No newline at end of file
diff --git a/examples/plaplacian.jl b/examples/plaplacian.jl
new file mode 100644
index 0000000..97396f4
--- /dev/null
+++ b/examples/plaplacian.jl
@@ -0,0 +1,65 @@
+##
+# Solve the p-Laplacian by minimising energy function using
+# 2nd-order Newton's method
+#
+# The p-Laplacian PDE (https://en.wikipedia.org/wiki/P-Laplacian):
+#
+#   -Δ_p u  =  f   in Ω = (0, 1)
+#        u  =  0   on ∂Ω
+#
+# Its weak solution is the minimiser of the energy functional
+#
+#   J(u) = (1/p) ∫₀¹ |u'(x)|ᵖ dx  -  ∫₀¹ f(x) u(x) dx
+#
+# We can discretise this and solve for 0 using 2nd order Newton's method.
+##
+
+using LinearAlgebra, DualArrays, Plots, BandedMatrices
+
+function energy_function(p, f, h)
+    n = length(f)
+    D = BandedMatrix(0 => ones(n), -1 => -ones(n))[:, 1:end-1] / h
+    return u -> (h / p) * sum(abs.(D * u) .^ p) - h * dot(f, u)
+end
+
+
+function newton_solve(J, u, n; n_iter = 20)
+    for _ in 1:n_iter
+        ∇J = J(DualVector(u, I(n))).partials
+        H  = hessian(J, u)
+        u  = u - H \ ∇J
+    end
+    return u
+end
+
+# Exact solution for f = 1 on [0,1]
+exact(x, p) = begin
+    q = p / (p - 1)
+    ((1 / 2)^q .- abs.(x .- 1 / 2) .^ q) ./ q
+end
+
+function solve(n = 40, p = 2.0)
+    h  = 1.0 / (n + 1)
+    xs = range(h, step = h, length = n)
+    f  = ones(n)
+    J  = energy_function(p, f, h)
+    u0 = 0.1 .* sin.(pi .* xs)
+    u  = newton_solve(J, u0, n; n_iter = 20)
+    return xs, u
+end
+
+function plot_solution(save=undef, n = 40, ps = [2.0, 2.25, 2.5, 2.75, 3.0])
+    plt = plot()
+    for p in ps
+        xs, u = solve(; n, p)
+        plot!(plt, xs, u, label = "Newton (DualArrays), p = $p", lw = 2)
+        plot!(plt, xs, exact(xs, p), label = "Exact, p = $p", lw = 2, ls = :dash)
+    end
+    xlabel!(plt, "x")
+    ylabel!(plt, "u(x)")
+    title!(plt, "p-Laplacian computed vs analytical solutions")
+    display(plt)
+    if save !== undef
+        savefig(plt, save)
+    end
+end
diff --git a/src/DualArrays.jl b/src/DualArrays.jl
index 000fd9f..360c597 100644
--- a/src/DualArrays.jl
+++ b/src/DualArrays.jl
@@ -13,11 +13,11 @@ Differentiation rules are mostly provided by ChainRules.jl.
 """
 module DualArrays
 
-export DualVector, DualMatrix, Dual, jacobian, ArrayOperator
+export DualVector, DualMatrix, Dual, jacobian, ArrayOperator, hessian
 
 import Base: +, -, ==, getindex, size, axes, broadcasted, show, sum, vcat, convert, *, isapprox, \, eltype, transpose, permutedims
 
-using LinearAlgebra, ArrayLayouts, FillArrays, DiffRules, TensorOperations
+using LinearAlgebra, ArrayLayouts, FillArrays, DiffRules, TensorOperations, SparseArrayKit
 
 import FillArrays: elconvert
 
diff --git a/src/arithmetic.jl b/src/arithmetic.jl
index 4217d80..3798e2c 100644
--- a/src/arithmetic.jl
+++ b/src/arithmetic.jl
@@ -10,9 +10,9 @@ for op in (:+, :-)
     @eval $op(x::AbstractVector, y::DualVector) = DualVector($op(x, y.value), y.jacobian)
 end
 
-
 # Matrix multiplication with a DualVector.
 *(x::AbstractMatrix, y::DualVector) = DualVector(x * y.value, x * y.jacobian)
+*(x::LayoutMatrix, y::DualVector) = DualVector(x * y.value, x * y.jacobian)
 
 ###
 # this section attempts to define broadcasting rules on DualVectors for functions
@@ -101,7 +101,11 @@ for (_, f, n) in DiffRules.diffrules(filter_modules=(:Base,))
     end
 end
 
-# Special cases: integer powers
+# Necessary for promotion between Dual and Real numbers.
+Base.signbit(x::Dual) = signbit(x.value)
+Base.zero(x::Dual) = Dual(zero(x.value), zero(x.partials))
+Base.one(x::Dual) = Dual(one(x.value), zero(x.partials))
+
 Base.:^(x::Dual, y::Integer) = Dual(x.value ^ y, y * x.value^(y - 1) * x.partials)
 Base.broadcasted(::typeof(^), x::DualVector, y::Integer) = DualVector(x.value .^ y, y * x.value .^ (y - 1) .* x.jacobian)
 Base.broadcasted(::typeof(Base.literal_pow), ::typeof(^), x::DualVector, ::Val{y}) where y = DualVector(x.value .^ y, y * x.value .^ (y - 1) .* x.jacobian)
@@ -122,9 +126,10 @@ Multiplication (*) for ArrayOperators generalises from matrix/vector
 multiplication and uses tensor contraction provided by
 TensorOperations.jl. Let an (A, B) ArrayOperator denote
 an ArrayOperator with output dimension A and input dimension B
-(i.e an ArrayOperator{A+B, T, A, B} for some type T). We only
-allow multiplication between an (A, B) ArrayOperator and a (B, C)
-ArrayOperator, with the result being an (A, C) ArrayOperator.
+(i.e an ArrayOperator{A+B, T, A, B} for some type T). For multiplication
+between an (A, B) ArrayOperator and a (C, D) ArrayOperator, we sum over the
+leading B dimensions of the left ArrayOperator with the leading B dimensions
+of the right ArrayOperator. This returns an (A + C - B, D) ArrayOperator.
 This generalises from:
 
 - An inner product: a row vector * a column vector -> a scalar:
@@ -152,8 +157,8 @@ function _contract(x, y, A, B, C)
 end
 
 
-function *(x::ArrayOperator{A, B}, y::ArrayOperator{B, C}) where {A, B, C}
-    return ArrayOperator{A}(_contract(x.data, y.data, A, B, C))
+function *(x::ArrayOperator{A, B}, y::ArrayOperator{C, D}) where {A, B, C, D}
+    return ArrayOperator{A + C - B}(_contract(x.data, y.data, A, B, C + D - B))
 end
 
 function *(x::ArrayOperator{A, B}, y::AbstractArray{<:Any, L}) where {A, B, L}
@@ -166,13 +171,28 @@ function *(x::AbstractArray{<:Any, L}, y::ArrayOperator{B, C}) where {L, B, C}
     return ArrayOperator{A}(_contract(x, y.data, A, B, C))
 end
 
+# Matrix overrides to avoid contract and preserve sparsity
+*(x::ArrayOperator{1, 1}, y::ArrayOperator{1, C}) where {C} = ArrayOperator{1}(x.data * y.data)
+*(x::ArrayOperator{1, 1}, y::AbstractVecOrMat) = ArrayOperator{1}(x.data * y)
+*(x::AbstractMatrix, y::ArrayOperator{1, C}) where {C} = ArrayOperator{1}(x * y.data)
+
+transpose(x::DualMatrix) = DualMatrix(
+    transpose(x.value),
+    ArrayOperator{2}(permutedims(x.jacobian.data, (2, 1, ntuple(i -> i + 2, ndims(x.jacobian.data) - 2)...))),
+)
+
+function *(x::DualMatrix, y::AbstractVector)
+    jac = _contract(permutedims(x.jacobian.data, (1, 3, 2)), y, 2, 1, 0)
+    DualVector(x.value * y, jac)
+end
+
+*(x::AbstractMatrix, y::DualMatrix) = DualArray(x * y.value, ArrayOperator{1}(x) * y.jacobian)
+*(x::LayoutMatrix, y::DualMatrix) = DualArray(x * y.value, ArrayOperator{1}(x) * y.jacobian)
+
 # Extra required arithmetic for ArrayOperators
 Base.:+(x::ArrayOperator, y::ArrayOperator) = x .+ y
 Base.:-(x::ArrayOperator, y::ArrayOperator) = x .- y
 Base.:-(x::ArrayOperator) = (-).(x)
 
-Base.:*(a::Number, t::ArrayOperator{N, M, T, L}) where {N, M, T, L} =
-    ArrayOperator{N, M, promote_type(typeof(a), T), L}(a .* t.data)
+Base.:*(a::Number, t::ArrayOperator{N, M, T, L}) where {N, M, T, L} = ArrayOperator{N}(a .* t.data)
 Base.:*(t::ArrayOperator, a::Number) = a * t
-
-
diff --git a/src/types.jl b/src/types.jl
index da914a3..53c2b8d 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -24,17 +24,17 @@ a + Jϵ
 Where a is an N-array of real numbers, J is an N+M=tensor and ϵ is an M-array of dual parts.
 In the simplest case, where N = 0, we have a Dual number with dual parts arranged in an M-array.
 """
-struct ArrayOperator{N, M, T, L}
-    data::AbstractArray{T, L}
+struct ArrayOperator{N, M, T, L, A <: AbstractArray{T, L}}
+    data::A
 end
 
 # Constructor to wrap an array with a tensor, given a contraction rule represented by N
 function ArrayOperator{N}(data::AbstractArray{T, L}) where {L, T, N}
-    ArrayOperator{N, L - N, T, L}(data)
+    ArrayOperator{N, L - N, T, L, typeof(data)}(data)
 end
 
 # Helper convert function
-elconvert(::Type{T}, t::ArrayOperator{N, M, S, L}) where {T, N, M, S, L} = ArrayOperator{N, M, T, L}(elconvert(T, t.data))
+elconvert(::Type{T}, t::ArrayOperator{N, M, S, L, A}) where {T, N, M, S, L, A} = ArrayOperator{N, M, T, L, typeof(elconvert(T, t.data))}(elconvert(T, t.data))
 
 # Basic array interface
 for op in (:size, :axes, :iterate)
@@ -81,20 +81,20 @@ as extra information.
 struct ArrayOperatorBroadcastStyle{L, N} <: Broadcast.AbstractArrayStyle{L} end
 
 Base.BroadcastStyle(::Type{<:ArrayOperator{N, <:Any, <:Any, L}}) where {L, N} = ArrayOperatorBroadcastStyle{L, N}()
-function Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{L, N}, ::Broadcast.DefaultArrayStyle{M}) where {L, N, M}
-    # Julia optimises these checks at compile time.
-    if L >= M
-        ArrayOperatorBroadcastStyle{L, N}()
-    else
-        throw(ArgumentError("Ambiguous output dimension for resulting ArrayOperator"))
-    end
+function Base.BroadcastStyle(s::ArrayOperatorBroadcastStyle{L, N}, ::Broadcast.DefaultArrayStyle{M}) where {L, N, M}
+    L >= M ? s : throw(ArgumentError("Array has higher dimensionality than ArrayOperator"))
+end
+function Base.BroadcastStyle(s::ArrayOperatorBroadcastStyle{L, N}, ::S) where {L, N, M, S <: Broadcast.AbstractArrayStyle{M}}
+    L >= M ? s : throw(ArgumentError("Array has higher dimensionality than ArrayOperator"))
 end
-Base.BroadcastStyle(a::Broadcast.DefaultArrayStyle{M}, b::ArrayOperatorBroadcastStyle{L, N}) where {L, N, M} = Base.BroadcastStyle(b, a)
-function Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{L1, N1},::ArrayOperatorBroadcastStyle{L2, N2}) where {L1, N1, L2, N2}
+Base.BroadcastStyle(s::Broadcast.AbstractArrayStyle, t::ArrayOperatorBroadcastStyle) = Base.BroadcastStyle(t, s)
+function Base.BroadcastStyle(s1::ArrayOperatorBroadcastStyle{L1, N1}, s2::ArrayOperatorBroadcastStyle{L2, N2}) where {L1, N1, L2, N2}
     if L1 > L2
-        ArrayOperatorBroadcastStyle{L1, N1}()
+        s1
     elseif L2 > L1
-        ArrayOperatorBroadcastStyle{L2, N2}()
+        s2
+    elseif N1 == N2
+        s1
     else
         throw(ArgumentError("Ambiguous output dimension for resulting ArrayOperator"))
     end
@@ -103,30 +103,24 @@ end
 # Helper functions to help define broadcasting/arithmetic with ArrayOperators.
 # By converting a broadcast involving ArrayOperators into a broadcast
 # involving the underlying arrays.
-_unwrap(t::ArrayOperator) = t.data
-_unwrap(bc::Broadcast.Broadcasted) = Broadcast.Broadcasted(bc.f, _unwrap_args(bc.args), bc.axes)
-_unwrap(x) = x
-_unwrap_args(args::Tuple) = map(_unwrap, args)
+_wrap_dual_matrix(x) = x
+_unwrap_arg(t::ArrayOperator) = t.data
+_unwrap_arg(bc::Broadcast.Broadcasted{<:ArrayOperatorBroadcastStyle}) = _unwrap_arg(Broadcast.materialize(bc))
+_unwrap_arg(bc::Broadcast.Broadcasted) = Broadcast.materialize(bc)
+_unwrap_arg(x) = x
 
 # copy ensures that arithmetic involving a Tensor returns a Tensor
 function Base.copy(bc::Broadcast.Broadcasted{ArrayOperatorBroadcastStyle{L, N}}) where {L, N}
     # We create a Broadcasted of the underlying arrays and create a Tensor containing
     # the evaluated broadcast. We check if Base.broadcasted is a Broadcasted
     # or is overriden such as with DualArrays
-    databroadcast = Base.broadcasted(bc.f, _unwrap_args(bc.args)...)
-    result = databroadcast isa Broadcast.Broadcasted ? copy(Broadcast.flatten(databroadcast)) : databroadcast
-    ArrayOperator{N}(result)
+    ArrayOperator{N}(_wrap_dual_matrix(Broadcast.materialize(Broadcast.broadcasted(bc.f, map(_unwrap_arg, bc.args)...))))
 end
 
 # copyto adds support for .=
 function Base.copyto!(dest::ArrayOperator, bc::Broadcast.Broadcasted{ArrayOperatorBroadcastStyle{L, N}}) where {L, N}
     # As above
-    databroadcast = Base.broadcasted(bc.f, _unwrap_args(bc.args)...)
-    if databroadcast isa Broadcast.Broadcasted
-        copyto!(dest.data, Broadcast.flatten(databroadcast))
-    else
-        copyto!(dest.data, databroadcast)
-    end
+    copyto!(dest.data, Broadcast.broadcasted(bc.f, map(_unwrap_arg, bc.args)...))
     dest
 end
 
@@ -223,7 +217,8 @@ DualArray{T,N}(value, jacobian) where {T, N} = DualArray{T,N}(elconvert(T, value
 # Constructor that forces type compatibility
 function DualArray(value::AbstractArray, jacobian::ArrayOperator)
     T = promote_type(eltype(value), eltype(jacobian))
-    DualArray{T}(value, jacobian)
+    N = ndims(value)
+    DualArray{T,N}(value, jacobian)
 end
 
 # Helper function to define DualArrays with AbstractArray jacobians
@@ -264,6 +259,18 @@ function DualMatrix(value::AbstractMatrix{S}, jacobian::AbstractArray{T, N}) whe
     DualMatrix(value, ArrayOperator{2}(jacobian))
 end
 
+_wrap_dual_matrix(x::DualMatrix) = x
+
+# If we have a matrix of dual numbers, we construct it into a DualMatrix
+# This helps (for now) resolve issues surrounding broadcasting with ArrayOperator
+# and DualMatrix nested (in 2nd order autodiff). A proper fix involves
+# redesigning the broadcasting of dual arrays.
+function _wrap_dual_matrix(x::AbstractMatrix{<:Dual})
+    npartials = length(first(x).partials)
+    partial_columns = reduce(hcat, vec(getfield.(x, :partials)))
+    partial_tensor = permutedims(reshape(partial_columns, npartials, size(x)...), (2, 3, 1))
+    DualMatrix(getfield.(x, :value), partial_tensor)
+end
 
 elconvert(::Type{Dual{T}}, a::DualVector) where {T} = DualVector(elconvert(T, a.value), elconvert(T, a.jacobian))
 elconvert(::Type{Dual{T}}, a::DualMatrix) where {T} = DualMatrix(elconvert(T, a.value), elconvert(T, a.jacobian))
diff --git a/src/utilities.jl b/src/utilities.jl
index 9111077..ce1b92d 100644
--- a/src/utilities.jl
+++ b/src/utilities.jl
@@ -2,9 +2,9 @@
 using FillArrays, BandedMatrices, LinearAlgebra
 
 # Sum all elements of a DualVector, returning a single Dual number.
+# We use dot as it provides a clean way of summing over nested dual vectors as well.
 function Base.sum(x::DualVector)
-    n = length(x.value)
-    Dual(sum(x.value), vec(sum(x.jacobian; dims=1)))
+    dot(ones(Int, length(x)), x)
 end
 
 # Helper functions for vcat operations
@@ -62,5 +62,14 @@ function jacobian(f::Function, x::AbstractVector, id=Eye)
         Matrix(I(length(x)))
     end
     d = DualVector(x, J)
-    return f(d).jacobian
+    return f(d).jacobian.data
+end
+
+function hessian(f::Function, x::AbstractVector)
+    n = length(x)
+    d1 = DualVector(x, I(n))
+    d2 = DualMatrix(I(n), zeros(n, n, n))
+    d = DualVector(d1, d2)
+    ret = f(d)
+    return ret.partials.jacobian.data
 end
diff --git a/test/array_operator_test.jl b/test/array_operator_test.jl
index 42b0af2..29f1921 100644
--- a/test/array_operator_test.jl
+++ b/test/array_operator_test.jl
@@ -55,6 +55,11 @@ using FillArrays
         @test t * [1, 0, 0] == ArrayOperator{1}([1,4,7])
         @test [1 0 0] * t == ArrayOperator{1}([1 2 3])
 
+        left = ArrayOperator{1}([1 2; 3 4])
+        right = ArrayOperator{2}(ones(2,2,2))
+        expected = cat((left.data * right.data[:, :, k] for k in axes(right.data, 3))..., dims=3)
+        @test left * right == ArrayOperator{2}(expected)
+
         @test t * 2 == ArrayOperator{1}([2 4 6;8 10 12;14 16 18])
         @test 2 * t == ArrayOperator{1}([2 4 6;8 10 12;14 16 18])
 
@@ -75,4 +80,13 @@ using FillArrays
         b .= a .* 3
         @test b == ArrayOperator{1}(OneElement(3, 1, 3))
     end
+
+    @testset "Arithmetic with BandedMatrix" begin
+        bm1 = BandedMatrix(0 => [1, 2, 3], 1 => [1, 1], -1 => [1, 1])
+        bm2 = BandedMatrix(0 => [4, 5, 6], 1 => [1, 1], -1 => [1, 1])
+        a = ArrayOperator{1}(bm1)
+        b = ArrayOperator{1}(bm2)
+        @test (a .+ b).data == bm1 .+ bm2
+        @test (a .+ b).data isa BandedMatrix
+    end
 end
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index ab24156..86fe037 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -148,6 +148,17 @@ using DualArrays: ArrayOperator
         @test d2[1].value == Dual(2, [1, 0])
         @test d2[1].partials == DualVector([1.0, 0], zeros(2, 2))
     end
+
+    @testset "Hessian" begin
+        M = [1 1; 1 1]
+        dm = DualMatrix([1 2;3 4], zeros(2, 2, 2))
+
+        @test transpose(dm) isa DualMatrix
+        @test transpose(dm) * [2, -1] == DualVector([-1, 0], zeros(2, 2))
+
+        f(x) = sum(abs.(M * x) .^ 2)
+        @test hessian(f, [1, 1]) ≈ 2 .* (M' * M)
+    end
     
     include("broadcast_test.jl")
     include("array_operator_test.jl")

From 264becbb46c41d5d705f68d40c394435db07cd3f Mon Sep 17 00:00:00 2001
From: Max Vassiliev <vassiliev.max@gmail.com>
Date: Thu, 4 Jun 2026 16:36:09 +0100
Subject: [PATCH 2/5] fix keyword mismatch

---
 examples/nestedduals.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/nestedduals.jl b/examples/nestedduals.jl
index 4dd90c5..5d796ae 100644
--- a/examples/nestedduals.jl
+++ b/examples/nestedduals.jl
@@ -65,7 +65,7 @@ gradient = result.value.partials
 # equivalent to c
 gradient2 = result.partials.value
 # equivalent to d
-hessian = result.partials.jacobian.data
+_hessian = result.partials.jacobian.data
 
 # We expect this to be 6
 print("Value: ", value, "\n")
@@ -74,5 +74,5 @@ print("Gradient: ", gradient, "\n")
 # We expect this to be the same
 print("Gradient: ", gradient2, "\n")
 # We expect this to be [0 1; 1 0]
-print("Hessian: ", hessian, "\n")
+print("Hessian: ", _hessian, "\n")
 

From a2b5e7a228564322848e0345ce9a42452eca31a9 Mon Sep 17 00:00:00 2001
From: Max Vassiliev <vassiliev.max@gmail.com>
Date: Thu, 4 Jun 2026 16:53:45 +0100
Subject: [PATCH 3/5] improve coverage

---
 examples/plaplacian.jl | 2 +-
 src/arithmetic.jl      | 2 --
 src/types.jl           | 7 -------
 test/runtests.jl       | 6 ++++++
 4 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/examples/plaplacian.jl b/examples/plaplacian.jl
index 97396f4..62bf32f 100644
--- a/examples/plaplacian.jl
+++ b/examples/plaplacian.jl
@@ -51,7 +51,7 @@ end
 function plot_solution(save=undef, n = 40, ps = [2.0, 2.25, 2.5, 2.75, 3.0])
     plt = plot()
     for p in ps
-        xs, u = solve(; n, p)
+        xs, u = solve(n, p)
         plot!(plt, xs, u, label = "Newton (DualArrays), p = $p", lw = 2)
         plot!(plt, xs, exact(xs, p), label = "Exact, p = $p", lw = 2, ls = :dash)
     end
diff --git a/src/arithmetic.jl b/src/arithmetic.jl
index 3798e2c..d7f8142 100644
--- a/src/arithmetic.jl
+++ b/src/arithmetic.jl
@@ -103,10 +103,8 @@ end
 
 # Necessary for promotion between Dual and Real numbers.
 Base.signbit(x::Dual) = signbit(x.value)
-Base.zero(x::Dual) = Dual(zero(x.value), zero(x.partials))
 Base.one(x::Dual) = Dual(one(x.value), zero(x.partials))
 
-Base.:^(x::Dual, y::Integer) = Dual(x.value ^ y, y * x.value^(y - 1) * x.partials)
 Base.broadcasted(::typeof(^), x::DualVector, y::Integer) = DualVector(x.value .^ y, y * x.value .^ (y - 1) .* x.jacobian)
 Base.broadcasted(::typeof(Base.literal_pow), ::typeof(^), x::DualVector, ::Val{y}) where y = DualVector(x.value .^ y, y * x.value .^ (y - 1) .* x.jacobian)
 Base.literal_pow(::typeof(^), x::Dual, ::Val{y}) where y = Dual(x.value ^ y, y * x.value^(y - 1) * x.partials)
diff --git a/src/types.jl b/src/types.jl
index 53c2b8d..93b4de3 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -84,17 +84,12 @@ Base.BroadcastStyle(::Type{<:ArrayOperator{N, <:Any, <:Any, L}}) where {L, N} =
 function Base.BroadcastStyle(s::ArrayOperatorBroadcastStyle{L, N}, ::Broadcast.DefaultArrayStyle{M}) where {L, N, M}
     L >= M ? s : throw(ArgumentError("Array has higher dimensionality than ArrayOperator"))
 end
-function Base.BroadcastStyle(s::ArrayOperatorBroadcastStyle{L, N}, ::S) where {L, N, M, S <: Broadcast.AbstractArrayStyle{M}}
-    L >= M ? s : throw(ArgumentError("Array has higher dimensionality than ArrayOperator"))
-end
 Base.BroadcastStyle(s::Broadcast.AbstractArrayStyle, t::ArrayOperatorBroadcastStyle) = Base.BroadcastStyle(t, s)
 function Base.BroadcastStyle(s1::ArrayOperatorBroadcastStyle{L1, N1}, s2::ArrayOperatorBroadcastStyle{L2, N2}) where {L1, N1, L2, N2}
     if L1 > L2
         s1
     elseif L2 > L1
         s2
-    elseif N1 == N2
-        s1
     else
         throw(ArgumentError("Ambiguous output dimension for resulting ArrayOperator"))
     end
@@ -259,8 +254,6 @@ function DualMatrix(value::AbstractMatrix{S}, jacobian::AbstractArray{T, N}) whe
     DualMatrix(value, ArrayOperator{2}(jacobian))
 end
 
-_wrap_dual_matrix(x::DualMatrix) = x
-
 # If we have a matrix of dual numbers, we construct it into a DualMatrix
 # This helps (for now) resolve issues surrounding broadcasting with ArrayOperator
 # and DualMatrix nested (in 2nd order autodiff). A proper fix involves
diff --git a/test/runtests.jl b/test/runtests.jl
index 86fe037..510c265 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -117,6 +117,12 @@ using DualArrays: ArrayOperator
         d = DualVector([2, 3], [4 5; 6 7])
         @test M * d isa DualVector
         @test M * d == DualVector([5,5],[10 12;10 12])
+
+        L = BandedMatrix([2 0; 0 3], (0, 0))
+        @test L * d == DualVector([4, 9], [8 10; 18 21])
+
+        dm = DualMatrix([1 2; 3 4], cat([1 3; 2 4], [5 7; 6 8]; dims=3))
+        @test L * dm == DualMatrix([2 4; 9 12], cat([2 6; 6 12], [10 14; 18 24]; dims=3))
     end
     @testset "vcat" begin
         x = Dual(1, [1, 2, 3])

From 7e57add537e8e3e1ece527fece3122d0004fa7db Mon Sep 17 00:00:00 2001
From: Max Vassiliev <vassiliev.max@gmail.com>
Date: Mon, 8 Jun 2026 11:18:45 +0100
Subject: [PATCH 4/5] fix mutability, implement nested dual ad for poisson
 control

---
 examples/controlproblem.jl | 60 ++++++++++++++++++++++++++++----------
 src/DualArrays.jl          |  2 +-
 src/arithmetic.jl          | 11 ++++++-
 src/indexing.jl            |  6 ++++
 test/broadcast_test.jl     |  9 ++++++
 5 files changed, 70 insertions(+), 18 deletions(-)

diff --git a/examples/controlproblem.jl b/examples/controlproblem.jl
index 861703f..f1afbac 100644
--- a/examples/controlproblem.jl
+++ b/examples/controlproblem.jl
@@ -49,6 +49,18 @@ using LinearAlgebra, DualArrays, Plots
 # If we propagate f as a DualVector above, we get the f.jacobian as the Hessian of J,
 # and this enables us to solve via newton's method.
 
+"""
+This calculates the scalar objective
+
+    (1/2) * ||u - d||² + (alpha / 2) * ||f||²
+
+where u solves K * u = f.
+"""
+function objective(K, f, d, alpha, h)
+    u = K \ f
+    return (h / 2) * sum((u - d) .^ 2) + (alpha * h / 2) * sum(f .^ 2)
+end
+
 """
 This calculates the gradient of the objective as detailed above.
 K: the discretised Laplacian
@@ -73,13 +85,20 @@ kappa: diffusivity constant
 alpha: regularisation parameter
 iters: number of iterations to run newton's method.
 """
-function solve_1D(f0, d, h, kappa, alpha, iters = 30)
+function solve_1D(f0, d, h, kappa, alpha, iters = 30; method = :adjoint)
     fvector = copy(f0)
     n = length(f0)
     K = (kappa / h ^ 2) * Tridiagonal(-ones(n - 1), 2 * ones(n), -ones(n - 1))
     for _ = 1:iters
-        grads = grad_objective(K, DualVector(fvector, I(length(fvector))), d, alpha)
-        step = grads.jacobian.data \ grads.value
+        gradient, hess = if method == :adjoint
+            grads = grad_objective(K, DualVector(fvector, I(length(fvector))), d, alpha)
+            (grads.value, grads.jacobian.data)
+        elseif method == :hessian
+            J = f -> objective(K, f, d, alpha, h)
+            gradient = J(DualVector(fvector, I(length(fvector)))).partials
+            (gradient, hessian(J, fvector))
+        end
+        step = hess \ gradient
         fvector -= step 
     end
 
@@ -97,7 +116,7 @@ kappa: diffusivity constant
 alpha: regularisation parameter
 iters: number of iterations to run newton's method.
 """
-function solve_2D(x0, d, h, kappa, alpha, iters = 30)
+function solve_2D(x0, d, h, kappa, alpha, iters = 30; method = :adjoint)
     fvector = copy(x0)
     n = isqrt(length(x0))
     T = Tridiagonal(-ones(n - 1), 2 * ones(n), -ones(n - 1))
@@ -105,8 +124,15 @@ function solve_2D(x0, d, h, kappa, alpha, iters = 30)
     # source: https://www.petercheng.me/blog/discrete-laplacian-matrix
     K = (kappa / h ^ 2) * (kron(I(n), T) + kron(T, I(n)))
     for _ = 1:iters
-        grads = grad_objective(K, DualVector(fvector, I(length(fvector))), d, alpha)
-        step = grads.jacobian.data \ grads.value
+        gradient, hess = if method == :adjoint
+            grads = grad_objective(K, DualVector(fvector, I(length(fvector))), d, alpha)
+            (grads.value, grads.jacobian.data)
+        elseif method == :hessian
+            J = f -> objective(K, f, d, alpha, h)
+            gradient = J(DualVector(fvector, I(length(fvector)))).partials
+            (gradient, hessian(J, fvector))
+        end
+        step = hess \ gradient
         fvector -= step 
     end
 
@@ -115,7 +141,7 @@ end
 
 # Solve the 1D Poisson control on the unit interval and plot the solution.
 # We compare it to a known analytical solution.
-function plot_solution_1D(save=undef)
+function plot_solution_1D(save=undef; method = :adjoint)
 
     # setup problem
     kappa = 1
@@ -131,17 +157,18 @@ function plot_solution_1D(save=undef)
     coeff = (kappa * pi^2) / (1 + alpha * kappa^2 * pi^4)
     fexact = coeff .* sin.(pi .* x[2:end-1])
 
-    f = solve_1D(zeros(length(d)), d, h, kappa, alpha)
-    plot(x[2:end-1], f, label="Computed Control", title="1D Poisson Control Solution")
-    plot!(x[2:end-1], fexact, label="Exact Control")
+    f = solve_1D(zeros(length(d)), d, h, kappa, alpha; method=method)
+    plt = plot(x[2:end-1], f, label="Computed Control ($(String(method)))", title="1D Poisson Control Solution")
+    plot!(plt, x[2:end-1], fexact, label="Exact Control")
+    display(plt)
     if save !== undef
-        savefig(save)
+        savefig(plt, save)
     end
 end
     
 # Solve the 2D Poisson control on the unit square and plot the solution.
 # We compare it to a known analytical solution given in the dolfin-adjoint example.
-function plot_solution_2D(save=undef)
+function plot_solution_2D(save=undef; method = :adjoint)
 
     # setup problem
     kappa = 1
@@ -162,13 +189,14 @@ function plot_solution_2D(save=undef)
     # analytical optimal control as specified in dolfin-adjoint
     fexact = (1 / (1 + 4 * alpha * pi^4)) .* sin.(pi .* X) .* sin.(pi .* Y)
 
-    f = solve_2D(zeros(n * n), vec(d), h, kappa, alpha)
+    f = solve_2D(zeros(n * n), vec(d), h, kappa, alpha; method=method)
     F = reshape(f, n, n)
 
-    p1 = heatmap(x, x, F, title="Computed Control", aspect_ratio=1)
+    p1 = heatmap(x, x, F, title="Computed Control ($(String(method)))", aspect_ratio=1)
     p2 = heatmap(x, x, fexact, title="Exact Control", aspect_ratio=1)
-    plot(p1, p2, layout=(1, 2), size=(900, 400), plot_title="2D Poisson Control Solution", plot_titlevspan=0.12)
+    plt = plot(p1, p2, layout=(1, 2), size=(900, 400), plot_title="2D Poisson Control Solution", plot_titlevspan=0.12)
+    display(plt)
     if save !== undef
-        savefig(save)
+        savefig(plt, save)
     end
 end
\ No newline at end of file
diff --git a/src/DualArrays.jl b/src/DualArrays.jl
index 360c597..2161772 100644
--- a/src/DualArrays.jl
+++ b/src/DualArrays.jl
@@ -15,7 +15,7 @@ module DualArrays
 
 export DualVector, DualMatrix, Dual, jacobian, ArrayOperator, hessian
 
-import Base: +, -, ==, getindex, size, axes, broadcasted, show, sum, vcat, convert, *, isapprox, \, eltype, transpose, permutedims
+import Base: +, -, ==, getindex, setindex!, size, axes, broadcasted, show, sum, vcat, convert, *, isapprox, \, eltype, transpose, permutedims
 
 using LinearAlgebra, ArrayLayouts, FillArrays, DiffRules, TensorOperations, SparseArrayKit
 
diff --git a/src/arithmetic.jl b/src/arithmetic.jl
index d7f8142..f835ffb 100644
--- a/src/arithmetic.jl
+++ b/src/arithmetic.jl
@@ -115,7 +115,16 @@ LinearAlgebra.dot(x::DualVector, y::AbstractVector) = Dual(dot(x.value, y), tran
 LinearAlgebra.dot(x::AbstractVector, y::DualVector) = Dual(dot(x, y.value), transpose(y.jacobian) * x)
 
 # solve
-\(x::AbstractMatrix, y::DualVector) = DualVector(x \ y.value, ArrayOperator{1}(x \ y.jacobian.data))
+function \(x::AbstractMatrix, y::DualVector)
+    DualVector(x \ y.value, ArrayOperator{1}(x \ y.jacobian.data))
+end
+
+function \(x::AbstractMatrix, y::DualMatrix)
+    jac = y.jacobian.data
+    reshaped = reshape(jac, size(jac, 1), :)
+    solved = x \ reshaped
+    DualMatrix(x \ y.value, ArrayOperator{2}(reshape(solved, size(jac))))
+end
 
 
 
diff --git a/src/indexing.jl b/src/indexing.jl
index b357811..2cbf31e 100644
--- a/src/indexing.jl
+++ b/src/indexing.jl
@@ -78,6 +78,12 @@ function getindex(x::DualMatrix, y::Vararg{Union{Colon, UnitRange}})
     DualMatrix(newval, newjac)
 end
 
+function setindex!(x::DualVector, y::Number, i::Int)
+    x.value[i] = y
+    x.jacobian.data[i, :] .= 0
+    return x
+end
+
 # DualArray array interface
 for op in (:size, :axes)
     @eval $op(a::DualArray) = $op(a.value)
diff --git a/test/broadcast_test.jl b/test/broadcast_test.jl
index e368e08..1d66fd3 100644
--- a/test/broadcast_test.jl
+++ b/test/broadcast_test.jl
@@ -71,4 +71,13 @@ using DualArrays, Test, SparseArrays, LinearAlgebra
     @test s.jacobian.data ≈ [0.0 -2.0 -3.0; -1.0 -1.0 -3.0; -1.0 -2.0 -2.0]
     @test m.jacobian.data ≈ [3.0 2.0 3.0; 2.0 6.0 6.0; 3.0 6.0 11.0]
     @test div.jacobian.data ≈ [0.25 -0.5 -0.75; -0.5 -0.5 -1.5; -0.75 -1.5 -1.75]
+end
+
+@testset "DualVector setindex!" begin
+    x = DualVector([1, 2, 3], [1 0 0; 0 1 0; 0 0 1])
+
+    x[2] = 4
+
+    @test x.value == [1, 4, 3]
+    @test x.jacobian.data == [1 0 0; 0 0 0; 0 0 1]
 end
\ No newline at end of file

From 7138cdf9070e2e8ab4bcf081b3d65cabbb385dfe Mon Sep 17 00:00:00 2001
From: Maxim Vassiliev <vassiliev.max@gmail.com>
Date: Thu, 11 Jun 2026 10:00:42 +0100
Subject: [PATCH 5/5] add testing

---
 test/runtests.jl | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/test/runtests.jl b/test/runtests.jl
index 510c265..ad16e8c 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -112,6 +112,16 @@ using DualArrays: ArrayOperator
         @test A \ b == DualVector([1, 1], [1.5 2; 1.5 2])
     end
 
+    @testset "Solve (DualMatrix)" begin
+        A = [1 0;0 2]
+        dm = DualMatrix([1.0 2.0; 3.0 4.0], zeros(2, 2, 2))
+
+        res = A \ dm
+
+        @test res.value == A \ dm.value
+        @test res.jacobian.data == zeros(2, 2, 2)
+    end
+
     @testset "Matrix multiplication" begin
         M = [1 1; 1 1]
         d = DualVector([2, 3], [4 5; 6 7])