diff --git a/src/JuMP.jl b/src/JuMP.jl
index 98a37e60aa8..aec66bb98e0 100644
--- a/src/JuMP.jl
+++ b/src/JuMP.jl
@@ -138,6 +138,8 @@ mutable struct GenericModel{T<:Real} <: AbstractModel
     # A dictionary to store timing information from the JuMP macros.
     enable_macro_timing::Bool
     macro_times::Dict{Tuple{LineNumberNode,String},Float64}
+    # We use `Any` as key because we haven't defined `GenericNonlinearExpr` yet
+    subexpressions::Dict{UInt64,MOI.ScalarNonlinearFunction}
 end
 
 value_type(::Type{GenericModel{T}}) where {T} = T
@@ -251,6 +253,7 @@ function direct_generic_model(
         Dict{Any,MOI.ConstraintIndex}(),
         false,
         Dict{Tuple{LineNumberNode,String},Float64}(),
+        Dict{UInt64,MOI.ScalarNonlinearFunction}(),
     )
 end
 
diff --git a/src/constraints.jl b/src/constraints.jl
index 157907ccd5e..451e0c5347d 100644
--- a/src/constraints.jl
+++ b/src/constraints.jl
@@ -760,6 +760,10 @@ function moi_function(constraint::AbstractConstraint)
     return moi_function(jump_function(constraint))
 end
 
+function moi_function(model, constraint::AbstractConstraint)
+    return moi_function(model, jump_function(constraint))
+end
+
 """
     moi_set(constraint::AbstractConstraint)
 
@@ -1016,6 +1020,17 @@ function _moi_add_constraint(
     return MOI.add_constraint(model, f, s)
 end
 
+function check_belongs_to_model(f::Vector, model)
+    for func in f
+        check_belongs_to_model(func, model)
+    end
+end
+
+function moi_function(model, f)
+    check_belongs_to_model(f, model)
+    return moi_function(f)
+end
+
 """
     add_constraint(
         model::GenericModel,
@@ -1032,10 +1047,9 @@ function add_constraint(
     name::String = "",
 )
     con = model_convert(model, con)
+    func, set = moi_function(model, con), moi_set(con)
     # The type of backend(model) is unknown so we directly redirect to another
     # function.
-    check_belongs_to_model(con, model)
-    func, set = moi_function(con), moi_set(con)
     cindex = _moi_add_constraint(
         backend(model),
         func,
diff --git a/src/nlp_expr.jl b/src/nlp_expr.jl
index 31e243f2f12..b2777624f8c 100644
--- a/src/nlp_expr.jl
+++ b/src/nlp_expr.jl
@@ -569,18 +569,33 @@ end
 
 moi_function(x::Number) = x
 
-function moi_function(f::GenericNonlinearExpr{V}) where {V}
+function moi_function(
+    model::JuMP.GenericModel,
+    f::GenericNonlinearExpr{V},
+) where {V}
+    cache = model.subexpressions
+    key = objectid(f)
+    if haskey(cache, key)
+        return cache[key]
+    end
     ret = MOI.ScalarNonlinearFunction(f.head, similar(f.args))
     stack = Tuple{MOI.ScalarNonlinearFunction,Int,GenericNonlinearExpr{V}}[]
     for i in length(f.args):-1:1
         if f.args[i] isa GenericNonlinearExpr{V}
             push!(stack, (ret, i, f.args[i]))
+        elseif f.args[i] isa AbstractJuMPScalar
+            ret.args[i] = moi_function(model, f.args[i])
         else
             ret.args[i] = moi_function(f.args[i])
         end
     end
     while !isempty(stack)
         parent, i, arg = pop!(stack)
+        arg_key = objectid(arg)
+        if haskey(cache, arg_key)
+            parent.args[i] = cache[arg_key]
+            continue
+        end
         child = MOI.ScalarNonlinearFunction(arg.head, similar(arg.args))
         parent.args[i] = child
         for j in length(arg.args):-1:1
@@ -590,7 +605,9 @@ function moi_function(f::GenericNonlinearExpr{V}) where {V}
                 child.args[j] = moi_function(arg.args[j])
             end
         end
+        cache[arg_key] = child
     end
+    cache[key] = ret
     return ret
 end
 
@@ -1187,7 +1204,8 @@ function moi_function(f::AbstractVector{<:GenericNonlinearExpr})
 end
 
 function MOI.VectorNonlinearFunction(f::Vector{<:AbstractJuMPScalar})
-    return MOI.VectorNonlinearFunction(map(moi_function, f))
+    model = owner_model(first(f))
+    return MOI.VectorNonlinearFunction(moi_function.(model, f))
 end
 
 """
@@ -1233,7 +1251,7 @@ x
 ```
 """
 function simplify(model::GenericModel, f::AbstractJuMPScalar)
-    g = MOI.Nonlinear.SymbolicAD.simplify(moi_function(f))
+    g = MOI.Nonlinear.SymbolicAD.simplify(moi_function(model, f))
     return jump_function(model, g)
 end
 
@@ -1284,7 +1302,8 @@ function derivative(
     f::AbstractJuMPScalar,
     x::GenericVariableRef{T},
 ) where {T}
-    df_dx = MOI.Nonlinear.SymbolicAD.derivative(moi_function(f), index(x))
+    df_dx =
+        MOI.Nonlinear.SymbolicAD.derivative(moi_function(model, f), index(x))
     return jump_function(model, MOI.Nonlinear.SymbolicAD.simplify!(df_dx))
 end
 
@@ -1329,7 +1348,7 @@ julia> ∇f[y]
 ```
 """
 function gradient(model::GenericModel{T}, f::AbstractJuMPScalar) where {T}
-    g = moi_function(f)
+    g = moi_function(model, f)
     ∇f = Dict{GenericVariableRef{T},Any}()
     for xi in MOI.Nonlinear.SymbolicAD.variables(g)
         df_dx = MOI.Nonlinear.SymbolicAD.simplify!(
diff --git a/src/objective.jl b/src/objective.jl
index c9dcbbde3ac..41f16dc0be1 100644
--- a/src/objective.jl
+++ b/src/objective.jl
@@ -277,7 +277,7 @@ end
 
 function set_objective_function(model::GenericModel, func::AbstractJuMPScalar)
     check_belongs_to_model(func, model)
-    set_objective_function(model, moi_function(func))
+    set_objective_function(model, moi_function(model, func))
     return
 end
 
@@ -296,7 +296,7 @@ function set_objective_function(
     for f in func
         check_belongs_to_model(f, model)
     end
-    set_objective_function(model, moi_function(func))
+    set_objective_function(model, moi_function(model, func))
     return
 end
 
diff --git a/test/Containers/test_DenseAxisArray.jl b/test/Containers/test_DenseAxisArray.jl
index 29948d79530..d57e8dea9cc 100644
--- a/test/Containers/test_DenseAxisArray.jl
+++ b/test/Containers/test_DenseAxisArray.jl
@@ -740,11 +740,11 @@ end
 
 function test_containers_denseaxisarray_kwarg_indexing_slicing()
     Containers.@container(x[i=2:3, j=1:2], i + j)
-    y = x[i=2, j = :]
+    y = x[i=2, j=:]
     @test y[j=2] == 4
-    y = x[i = :, j=1]
+    y = x[i=:, j=1]
     @test y[i=3] == 4
-    y = x[i = :, j = :]
+    y = x[i=:, j=:]
     @test y[i=3, j=1] == 4
     return
 end
@@ -801,11 +801,11 @@ end
 function test_containers_denseaxisarrayview_kwarg_indexing_slicing()
     Containers.@container(a[i=2:3, j=1:2], i + j)
     x = view(a, :, :)
-    y = x[i=2, j = :]
+    y = x[i=2, j=:]
     @test y[j=2] == 4
-    y = x[i = :, j=1]
+    y = x[i=:, j=1]
     @test y[i=3] == 4
-    y = x[i = :, j = :]
+    y = x[i=:, j=:]
     @test y[i=3, j=1] == 4
     return
 end
diff --git a/test/Containers/test_SparseAxisArray.jl b/test/Containers/test_SparseAxisArray.jl
index ee8c8820c07..95226bd2cc5 100644
--- a/test/Containers/test_SparseAxisArray.jl
+++ b/test/Containers/test_SparseAxisArray.jl
@@ -296,11 +296,11 @@ end
 
 function test_containers_sparseaxisarray_kwarg_indexing_slicing()
     Containers.@container(x[i=2:3, j=1:2], i + j, container = SparseAxisArray)
-    y = x[i=2, j = :]
+    y = x[i=2, j=:]
     @test y[j=2] == 4
-    y = x[i = :, j=1]
+    y = x[i=:, j=1]
     @test y[i=3] == 4
-    y = x[i = :, j = :]
+    y = x[i=:, j=:]
     @test y[i=3, j=1] == 4
     return
 end
diff --git a/test/perf/benchmark_cache.jl b/test/perf/benchmark_cache.jl
new file mode 100644
index 00000000000..52a661c9736
--- /dev/null
+++ b/test/perf/benchmark_cache.jl
@@ -0,0 +1,322 @@
+# Benchmark for PR #4032: caching strategies for moi_function on
+# GenericNonlinearExpr.
+#
+# Four modes are compared:
+#   :none           — master behaviour: NO cache. `add_constraint` runs
+#                      `check_belongs_to_model` and then `moi_function`, both
+#                      of which traverse the expression independently.
+#   :per_call_oid   — odow's actual PR snippet: a fresh `Dict{UInt64, ...}`
+#                      allocated INSIDE every call to `moi_function`, keyed by
+#                      `objectid(arg)` (so cache lookups are O(1)).
+#                      `check_belongs_to_model` still runs as a separate walk.
+#   :model_struct   — this branch as-is: cache stored on the model, keyed by
+#                      the JuMP `GenericNonlinearExpr` itself. Default `==`/
+#                      `hash` on that struct is structural and walks the whole
+#                      sub-tree, so a cache hit still costs O(size) — which
+#                      can defeat the cache for deeply aliased DAGs.
+#   :model_oid      — model-level cache, but keyed by `objectid` like odow's
+#                      snippet. Shows what the branch could evolve to once
+#                      blegat's "use hash as keys" suggestion is applied.
+#
+# Run from the repo root, in an environment that has BenchmarkTools added on
+# top of dev'd JuMP (do NOT add BenchmarkTools to JuMP's own Project.toml):
+#
+#   julia> using Pkg
+#   julia> Pkg.activate(temp = true)
+#   julia> Pkg.develop(path = ".")
+#   julia> Pkg.add("BenchmarkTools")
+#   julia> include("benchmark_cache.jl"); run_all()
+#
+# The script monkey-patches `JuMP.moi_function(::GenericNonlinearExpr,
+# ::GenericModel)` so the same JuMP build can be exercised under each mode
+# without rebuilding the package.
+#
+# Sample numbers from this branch (Julia 1.12, K=14, etc.):
+#
+#   A: aliased tree (one big DAG, K=14)        ~46 ms — all four modes within
+#                                               1% (MOI-tree alloc cost is
+#                                               small vs. the rest of
+#                                               add_constraint here).
+#   B: many independent constraints (N=5000)   none 84 / per_call_oid 86 /
+#                                               model_struct 91 / model_oid 86
+#                                               → the *current branch* is the
+#                                               slowest; struct keys hurt the
+#                                               common case. Switching to
+#                                               objectid keys fixes it.
+#   C: shared big subexpr (N=200, M=200)       model_struct 171 / model_oid 170
+#                                               vs none 191 / per_call_oid 195
+#                                               → 12–14% win for the model-
+#                                               level cache. Per-call cannot
+#                                               see the cross-constraint
+#                                               sharing.
+#   D: many aliased trees (M=200, K=8)         all ~140 ms (per-constraint
+#                                               sharing is small and JuMP's
+#                                               `+` would flatten without the
+#                                               explicit GenericNonlinearExpr
+#                                               construction we use here).
+#
+# Takeaways:
+#  * The model-level scope is the right call when subexpressions are shared
+#    across constraints (scenario C).
+#  * Using the JuMP struct as the cache key (current branch) is what costs
+#    in scenario B — the deep `==`/`hash` adds overhead per node. Switching
+#    to `objectid` keys (blegat's "use hash as keys" suggestion) recovers
+#    most of it.
+#  * Scenario A's exponential blow-up is more visible in MEMORY than in
+#    TIME at K=14; bump K higher (or use the bench.jl example with
+#    |R|*|S| ≈ 3600) to make the time gap obvious.
+
+using Printf
+using JuMP
+using BenchmarkTools
+import MathOptInterface as MOI
+
+const _G = JuMP.GenericNonlinearExpr
+
+# ---------------------------------------------------------------------------
+# Three implementations of the GenericNonlinearExpr → ScalarNonlinearFunction
+# walk. They share the same skeleton; only the cache differs.
+# ---------------------------------------------------------------------------
+
+function _moi_no_cache(f::_G{V}) where {V}
+    ret = MOI.ScalarNonlinearFunction(f.head, similar(f.args))
+    stack = Tuple{MOI.ScalarNonlinearFunction,Int,_G{V}}[]
+    for i in length(f.args):-1:1
+        if f.args[i] isa _G{V}
+            push!(stack, (ret, i, f.args[i]))
+        else
+            ret.args[i] = JuMP.moi_function(f.args[i])
+        end
+    end
+    while !isempty(stack)
+        parent, i, arg = pop!(stack)
+        child = MOI.ScalarNonlinearFunction(arg.head, similar(arg.args))
+        parent.args[i] = child
+        for j in length(arg.args):-1:1
+            if arg.args[j] isa _G{V}
+                push!(stack, (child, j, arg.args[j]))
+            else
+                child.args[j] = JuMP.moi_function(arg.args[j])
+            end
+        end
+    end
+    return ret
+end
+
+# Cache walk parameterised by the `keyfn` that maps a JuMP NL expression to
+# the dict key. `keyfn = identity` matches the branch (struct keys, deep
+# hash). `keyfn = objectid` matches odow's snippet (UInt64 keys, O(1) hash).
+function _moi_with_cache(f::_G{V}, cache, keyfn::F) where {V,F}
+    fk = keyfn(f)
+    if haskey(cache, fk)
+        return cache[fk]
+    end
+    ret = MOI.ScalarNonlinearFunction(f.head, similar(f.args))
+    stack = Tuple{MOI.ScalarNonlinearFunction,Int,_G{V}}[]
+    for i in length(f.args):-1:1
+        if f.args[i] isa _G{V}
+            push!(stack, (ret, i, f.args[i]))
+        else
+            ret.args[i] = JuMP.moi_function(f.args[i])
+        end
+    end
+    while !isempty(stack)
+        parent, i, arg = pop!(stack)
+        argk = keyfn(arg)
+        if haskey(cache, argk)
+            parent.args[i] = cache[argk]
+            continue
+        end
+        child = MOI.ScalarNonlinearFunction(arg.head, similar(arg.args))
+        parent.args[i] = child
+        for j in length(arg.args):-1:1
+            if arg.args[j] isa _G{V}
+                push!(stack, (child, j, arg.args[j]))
+            else
+                child.args[j] = JuMP.moi_function(arg.args[j])
+            end
+        end
+        cache[argk] = child
+    end
+    cache[fk] = ret
+    return ret
+end
+
+# ---------------------------------------------------------------------------
+# Switching mechanism. We re-define JuMP's `moi_function(::GenericNonlinearExpr,
+# ::GenericModel)` so that `add_constraint` dispatches differently per mode.
+#
+# Modes :none and :per_call mirror what those PR options would actually
+# look like in production: a separate `check_belongs_to_model` walk, then
+# the (cached or uncached) build walk.
+# Mode :model_level matches the branch: a single integrated walk.
+# ---------------------------------------------------------------------------
+
+const BENCH_MODE = Ref(:model_struct)
+
+# Per-mode caches that need a UInt64 (objectid) key type rather than the
+# branch's `Dict{Any,...}`. We store them on the model as a side table so
+# they survive across constraints inside one build but are cleared between
+# builds (because each scenario builds a fresh `Model()`).
+const _OID_CACHE =
+    IdDict{JuMP.GenericModel,Dict{UInt64,MOI.ScalarNonlinearFunction}}()
+
+function _oid_cache(model)
+    get!(_OID_CACHE, model) do
+        return Dict{UInt64,MOI.ScalarNonlinearFunction}()
+    end
+end
+
+@eval JuMP function moi_function(
+    f::JuMP.GenericNonlinearExpr{V},
+    model::JuMP.GenericModel,
+) where {V}
+    mode = Main.BENCH_MODE[]
+    if mode === :none
+        JuMP.check_belongs_to_model(f, model)
+        return Main._moi_no_cache(f)
+    elseif mode === :per_call_oid
+        JuMP.check_belongs_to_model(f, model)
+        cache = Dict{UInt64,$(MOI.ScalarNonlinearFunction)}()
+        return Main._moi_with_cache(f, cache, objectid)
+    elseif mode === :model_struct
+        # branch behaviour: belongs-check is integrated; struct keys (deep hash)
+        return Main._moi_with_cache(f, model.subexpressions, identity)
+    else  # :model_oid
+        return Main._moi_with_cache(f, Main._oid_cache(model), objectid)
+    end
+end
+
+# ---------------------------------------------------------------------------
+# Scenarios. Each `scenario_*` returns a 0-arg closure that builds a fresh
+# model from scratch, so each BenchmarkTools sample sees an empty
+# `model.subexpressions` cache.
+# ---------------------------------------------------------------------------
+
+# Scenario A — odow's case: ONE constraint with a deeply aliased binary tree.
+# `e_k = e_{k-1} + e_{k-1}`. With no cache this expands to 2^K leaves; with
+# either cache it stays linear in K.
+#
+# We build use `atan` as with `+`, it gets flattened.
+function scenario_aliased_tree(; K::Int)
+    return function ()
+        model = Model()
+        @variable(model, x)
+        e = sin(x)
+        for _ in 1:K
+            e = atan(e, e)
+        end
+        @constraint(model, e <= 1)
+        return model
+    end
+end
+
+# Scenario B — many small INDEPENDENT NL constraints. No subexpression
+# sharing within or across constraints. The model-level cache pays an
+# insertion + hash cost for every new node; the per-call cache pays an
+# extra Dict allocation per constraint; master pays nothing.
+#
+# This is the case blegat warned about in the PR thread: "we start
+# creating many small dictionaries if there are a lot of constraints,
+# but that's probably negligible". The benchmark tells us whether it
+# really is negligible.
+function scenario_many_independent(; N::Int = 5_000)
+    return function ()
+        model = Model()
+        @variable(model, x[1:N])
+        for i in 1:N
+            @constraint(model, sin(x[i]) + cos(x[i]) * exp(x[i]) <= 1)
+        end
+        return model
+    end
+end
+
+# Scenario C — many constraints sharing one big subexpression.
+# `big = sum(sin(x[i]) for i in 1:N)` appears once in each of M constraints.
+# The per-call cache cannot help here (each call sees `big` only once); only
+# the model-level cache deduplicates `big` across constraints. This is the
+# case that genuinely motivates the model-level cache.
+function scenario_shared_big(; N::Int = 200, M::Int = 200)
+    return function ()
+        model = Model()
+        @variable(model, x[1:N])
+        big = sum(sin(x[i]) for i in 1:N)
+        for j in 1:M
+            @constraint(model, big * x[j] <= 1)
+        end
+        return model
+    end
+end
+
+# Scenario D — many constraints, each with internal aliasing (a smaller
+# binary tree per constraint, distinct leaf variable per constraint).
+# Both per-call and model-level caches help WITHIN each constraint;
+# across constraints there is nothing to share. This is the natural
+# territory of odow's per-call dict.
+function scenario_many_aliased(; M::Int = 200, K::Int = 8)
+    return function ()
+        model = Model()
+        @variable(model, x[1:M])
+        for i in 1:M
+            e = sin(x[i])
+            for _ in 1:K
+                e = atan(e, e)
+            end
+            @constraint(model, e <= 1)
+        end
+        return model
+    end
+end
+
+# ---------------------------------------------------------------------------
+# Runner
+# ---------------------------------------------------------------------------
+
+const MODES = [:none, :per_call_oid, :model_struct, :model_oid]
+
+function run_bench(label, build)
+    println("="^72)
+    println(label)
+    println("="^72)
+    for m in MODES
+        BENCH_MODE[] = m
+        display(@benchmark $build())
+    end
+end
+
+function one_aliased_tree(; K = 16)
+    return run_bench(
+        "A: aliased tree (one big DAG, K=$K)",
+        scenario_aliased_tree(; K),
+    )
+end
+
+function independent()
+    return run_bench(
+        "B: many independent constraints (N=5000)",
+        scenario_many_independent(; N = 5_000),
+    )
+end
+
+function shared_subexpr()
+    return run_bench(
+        "C: shared big subexpr (N=200, M=200)",
+        scenario_shared_big(; N = 200, M = 200),
+    )
+end
+
+function many_aliased_tree()
+    return run_bench(
+        "D: many aliased trees (M=200, K=8)",
+        scenario_many_aliased(; M = 200, K = 8),
+    )
+end
+
+function run_all()
+    one_aliased_tree()
+    independent()
+    shared_subexpr()
+    return many_aliased_tree()
+end
+
+run_all()
diff --git a/test/test_model.jl b/test/test_model.jl
index accdea0a76d..558731b110a 100644
--- a/test/test_model.jl
+++ b/test/test_model.jl
@@ -1151,7 +1151,7 @@ function test_keyword_getindex()
     # Vector{VariableRef}
     @test_throws err x[i=1]
     @test_throws err x[i=2]
-    @test_throws err x[i = :]
+    @test_throws err x[i=:]
     @test_throws err x[i=1:2]
     @test_throws BoundsError x[]
     # Matrix{VariableRef}
@@ -1168,7 +1168,7 @@ function test_keyword_getindex()
     @constraint(model, c[i=1:2], x[i] + i <= 1)
     @test_throws err c[i=1]
     @test_throws err c[i=1:2]
-    @test_throws err c[i = :]
+    @test_throws err c[i=:]
     @test_throws BoundsError c[]
     return
 end