JuliaSymbolics · DhairyaLGandhi · Oct 22, 2025 · Oct 22, 2025 · Oct 22, 2025 · Oct 23, 2025
diff --git a/src/SymbolicUtils.jl b/src/SymbolicUtils.jl
@@ -156,6 +156,25 @@ export substitute
 include("substitute.jl")
 
 include("code.jl")
+import .Code
+import .Code: CSEState, Let, Assignment, Func, MakeArray,
+    MakeSparseArray, AtIndex, DestructuredArgs, SpawnFetch,
+    LiteralExpr, BasicSymbolic, ForLoop, SetArray, MakeTuple,
+    lhs, rhs
+include("matmuladd.jl")
+
+# Updated mul5_cse2 that uses the rule system
+function mul5_cse2(expr, state::CSEState)
+
+    # Try to apply optimization rules
+    optimized = apply_optimization_rules(expr, state)
+    if optimized !== nothing
+        return optimized
+    end
+
+    # If no optimization applied, return original expression
+    return expr
+end
 
 PrecompileTools.@recompile_invalidations begin
     include("despecialize.jl")

diff --git a/src/code.jl b/src/code.jl
@@ -13,7 +13,8 @@ import SymbolicUtils: @matchable, BasicSymbolic, Sym, Term, iscall, operation, a
                       symtype, sorted_arguments, metadata, isterm, term, maketerm, unwrap_const,
                       ArgsT, Const, SymVariant, _is_array_of_symbolics, _is_tuple_of_symbolics,
                       ArrayOp, isarrayop, IdxToAxesT, ROArgsT, shape, Unknown, ShapeVecT, BSImpl,
-                      search_variables!, _is_index_variable, RangesT, IDXS_SYM, is_array_shape
+                      search_variables!, _is_index_variable, RangesT, IDXS_SYM, is_array_shape,
+                      vartype, symtype
 using Moshi.Match: @match
 import SymbolicIndexingInterface: symbolic_type, NotSymbolic
 
@@ -492,6 +493,8 @@ Func
 toexpr_kw(f, st) = Expr(:kw, toexpr(f, st).args...)
 
 function toexpr(f::Func, st)
+    # @show st
+    # @show f.args
     funkyargs = get_rewrites(vcat(f.args, map(lhs, f.kwargs)))
     union_rewrites!(st.rewrites, funkyargs)
     dargs = filter(x->x isa DestructuredArgs, f.args)
@@ -1019,7 +1022,6 @@ function cse!(expr::BasicSymbolic{T}, state::CSEState) where {T}
                 return sym
             end
         end
-
     end
 end
 

diff --git a/src/matmuladd.jl b/src/matmuladd.jl
@@ -0,0 +1,219 @@
+# Pattern-based optimization templates for CSE
+struct OptimizationRule
+    name::String
+    detector::Function
+    transformer::Function
+    priority::Int
+end
+
+function find_cse_expr(x, state)
+    idx = findfirst(y -> nameof(lhs(y)) == nameof(x), state.sorted_exprs)
+    isnothing(idx) ? nothing : (; expr = rhs(state.sorted_exprs[idx]), x)
+end
+
+function is_cse_var(x)
+    startswith(string(nameof(x)), "##cse")
+end
+
+function validate_mul_shapes(A, B, C)
+    [shape(A)[1], shape(B)[2]] == shape(C)
+end
+
+function validate_mul_shapes(A, B, C...)
+    return true
+    [shape(A)[1], shape(B)[2]] == shape(first(C))
+end
+
+function detect_matmul_add_pattern(expr::Code.Let, state::Code.CSEState)
+    mul_candidates_idx = findall(expr.pairs) do x
+        iscall(rhs(x)) || return false
+        args = arguments(rhs(x))
+        all_arrays = all(y -> y <: AbstractArray, symtype.(args))
+        is_mul = operation(rhs(x)) === *
+        all_arrays && is_mul
+    end
+    mul_candidates = expr.pairs[mul_candidates_idx]
+
+    plus_candidates_idx = findall(expr.pairs) do x
+        iscall(rhs(x)) || return false
+        args = arguments(rhs(x))
+        all_arrays = all(y -> y <: AbstractArray, symtype.(args))
+        is_plus = operation(rhs(x)) === +
+        all_arrays && is_plus
+    end
+    plus_candidates = expr.pairs[plus_candidates_idx]
+
+    mul_vals = lhs.(mul_candidates)
+    candidates = map(plus_candidates_idx, plus_candidates) do p_idx, p
+        map(mul_candidates_idx, mul_vals) do m_idx, m_v
+            if nameof(m_v) in nameof.(arguments(rhs(p)))
+                (m_idx, m_v) => (p_idx, expr.pairs[p_idx])
+            end
+        end
+    end
+    candidates = filter(!isnothing, reduce(vcat, candidates))
+
+    pattern = map(plus_candidates_idx, plus_candidates) do plus_idx, c
+        plus_args = arguments(rhs(c))
+        mul_pattern = map(mul_candidates_idx, mul_candidates) do mul_idx, m
+            mul_val = lhs(m)
+
+            if nameof(mul_val) in nameof.(plus_args)
+                A, B = arguments(rhs(m))
+                Cs = filter(x -> nameof(x) != nameof(mul_val), plus_args)
+                validate_mul_shapes(A, B, Cs...) || return nothing
+                return (; A, B, Cs, mul_candidate = m, plus_candidate = c, mul_idx, plus_idx, pattern="A*B + C")
+            end
+        end
+        filter(!isnothing, mul_pattern)
+    end
+    isempty(pattern) ? nothing : pattern
+end
+
+function transform_to_mul5_assignment(expr, match_data_, state::Code.CSEState)
+    Cset = Set(filter(!is_cse_var, reduce(vcat,getproperty.(match_data_, :Cs))))
+    plus_candidates_idx = getproperty.(match_data_, :plus_idx)
+
+    final_temps = []
+
+    m_ = map(match_data_) do match_data
+
+        A, B = match_data.A, match_data.B
+        C = pop!(Cset)
+        T = vartype(C)
-function transform_to_mul5_assignment(expr, match_data_, state::Code.CSEState)
-    Cset = Set(filter(!is_cse_var, reduce(vcat,getproperty.(match_data_, :Cs))))
-    plus_candidates_idx = getproperty.(match_data_, :plus_idx)
-
-    final_temps = []
-
-    m_ = map(match_data_) do match_data
-
-        A, B = match_data.A, match_data.B
-        C = pop!(Cset)
-        T = vartype(C)
+function transform_to_mul5_assignment(expr::BasicSymbolic{T}, match_data_, state::Code.CSEState) where {T}
+    Cset = Set(filter(!is_cse_var, reduce(vcat,getproperty.(match_data_, :Cs))))
+    plus_candidates_idx = getproperty.(match_data_, :plus_idx)
+
+    final_temps = BasicSymbolic{T}[]
+
+    m_ = map(match_data_) do match_data
+
+        A, B = match_data.A, match_data.B
+        C = pop!(Cset)
-function transform_to_mul5_assignment(expr, match_data_, state::Code.CSEState)
-    Cset = Set(filter(!is_cse_var, reduce(vcat,getproperty.(match_data_, :Cs))))
-    plus_candidates_idx = getproperty.(match_data_, :plus_idx)
-
-    final_temps = []
-
-    m_ = map(match_data_) do match_data
-
-        A, B = match_data.A, match_data.B
-        C = pop!(Cset)
-        T = vartype(C)
+function transform_to_mul5_assignment(expr::BasicSymbolic{T}, match_data_, state::Code.CSEState) where {T}
+    Cset = Set(filter(!is_cse_var, reduce(vcat,getproperty.(match_data_, :Cs))))
+    plus_candidates_idx = getproperty.(match_data_, :plus_idx)
+
+    final_temps = BasicSymbolic{T}[]
+
+    m_ = map(match_data_) do match_data
+
+        A, B = match_data.A, match_data.B
+        C = pop!(Cset)
+
+        # Create temporary variable for the result
+        temp_var_sym = gensym("mul5_temp")
+        temp_var = Sym{T}(temp_var_sym; type=symtype(C))
+
+        copy_call = Term{T}(copy, [C]; type=symtype(C))
+        mul_call = Term{T}(LinearAlgebra.mul!,
+            [temp_var, A, B, Const{T}(1), Const{T}(1)];
+            type=symtype(C))
+
+        # Add assignments to CSE state
+        copy_assignment = Assignment(temp_var, copy_call)
+        mul_assignment = Assignment(temp_var, mul_call)  # This overwrites temp_var with mul! result
+        final_assignment = Assignment(temp_var, temp_var)
+        push!(final_temps, temp_var)
+
+        [copy_assignment, mul_assignment, final_assignment]
+    end
+    m = m_ |> Base.Fix1(reduce, vcat)
+
+    transformed_idxs = getproperty.(match_data_, :plus_idx)
-    transformed_idxs = getproperty.(match_data_, :plus_idx)
+    transformed_idxs = plus_candidates_idx
-    transformed_idxs = getproperty.(match_data_, :plus_idx)
+    transformed_idxs = plus_candidates_idx
+    substitution_map = get_substitution_map(match_data_, m_)
+    rm_idxs = getproperty.(match_data_, :mul_idx)
+    transformations = Dict()
+    map(transformed_idxs, m_) do i, mm
+        bank(transformations, i, mm)
+    end
+
+    new_pairs = []
+    for (i, e) in enumerate(expr.pairs)
+        if i in transformed_idxs
+            push!(new_pairs, transformations[i]...)
-            push!(new_pairs, transformations[i]...)
+            append!(new_pairs, transformations[i])
-            push!(new_pairs, transformations[i]...)
+            append!(new_pairs, transformations[i])
+            @show e
-            @show e
-            @show e
+        elseif i in rm_idxs
+            push!(new_pairs, nothing)
+        else
+            push!(new_pairs, e)
+        end
+    end
+    new_pairs = filter(!isnothing, new_pairs)
+
+    push!(state.sorted_exprs, m...)
-    push!(state.sorted_exprs, m...)
+    append!(state.sorted_exprs, m)
-    push!(state.sorted_exprs, m...)
+    append!(state.sorted_exprs, m)
+    temp_var = last(m).lhs
+    new_let = Code.Let(new_pairs, expr.body, expr.let_block)
+    apply_substitution_map(new_let, substitution_map)
+end
+
+function get_substitution_map(match_data, transformations)
+    dic = Dict()
+    @assert length(match_data) == length(transformations)
+
+    plus_idxs = getproperty.(match_data, :plus_idx)
+
+    map(match_data, transformations) do m, t
+        bank(dic, m.plus_candidate.lhs, t[end].lhs)
+    end
+    dic
+end
+
+function bank(dic, key, value)
+    if haskey(dic, key)
+        dic[key] = vcat(dic[key], value)
+    else
+        dic[key] = value
-        dic[key] = vcat(dic[key], value)
-    else
-        dic[key] = value
+        push!(dic[key], value)
+    else
+        dic[key] = copy(value)
-        dic[key] = vcat(dic[key], value)
-    else
-        dic[key] = value
+        push!(dic[key], value)
+    else
+        dic[key] = copy(value)
+    end
+end
+
+function apply_substitution_map(expr::Code.Let, substitution_map::Dict)
+    substitute_in_ir(expr, substitution_map)
+end
+
+function substitute_in_ir(s::Symbol, substitution_map::Dict)
+    get(substitution_map, s, s)
+end
+
+function substitute_in_ir_base(s, substitution_map::Dict)
+    if haskey(substitution_map, s)
+        v = substitution_map[s]
+        if issym(v)
+            v
+        else
+            +(v...)
-            +(v...)
+            SymbolicUtils.add_worker(vartype(first(v)), v...)
-            +(v...)
+            SymbolicUtils.add_worker(vartype(first(v)), v...)
+        end
+    else
+        s
+    end
+end
+
+function substitute_in_ir(expr, substitution_map::Dict)
+    if iscall(expr)
+        new_args = map(arguments(expr)) do arg
+            substitute_in_ir(arg, substitution_map)
+        end
+        return Code.Term{Code.vartype(expr)}(operation(expr), new_args; type=Code.symtype(expr))
-        return Code.Term{Code.vartype(expr)}(operation(expr), new_args; type=Code.symtype(expr))
+        return Term{vartype(expr)}(operation(expr), new_args; type=symtype(expr))
-        return Code.Term{Code.vartype(expr)}(operation(expr), new_args; type=Code.symtype(expr))
+        return Term{vartype(expr)}(operation(expr), new_args; type=symtype(expr))
+    elseif issym(expr)
+        substitute_in_ir_base(expr, substitution_map)
+    else
+        expr
+    end
+end
+
+function substitute_in_ir(x::Code.Assignment, substitution_map::Dict)
+    new_lhs = substitute_in_ir(Code.lhs(x), substitution_map)
+    new_rhs = substitute_in_ir(Code.rhs(x), substitution_map)
+    return Code.Assignment(new_lhs, new_rhs)
+end
+
+function substitute_in_ir(expr::Code.Let, substitution_map::Dict)
+    isempty(substitution_map) && return expr
+
+    new_pairs = map(expr.pairs) do p
+        substitute_in_ir(p, substitution_map)
+    end
+    new_body = substitute_in_ir(expr.body, substitution_map)
+    return Code.Let(new_pairs, new_body, expr.let_block)
+end
+
+const MATMUL_ADD_RULE = OptimizationRule(
+    "MatMul+Add",
+    detect_matmul_add_pattern,
+    transform_to_mul5_assignment,
+    10
+)
+
+Base.isempty(l::Code.Let) = isempty(l.pairs)   
+
+# Apply optimization rules during CSE
+function apply_optimization_rules(expr, state::Code.CSEState, rules=[MATMUL_ADD_RULE])
+    for rule in sort(rules, by=r->r.priority, rev=true)
+        match_data = reduce(vcat, rule.detector(expr, state))
+        if match_data !== nothing # || !isempty(match_data)
+            return rule.transformer(expr, match_data, state)
+        end
+    end
+    return nothing
+end
diff --git a/src/simplify_rules.jl b/src/simplify_rules.jl
@@ -145,10 +145,19 @@ const NUMBER_SIMPLIFIER = RestartedChain((
     If(is_operation(^), Chain(POW_RULES)),
 ))
 
+is_array(x) = symtype(x) <: AbstractArray
+const ARRAY_RULES = (
+    @rule( ~a::is_array * ~b::is_array + ~c::is_array => begin
+        tmp = copy(~c)
+        LinearAlgebra.mul!(tmp, ~a, ~b, 1, 1)
+    end),
+)
+
 const TRIG_EXP_SIMPLIFIER = Chain(TRIG_EXP_RULES)
 
 const BOOLEAN_SIMPLIFIER = Chain(BOOLEAN_RULES)
 
+const ARRAY_SIMPLIFIER = Chain(ARRAY_RULES)
 
 function get_default_simplifier(; kw...)
     IfElse(has_trig_exp,
@@ -159,7 +168,9 @@ function get_default_simplifier(; kw...)
            Postwalk(Chain((If(x->symtype(x) <: Number,
                               NUMBER_SIMPLIFIER),
                            If(x->symtype(x) <: Bool,
-                              BOOLEAN_SIMPLIFIER)))
+                              BOOLEAN_SIMPLIFIER),
+                           If(x -> symtype(x) <: AbstractArray,
+                              ARRAY_SIMPLIFIER)))
                     ; kw...))
 end
 

diff --git a/test/mul5_opt.jl b/test/mul5_opt.jl
@@ -0,0 +1,72 @@
+using SymbolicUtils
+using SymbolicUtils.Code
+import SymbolicUtils as SU
+using LinearAlgebra
+using Test
+
+# Helper function to check if optimization was applied
+function has_mul5_optimization(ir)
+    if ir isa Code.Let
+        return any(ir.pairs) do assignment
+            rhs_expr = Code.rhs(assignment)
+            if SU.iscall(rhs_expr)
+                op = SU.operation(rhs_expr)
+                return op === LinearAlgebra.mul!
+            end
+            false
+        end
+    end
+    return false
+end
+
+# Helper function to build and evaluate both versions
+function test_optimization(expr, args...)
+    cse_ir = SU.Code.cse(expr)
+    state = SU.Code.CSEState()
+    optimized_ir = SU.mul5_cse2(cse_ir, state)
+
+    # Check if optimization was applied
+    has_optimization = has_mul5_optimization(optimized_ir)
+    @test has_optimization
+
+    f_cse_expr = Func(collect(args), [], cse_ir)
+    f_cse = eval(toexpr(f_cse_expr))
+
+    f_opt_expr = Func(collect(args), [], optimized_ir)
+    f_opt = eval(toexpr(f_opt_expr))
+
+    test_A = randn(3, 3)
+    test_B = randn(3, 3)
+    test_C = randn(3, 3)
+    test_D = randn(3, 3)
+
+    # Get concrete test args
+    test_args = if length(args) == 3
+        (test_A, test_B, test_C)
+    else
+        (test_A, test_B, test_C, test_D)
+    end
+
+    # Evaluate both versions
+    result_cse = invokelatest(f_cse, test_args...)
+    result_opt = invokelatest(f_opt, test_args...)
+
+    # Assert correctness
+    @test isapprox(result_cse, result_opt, rtol=1e-10)
+end
+
+@testset "Mul5 Optimization Tests" begin
+    @syms A[1:3, 1:3] B[1:3, 1:3] C[1:3, 1:3] D[1:3, 1:3]
+
+    expr1 = A * B + C
+    test_optimization(expr1, A, B, C)
+
+    expr2 = A * B + C + D
+    test_optimization(expr2, A, B, C, D)
+
+    expr4 = A * B + C + D + C * D # multiple correct patterns
+    test_optimization(expr4, A, B, C, D)
+
+    expr5 = sin.(A * B + C + D + C * D)
+    test_optimization(expr5, A, B, C, D)
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -30,5 +30,8 @@ using Pkg, Test, SafeTestsets
         @safetestset "Recursive utilities" begin include("recursive_utils.jl") end
         @safetestset "Misc" begin include("misc.jl") end
         @safetestset "Method library" begin include("methods.jl") end
+
+        # Optimization
+        @safetestset "MatmulAdd Optimization" begin include("mul5_opt.jl") end
     end
 end