diff --git a/.DS_Store b/.DS_Store
deleted file mode 100644
index b21cdc40..00000000
Binary files a/.DS_Store and /dev/null differ
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ff3d9a31..13c0e7c9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -23,13 +23,6 @@ if(${CMAKE_CXX_COMPILER_ID} STREQUAL "MSVC")
   message(FATAL_ERROR "You cannot build ASSET with the Microsoft Compiler. Please use Clang or GCC and try again.")
 endif()
 
-if(NOT WIN32)
-  if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
-      set(CMAKE_EXE_LINKER_FLAGS "-fuse-ld=lld")
-  endif()
-endif()
-
-
 ################################################################################
 ############### Build Settings #################################################
 ################################################################################
@@ -57,27 +50,40 @@ list(APPEND RELEASE_FLAGS "-O2")
 
 ## Generic Binary Flags
 if(BUILD_ASSET_WHEEL)
-    #list(APPEND RELEASE_FLAGS "-mcx16;-mpopcnt;-msse3;-msse4.1;-msse4.2;-mssse3;-mavx;-mavx2;-mbmi;-mbmi2;-mf16c;-mfma;-mlzcnt;-mmovbe;-mxsave")
-    # x86-64-v3 flags written out for older compilers
-    list(APPEND RELEASE_FLAGS "-mcx16")
-    list(APPEND RELEASE_FLAGS "-mpopcnt")
-    list(APPEND RELEASE_FLAGS "-msse3")
-    list(APPEND RELEASE_FLAGS "-msse4.1")
-    list(APPEND RELEASE_FLAGS "-msse4.2")
-    list(APPEND RELEASE_FLAGS "-mssse3")
-    list(APPEND RELEASE_FLAGS "-mavx")
-    list(APPEND RELEASE_FLAGS "-mavx2")
-    list(APPEND RELEASE_FLAGS "-mbmi")
-    list(APPEND RELEASE_FLAGS "-mbmi2")
-    list(APPEND RELEASE_FLAGS "-mf16c")
-    list(APPEND RELEASE_FLAGS "-mfma")
-    list(APPEND RELEASE_FLAGS "-mlzcnt")
-    list(APPEND RELEASE_FLAGS "-mmovbe")
-    list(APPEND RELEASE_FLAGS "-mxsave")
+    # Detect architecture for wheel builds
+    if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64|aarch64|ARM64")
+        # ARM64 (Apple Silicon, ARM servers) - use NEON
+        list(APPEND RELEASE_FLAGS "-march=armv8-a+simd")
+        message(STATUS "Building ARM64 wheel with NEON support")
+    elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|AMD64")
+        # x86-64 (Intel/AMD) - use AVX2 (x86-64-v3 flags written out for older compilers)
+        list(APPEND RELEASE_FLAGS "-mcx16")
+        list(APPEND RELEASE_FLAGS "-mpopcnt")
+        list(APPEND RELEASE_FLAGS "-msse3")
+        list(APPEND RELEASE_FLAGS "-msse4.1")
+        list(APPEND RELEASE_FLAGS "-msse4.2")
+        list(APPEND RELEASE_FLAGS "-mssse3")
+        list(APPEND RELEASE_FLAGS "-mavx")
+        list(APPEND RELEASE_FLAGS "-mavx2")
+        list(APPEND RELEASE_FLAGS "-mbmi")
+        list(APPEND RELEASE_FLAGS "-mbmi2")
+        list(APPEND RELEASE_FLAGS "-mf16c")
+        list(APPEND RELEASE_FLAGS "-mfma")
+        list(APPEND RELEASE_FLAGS "-mlzcnt")
+        list(APPEND RELEASE_FLAGS "-mmovbe")
+        list(APPEND RELEASE_FLAGS "-mxsave")
+        message(STATUS "Building x86-64 wheel with AVX2 support")
+    else()
+        # Fallback for other architectures
+        list(APPEND RELEASE_FLAGS "-march=native")
+        message(WARNING "Unknown architecture ${CMAKE_SYSTEM_PROCESSOR}, using -march=native")
+    endif()
 
     # Windows runs out of ram with LINK_TIME_OPT on gh-actions
     if(NOT WIN32)
-        list(APPEND RELEASE_FLAGS "-mtune=skylake")
+        if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|AMD64")
+            list(APPEND RELEASE_FLAGS "-mtune=skylake")
+        endif()
     endif()
     set(LINK_TIME_OPT TRUE)   ### DO LTO - Recommended for full release
     set(CLANG_MAX_INLINE_DEPTH 400)
@@ -154,18 +160,14 @@ endif()
 if(NOT WIN32)
   list(APPEND RELEASE_FLAGS "-fomit-frame-pointer")
   list(APPEND RELEASE_FLAGS "-fno-stack-protector")
-  #list(APPEND RELEASE_FLAGS "-fno-stack-clash-protection")
   #list(APPEND RELEASE_FLAGS "-fcf-protection=none")
   list(APPEND RELEASE_FLAGS "-fno-asynchronous-unwind-tables")
   list(APPEND RELEASE_FLAGS "-ffast-math")
+  if (NOT APPLE)
+    #list(APPEND RELEASE_FLAGS "-fno-stack-clash-protection")
+  endif()
 endif()
  
-if(APPLE)
-  list(APPEND COMMON_FLAGS "-Xlinker -undefined")
-  list(APPEND COMMON_FLAGS "-Xlinker dynamic_lookup")
-endif()
-
-
 
 # Combine Flags
 set(COMPILE_FLAGS PUBLIC ${COMMON_FLAGS})
@@ -210,17 +212,20 @@ endif()
 ###################### Find and set our dependencies ###########################
 ################################################################################
 
-if(APPLE)
-  include(FindPythonEnv)
-endif()
+#if(APPLE)
+#  include(FindPythonEnv)
+#endif()
+
 # Set up submodule dependencies
 include_directories(dep/eigen)
 include_directories(dep/fmt/include)
 include_directories(dep/autodiff)
 
-add_subdirectory(dep)
+find_package(Python ${PYVERSION_EXACT} COMPONENTS Interpreter Development REQUIRED)
 
 set(PYBIND11_CPP_STANDARD -std=c++17)
+set(PYBIND11_FINDPYTHON ON)
+add_subdirectory(dep)
 
 # Set up external dependencies
 find_package(Threads REQUIRED)
@@ -233,38 +238,70 @@ if(UNIX AND NOT APPLE)
     list(APPEND COMPILE_FLAGS "-fopenmp")
   endif()
 elseif(APPLE)
-  if(FALSE)
-    include_directories("/usr/local/opt/llvm/include")
-    link_directories("/usr/local/opt/llvm/lib")
-    set(CMAKE_LIBRARY_PATH /usr/local/opt/llvm/lib ${CMAKE_LIBRARY_PATH})
-    set(CMAKE_LIBRARY_PATH $ENV{MKLROOT}/lib ${CMAKE_LIBRARY_PATH})
-    if(CMAKE_C_COMPILER_ID MATCHES "Clang")
-      set(OpenMP_C "${CMAKE_C_COMPILER}")
-      set(OpenMP_C_FLAGS "-fopenmp=libomp")
-      set(OpenMP_C_LIB_NAMES "libomp")
-      set(OpenMP_libomp_LIBRARY "omp")
-    endif()
-    if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-      set(OpenMP_CXX "${CMAKE_CXX_COMPILER}")
-      set(OpenMP_CXX_FLAGS "-fopenmp=libomp")
-      set(OpenMP_CXX_LIB_NAMES "libomp")
-      set(OpenMP_libomp_LIBRARY "omp")
+  if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+    if(CMAKE_CXX_COMPILER_ID MATCHES "AppleClang")
+      # fina_package(OpenMP REQUIRED) will fail with AppleClang so 
+      # we need this to point towards the homebrew libomp
+      include_directories("/opt/homebrew/Cellar/libomp/20.1.8/include/")
+      link_directories("/opt/homebrew/Cellar/libomp/20.1.8/lib/")
+      list(APPEND COMPILE_FLAGS "-Xclang")
+      list(APPEND COMPILE_FLAGS "-fopenmp")
+      list(APPEND OpenMP_CXX_FLAGS "-lomp")
+    else()
+      # With homebrew LLVM clang compiler on Mac, this seems to work well for 
+      # finding homebrew OpenMP 
       find_package(OpenMP REQUIRED)
-      list(APPEND COMPILE_FLAGS ${OpenMP_CXX_FLAGS})
-    endif()
 
-    
+      list(APPEND COMPILE_FLAGS "-fopenmp")
+      list(APPEND OpenMP_CXX_FLAGS "-lomp")
+      
+      # Add flags to make LLVM Clang more compatible with Apple SDK headers
+      list(APPEND COMPILE_FLAGS "-Wno-elaborated-enum-base")
+    endif()
   endif()
-  
 endif()
 
-find_package(MKL REQUIRED)
+if(APPLE)
+  find_package(AccelerateSparse REQUIRED)
+  add_compile_definitions(USE_ACCELERATE_SPARSE)
+else()
+  find_package(MKL REQUIRED)
+endif()
 
 find_package(Python ${PYVERSION_EXACT} REQUIRED COMPONENTS Interpreter Development)
 
 # Set dependency variables
-set(INCLUDE_DIRS ${PYBIND11_INCLUDE_DIR} ${PYTHON_INCLUDE_DIRS} ${MKL_INCLUDE_DIRS})
-set(LINK_LIBS ${PYTHON_LIBRARIES} ${MKL_LIBRARIES} Threads::Threads ${CMAKE_DL_LIBS})
+set(INCLUDE_DIRS ${PYBIND11_INCLUDE_DIR} ${MKL_INCLUDE_DIRS} ${AccelerateSparse_INCLUDE_DIRS})
+set(LINK_LIBS ${MKL_LIBRARIES} Threads::Threads ${CMAKE_DL_LIBS} ${AccelerateSparse_LIBRARIES})
+
+# Handle OpenMP for Apple
+if (APPLE)
+  if(CMAKE_CXX_COMPILER_ID MATCHES "AppleClang")
+    list(APPEND LINK_LIBS ${OpenMP_CXX_FLAGS})
+  else()
+    list(APPEND LINK_LIBS OpenMP::OpenMP_CXX)
+  endif()
+endif()
+
+# Handle Python executable variable 
+if (NOT DEFINED PYTHON_EXECUTABLE)
+    set(PYTHON_EXECUTABLE ${Python_EXECUTABLE})
+endif()
+if(DEFINED Python_INCLUDE_DIRS)
+    list(APPEND INCLUDE_DIRS ${Python_INCLUDE_DIRS})    
+elseif(DEFINED PYTHON_INCLUDE_DIRS)
+    list(APPEND INCLUDE_DIRS ${PYTHON_INCLUDE_DIRS})
+endif()
+if (NOT APPLE)
+    # Don't directly link with python library on mac and instead rely on -undefined dynamic_lookup 
+    # to bind to the python symbols at runtime from the interpreter
+    if(DEFINED Python_LIBRARIES)
+        list(APPEND LINK_LIBS ${Python_LIBRARIES})
+    elseif(DEFINED PYTHON_LIBRARIES)
+        list(APPEND LINK_LIBS ${PYTHON_LIBRARIES})
+    endif()
+endif()
+
 if(UNIX)
   list(APPEND LINK_LIBS m)
 endif()
@@ -273,13 +310,22 @@ endif()
 
 ################################################################################
 ################ Linker Flags ##################################################
+
+# Set linker for Clang after threading detection is complete
+if(NOT WIN32)
+  if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
+      set(CMAKE_EXE_LINKER_FLAGS "-fuse-ld=lld")
+  endif()
+endif()
+
 if(BUILD_SHARED_LIBS AND NOT APPLE AND NOT WIN32)  # Windows and Linux Dynamic Linking
   list(APPEND LINKER_FLAGS "${OpenMP_CXX_FLAGS} -Wl,--no-undefined -Wl,--start-group ${MKL_LIBRARIES_LIST} -Wl, --end-group")
 elseif(UNIX AND NOT APPLE)  # Linux Static Linking
   list(APPEND LINKER_FLAGS "${OpenMP_CXX_FLAGS} -Wl,--no-undefined -Wl,--start-group ${MKL_LIBRARIES_LIST} -Wl,--end-group")
 elseif(APPLE)  # Apple Dynamic and Static Linking
-
-  list(APPEND LINKER_FLAGS "${OpenMP_CXX_FLAGS} -Xlinker -undefined -Xlinker dynamic_lookup")
+  list(APPEND LINKER_FLAGS ${OpenMP_CXX_FLAGS})
+  list(APPEND LINKER_FLAGS "-Wl,-bind_at_load")
+  list(APPEND LINKER_FLAGS "-Wl,-undefined,dynamic_lookup")
 else()  # Windows Static Linking
   list(APPEND LINKER_FLAGS "${OpenMP_CXX_FLAGS}")
 endif()
@@ -328,4 +374,3 @@ add_subdirectory(pypiwheel)
 
 # Formatting
 include(cmake/clang-format.cmake)
-
diff --git a/cmake/FindAccelerateSparse.cmake b/cmake/FindAccelerateSparse.cmake
new file mode 100644
index 00000000..f15c60b2
--- /dev/null
+++ b/cmake/FindAccelerateSparse.cmake
@@ -0,0 +1,143 @@
+# Ceres Solver - A fast non-linear least squares minimizer
+# Copyright 2023 Google Inc. All rights reserved.
+# http://ceres-solver.org/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+# * Neither the name of Google Inc. nor the names of its contributors may be
+#   used to endorse or promote products derived from this software without
+#   specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# Author: alexs.mac@gmail.com (Alex Stewart)
+#
+# FindAccelerateSparse.cmake - Find the sparse solvers in Apple's Accelerate
+#                              framework, introduced in Xcode 9.0 (2017).
+#                              Note that this is distinct from the Accelerate
+#                              framework on its own, which existed in previous
+#                              versions but without the sparse solvers.
+#
+# This module defines the following variables which should be referenced
+# by the caller to use the library.
+#
+# AccelerateSparse_FOUND: TRUE iff an Accelerate framework including the sparse
+#                         solvers, and all dependencies, has been found.
+# AccelerateSparse_INCLUDE_DIRS: Include directories for Accelerate framework.
+# AccelerateSparse_LIBRARIES: Libraries for Accelerate framework and all
+#                             dependencies.
+#
+# The following variables are also defined by this module, but in line with
+# CMake recommended FindPackage() module style should NOT be referenced directly
+# by callers (use the plural variables detailed above instead).  These variables
+# do however affect the behaviour of the module via FIND_[PATH/LIBRARY]() which
+# are NOT re-called (i.e. search for library is not repeated) if these variables
+# are set with valid values _in the CMake cache_. This means that if these
+# variables are set directly in the cache, either by the user in the CMake GUI,
+# or by the user passing -DVAR=VALUE directives to CMake when called (which
+# explicitly defines a cache variable), then they will be used verbatim,
+# bypassing the HINTS variables and other hard-coded search locations.
+#
+# AccelerateSparse_INCLUDE_DIR: Include directory for Accelerate framework, not
+#                               including the include directory of any
+#                               dependencies.
+# AccelerateSparse_LIBRARY: Accelerate framework, not including the libraries of
+#                           any dependencies.
+# Called if we failed to find the Accelerate framework with the sparse solvers.
+# Unsets all public (designed to be used externally) variables and reports
+# error message at priority depending upon [REQUIRED/QUIET/<NONE>] argument.
+macro(accelerate_sparse_report_not_found REASON_MSG)
+  unset(AccelerateSparse_FOUND)
+  unset(AccelerateSparse_INCLUDE_DIRS)
+  unset(AccelerateSparse_LIBRARIES)
+  # Make results of search visible in the CMake GUI if Accelerate has not
+  # been found so that user does not have to toggle to advanced view.
+  mark_as_advanced(CLEAR AccelerateSparse_INCLUDE_DIR
+                         AccelerateSparse_LIBRARY)
+  # Note <package>_FIND_[REQUIRED/QUIETLY] variables defined by FindPackage()
+  # use the camelcase library name, not uppercase.
+  if (AccelerateSparse_FIND_QUIETLY)
+    message(STATUS "Failed to find Accelerate framework with sparse solvers - "
+      ${REASON_MSG} ${ARGN})
+  elseif (AccelerateSparse_FIND_REQUIRED)
+    message(FATAL_ERROR "Failed to find Accelerate framework with sparse solvers - "
+      ${REASON_MSG} ${ARGN})
+  else()
+    # Neither QUIETLY nor REQUIRED, use no priority which emits a message
+    # but continues configuration and allows generation.
+    message("-- Failed to find Accelerate framework with sparse solvers - "
+      ${REASON_MSG} ${ARGN})
+  endif()
+  return()
+endmacro()
+unset(AccelerateSparse_FOUND)
+find_path(AccelerateSparse_INCLUDE_DIR NAMES Accelerate.h)
+if (NOT AccelerateSparse_INCLUDE_DIR OR
+    NOT EXISTS ${AccelerateSparse_INCLUDE_DIR})
+  accelerate_sparse_report_not_found(
+    "Could not find Accelerate framework headers. Set "
+    "AccelerateSparse_INCLUDE_DIR to the directory containing Accelerate.h")
+endif()
+find_library(AccelerateSparse_LIBRARY NAMES Accelerate)
+if (NOT AccelerateSparse_LIBRARY OR
+    NOT EXISTS ${AccelerateSparse_LIBRARY})
+  accelerate_sparse_report_not_found(
+    "Could not find Accelerate framework. Set AccelerateSparse_LIBRARY "
+    "to the Accelerate.framework directory")
+endif()
+set(AccelerateSparse_FOUND TRUE)
+# Determine if the Accelerate framework detected includes the sparse solvers.
+# Skip the test for LLVM Clang due to compatibility issues with Apple SDK headers
+if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" AND NOT "${CMAKE_CXX_COMPILER_VERSION}" MATCHES "Apple")
+  # For LLVM Clang, assume the sparse solvers are available on macOS
+  set(ACCELERATE_FRAMEWORK_HAS_SPARSE_SOLVER TRUE)
+  message(STATUS "Skipping Accelerate framework sparse solver test for LLVM Clang")
+else()
+  # For other compilers, perform the test
+  include(CheckCXXSourceCompiles)
+  set(CMAKE_REQUIRED_INCLUDES ${AccelerateSparse_INCLUDE_DIR})
+  set(CMAKE_REQUIRED_LIBRARIES ${AccelerateSparse_LIBRARY})
+  check_cxx_source_compiles(
+    "#include <Accelerate.h>
+     int main() {
+       SparseMatrix_Double A;
+       SparseFactor(SparseFactorizationCholesky, A);
+       return 0;
+     }"
+     ACCELERATE_FRAMEWORK_HAS_SPARSE_SOLVER)
+  unset(CMAKE_REQUIRED_INCLUDES)
+  unset(CMAKE_REQUIRED_LIBRARIES)
+  if (NOT ACCELERATE_FRAMEWORK_HAS_SPARSE_SOLVER)
+    accelerate_sparse_report_not_found(
+      "Detected Accelerate framework: ${AccelerateSparse_LIBRARY} does not "
+      "include the sparse solvers.")
+  endif()
+endif()
+if (AccelerateSparse_FOUND)
+  set(AccelerateSparse_INCLUDE_DIRS ${AccelerateSparse_INCLUDE_DIR})
+  set(AccelerateSparse_LIBRARIES ${AccelerateSparse_LIBRARY})
+endif()
+# Handle REQUIRED / QUIET optional arguments and version.
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(AccelerateSparse
+  REQUIRED_VARS AccelerateSparse_INCLUDE_DIRS AccelerateSparse_LIBRARIES)
+if (AccelerateSparse_FOUND)
+  mark_as_advanced(FORCE AccelerateSparse_INCLUDE_DIR
+                         AccelerateSparse_LIBRARY)
+endif()
\ No newline at end of file
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index ce502b62..8f1ecdf1 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -155,11 +155,9 @@ target_link_libraries(asset PRIVATE ${LINK_LIBS})
 #set_target_properties(asset PROPERTIES CXX_VISIBILITY_PRESET hidden)
 
 if(UNIX)
-    set_target_properties(asset PROPERTIES LINK_FLAGS ${LINKER_FLAGS})
+    target_link_options(asset PRIVATE ${LINKER_FLAGS})
 endif()
 
-
-
 add_custom_target(pyassetsrc ALL DEPENDS ${CMAKE_SOURCE_DIR}/asset_asrl)
 
 
@@ -184,13 +182,3 @@ if(PYTHON_LOCAL_INSTALL_DIR)
                     COMMENT "Copied asset_asrl to ${PYTHON_LOCAL_INSTALL_DIR}")
 
 endif()
-
-
-    
-
-
-
-
-    
-
-
diff --git a/src/Solvers/ASSET_Solvers.h b/src/Solvers/ASSET_Solvers.h
index 94d25cbc..6247eb08 100644
--- a/src/Solvers/ASSET_Solvers.h
+++ b/src/Solvers/ASSET_Solvers.h
@@ -7,7 +7,11 @@
 #include "OptimizationProblem.h"
 #include "OptimizationProblemBase.h"
 #include "PSIOPT.h"
+#ifdef USE_ACCELERATE_SPARSE
+#include "AccelerateInterface.h"
+#else
 #include "mkl.h"
+#endif
 
 namespace ASSET {
 
@@ -15,7 +19,9 @@ namespace ASSET {
     // auto sol = m.def_submodule("Solvers","SubModule Containing PSIOPT,NLP, and Solver Flags");
 
     auto& sol = reg.getSolversModule();
+#ifndef USE_ACCELERATE_SPARSE
     int DSECOND = dsecnd();
+#endif
     PSIOPT::Build(sol);
     OptimizationProblemBase::Build(sol);
     Jet::Build(sol);
diff --git a/src/Solvers/AccelerateInterface.h b/src/Solvers/AccelerateInterface.h
new file mode 100644
index 00000000..f6e1189d
--- /dev/null
+++ b/src/Solvers/AccelerateInterface.h
@@ -0,0 +1,841 @@
+#ifndef EIGEN_ACCELERATESUPPORT_H
+#define EIGEN_ACCELERATESUPPORT_H
+
+#include "AccelerateUtils.h"
+
+#include <Eigen/src/Core/util/DisableStupidWarnings.h>
+#include <Accelerate/Accelerate.h>
+
+#include <Eigen/Sparse>
+
+#include <cmath>
+#include <numeric>
+
+/*
+The classes in this file are directly based on the AccelerateSupport module from Eigen 3.4 and 
+are subject to Eigen's MPL2 license, which can be found in the notices folder of the GitHub repository.
+Changes include the addition of several member variables and functions to provide more fine grained 
+control of the Accelerate Sparse solvers, to avoid making unnecessary copies/allocations, to add an 
+iterative refinement implementation (to bring solution accuracy more in line with PARDISO), as well as 
+to more closely align with the methods of the PARDISO interface in PardisoInterface.h (particularly 
+for methods that ASSET employs extensively). Note that the MPL2 license is only applied to this particular 
+file and not the rest of the project, as per the MPL2 license.
+ 
+*/
+
+namespace Eigen {
+
+template <typename MatrixType_, int UpLo_, SparseFactorization_t Solver_, bool EnforceSquare_>
+class AccelerateImpl;
+
+/** \ingroup AccelerateSupport_Module
+ * \typedef AccelerateLLT
+ * \brief A direct Cholesky (LLT) factorization and solver based on Accelerate
+ *
+ * \warning Only single and double precision real scalar types are supported by Accelerate
+ *
+ * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
+ * \tparam UpLo_ additional information about the matrix structure. Default is Lower.
+ *
+ * \sa \ref TutorialSparseSolverConcept, class AccelerateLLT
+ */
+template <typename MatrixType, int UpLo = Lower>
+using AccelerateLLT = AccelerateImpl<MatrixType, UpLo | Symmetric, SparseFactorizationCholesky, true>;
+
+/** \ingroup AccelerateSupport_Module
+ * \typedef AccelerateLDLT
+ * \brief The default Cholesky (LDLT) factorization and solver based on Accelerate
+ *
+ * \warning Only single and double precision real scalar types are supported by Accelerate
+ *
+ * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
+ * \tparam UpLo_ additional information about the matrix structure. Default is Lower.
+ *
+ * \sa \ref TutorialSparseSolverConcept, class AccelerateLDLT
+ */
+template <typename MatrixType, int UpLo = Lower>
+using AccelerateLDLT = AccelerateImpl<MatrixType, UpLo | Symmetric, SparseFactorizationLDLT, true>;
+
+/** \ingroup AccelerateSupport_Module
+ * \typedef AccelerateLDLTUnpivoted
+ * \brief A direct Cholesky-like LDL^T factorization and solver based on Accelerate with only 1x1 pivots and no pivoting
+ *
+ * \warning Only single and double precision real scalar types are supported by Accelerate
+ *
+ * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
+ * \tparam UpLo_ additional information about the matrix structure. Default is Lower.
+ *
+ * \sa \ref TutorialSparseSolverConcept, class AccelerateLDLTUnpivoted
+ */
+template <typename MatrixType, int UpLo = Lower>
+using AccelerateLDLTUnpivoted = AccelerateImpl<MatrixType, UpLo | Symmetric, SparseFactorizationLDLTUnpivoted, true>;
+
+/** \ingroup AccelerateSupport_Module
+ * \typedef AccelerateLDLTSBK
+ * \brief A direct Cholesky (LDLT) factorization and solver based on Accelerate with Supernode Bunch-Kaufman and static
+ * pivoting
+ *
+ * \warning Only single and double precision real scalar types are supported by Accelerate
+ *
+ * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
+ * \tparam UpLo_ additional information about the matrix structure. Default is Lower.
+ *
+ * \sa \ref TutorialSparseSolverConcept, class AccelerateLDLTSBK
+ */
+template <typename MatrixType, int UpLo = Lower>
+using AccelerateLDLTSBK = AccelerateImpl<MatrixType, UpLo | Symmetric, SparseFactorizationLDLTSBK, true>;
+
+/** \ingroup AccelerateSupport_Module
+ * \typedef AccelerateLDLTTPP
+ * \brief A direct Cholesky (LDLT) factorization and solver based on Accelerate with full threshold partial pivoting
+ *
+ * \warning Only single and double precision real scalar types are supported by Accelerate
+ *
+ * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
+ * \tparam UpLo_ additional information about the matrix structure. Default is Lower.
+ *
+ * \sa \ref TutorialSparseSolverConcept, class AccelerateLDLTTPP
+ */
+template <typename MatrixType, int UpLo = Lower>
+using AccelerateLDLTTPP = AccelerateImpl<MatrixType, UpLo | Symmetric, SparseFactorizationLDLTTPP, true>;
+
+/** \ingroup AccelerateSupport_Module
+ * \typedef AccelerateQR
+ * \brief A QR factorization and solver based on Accelerate
+ *
+ * \warning Only single and double precision real scalar types are supported by Accelerate
+ *
+ * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
+ *
+ * \sa \ref TutorialSparseSolverConcept, class AccelerateQR
+ */
+template <typename MatrixType>
+using AccelerateQR = AccelerateImpl<MatrixType, 0, SparseFactorizationQR, false>;
+
+/** \ingroup AccelerateSupport_Module
+ * \typedef AccelerateCholeskyAtA
+ * \brief A QR factorization and solver based on Accelerate without storing Q (equivalent to A^TA = R^T R)
+ *
+ * \warning Only single and double precision real scalar types are supported by Accelerate
+ *
+ * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<>
+ *
+ * \sa \ref TutorialSparseSolverConcept, class AccelerateCholeskyAtA
+ */
+template <typename MatrixType>
+using AccelerateCholeskyAtA = AccelerateImpl<MatrixType, 0, SparseFactorizationCholeskyAtA, false>;
+
+namespace internal {
+template <typename T>
+struct AccelFactorizationDeleter {
+  void operator()(T* sym) {
+    if (sym) {
+      SparseCleanup(*sym);
+      delete sym;
+      sym = nullptr;
+    }
+  }
+};
+
+template <typename DenseVecT, typename DenseMatT, typename SparseMatT, typename NumFactT>
+struct SparseTypesTraitBase {
+  typedef DenseVecT AccelDenseVector;
+  typedef DenseMatT AccelDenseMatrix;
+  typedef SparseMatT AccelSparseMatrix;
+
+  typedef SparseOpaqueSymbolicFactorization SymbolicFactorization;
+  typedef NumFactT NumericFactorization;
+
+  typedef AccelFactorizationDeleter<SymbolicFactorization> SymbolicFactorizationDeleter;
+  typedef AccelFactorizationDeleter<NumericFactorization> NumericFactorizationDeleter;
+};
+
+template <typename Scalar>
+struct SparseTypesTrait {};
+
+template <>
+struct SparseTypesTrait<double> : SparseTypesTraitBase<DenseVector_Double, DenseMatrix_Double, SparseMatrix_Double,
+                                                       SparseOpaqueFactorization_Double> {};
+
+template <>
+struct SparseTypesTrait<float>
+    : SparseTypesTraitBase<DenseVector_Float, DenseMatrix_Float, SparseMatrix_Float, SparseOpaqueFactorization_Float> {
+};
+
+// Taken from https://github.com/ceres-solver/ceres-solver/blob/master/internal/ceres/accelerate_sparse.cc
+inline void* resizeForAccelerateAlignment(const size_t required_size, 
+                                         std::vector<uint8_t>* mem_vec) 
+{
+  // As per the Accelerate documentation, all workspace memory passed to the
+  // sparse solver functions must be 16-byte aligned.
+  constexpr int kAccelerateRequiredAlignment = 16; 
+  // Although malloc() on macOS should always be 16-byte aligned, it is unclear
+  // if this holds for new(), or on other Apple OSs (phoneOS, watchOS etc).
+  // As such we assume it is not and use std::align() to create a (potentially
+  // offset) 16-byte aligned sub-buffer of the specified size within workspace.
+  mem_vec->resize(required_size + kAccelerateRequiredAlignment);
+  size_t size_from_aligned_start = mem_vec->size();
+  void* aligned_solve_workspace_start =
+      reinterpret_cast<void*>(mem_vec->data());
+  aligned_solve_workspace_start = std::align(kAccelerateRequiredAlignment,
+                                             required_size,
+                                             aligned_solve_workspace_start,
+                                             size_from_aligned_start);
+  return aligned_solve_workspace_start;
+}
+
+}  // end namespace internal
+
+template <typename MatrixType_, int UpLo_, SparseFactorization_t Solver_, bool EnforceSquare_>
+class AccelerateImpl : public SparseSolverBase<AccelerateImpl<MatrixType_, UpLo_, Solver_, EnforceSquare_> > {
+ protected:
+  using Base = SparseSolverBase<AccelerateImpl>;
+  using Base::derived;
+  using Base::m_isInitialized;
+
+ public:
+  using Base::_solve_impl;
+
+  typedef MatrixType_ MatrixType;
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::StorageIndex StorageIndex;
+  enum { ColsAtCompileTime = Dynamic, MaxColsAtCompileTime = Dynamic };
+  enum { UpLo = UpLo_ };
+
+  using AccelDenseVector = typename internal::SparseTypesTrait<Scalar>::AccelDenseVector;
+  using AccelDenseMatrix = typename internal::SparseTypesTrait<Scalar>::AccelDenseMatrix;
+  using AccelSparseMatrix = typename internal::SparseTypesTrait<Scalar>::AccelSparseMatrix;
+  using SymbolicFactorization = typename internal::SparseTypesTrait<Scalar>::SymbolicFactorization;
+  using NumericFactorization = typename internal::SparseTypesTrait<Scalar>::NumericFactorization;
+  using SymbolicFactorizationDeleter = typename internal::SparseTypesTrait<Scalar>::SymbolicFactorizationDeleter;
+  using NumericFactorizationDeleter = typename internal::SparseTypesTrait<Scalar>::NumericFactorizationDeleter;
+
+  AccelerateImpl() {
+    m_isInitialized = false;
+
+    auto check_flag_set = [](int value, int flag) { return ((value & flag) == flag); };
+
+    if (check_flag_set(UpLo_, Symmetric)) {
+      m_sparseKind = SparseSymmetric;
+      m_triType = (UpLo_ & Lower) ? SparseLowerTriangle : SparseUpperTriangle;
+    } else if (check_flag_set(UpLo_, UnitLower)) {
+      m_sparseKind = SparseUnitTriangular;
+      m_triType = SparseLowerTriangle;
+    } else if (check_flag_set(UpLo_, UnitUpper)) {
+      m_sparseKind = SparseUnitTriangular;
+      m_triType = SparseUpperTriangle;
+    } else if (check_flag_set(UpLo_, StrictlyLower)) {
+      m_sparseKind = SparseTriangular;
+      m_triType = SparseLowerTriangle;
+    } else if (check_flag_set(UpLo_, StrictlyUpper)) {
+      m_sparseKind = SparseTriangular;
+      m_triType = SparseUpperTriangle;
+    } else if (check_flag_set(UpLo_, Lower)) {
+      m_sparseKind = SparseTriangular;
+      m_triType = SparseLowerTriangle;
+    } else if (check_flag_set(UpLo_, Upper)) {
+      m_sparseKind = SparseTriangular;
+      m_triType = SparseUpperTriangle;
+    } else {
+      m_sparseKind = SparseOrdinary;
+      m_triType = (UpLo_ & Lower) ? SparseLowerTriangle : SparseUpperTriangle;
+    }
+
+    m_order = SparseOrderMetis;  // Use METIS ordering by default for better performance
+    m_doIterativeRefinement = false;
+    m_iterativeRefinementIterations = 2;
+  }
+
+  explicit AccelerateImpl(const MatrixType& matrix) : AccelerateImpl() { compute(matrix); }
+
+  ~AccelerateImpl() {}
+
+  inline Index cols() const { return m_nCols; }
+  inline Index rows() const { return m_nRows; }
+
+  ComputationInfo info() const {
+    eigen_assert(m_isInitialized && "Decomposition is not initialized.");
+    return m_info;
+  }
+
+  void setMatrix(const MatrixType& matrix);
+
+  void analyzePattern(const MatrixType& matrix);
+
+  void factorize(const MatrixType& matrix);
+
+  void compute(const MatrixType& matrix);
+
+  template <typename Rhs, typename Dest>
+  void _solve_impl(const MatrixBase<Rhs>& b, MatrixBase<Dest>& dest) const;
+
+  /** Sets the ordering algorithm to use. */
+  void setOrder(SparseOrder_t order) { m_order = order; }
+
+  /** Sets the number of threads for accelerate */
+  inline void setNumThreads(int num_threads) {
+    accelerate_set_num_threads(num_threads);
+  }
+
+  void setIterativeRefinement(bool iterativeRefinement) { 
+    m_doIterativeRefinement = iterativeRefinement; 
+  }
+
+  void setIterativeRefinementIterations(int iterations) {
+    eigen_assert(iterations >= 0 && "Number of iterations must be non-negative.");
+    m_iterativeRefinementIterations = iterations;
+  }
+
+  MatrixType& getMatrix() { return m_matrix; }
+
+  template <int U = UpLo>
+  typename std::enable_if<!bool(U & Symmetric), void>::type
+  getMatrix(const MatrixType& matrix) {
+    m_matrix = matrix;
+    m_matrix.makeCompressed();
+  }
+
+  template <int U = UpLo>
+  typename std::enable_if<bool(U & Symmetric), void>::type
+  getMatrix(const MatrixType& matrix) {
+    // Similar to Pardiso, use selfadjointView to ensure symmetry
+    PermutationMatrix<Dynamic, Dynamic, StorageIndex> p_null;
+    m_matrix.resize(matrix.rows(), matrix.cols());
+    
+    constexpr int TriangleType = (UpLo & Lower) ? Lower : Upper;
+    m_matrix.template selfadjointView<TriangleType>() = 
+        matrix.template selfadjointView<TriangleType>().twistedBy(p_null);
+    m_matrix.makeCompressed();
+  }
+
+  template <int U = UpLo>
+  typename std::enable_if<bool(U & Symmetric), MatrixType>::type
+  getMatrixTwisted(const MatrixType& matrix) {
+    eigen_assert(!m_permutation.empty() && "Permutation not available. Call compute() or analyzePattern() first.");
+    eigen_assert(matrix.rows() == matrix.cols() && "Matrix must be square for twisted operation.");
+    eigen_assert(static_cast<size_t>(matrix.rows()) == m_permutation.size() && "Matrix size must match permutation size.");
+    
+    // Create permutation matrix from the stored permutation vector
+    PermutationMatrix<Dynamic, Dynamic, StorageIndex> p_perm;
+    p_perm.indices() = Map<const Matrix<StorageIndex, Dynamic, 1>>(m_permutation.data(), m_permutation.size());
+    
+    MatrixType result;
+    result.resize(matrix.rows(), matrix.cols());
+    
+    constexpr int TriangleType = (U & Lower) ? Lower : Upper;
+    result.template selfadjointView<TriangleType>() = 
+        matrix.template selfadjointView<TriangleType>().twistedBy(p_perm);
+    
+    result.makeCompressed();
+    return result;
+  }
+
+  template <SparseFactorization_t S = Solver_>
+  typename std::enable_if<S == SparseFactorizationLDLTTPP, Index>::type 
+  peigs() const {
+    eigen_assert(m_numericFactorization && "Numerical factorization must be computed first.");
+    
+    int num_positive = 0, num_zero = 0, num_negative = 0;
+    int status = SparseGetInertia(*m_numericFactorization, &num_positive, &num_zero, &num_negative);
+    
+    if (status != 0) {
+      // If SparseGetInertia fails, return -1 to indicate error
+      return -1;
+    }
+    
+    return static_cast<Index>(num_positive);
+  }
+
+  template <SparseFactorization_t S = Solver_>
+  typename std::enable_if<S == SparseFactorizationLDLTTPP, Index>::type 
+  neigs() const {
+    eigen_assert(m_numericFactorization && "Numerical factorization must be computed first.");
+    
+    int num_positive = 0, num_zero = 0, num_negative = 0;
+    int status = SparseGetInertia(*m_numericFactorization, &num_positive, &num_zero, &num_negative);
+    
+    if (status != 0) {
+      // If SparseGetInertia fails, return -1 to indicate error
+      return -1;
+    }
+    
+    return static_cast<Index>(num_negative);
+  }
+
+  template <SparseFactorization_t S = Solver_>
+  typename std::enable_if<S == SparseFactorizationLDLTTPP, Index>::type 
+  zeigs() const {
+    eigen_assert(m_numericFactorization && "Numerical factorization must be computed first.");
+    
+    int num_positive = 0, num_zero = 0, num_negative = 0;
+    int status = SparseGetInertia(*m_numericFactorization, &num_positive, &num_zero, &num_negative);
+    
+    if (status != 0) {
+      // If SparseGetInertia fails, return -1 to indicate error
+      return -1;
+    }
+    
+    return static_cast<Index>(num_zero);
+  }
+
+  inline int ppivs() const {
+    // Accelerate doesn't provide direct pivot perturbation count
+    // Return 0 as a safe default for now
+    return 0;
+  }
+
+  // This method initializes the internal AccelSparseMatrix. This must be called 
+  // after changing the sparsity pattern of m_matrix via the reference returned 
+  // from MatrixType& getMatrix()
+  void reinitializeInternalMatrixRepresentation();
+
+  // Internal factorization methods 
+  AccelerateImpl& analyzePattern_internal();
+  AccelerateImpl& factorize_internal();
+  AccelerateImpl& compute_internal();
+
+  // Cleanup method
+  void release();
+
+  // Performance metrics 
+  mutable int m_flops = 0;
+  mutable int m_mem = 0;
+
+ private:
+  void buildAccelSparseMatrix() {
+    SparseAttributes_t attributes{};
+    attributes.kind = m_sparseKind;
+
+    SparseMatrixStructure structure{};
+    structure.blockSize = 1;
+
+    if ((MatrixType::Flags & Eigen::ColMajor)) { // CSC format
+      const Index nColumnsStarts = m_matrix.cols() + 1;
+      m_columnStarts.resize(nColumnsStarts);
+      std::copy_n(m_matrix.outerIndexPtr(), nColumnsStarts, m_columnStarts.data());
+
+      structure.rowCount = static_cast<int>(m_matrix.rows());
+      structure.columnCount = static_cast<int>(m_matrix.cols());
+      structure.columnStarts = m_columnStarts.data();
+      structure.rowIndices = const_cast<int*>(m_matrix.innerIndexPtr());
+      attributes.transpose = false;
+      attributes.triangle = m_triType;
+    } else { // RowMajor (CSR) format
+      // For CSR, Accelerate expects CSC. We use the 'transpose' attribute
+      // to tell Accelerate to interpret the CSR matrix as a transposed CSC matrix.
+      const Index nRowStarts = m_matrix.rows() + 1;
+      m_columnStarts.resize(nRowStarts); // Reuse m_columnStarts for rowStarts
+      std::copy_n(m_matrix.outerIndexPtr(), nRowStarts, m_columnStarts.data());
+
+      structure.rowCount = static_cast<int>(m_matrix.cols()); // Swapped
+      structure.columnCount = static_cast<int>(m_matrix.rows()); // Swapped
+      structure.columnStarts = m_columnStarts.data(); // These are now rowStarts
+      structure.rowIndices = const_cast<int*>(m_matrix.innerIndexPtr()); // These are now columnIndices
+      attributes.transpose = true;
+      
+      // When transposing, we need to flip the triangle type for symmetric matrices
+      if (m_sparseKind == SparseSymmetric) {
+        attributes.triangle = (m_triType == SparseLowerTriangle) ? SparseUpperTriangle : SparseLowerTriangle;
+      } else {
+        attributes.triangle = m_triType;
+      }
+    }
+
+    structure.attributes = attributes;
+    m_accel_matrix.structure = structure;
+    m_accel_matrix.data = const_cast<Scalar*>(m_matrix.valuePtr());
+  }
+
+  void doAnalysis() {
+    m_numericFactorization.reset(nullptr);
+
+    // Only resize permutation if necessary to avoid unnecessary allocations
+    if (m_permutation.size() != static_cast<size_t>(m_nRows)) {
+      m_permutation.resize(m_nRows);
+    }
+    std::iota(m_permutation.begin(), m_permutation.end(), 0);  // Initialize with identity
+
+    SparseSymbolicFactorOptions fopts{};
+    fopts.control = SparseDefaultControl;
+    fopts.orderMethod = m_order;
+    fopts.order = m_permutation.data();  // Provide storage for computed permutation
+    fopts.ignoreRowsAndColumns = nullptr;
+    fopts.malloc = malloc;
+    fopts.free = free;
+
+    fopts.reportError = [](const char* msg) {
+      fmt::print(fmt::fg(fmt::color::red), "Accelerate Sparse Symbolic Factorization Error: {}\n", msg);
+    };
+
+    m_symbolicFactorization.reset(new SymbolicFactorization(SparseFactor(Solver_, m_accel_matrix.structure, fopts)));
+
+    SparseStatus_t status = m_symbolicFactorization->status;
+
+    updateInfoStatus(status);
+
+    if (status != SparseStatusOK) {
+      m_symbolicFactorization.reset(nullptr);
+    }
+  }
+
+  void doFactorization() {
+    SparseStatus_t status = SparseStatusReleased;
+
+    if (m_symbolicFactorization) {
+
+      SparseNumericFactorOptions nopts{};
+      nopts.control = SparseDefaultControl;
+      nopts.scalingMethod = SparseScalingDefault;
+      nopts.scaling = nullptr;
+      // Default values set by Apple
+      nopts.pivotTolerance = 0.01;                   // Recommended value for difficult matrices in double
+      nopts.zeroTolerance = 1e-4 * __DBL_EPSILON__;  // "A few" orders of magnitude below epsilon.
+
+      // Get factor and workspace size
+      const int factorSize = 
+        std::is_same<Scalar, double>::value 
+            ? m_symbolicFactorization->factorSize_Double 
+            : m_symbolicFactorization->factorSize_Float;
+      const int workspaceSize =
+        std::is_same<Scalar, double>::value 
+            ? m_symbolicFactorization->workspaceSize_Double 
+            : m_symbolicFactorization->workspaceSize_Float;
+
+      m_numericFactorization.reset(new NumericFactorization(SparseFactor(
+        *m_symbolicFactorization, m_accel_matrix, nopts, 
+        internal::resizeForAccelerateAlignment(factorSize, &m_factorStorage), 
+        internal::resizeForAccelerateAlignment(workspaceSize, &m_workspace))));
+
+      status = m_numericFactorization->status;
+
+      if (status != SparseStatusOK) m_numericFactorization.reset(nullptr);
+    }
+
+    updateInfoStatus(status);
+  }
+
+ protected:
+  void updateInfoStatus(SparseStatus_t status) const {
+    switch (status) {
+      case SparseStatusOK:
+        m_info = Success;
+        break;
+      case SparseFactorizationFailed:
+      case SparseMatrixIsSingular:
+        m_info = NumericalIssue;
+        break;
+      case SparseInternalError:
+      case SparseParameterError:
+      case SparseStatusReleased:
+      default:
+        m_info = InvalidInput;
+        break;
+    }
+  }
+
+  std::vector<long> m_columnStarts;
+  mutable MatrixType m_matrix;
+  mutable AccelSparseMatrix m_accel_matrix;
+  mutable ComputationInfo m_info;
+  mutable std::vector<uint8_t> m_factorStorage;
+  mutable std::vector<uint8_t> m_workspace;
+  mutable std::vector<uint8_t> m_solve_workspace;  // Cache solve workspace
+  mutable std::vector<Scalar> m_r_mem;
+  mutable int m_cached_solve_workspace_size = 0;   // Track cached size
+  Index m_nRows, m_nCols;
+  std::unique_ptr<SymbolicFactorization, SymbolicFactorizationDeleter> m_symbolicFactorization;
+  std::unique_ptr<NumericFactorization, NumericFactorizationDeleter> m_numericFactorization;
+  SparseKind_t m_sparseKind;
+  SparseTriangle_t m_triType;
+  SparseOrder_t m_order;
+  bool m_doIterativeRefinement;
+  int m_iterativeRefinementIterations;
+  mutable std::vector<StorageIndex> m_permutation;  // Store permutation from factorization
+};
+
+template <typename MatrixType_, int UpLo_, SparseFactorization_t Solver_, bool EnforceSquare_>
+void AccelerateImpl<MatrixType_, UpLo_, Solver_, EnforceSquare_>::setMatrix(const MatrixType& matrix) {
+  if (EnforceSquare_) {
+    eigen_assert(matrix.rows() == matrix.cols());
+  }
+
+  //m_matrix = matrix;
+  getMatrix(matrix);
+  m_nRows = m_matrix.rows();
+  m_nCols = m_matrix.cols();
+
+  buildAccelSparseMatrix();
+
+  m_isInitialized = false;
+  m_symbolicFactorization.reset(nullptr);
+  m_numericFactorization.reset(nullptr);
+  m_cached_solve_workspace_size = 0;  // Clear cached workspace size
+  m_info = Success;
+}
+
+/** Computes the symbolic and numeric decomposition of matrix \a a */
+template <typename MatrixType_, int UpLo_, SparseFactorization_t Solver_, bool EnforceSquare_>
+void AccelerateImpl<MatrixType_, UpLo_, Solver_, EnforceSquare_>::compute(const MatrixType& a) {
+  setMatrix(a);
+
+  doAnalysis();
+
+  if (m_symbolicFactorization) doFactorization();
+
+  m_isInitialized = true;
+}
+
+/** Performs a symbolic decomposition on the sparsity pattern of matrix \a a.
+ *
+ * This function is particularly useful when solving for several problems having the same structure.
+ *
+ * \sa factorize()
+ */
+template <typename MatrixType_, int UpLo_, SparseFactorization_t Solver_, bool EnforceSquare_>
+void AccelerateImpl<MatrixType_, UpLo_, Solver_, EnforceSquare_>::analyzePattern(const MatrixType& a) {
+  if (EnforceSquare_) {
+    eigen_assert(a.rows() == a.cols());
+  }
+  setMatrix(a);
+
+  doAnalysis();
+
+  m_isInitialized = true;
+}
+
+/** Performs a numeric decomposition of matrix \a a.
+ *
+ * The given matrix must have the same sparsity pattern as the matrix on which the symbolic decomposition has been
+ * performed.
+ *
+ * \sa analyzePattern()
+ */
+template <typename MatrixType_, int UpLo_, SparseFactorization_t Solver_, bool EnforceSquare_>
+void AccelerateImpl<MatrixType_, UpLo_, Solver_, EnforceSquare_>::factorize(const MatrixType& a) {
+  eigen_assert(m_symbolicFactorization && "You must first call analyzePattern()");
+  eigen_assert(m_nRows == a.rows() && m_nCols == a.cols());
+
+  if (EnforceSquare_) {
+    eigen_assert(a.rows() == a.cols());
+  }
+
+  m_matrix = a;
+  buildAccelSparseMatrix();
+  m_numericFactorization.reset(nullptr);
+
+  doFactorization();
+}
+
+template <typename MatrixType_, int UpLo_, SparseFactorization_t Solver_, bool EnforceSquare_>
+template <typename Rhs, typename Dest>
+void AccelerateImpl<MatrixType_, UpLo_, Solver_, EnforceSquare_>::_solve_impl(const MatrixBase<Rhs>& b,
+                                                                              MatrixBase<Dest>& x) const {
+  if (!m_numericFactorization) {
+    m_info = InvalidInput;
+    return;
+  }
+
+  eigen_assert(m_nRows == b.rows());
+  eigen_assert(((b.cols() == 1) || b.outerStride() == b.rows()));
+
+  SparseStatus_t status = SparseStatusOK;
+
+  Scalar* b_ptr = const_cast<Scalar*>(b.derived().data());
+  Scalar* x_ptr = const_cast<Scalar*>(x.derived().data());
+
+  AccelDenseMatrix xmat{};
+  xmat.attributes = SparseAttributes_t();
+  xmat.columnCount = static_cast<int>(x.cols());
+  xmat.rowCount = static_cast<int>(x.rows());
+  xmat.columnStride = xmat.rowCount;
+  xmat.data = x_ptr;
+
+  AccelDenseMatrix bmat{};
+  bmat.attributes = SparseAttributes_t();
+  bmat.columnCount = static_cast<int>(b.cols());
+  bmat.rowCount = static_cast<int>(b.rows());
+  bmat.columnStride = bmat.rowCount;
+  bmat.data = b_ptr;
+
+  const int nrhs = (bmat.attributes.transpose) ? bmat.rowCount : bmat.columnCount;
+  const int workspaceSize = m_numericFactorization->solveWorkspaceRequiredStatic + 
+    nrhs*m_numericFactorization->solveWorkspaceRequiredPerRHS;
+
+  // Use cached solve workspace to avoid repeated allocations
+  void* ws;
+  if (workspaceSize != m_cached_solve_workspace_size) {
+    ws = internal::resizeForAccelerateAlignment(workspaceSize, &m_solve_workspace);
+    m_cached_solve_workspace_size = workspaceSize;
+  } else {
+    // Reuse existing aligned workspace
+    constexpr int kAccelerateRequiredAlignment = 16;
+    ws = reinterpret_cast<void*>(m_solve_workspace.data() + 
+         (kAccelerateRequiredAlignment - reinterpret_cast<uintptr_t>(m_solve_workspace.data()) % kAccelerateRequiredAlignment) % kAccelerateRequiredAlignment);
+  }
+  assert(ws != nullptr && "Accelerate workspace alignment failed");
+
+  SparseSolve(*m_numericFactorization, bmat, xmat, ws);
+
+  updateInfoStatus(status);
+
+  if (m_doIterativeRefinement)
+  {
+    auto n = vDSP_Length(x.rows() * x.cols());
+    if (m_r_mem.size() < n) {
+        m_r_mem.resize(n);
+    }
+    AccelDenseMatrix ref_mat{};
+    ref_mat.attributes = SparseAttributes_t();
+    ref_mat.columnCount = static_cast<int>(x.cols());
+    ref_mat.rowCount = static_cast<int>(x.rows());
+    ref_mat.columnStride = ref_mat.rowCount;
+    ref_mat.data = m_r_mem.data();
+
+    for (int i = 0; i < m_iterativeRefinementIterations; ++i) {
+        // Calculate residual and store in ref_mat
+        vDSP_vnegD(
+            bmat.data, 1,
+            ref_mat.data, 1, n
+        );
+        SparseMultiplyAdd(m_accel_matrix, xmat, ref_mat);
+
+        // Solve for correction and store in ref_mat
+        SparseSolve(*m_numericFactorization, ref_mat, ws);
+
+        // vDSP operation that calculates x -= correction
+        vDSP_vsubD(
+            ref_mat.data, 1,
+            xmat.data, 1,
+            xmat.data, 1,
+            n
+        );
+    }
+  }
+}
+
+/** Initializes the internal AccelSparseMatrix from the internal Eigen sparse matrix
+ * 
+ * This method must be called after changing the sparsity pattern of the m_matrix 
+ * member via the reference returned from MatrixType& getMatrix().
+ * 
+ */
+template <typename MatrixType_, int UpLo_, SparseFactorization_t Solver_, bool EnforceSquare_>
+void AccelerateImpl<MatrixType_, UpLo_, Solver_, EnforceSquare_>::reinitializeInternalMatrixRepresentation()
+{
+  // Update matrix dimensions in case they changed when the sparsity pattern was modified
+  m_nRows = m_matrix.rows();
+  m_nCols = m_matrix.cols();
+  
+  // Build/rebuild the internal AccelSparseMatrix from the current state of m_matrix
+  buildAccelSparseMatrix();
+  
+  // Reset factorizations since the matrix structure has changed
+  m_symbolicFactorization.reset(nullptr);
+  m_numericFactorization.reset(nullptr);
+  
+  // Clear cached workspace size since matrix structure changed
+  m_cached_solve_workspace_size = 0;
+  
+  // Reset initialization state - will need to recompute factorizations
+  m_isInitialized = false;
+  
+  // Reset computation info
+  m_info = Success;
+}
+
+template <typename MatrixType_, int UpLo_, SparseFactorization_t Solver_, bool EnforceSquare_>
+AccelerateImpl<MatrixType_, UpLo_, Solver_, EnforceSquare_>& 
+AccelerateImpl<MatrixType_, UpLo_, Solver_, EnforceSquare_>::analyzePattern_internal() {
+  eigen_assert(m_matrix.rows() > 0 && m_matrix.cols() > 0 && "Matrix must be set before calling analyzePattern_internal()");
+  
+  m_symbolicFactorization.reset(nullptr);
+  doAnalysis();
+  
+  m_isInitialized = true;
+  return *this;
+}
+
+template <typename MatrixType_, int UpLo_, SparseFactorization_t Solver_, bool EnforceSquare_>
+AccelerateImpl<MatrixType_, UpLo_, Solver_, EnforceSquare_>& 
+AccelerateImpl<MatrixType_, UpLo_, Solver_, EnforceSquare_>::factorize_internal() {
+  eigen_assert(m_symbolicFactorization && "You must first call analyzePattern_internal()");
+  
+  m_numericFactorization.reset(nullptr);
+  doFactorization();
+  
+  // Update performance metrics after factorization
+  if (m_numericFactorization) {
+    // Estimate memory usage based on factorization storage
+    m_mem = static_cast<int>(m_factorStorage.size() + m_workspace.size());
+    
+    // Estimate FLOP count based on matrix size and sparsity
+    // This is a rough estimate since Accelerate doesn't provide exact FLOP counts
+    Index nnz = m_matrix.nonZeros();
+    m_flops = static_cast<int>(nnz * std::log(static_cast<double>(m_nRows)));
+  }
+  
+  return *this;
+}
+
+template <typename MatrixType_, int UpLo_, SparseFactorization_t Solver_, bool EnforceSquare_>
+AccelerateImpl<MatrixType_, UpLo_, Solver_, EnforceSquare_>& 
+AccelerateImpl<MatrixType_, UpLo_, Solver_, EnforceSquare_>::compute_internal() {
+  eigen_assert(m_matrix.rows() > 0 && m_matrix.cols() > 0 && "Matrix must be set before calling compute_internal()");
+  
+  m_symbolicFactorization.reset(nullptr);
+  m_numericFactorization.reset(nullptr);
+
+  doAnalysis();
+  
+  if (m_symbolicFactorization) {
+    doFactorization();
+    
+    // Update performance metrics after factorization
+    if (m_numericFactorization) {
+      // Estimate memory usage based on factorization storage
+      m_mem = static_cast<int>(m_factorStorage.size() + m_workspace.size());
+      
+      // Estimate FLOP count based on matrix size and sparsity
+      Index nnz = m_matrix.nonZeros();
+      m_flops = static_cast<int>(nnz * std::log(static_cast<double>(m_nRows)));
+    }
+  }
+  
+  m_isInitialized = true;
+  return *this;
+}
+
+template <typename MatrixType_, int UpLo_, SparseFactorization_t Solver_, bool EnforceSquare_>
+void AccelerateImpl<MatrixType_, UpLo_, Solver_, EnforceSquare_>::release() {
+  // Clean up factorizations
+  m_numericFactorization.reset(nullptr);
+  m_symbolicFactorization.reset(nullptr);
+  
+  // Clear storage vectors
+  m_factorStorage.clear();
+  m_workspace.clear();
+  m_solve_workspace.clear();
+  m_r_mem.clear();
+  
+  // Clear matrix data
+  m_matrix.resize(0, 0);
+  m_matrix.data().squeeze();
+  
+  // Clear permutation
+  m_permutation.clear();
+  
+  // Reset cached sizes
+  m_cached_solve_workspace_size = 0;
+  
+  // Reset performance metrics
+  m_flops = 0;
+  m_mem = 0;
+  
+  // Reset initialization state
+  m_isInitialized = false;
+  m_info = Success;
+}
+
+}  // end namespace Eigen
+
+#endif  // EIGEN_ACCELERATESUPPORT_H
diff --git a/src/Solvers/AccelerateUtils.h b/src/Solvers/AccelerateUtils.h
new file mode 100644
index 00000000..a94225b6
--- /dev/null
+++ b/src/Solvers/AccelerateUtils.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include <cstdlib>
+#include <string>
+
+// Sets the number of threads for Accelerate via the environment variable VECLIB_MAXIMUM_THREADS.
+// Currently, the VECLIB_MAXIMUM_THREADS is the only way to specify the number of threads 
+// Accelerate will use. This should be swapped to an Accelerate API call if Apple ever adds one. 
+inline void accelerate_set_num_threads(int num_threads) {
+    setenv("VECLIB_MAXIMUM_THREADS", std::to_string(num_threads).c_str(), 1);
+}
diff --git a/src/Solvers/Jet.h b/src/Solvers/Jet.h
index 2320750b..d1f91670 100644
--- a/src/Solvers/Jet.h
+++ b/src/Solvers/Jet.h
@@ -1,6 +1,10 @@
 #pragma once
 #include "OptimizationProblemBase.h"
+#ifdef USE_ACCELERATE_SPARSE
+#include "AccelerateUtils.h"
+#else
 #include "mkl.h"
+#endif
 
 namespace ASSET {
 
@@ -97,6 +101,10 @@ namespace ASSET {
         int nt,
         bool verbose) {
 
+#ifdef USE_ACCELERATE_SPARSE
+      accelerate_set_num_threads(1);
+#endif
+
 
       int NumJobs = args.size();
       int NumConv = 0;
@@ -110,7 +118,9 @@ namespace ASSET {
       Utils::Timer t;
 
       auto Job = [&](int threadid, int i) {
+#ifndef USE_ACCELERATE_SPARSE
         mkl_set_num_threads_local(1);
+#endif
 
         int gfidx = genfidxes[i];
         if constexpr (std::is_same_v<Args2, py::args>) {
@@ -218,4 +228,4 @@ namespace ASSET {
   };
 
 
-}  // namespace ASSET
\ No newline at end of file
+}  // namespace ASSET
diff --git a/src/Solvers/PSIOPT.cpp b/src/Solvers/PSIOPT.cpp
index ba44f490..ad7fb742 100644
--- a/src/Solvers/PSIOPT.cpp
+++ b/src/Solvers/PSIOPT.cpp
@@ -1,6 +1,8 @@
 #include "PSIOPT.h"
 
+#ifndef USE_ACCELERATE_SPARSE
 #include <mkl.h>
+#endif
 
 #include "PyDocString/Solvers/PSIOPT_doc.h"
 
@@ -12,10 +14,19 @@ void ASSET::PSIOPT::setNLP(std::shared_ptr<NonLinearProgram> np) {
   this->SlackVars = this->nlp->SlackVars;
   this->KKTdim = this->nlp->KKTdim;
   this->setQPParams();
+#ifdef USE_ACCELERATE_SPARSE
+  accelerate_set_num_threads(QPThreads);
+#else
   mkl_set_num_threads(QPThreads);
+#endif
 
 
   this->nlp->analyzeSparsity(this->KKTSol.getMatrix());
+#ifdef USE_ACCELERATE_SPARSE
+  // we need to call this to update the internal AccelSparseMatrix since
+  // we changed the sparsity pattern via the reference returned from getMatrix.
+  this->KKTSol.reinitializeInternalMatrixRepresentation();
+#endif
   if (storespmat)
     spmat = this->KKTSol.getMatrix();
   this->QPanalyzed = false;
diff --git a/src/Solvers/PSIOPT.h b/src/Solvers/PSIOPT.h
index 4bd5a367..1075b8f2 100644
--- a/src/Solvers/PSIOPT.h
+++ b/src/Solvers/PSIOPT.h
@@ -2,7 +2,13 @@
 #pragma once
 #include "IterateInfo.h"
 #include "NonLinearProgram.h"
+
+#ifdef USE_ACCELERATE_SPARSE
+#include "AccelerateInterface.h"
+#else
 #include "PardisoInterface.h"
+#endif
+
 #include "Utils/ColorText.h"
 #include "pch.h"
 
@@ -125,7 +131,11 @@ namespace ASSET {
     int InequalCons = 0;
     int KKTdim = 0;
 
+#ifdef USE_ACCELERATE_SPARSE
+    Eigen::AccelerateLDLTTPP<Eigen::SparseMatrix<double, Eigen::RowMajor>, Eigen::Upper> KKTSol;
+#else
     Eigen::PardisoLDLT<Eigen::SparseMatrix<double, Eigen::RowMajor>, Eigen::Upper> KKTSol;
+#endif
 
     int QPThreads = ASSET_DEFAULT_QP_THREADS;
     QPAlgModes QPAlg = QPAlgModes::Classic;
@@ -475,6 +485,21 @@ namespace ASSET {
     }
 
     void setQPParams() {
+#ifdef USE_ACCELERATE_SPARSE
+      // Accelerate interface uses different configuration methods
+      switch(QPOrd) {
+        case MINDEG:
+            this->KKTSol.setOrder(SparseOrderAMD);
+            break;
+        case METIS:
+        case PARMETIS:
+            // Note next version of Apple Accelerate will be providing a parallel Metis factorization
+            this->KKTSol.setOrder(SparseOrderMetis);
+      }
+      this->KKTSol.setNumThreads(QPThreads);
+      this->KKTSol.setIterativeRefinement(QPRefSteps > 0);
+      this->KKTSol.setIterativeRefinementIterations(QPRefSteps);
+#else
       this->KKTSol.m_ord = QPOrd;
       this->KKTSol.m_pivotstrat = QPPivotStrategy;
       this->KKTSol.m_pivotpert = QPPivotPerturb;
@@ -487,8 +512,8 @@ namespace ASSET {
       if (this->CNRMode)
         this->KKTSol.m_threads = this->QPThreads;
       this->KKTSol.m_parsolve = this->QPParSolve;
-      // mkl_set_num_threads(QPThreads);
       this->KKTSol.setParams();
+#endif
     }
 
     void set_early_callback(const EarlyCallBackType& f) {
diff --git a/src/TypeDefs/EigenTypes.h b/src/TypeDefs/EigenTypes.h
index 7f91a6c9..6a5a6668 100644
--- a/src/TypeDefs/EigenTypes.h
+++ b/src/TypeDefs/EigenTypes.h
@@ -72,7 +72,11 @@ using DomainMatrix = Eigen::Matrix<IOint, 2, -1>;
 template <class Scalar, int Sz>
 using SuperScalarType = Eigen::Array<Scalar, Sz, 1>;
 
+#ifdef __ARM_NEON
+using DefaultSuperScalar = SuperScalarType<double, 2>;
+#else
 using DefaultSuperScalar = SuperScalarType<double, 4>;
+#endif
 
 
 }  // namespace ASSET
diff --git a/src/main.cpp b/src/main.cpp
index 470ba11a..ceb75fe7 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -34,12 +34,17 @@ void SoftwareInfo() {
       std::string(ASSET_COMPILERSTRING) + std::string(" ") + std::string(ASSET_COMPILERVERSION);
   std::string pythonv = std::to_string(PY_MAJOR_VERSION) + "." + std::to_string(PY_MINOR_VERSION);
   std::string vecversion;
-  if (vsize == 2)
+  if (vsize == 2) {
+#ifdef __ARM_NEON
+    vecversion = "ARM NEON - 128 bit - 2 doubles";
+#else
     vecversion = "SSE - 128 bit - 2 doubles";
-  else if (vsize == 4)
+#endif
+  } else if (vsize == 4) {
     vecversion = "AVX2 - 256 bit - 4 doubles";
-  else if (vsize == 8)
+  } else if (vsize == 8) {
     vecversion = "AVX512 - 512 bit - 8 doubles";
+  }
 
 
   std::string ASSET_STR("         ___    _____   _____    ______  ______ \n"
@@ -91,7 +96,11 @@ void SoftwareInfo() {
   fmt::print(fmt::fg(fmt::color::white), vecversion);
   fmt::print("\n");
   fmt::print(fmt::fg(fmt::color::royal_blue), " Linear Solver        : ");
+  #ifndef USE_ACCELERATE_SPARSE
   fmt::print(fmt::fg(fmt::color::white), "Intel MKL Pardiso");
+  #else
+  fmt::print(fmt::fg(fmt::color::white), "Apple Accelerate Sparse");
+  #endif
   fmt::print("\n");
 
   fmt::print(fmt::fg(fmt::color::royal_blue), " Compiled With        : ");