From 49f95f461cf31d438c89155d9fdd60a0174576d5 Mon Sep 17 00:00:00 2001
From: Jaiveer Singh <jaiveers@nvidia.com>
Date: Wed, 5 Apr 2023 17:51:51 -0700
Subject: [PATCH] Isaac ROS 0.30.0 (DP3)

---
 .gitattributes                                |    6 +-
 README.md                                     |   42 +-
 docs/tutorial-ess-realsense.md                |    2 +-
 docs/tutorial-isaac-sim.md                    |   16 +-
 isaac_ros_ess/CMakeLists.txt                  |   41 +-
 isaac_ros_ess/config/isaac_ros_ess_hawk.rviz  |  212 +++
 isaac_ros_ess/gxf/AMENT_IGNORE                |    0
 .../gxf/ess/3dv/include/cv/ess/ESS.h          |  203 +++
 isaac_ros_ess/gxf/ess/3dv/src/ESS.cpp         |  230 ++++
 .../gxf/ess/3dv/src/ESSPostProcess.cpp        |  129 ++
 .../gxf/ess/3dv/src/ESSPreProcess.cpp         |  190 +++
 isaac_ros_ess/gxf/ess/CMakeLists.txt          |  129 ++
 isaac_ros_ess/gxf/ess/ESS.cpp                 |  325 +++++
 isaac_ros_ess/gxf/ess/ESSRegistry.cpp         |   27 +
 .../gxf/ess/cvcore/include/cv/core/Array.h    |  386 ++++++
 .../gxf/ess/cvcore/include/cv/core/BBox.h     |  142 ++
 .../gxf/ess/cvcore/include/cv/core/CVError.h  |  116 ++
 .../ess/cvcore/include/cv/core/CameraModel.h  |  292 ++++
 .../cvcore/include/cv/core/ComputeEngine.h    |   43 +
 .../gxf/ess/cvcore/include/cv/core/Core.h     |   35 +
 .../gxf/ess/cvcore/include/cv/core/Image.h    |  893 +++++++++++++
 .../cvcore/include/cv/core/Instrumentation.h  |   65 +
 .../ess/cvcore/include/cv/core/MathTypes.h    |  234 ++++
 .../gxf/ess/cvcore/include/cv/core/Memory.h   |  135 ++
 .../gxf/ess/cvcore/include/cv/core/Model.h    |   50 +
 .../ess/cvcore/include/cv/core/ProfileUtils.h |   40 +
 .../gxf/ess/cvcore/include/cv/core/Tensor.h   | 1189 +++++++++++++++++
 .../ess/cvcore/include/cv/core/TensorList.h   |   37 +
 .../ess/cvcore/include/cv/core/TensorMap.h    |  534 ++++++++
 .../gxf/ess/cvcore/include/cv/core/Traits.h   |  478 +++++++
 .../ess/cvcore/include/cv/inferencer/Errors.h |   58 +
 .../include/cv/inferencer/IInferenceBackend.h |  185 +++
 .../cvcore/include/cv/inferencer/Inferencer.h |   79 ++
 .../cvcore/include/cv/tensor_ops/BBoxUtils.h  |  135 ++
 .../ess/cvcore/include/cv/tensor_ops/DBScan.h |   91 ++
 .../ess/cvcore/include/cv/tensor_ops/Errors.h |   48 +
 .../cvcore/include/cv/tensor_ops/IImageWarp.h |   63 +
 .../cv/tensor_ops/ITensorOperatorContext.h    |   65 +
 .../cv/tensor_ops/ITensorOperatorStream.h     |  251 ++++
 .../cvcore/include/cv/tensor_ops/ImageUtils.h | 1091 +++++++++++++++
 .../include/cv/tensor_ops/OneEuroFilter.h     |   82 ++
 .../include/cv/tensor_ops/TensorOperators.h   |   48 +
 .../cvcore/include/cv/trtbackend/TRTBackend.h |  203 +++
 .../gxf/ess/cvcore/src/core/cvcore/Array.cpp  |  145 ++
 .../gxf/ess/cvcore/src/core/cvcore/Dummy.cu   |    0
 .../ess/cvcore/src/core/cvcore/MathTypes.cpp  |  244 ++++
 .../gxf/ess/cvcore/src/core/cvcore/Tensor.cpp |  270 ++++
 .../ess/cvcore/src/core/utility/CVError.cpp   |  123 ++
 .../src/core/utility/Instrumentation.cpp      |   95 ++
 .../ess/cvcore/src/core/utility/Memory.cpp    |  124 ++
 .../cvcore/src/core/utility/ProfileUtils.cpp  |  127 ++
 .../gxf/ess/cvcore/src/inferencer/Errors.cpp  |  129 ++
 .../ess/cvcore/src/inferencer/Inferencer.cpp  |  130 ++
 .../tensorrt/TensorRTInferencer.cpp           |  275 ++++
 .../inferencer/tensorrt/TensorRTInferencer.h  |   78 ++
 .../src/inferencer/tensorrt/TensorRTUtils.cpp |   64 +
 .../src/inferencer/tensorrt/TensorRTUtils.h   |   45 +
 .../triton/TritonGrpcInferencer.cpp           |  342 +++++
 .../inferencer/triton/TritonGrpcInferencer.h  |   75 ++
 .../src/inferencer/triton/TritonUtils.cpp     |   84 ++
 .../src/inferencer/triton/TritonUtils.h       |   47 +
 .../src/tensor_ops/ArithmeticOperations.cpp   |  329 +++++
 .../ess/cvcore/src/tensor_ops/BBoxUtils.cpp   |  173 +++
 .../src/tensor_ops/ColorConversions.cpp       |  447 +++++++
 .../gxf/ess/cvcore/src/tensor_ops/DBScan.cpp  |  214 +++
 .../gxf/ess/cvcore/src/tensor_ops/Errors.cpp  |  104 ++
 .../gxf/ess/cvcore/src/tensor_ops/Filters.cpp |  112 ++
 .../gxf/ess/cvcore/src/tensor_ops/Filters.h   |  105 ++
 .../cvcore/src/tensor_ops/FusedOperations.cpp |  261 ++++
 .../src/tensor_ops/GeometryTransforms.cpp     |  754 +++++++++++
 .../ess/cvcore/src/tensor_ops/IImageWarp.cpp  |   24 +
 .../ess/cvcore/src/tensor_ops/NppUtils.cpp    |  116 ++
 .../gxf/ess/cvcore/src/tensor_ops/NppUtils.h  |   31 +
 .../cvcore/src/tensor_ops/OneEuroFilter.cpp   |  288 ++++
 .../cvcore/src/tensor_ops/TensorOperators.cpp |  116 ++
 .../tensor_ops/vpi/VPIColorConvertImpl.cpp    |  135 ++
 .../src/tensor_ops/vpi/VPIColorConvertImpl.h  |   65 +
 .../src/tensor_ops/vpi/VPIEnumMapping.h       |  196 +++
 .../cvcore/src/tensor_ops/vpi/VPIImageWarp.h  |   37 +
 .../src/tensor_ops/vpi/VPIRemapImpl.cpp       |  160 +++
 .../cvcore/src/tensor_ops/vpi/VPIRemapImpl.h  |   82 ++
 .../src/tensor_ops/vpi/VPIResizeImpl.cpp      |  139 ++
 .../cvcore/src/tensor_ops/vpi/VPIResizeImpl.h |   66 +
 .../src/tensor_ops/vpi/VPIStatusMapping.cpp   |  122 ++
 .../src/tensor_ops/vpi/VPIStatusMapping.h     |   38 +
 .../vpi/VPIStereoDisparityEstimatorImpl.cpp   |  211 +++
 .../vpi/VPIStereoDisparityEstimatorImpl.h     |   83 ++
 .../src/tensor_ops/vpi/VPITensorOperators.cpp |  709 ++++++++++
 .../src/tensor_ops/vpi/VPITensorOperators.h   |  272 ++++
 .../ess/cvcore/src/trtbackend/TRTBackend.cpp  |  632 +++++++++
 isaac_ros_ess/gxf/ess/extensions/ess/ESS.hpp  |  119 ++
 .../ess/extensions/tensor_ops/CameraModel.cpp |   86 ++
 .../ess/extensions/tensor_ops/CameraModel.hpp |   60 +
 .../tensor_ops/ConvertColorFormat.cpp         |  214 +++
 .../tensor_ops/ConvertColorFormat.hpp         |   51 +
 .../extensions/tensor_ops/CropAndResize.cpp   |  161 +++
 .../extensions/tensor_ops/CropAndResize.hpp   |   53 +
 .../gxf/ess/extensions/tensor_ops/Frame3D.cpp |   56 +
 .../gxf/ess/extensions/tensor_ops/Frame3D.hpp |   53 +
 .../extensions/tensor_ops/ImageAdapter.cpp    |   78 ++
 .../extensions/tensor_ops/ImageAdapter.hpp    |  101 ++
 .../ess/extensions/tensor_ops/ImageUtils.cpp  |  175 +++
 .../ess/extensions/tensor_ops/ImageUtils.hpp  |   65 +
 .../tensor_ops/InterleavedToPlanar.cpp        |  146 ++
 .../tensor_ops/InterleavedToPlanar.hpp        |   45 +
 .../ess/extensions/tensor_ops/Normalize.cpp   |  183 +++
 .../ess/extensions/tensor_ops/Normalize.hpp   |   47 +
 .../gxf/ess/extensions/tensor_ops/Reshape.cpp |   98 ++
 .../gxf/ess/extensions/tensor_ops/Reshape.hpp |   49 +
 .../gxf/ess/extensions/tensor_ops/Resize.cpp  |  194 +++
 .../gxf/ess/extensions/tensor_ops/Resize.hpp  |   55 +
 .../extensions/tensor_ops/TensorOperator.cpp  |  235 ++++
 .../extensions/tensor_ops/TensorOperator.hpp  |   95 ++
 .../ess/extensions/tensor_ops/TensorOps.cpp   |   75 ++
 .../extensions/tensor_ops/TensorStream.cpp    |  124 ++
 .../extensions/tensor_ops/TensorStream.hpp    |   59 +
 .../ess/extensions/tensor_ops/Undistort.cpp   |  285 ++++
 .../ess/extensions/tensor_ops/Undistort.hpp   |   69 +
 .../detail/ImageAdapterTensorImpl.cpp         |  105 ++
 .../detail/ImageAdapterTensorImpl.hpp         |  105 ++
 .../detail/ImageAdapterVideoBufferImpl.cpp    |   88 ++
 .../detail/ImageAdapterVideoBufferImpl.hpp    |  294 ++++
 .../isaac_ros_ess/ess_disparity_node.hpp      |    2 +-
 .../launch/isaac_ros_argus_ess.launch.py      |    2 +-
 isaac_ros_ess/launch/isaac_ros_ess.launch.py  |    2 +-
 .../launch/isaac_ros_ess_isaac_sim.launch.py  |    2 +-
 .../launch/isaac_ros_ess_realsense.launch.py  |    2 +-
 isaac_ros_ess/package.xml                     |    5 +-
 .../scripts/isaac_ros_ess_visualizer.py       |    2 +-
 isaac_ros_ess/src/ess_disparity_node.cpp      |   18 +-
 isaac_ros_ess/test/isaac_ros_ess_test.py      |    2 +-
 resources/Isaac_sim_enable_stereo.png         |    4 +-
 resources/Isaac_sim_play.png                  |    4 +-
 resources/Isaac_sim_set_stereo_offset.png     |    4 +-
 resources/isaac_ros_ess_nodegraph.png         |    3 +
 135 files changed, 20631 insertions(+), 82 deletions(-)
 create mode 100644 isaac_ros_ess/config/isaac_ros_ess_hawk.rviz
 create mode 100644 isaac_ros_ess/gxf/AMENT_IGNORE
 create mode 100644 isaac_ros_ess/gxf/ess/3dv/include/cv/ess/ESS.h
 create mode 100644 isaac_ros_ess/gxf/ess/3dv/src/ESS.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/3dv/src/ESSPostProcess.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/3dv/src/ESSPreProcess.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/CMakeLists.txt
 create mode 100644 isaac_ros_ess/gxf/ess/ESS.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/ESSRegistry.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/include/cv/core/Array.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/include/cv/core/BBox.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/include/cv/core/CVError.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/include/cv/core/CameraModel.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/include/cv/core/ComputeEngine.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/include/cv/core/Core.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/include/cv/core/Image.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/include/cv/core/Instrumentation.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/include/cv/core/MathTypes.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/include/cv/core/Memory.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/include/cv/core/Model.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/include/cv/core/ProfileUtils.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/include/cv/core/Tensor.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/include/cv/core/TensorList.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/include/cv/core/TensorMap.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/include/cv/core/Traits.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/include/cv/inferencer/Errors.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/include/cv/inferencer/IInferenceBackend.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/include/cv/inferencer/Inferencer.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/BBoxUtils.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/DBScan.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/Errors.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/IImageWarp.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/ITensorOperatorContext.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/ITensorOperatorStream.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/ImageUtils.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/OneEuroFilter.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/TensorOperators.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/include/cv/trtbackend/TRTBackend.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/core/cvcore/Array.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/core/cvcore/Dummy.cu
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/core/cvcore/MathTypes.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/core/cvcore/Tensor.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/core/utility/CVError.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/core/utility/Instrumentation.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/core/utility/Memory.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/core/utility/ProfileUtils.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/inferencer/Errors.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/inferencer/Inferencer.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/inferencer/tensorrt/TensorRTInferencer.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/inferencer/tensorrt/TensorRTInferencer.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/inferencer/tensorrt/TensorRTUtils.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/inferencer/tensorrt/TensorRTUtils.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/inferencer/triton/TritonGrpcInferencer.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/inferencer/triton/TritonGrpcInferencer.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/inferencer/triton/TritonUtils.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/inferencer/triton/TritonUtils.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/ArithmeticOperations.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/BBoxUtils.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/ColorConversions.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/DBScan.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/Errors.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/Filters.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/Filters.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/FusedOperations.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/GeometryTransforms.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/IImageWarp.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/NppUtils.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/NppUtils.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/OneEuroFilter.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/TensorOperators.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIColorConvertImpl.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIColorConvertImpl.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIEnumMapping.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIImageWarp.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIRemapImpl.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIRemapImpl.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIResizeImpl.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIResizeImpl.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIStatusMapping.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIStatusMapping.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIStereoDisparityEstimatorImpl.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIStereoDisparityEstimatorImpl.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPITensorOperators.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPITensorOperators.h
 create mode 100644 isaac_ros_ess/gxf/ess/cvcore/src/trtbackend/TRTBackend.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/extensions/ess/ESS.hpp
 create mode 100644 isaac_ros_ess/gxf/ess/extensions/tensor_ops/CameraModel.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/extensions/tensor_ops/CameraModel.hpp
 create mode 100644 isaac_ros_ess/gxf/ess/extensions/tensor_ops/ConvertColorFormat.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/extensions/tensor_ops/ConvertColorFormat.hpp
 create mode 100644 isaac_ros_ess/gxf/ess/extensions/tensor_ops/CropAndResize.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/extensions/tensor_ops/CropAndResize.hpp
 create mode 100644 isaac_ros_ess/gxf/ess/extensions/tensor_ops/Frame3D.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/extensions/tensor_ops/Frame3D.hpp
 create mode 100644 isaac_ros_ess/gxf/ess/extensions/tensor_ops/ImageAdapter.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/extensions/tensor_ops/ImageAdapter.hpp
 create mode 100644 isaac_ros_ess/gxf/ess/extensions/tensor_ops/ImageUtils.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/extensions/tensor_ops/ImageUtils.hpp
 create mode 100644 isaac_ros_ess/gxf/ess/extensions/tensor_ops/InterleavedToPlanar.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/extensions/tensor_ops/InterleavedToPlanar.hpp
 create mode 100644 isaac_ros_ess/gxf/ess/extensions/tensor_ops/Normalize.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/extensions/tensor_ops/Normalize.hpp
 create mode 100644 isaac_ros_ess/gxf/ess/extensions/tensor_ops/Reshape.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/extensions/tensor_ops/Reshape.hpp
 create mode 100644 isaac_ros_ess/gxf/ess/extensions/tensor_ops/Resize.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/extensions/tensor_ops/Resize.hpp
 create mode 100644 isaac_ros_ess/gxf/ess/extensions/tensor_ops/TensorOperator.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/extensions/tensor_ops/TensorOperator.hpp
 create mode 100644 isaac_ros_ess/gxf/ess/extensions/tensor_ops/TensorOps.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/extensions/tensor_ops/TensorStream.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/extensions/tensor_ops/TensorStream.hpp
 create mode 100644 isaac_ros_ess/gxf/ess/extensions/tensor_ops/Undistort.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/extensions/tensor_ops/Undistort.hpp
 create mode 100644 isaac_ros_ess/gxf/ess/extensions/tensor_ops/detail/ImageAdapterTensorImpl.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/extensions/tensor_ops/detail/ImageAdapterTensorImpl.hpp
 create mode 100644 isaac_ros_ess/gxf/ess/extensions/tensor_ops/detail/ImageAdapterVideoBufferImpl.cpp
 create mode 100644 isaac_ros_ess/gxf/ess/extensions/tensor_ops/detail/ImageAdapterVideoBufferImpl.hpp
 create mode 100644 resources/isaac_ros_ess_nodegraph.png

diff --git a/.gitattributes b/.gitattributes
index f53b7af..cb44273 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,3 +1,6 @@
+# Ignore Python files in linguist
+*.py linguist-detectable=false
+
 # Images
 *.gif filter=lfs diff=lfs merge=lfs -text
 *.jpg filter=lfs diff=lfs merge=lfs -text
@@ -19,6 +22,7 @@
 # ROS Bags
 **/resources/**/*.db3 filter=lfs diff=lfs merge=lfs -text
 **/resources/**/*.yaml filter=lfs diff=lfs merge=lfs -text
+**/resources/**/*.yaml filter=lfs diff=lfs merge=lfs -text
 
 # DNN Model files
-*.onnx filter=lfs diff=lfs merge=lfs -text
\ No newline at end of file
+*.onnx filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
index 9baad94..43844d8 100644
--- a/README.md
+++ b/README.md
@@ -5,36 +5,41 @@ DNN Stereo Disparity includes packages for predicting disparity of stereo input.
 <div align="center"><img src="resources/warehouse.gif" width="500px" title="Applying Colormap of ESS Disparity Node Ouput to the Input Image."/></div>
 
 ---
+
 ## Webinar Available
-Learn how to use this package by watching our on-demand webinar: [Using ML Models in ROS2 to Robustly Estimate Distance to Obstacles](https://gateway.on24.com/wcc/experience/elitenvidiabrill/1407606/3998202/isaac-ros-webinar-series)
+
+Learn how to use this package by watching our on-demand webinar: [Using ML Models in ROS 2 to Robustly Estimate Distance to Obstacles](https://gateway.on24.com/wcc/experience/elitenvidiabrill/1407606/3998202/isaac-ros-webinar-series)
 
 ---
 
 ## Overview
 
-This repository provides an NVIDIA hardware-accelerated package for DNN-based stereo disparity. Stereo disparity (with additional processing) can produce a depth image or point cloud of a scene for robot navigation. The `isaac_ros_ess` package uses the [ESS](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/isaac/models/dnn_stereo_disparity) DNN to perform stereo depth estimation via continuous disparity prediction. Given a pair of stereo input images, the package generates a disparity map of the left input image.
+Isaac ROS DNN Disparity provides a GPU-accelerated package for DNN-based stereo disparity. Stereo disparity is calculated from a time-synchronized image pair sourced from a stereo camera and is used to produce a depth image or a point cloud of a scene. The `isaac_ros_ess` package uses the [ESS DNN model](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/isaac/models/dnn_stereo_disparity) to perform stereo depth estimation via continuous disparity prediction. Given a pair of stereo input images, the package generates a disparity map of the left input image.
+
+<div align="center"><img src="resources/isaac_ros_ess_nodegraph.png" width="800px" title="graph of nodes using ESS"/></div>
 
-> Check your requirements against package [input limitations](#input-restrictions).
+ESS is used in a graph of nodes to provide a disparity prediction from an input left and right stereo image pair. Images to ESS need to be rectified and resized to the appropriate input resolution. The aspect ratio of the image needs to be maintained, so the image may need to be cropped and resized to maintain the input aspect ratio. The graph for DNN encode, to DNN inference, to DNN decode is part of the ESS node. Inference is performed using TensorRT, as the ESS DNN model is designed to use optimizations supported by TensorRT.
 
 ### ESS DNN
 
-[ESS](https://arxiv.org/pdf/1803.09719.pdf) stands for Enhanced Semi-Supervised stereo disparity DNN, which was developed by NVIDIA. The ESS DNN is used to predict the disparity for each pixel from stereo camera image pairs. This network has improvements over classic CV approaches that use epi-polar geometry to compute disparity, as the DNN can learn to predict disparity in cases where epi-polar geometry feature matching fails. The semi-supervised learning and stereo disparity matching makes the DNN robust to environment that is unseen in the training datasets and occluded objects. This DNN is optimized for and evaluated with RGB global shutter stereo camera images, and accuracy may vary with monochrome images.
+[ESS](https://arxiv.org/pdf/1803.09719.pdf) stands for Enhanced Semi-Supervised stereo disparity, developed by NVIDIA. The ESS DNN is used to predict the disparity for each pixel from stereo camera image pairs. This network has improvements over classic CV approaches that use epipolar geometry to compute disparity, as the DNN can learn to predict disparity in cases where epipolar geometry feature matching fails. The semi-supervised learning and stereo disparity matching makes the ESS DNN robust in environments unseen in the training datasets and with occluded objects. This DNN is optimized for and evaluated with color (RGB) global shutter stereo camera images, and accuracy may vary with monochrome stereo images used in analytic computer vision approaches to stereo disparity.
 
 The predicted [disparity](https://en.wikipedia.org/wiki/Binocular_disparity) values represent the distance a point moves from one image to the other in a stereo image pair (a.k.a. the binocular image pair). The disparity is inversely proportional to the depth (i.e. `disparity = focalLength x baseline / depth`). Given the [focal length](https://en.wikipedia.org/wiki/Focal_length) and [baseline](https://en.wikipedia.org/wiki/Stereo_camera) of the camera that generates a stereo image pair, the predicted disparity map from the `isaac_ros_ess` package can be used to compute depth and generate a [point cloud](https://en.wikipedia.org/wiki/Point_cloud).
 
+> **Note**: Compare the requirements of your use case against the package [input limitations](#input-restrictions).
+
 ### Isaac ROS NITROS Acceleration
 
 This package is powered by [NVIDIA Isaac Transport for ROS (NITROS)](https://developer.nvidia.com/blog/improve-perception-performance-for-ros-2-applications-with-nvidia-isaac-transport-for-ros/), which leverages type adaptation and negotiation to optimize message formats and dramatically accelerate communication between participating nodes.
 
 ## Performance
 
-The following are the benchmark performance results of the prepared pipelines in this package, by supported platform:
-
-| Pipeline                   | AGX Orin           | Orin Nano        | x86_64 w/ RTX 3060 Ti |
-| -------------------------- | ------------------ | ---------------- | --------------------- |
-| ESS Disparity Node (1080p) | 51 fps <br> 17.3ms | 16 fps <br> 60ms | 98 fps <br> 7.6ms     |
+The following table summarizes the per-platform performance statistics of sample graphs that use this package, with links included to the full benchmark output. These benchmark configurations are taken from the [Isaac ROS Benchmark](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark#list-of-isaac-ros-benchmarks) collection, based on the [`ros2_benchmark`](https://github.com/NVIDIA-ISAAC-ROS/ros2_benchmark) framework.
 
-These data have been collected per the methodology described [here](https://github.com/NVIDIA-ISAAC-ROS/.github/blob/main/profile/performance-summary.md#methodology).
+| Sample Graph                                                                                                                            | Input Size | AGX Orin                                                                                                                                 | Orin NX                                                                                                                                 | x86_64 w/ RTX 3060 Ti                                                                                                                             |
+| --------------------------------------------------------------------------------------------------------------------------------------- | ---------- | ---------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------- |
+| [DNN Stereo Disparity Node](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/scripts//isaac_ros_ess_node.py)   | 1080p      | [63.6 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_ess_node-agx_orin.json)<br>2.6 ms | [24.5 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_ess_node-orin_nx.json)<br>3.0 ms | [131 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_ess_node-x86_64_rtx_3060Ti.json)<br>0.73 ms |
+| [DNN Stereo Disparity Graph](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/scripts//isaac_ros_ess_graph.py) | 1080p      | [52.7 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_ess_graph-agx_orin.json)<br>20 ms | [20.8 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_ess_graph-orin_nx.json)<br>50 ms | [116 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_ess_graph-x86_64_rtx_3060Ti.json)<br>8.7 ms |
 
 ## Table of Contents
 
@@ -76,24 +81,24 @@ These data have been collected per the methodology described [here](https://gith
 
 ## Latest Update
 
-Update 2022-10-19: Updated OSS licensing
+Update 2023-04-05: Source available GXF extensions
 
 ## Supported Platforms
 
-This package is designed and tested to be compatible with ROS2 Humble running on [Jetson](https://developer.nvidia.com/embedded-computing) or an x86_64 system with an NVIDIA GPU.
+This package is designed and tested to be compatible with ROS 2 Humble running on [Jetson](https://developer.nvidia.com/embedded-computing) or an x86_64 system with an NVIDIA GPU.
 
-> **Note**: Versions of ROS2 earlier than Humble are **not** supported. This package depends on specific ROS2 implementation features that were only introduced beginning with the Humble release.
+> **Note**: Versions of ROS 2 earlier than Humble are **not** supported. This package depends on specific ROS 2 implementation features that were only introduced beginning with the Humble release.
 
-| Platform | Hardware                                                                                                                                                                                          | Software                                                                                                             | Notes                                                                                                                                                                                   |
-| -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| Jetson   | [AGX Orin](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/jetson-orin/) <br> [Orin Nano](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/jetson-agx-xavier/) | [JetPack 5.0.2](https://developer.nvidia.com/embedded/jetpack)                                                       | For best performance, ensure that [power settings](https://docs.nvidia.com/jetson/archives/r34.1/DeveloperGuide/text/SD/PlatformPowerAndPerformance.html) are configured appropriately. |
-| x86_64   | NVIDIA GPU                                                                                                                                                                                        | [Ubuntu 20.04+](https://releases.ubuntu.com/20.04/) <br> [CUDA 11.6.1+](https://developer.nvidia.com/cuda-downloads) |
+| Platform | Hardware                                                                                                                                                                                          | Software                                                                                                           | Notes                                                                                                                                                                                   |
+| -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| Jetson   | [AGX Orin](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/jetson-orin/) <br> [Orin Nano](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/jetson-agx-xavier/) | [JetPack 5.1.1](https://developer.nvidia.com/embedded/jetpack)                                                     | For best performance, ensure that [power settings](https://docs.nvidia.com/jetson/archives/r34.1/DeveloperGuide/text/SD/PlatformPowerAndPerformance.html) are configured appropriately. |
+| x86_64   | NVIDIA GPU                                                                                                                                                                                        | [Ubuntu 20.04+](https://releases.ubuntu.com/20.04/) <br> [CUDA 11.8+](https://developer.nvidia.com/cuda-downloads) |
 
 ### Docker
 
 To simplify development, we strongly recommend leveraging the Isaac ROS Dev Docker images by following [these steps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_common/blob/main/docs/dev-env-setup.md). This will streamline your development environment setup with the correct versions of dependencies on both Jetson and x86_64 platforms.
 
-> **Note:** All Isaac ROS quick start guides, tutorials, and examples have been designed with the Isaac ROS Docker images as a prerequisite.
+> **Note**: All Isaac ROS quick start guides, tutorials, and examples have been designed with the Isaac ROS Docker images as a prerequisite.
 
 ## Quickstart
 
@@ -297,6 +302,7 @@ Make sure that the ROS bag has a reasonable size and publish rate.
 
 | Date       | Changes                                    |
 | ---------- | ------------------------------------------ |
+| 2023-04-05 | Source available GXF extensions            |
 | 2022-10-19 | Updated OSS licensing                      |
 | 2022-08-31 | Update to be compatible with JetPack 5.0.2 |
 | 2022-06-30 | Initial release                            |
diff --git a/docs/tutorial-ess-realsense.md b/docs/tutorial-ess-realsense.md
index 9cff5e3..cf6afbf 100644
--- a/docs/tutorial-ess-realsense.md
+++ b/docs/tutorial-ess-realsense.md
@@ -6,7 +6,7 @@
 
 This tutorial demonstrates how to perform stereo-camera-based reconstruction using a [RealSense](https://www.intel.com/content/www/us/en/architecture-and-technology/realsense-overview.html) camera and [isaac_ros_ess](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_dnn_stereo_disparity).
 
-> Note: This tutorial has been tested with a RealSense D455/D435 camera connected to a Jetson Xavier AGX, as well as an x86 PC with an NVIDIA graphics card.
+> **Note**: This tutorial requires a compatible RealSense camera from the list available [here](https://github.com/NVIDIA-ISAAC-ROS/.github/blob/main/profile/realsense-setup.md#camera-compatibility).
 
 ## Tutorial Walkthrough
 
diff --git a/docs/tutorial-isaac-sim.md b/docs/tutorial-isaac-sim.md
index 791b716..675bc76 100644
--- a/docs/tutorial-isaac-sim.md
+++ b/docs/tutorial-isaac-sim.md
@@ -4,22 +4,24 @@
 
 ## Overview
 
-This tutorial walks you through a pipeline to [estimate depth](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_dnn_stereo_disparity) with stereo images from Isaac Sim.
+This tutorial walks you through a graph to [estimate depth](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_dnn_stereo_disparity) with stereo images from Isaac Sim.
+
+Last validated with [Isaac Sim 2022.2.1](https://docs.omniverse.nvidia.com/app_isaacsim/app_isaacsim/release_notes.html#id1)
 
 ## Tutorial Walkthrough
 
 1. Complete steps 1-7 listed in the [Quickstart section](../README.md#quickstart) of the main README.
 2. Install and launch Isaac Sim following the steps in the [Isaac ROS Isaac Sim Setup Guide](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_common/blob/main/docs/isaac-sim-sil-setup.md)
-3. Open the Isaac ROS Common USD scene (using the **Content** window) located at:
+3. Open the Isaac ROS Common USD scene (using the *Content* tab) located at:
 
-   `omniverse://localhost/NVIDIA/Assets/Isaac/2022.1/Isaac/Samples/ROS2/Scenario/carter_warehouse_apriltags_worker.usd`.
+    ```text
+    http://omniverse-content-production.s3-us-west-2.amazonaws.com/Assets/Isaac/2022.2.1/Isaac/Samples/ROS2/Scenario/carter_warehouse_apriltags_worker.usd
+    ```
 
    Wait for the scene to load completely.
-   > **Note:** To use a different server, replace `localhost` with `<your_nucleus_server>`
-4. Go to the **Stage** tab and select `/World/Carter_ROS/ROS_Cameras/ros2_create_camera_right_info`. In the **Property** tab, locate the **Compute Node -> Inputs -> stereoOffset -> X** value and change it from `0` to `-175.92`.
+4. Go to the *Stage* tab and select `/World/Carter_ROS/ROS_Cameras/ros2_create_camera_right_info`, then in *Property* tab *-> OmniGraph Node -> Inputs -> stereoOffset X* change `0` to `-175.92`.
     <div align="center"><img src="../resources/Isaac_sim_set_stereo_offset.png" width="500px"/></div>
-
-5. Enable the right camera for a stereo image pair. Go to the **Stage** tab and select `/World/Carter_ROS/ROS_Cameras/enable_camera_right`, then tick the **Condition** checkbox.
+5. Enable the right camera for a stereo image pair. Go to the *Stage* tab and select `/World/Carter_ROS/ROS_Cameras/enable_camera_right`, then tick the *Condition* checkbox.
     <div align="center"><img src="../resources/Isaac_sim_enable_stereo.png" width="500px"/></div>
 6. Press **Play** to start publishing data from the Isaac Sim application.
     <div align="center"><img src="../resources/Isaac_sim_play.png" width="800px"/></div>
diff --git a/isaac_ros_ess/CMakeLists.txt b/isaac_ros_ess/CMakeLists.txt
index c12cfbb..1ab88da 100644
--- a/isaac_ros_ess/CMakeLists.txt
+++ b/isaac_ros_ess/CMakeLists.txt
@@ -1,5 +1,5 @@
 # SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
-# Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,54 +15,31 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-cmake_minimum_required(VERSION 3.8)
+cmake_minimum_required(VERSION 3.23.2)
 project(isaac_ros_ess LANGUAGES C CXX)
 
-
-# Default to C++17
-if(NOT CMAKE_CXX_STANDARD)
-  set(CMAKE_CXX_STANDARD 17)
-endif()
-
 if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
   add_compile_options(-Wall -Wextra -Wpedantic)
 endif()
 
-# Default to Release build
-if(NOT CMAKE_BUILD_TYPE OR CMAKE_BUILD_TYPE STREQUAL "")
-  set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE)
-endif()
-message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
-
-execute_process(COMMAND uname -m COMMAND tr -d '\n' OUTPUT_VARIABLE ARCHITECTURE)
-message(STATUS "Architecture: ${ARCHITECTURE}")
-
 find_package(ament_cmake_auto REQUIRED)
 ament_auto_find_build_dependencies()
 
-
 # isaac_ros_ess_node
 ament_auto_add_library(isaac_ros_ess_node SHARED src/ess_disparity_node.cpp)
-target_compile_definitions(isaac_ros_ess_node
-  PRIVATE "COMPOSITION_BUILDING_DLL"
-)
 target_link_libraries(isaac_ros_ess_node)
 rclcpp_components_register_nodes(isaac_ros_ess_node "nvidia::isaac_ros::dnn_stereo_disparity::ESSDisparityNode")
 set(node_plugins "${node_plugins}nvidia::isaac_ros::dnn_stereo_disparity::ESSDisparityNode;$<TARGET_FILE:isaac_ros_ess_node>\n")
 
-# Install visualizer python scripts
+### Install ESS extension built from source
 
-install(PROGRAMS
-  scripts/isaac_ros_ess_visualizer.py
-  DESTINATION lib/${PROJECT_NAME}
-)
+add_subdirectory(gxf/ess)
+install(TARGETS gxf_cvcore_ess DESTINATION share/${PROJECT_NAME}/gxf/lib/ess)
 
-# Install package executable
-install(TARGETS isaac_ros_ess_node
-  ARCHIVE DESTINATION lib
-  LIBRARY DESTINATION lib
-  RUNTIME DESTINATION bin
-)
+### End extensions
+
+# Install visualizer python scripts
+install(PROGRAMS scripts/isaac_ros_ess_visualizer.py DESTINATION lib/${PROJECT_NAME})
 
 if(BUILD_TESTING)
   find_package(ament_lint_auto REQUIRED)
diff --git a/isaac_ros_ess/config/isaac_ros_ess_hawk.rviz b/isaac_ros_ess/config/isaac_ros_ess_hawk.rviz
new file mode 100644
index 0000000..d074741
--- /dev/null
+++ b/isaac_ros_ess/config/isaac_ros_ess_hawk.rviz
@@ -0,0 +1,212 @@
+Panels:
+  - Class: rviz_common/Displays
+    Help Height: 78
+    Name: Displays
+    Property Tree Widget:
+      Expanded:
+        - /Global Options1
+        - /Status1
+        - /PointCloud21
+      Splitter Ratio: 0.5
+    Tree Height: 220
+  - Class: rviz_common/Selection
+    Name: Selection
+  - Class: rviz_common/Tool Properties
+    Expanded:
+      - /2D Goal Pose1
+      - /Publish Point1
+    Name: Tool Properties
+    Splitter Ratio: 0.5886790156364441
+  - Class: rviz_common/Views
+    Expanded:
+      - /Current View1
+    Name: Views
+    Splitter Ratio: 0.5
+  - Class: rviz_common/Time
+    Experimental: false
+    Name: Time
+    SyncMode: 0
+    SyncSource: PointCloud2
+Visualization Manager:
+  Class: ""
+  Displays:
+    - Alpha: 0.5
+      Cell Size: 1
+      Class: rviz_default_plugins/Grid
+      Color: 160; 160; 164
+      Enabled: true
+      Line Style:
+        Line Width: 0.029999999329447746
+        Value: Lines
+      Name: Grid
+      Normal Cell Count: 0
+      Offset:
+        X: 0
+        Y: 0
+        Z: 0
+      Plane: XY
+      Plane Cell Count: 10
+      Reference Frame: <Fixed Frame>
+      Value: true
+    - Class: rviz_default_plugins/Image
+      Enabled: false
+      Max Value: 1
+      Median window: 5
+      Min Value: 0
+      Name: Image
+      Normalize Range: true
+      Topic:
+        Depth: 5
+        Durability Policy: Volatile
+        History Policy: Keep Last
+        Reliability Policy: Reliable
+        Value: /left/image_raw
+      Value: false
+    - Class: rviz_default_plugins/Image
+      Enabled: true
+      Max Value: 1
+      Median window: 5
+      Min Value: 0
+      Name: Image
+      Normalize Range: true
+      Topic:
+        Depth: 5
+        Durability Policy: Volatile
+        History Policy: Keep Last
+        Reliability Policy: Reliable
+        Value: /left/image_rect
+      Value: true
+    - Class: rviz_default_plugins/Image
+      Enabled: true
+      Max Value: 1
+      Median window: 5
+      Min Value: 0
+      Name: Image
+      Normalize Range: true
+      Topic:
+        Depth: 5
+        Durability Policy: Volatile
+        History Policy: Keep Last
+        Reliability Policy: Reliable
+        Value: /right/image_rect
+      Value: true
+    - Alpha: 1
+      Autocompute Intensity Bounds: true
+      Autocompute Value Bounds:
+        Max Value: 10
+        Min Value: -10
+        Value: true
+      Axis: Z
+      Channel Name: intensity
+      Class: rviz_default_plugins/PointCloud2
+      Color: 255; 255; 255
+      Color Transformer: Intensity
+      Decay Time: 0
+      Enabled: true
+      Invert Rainbow: false
+      Max Color: 255; 255; 255
+      Max Intensity: 4096
+      Min Color: 0; 0; 0
+      Min Intensity: 0
+      Name: PointCloud2
+      Position Transformer: XYZ
+      Selectable: true
+      Size (Pixels): 3
+      Size (m): 0.0010000000474974513
+      Style: Flat Squares
+      Topic:
+        Depth: 5
+        Durability Policy: Volatile
+        Filter size: 10
+        History Policy: Keep Last
+        Reliability Policy: Reliable
+        Value: /points2
+      Use Fixed Frame: true
+      Use rainbow: true
+      Value: true
+  Enabled: true
+  Global Options:
+    Background Color: 48; 48; 48
+    Fixed Frame: left/image_rect
+    Frame Rate: 30
+  Name: root
+  Tools:
+    - Class: rviz_default_plugins/Interact
+      Hide Inactive Objects: true
+    - Class: rviz_default_plugins/MoveCamera
+    - Class: rviz_default_plugins/Select
+    - Class: rviz_default_plugins/FocusCamera
+    - Class: rviz_default_plugins/Measure
+      Line color: 128; 128; 0
+    - Class: rviz_default_plugins/SetInitialPose
+      Covariance x: 0.25
+      Covariance y: 0.25
+      Covariance yaw: 0.06853891909122467
+      Topic:
+        Depth: 5
+        Durability Policy: Volatile
+        History Policy: Keep Last
+        Reliability Policy: Reliable
+        Value: /initialpose
+    - Class: rviz_default_plugins/SetGoal
+      Topic:
+        Depth: 5
+        Durability Policy: Volatile
+        History Policy: Keep Last
+        Reliability Policy: Reliable
+        Value: /goal_pose
+    - Class: rviz_default_plugins/PublishPoint
+      Single click: true
+      Topic:
+        Depth: 5
+        Durability Policy: Volatile
+        History Policy: Keep Last
+        Reliability Policy: Reliable
+        Value: /clicked_point
+  Transformation:
+    Current:
+      Class: rviz_default_plugins/TF
+  Value: true
+  Views:
+    Current:
+      Class: rviz_default_plugins/Orbit
+      Distance: 10
+      Enable Stereo Rendering:
+        Stereo Eye Separation: 0.05999999865889549
+        Stereo Focal Distance: 1
+        Swap Stereo Eyes: false
+        Value: false
+      Focal Point:
+        X: 0
+        Y: 0
+        Z: 0
+      Focal Shape Fixed Size: true
+      Focal Shape Size: 0.05000000074505806
+      Invert Z Axis: false
+      Name: Current View
+      Near Clip Distance: 0.009999999776482582
+      Pitch: -0.6146020889282227
+      Target Frame: <Fixed Frame>
+      Value: Orbit (rviz)
+      Yaw: 3.9685781002044678
+    Saved: ~
+Window Geometry:
+  Displays:
+    collapsed: false
+  Height: 1136
+  Hide Left Dock: false
+  Hide Right Dock: false
+  Image:
+    collapsed: false
+  QMainWindow State: 000000ff00000000fd000000040000000000000156000003d6fc020000000bfb0000001200530065006c0065006300740069006f006e00000001e10000009b0000005c00fffffffb0000001e0054006f006f006c002000500072006f007000650072007400690065007302000001ed000001df00000185000000a3fb000000120056006900650077007300200054006f006f02000001df000002110000018500000122fb000000200054006f006f006c002000500072006f0070006500720074006900650073003203000002880000011d000002210000017afb000000100044006900730070006c006100790073000000003b00000165000000c700fffffffb0000002000730065006c0065006300740069006f006e00200062007500660066006500720200000138000000aa0000023a00000294fb00000014005700690064006500530074006500720065006f02000000e6000000d2000003ee0000030bfb0000000c004b0069006e0065006300740200000186000001060000030c00000261fb0000000a0049006d0061006700650000000233000000bc0000002800fffffffb0000000a0049006d006100670065010000003b000001980000002800fffffffb0000000a0049006d00610067006501000001d9000002380000002800ffffff000000010000010f000002b4fc0200000003fb0000001e0054006f006f006c002000500072006f00700065007200740069006500730100000041000000780000000000000000fb0000000a00560069006500770073000000003b000002b4000000a000fffffffb0000001200530065006c0065006300740069006f006e010000025a000000b200000000000000000000000200000490000000a9fc0100000001fb0000000a00560069006500770073030000004e00000080000002e10000019700000003000007380000003efc0100000002fb0000000800540069006d00650100000000000007380000024400fffffffb0000000800540069006d00650100000000000004500000000000000000000005dc000003d600000004000000040000000800000008fc0000000100000002000000010000000a0054006f006f006c00730100000000ffffffff0000000000000000
+  Selection:
+    collapsed: false
+  Time:
+    collapsed: false
+  Tool Properties:
+    collapsed: false
+  Views:
+    collapsed: false
+  Width: 1848
+  X: 72
+  Y: 27
diff --git a/isaac_ros_ess/gxf/AMENT_IGNORE b/isaac_ros_ess/gxf/AMENT_IGNORE
new file mode 100644
index 0000000..e69de29
diff --git a/isaac_ros_ess/gxf/ess/3dv/include/cv/ess/ESS.h b/isaac_ros_ess/gxf/ess/3dv/include/cv/ess/ESS.h
new file mode 100644
index 0000000..277165d
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/3dv/include/cv/ess/ESS.h
@@ -0,0 +1,203 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef NV_ESS_H_
+#define NV_ESS_H_
+
+#include <memory>
+
+#include <cuda_runtime.h>
+
+#include <cv/core/Array.h>
+#include <cv/core/Core.h>
+#include <cv/core/Model.h>
+#include <cv/core/Tensor.h>
+
+namespace cvcore { namespace ess {
+
+/**
+ * Describes the algorithm supported for ESS Preprocessing
+ */
+enum class PreProcessType : uint8_t
+{
+    RESIZE = 0, // Resize to network dimensions without maintaining aspect ratio
+    CENTER_CROP // Crop to network dimensions from center of image
+};
+
+/**
+ * Describes the parameters for ESS Preprocessing
+ */
+struct ESSPreProcessorParams
+{
+    /* Preprocessing steps for ESS */
+    PreProcessType preProcessType;
+};
+
+/**
+ *  Default parameters for the preprocessing pipeline.
+ */
+CVCORE_API extern const ImagePreProcessingParams defaultPreProcessorParams;
+
+/**
+ *  Default parameters to describe the input expected for the model.
+ */
+CVCORE_API extern const ModelInputParams defaultModelInputParams;
+
+/**
+ *  Default parameters to describe the model inference parameters.
+ */
+CVCORE_API extern const ModelInferenceParams defaultInferenceParams;
+
+/**
+ *  Default parameters for the ESS Preprocessing
+ */
+CVCORE_API extern const ESSPreProcessorParams defaultESSPreProcessorParams;
+
+/*
+ * Interface for running pre-processing on ESS model.
+ */
+class CVCORE_API ESSPreProcessor
+{
+public:
+    /**
+   * Default constructor is deleted
+   */
+    ESSPreProcessor() = delete;
+
+    /**
+   * Constructor of ESSPreProcessor.
+   * @param preProcessorParams image pre-processing parameters.
+   * @param modelInputParams model paramters for network.
+   * @param essPreProcessorParams paramaters specific for ess preprocessing.
+   */
+    ESSPreProcessor(const ImagePreProcessingParams &preProcessorParams, const ModelInputParams &modelInputParams,
+                    const ESSPreProcessorParams &essPreProcessorParams);
+
+    /**
+   * Destructor of ESSPreProcessor.
+   */
+    ~ESSPreProcessor();
+
+    /**
+   * Main interface to run pre-processing.
+   * @param stream cuda stream.
+   */
+
+    void execute(Tensor<NCHW, C3, F32> &leftOutput, Tensor<NCHW, C3, F32> &rightOutput,
+                 const Tensor<NHWC, C3, U8> &leftInput, const Tensor<NHWC, C3, U8> &rightInput,
+                 cudaStream_t stream = 0);
+
+private:
+    /**
+   * Implementation of ESSPreProcessor.
+   */
+    struct ESSPreProcessorImpl;
+    std::unique_ptr<ESSPreProcessorImpl> m_pImpl;
+};
+
+/**
+ * ESS parameters and implementation
+ */
+class CVCORE_API ESS
+{
+public:
+    /**
+   * Constructor for ESS.
+   * @param imgparams image pre-processing parameters.
+   * @param modelInputParams model parameters for network.
+   * @param modelInferParams model input inference parameters.
+   * @param essPreProcessorParams paramaters specific for ess preprocessing.
+   */
+    ESS(const ImagePreProcessingParams &imgparams, const ModelInputParams &modelParams,
+        const ModelInferenceParams &modelInferParams, const ESSPreProcessorParams &essPreProcessorParams);
+
+    /**
+   * Default constructor not supported.
+   */
+    ESS() = delete;
+
+    /**
+   * Destructor for ESS.
+   */
+    ~ESS();
+
+    /**
+   * Inference function for a given BGR image
+   * @param disparityMap Disparity map (CPU/GPU tensor supported)
+   * @param leftInput RGB/BGR Interleaved Left image (Only GPU Input Tensor
+   * supported)
+   * @param rightInput RGB/BGR Interleaved Right image (Only GPU Input Tensor
+   * supported)
+   * @param stream Cuda stream
+   */
+    void execute(Tensor<NHWC, C1, F32> &disparityMap, const Tensor<NHWC, C3, U8> &leftInput,
+                 const Tensor<NHWC, C3, U8> &rightInput, cudaStream_t stream = 0);
+
+    /**
+   * Inference function for a given Preprocessed image
+   * @param disparityMap Disparity map (CPU/GPU tensor supported)
+   * @param leftInput RGB Planar Left image resized to network input dimensions (Only GPU Input Tensor
+   * supported)
+   * @param rightInput RGB Planar Right image resized to network input dimensions (Only GPU Input Tensor
+   * supported)
+   * @param stream Cuda stream
+   */
+    void execute(Tensor<NHWC, C1, F32> &disparityMap, const Tensor<NCHW, C3, F32> &leftInput,
+                 const Tensor<NCHW, C3, F32> &rightInput, cudaStream_t stream = 0);
+
+private:
+    struct ESSImpl;
+    std::unique_ptr<ESSImpl> m_pImpl;
+};
+
+/**
+ * ESS parameters and implementation
+ */
+class CVCORE_API ESSPostProcessor
+{
+public:
+    /**
+   * Constructor for ESS.
+   * @param modelInputParams model parameters for network.
+   */
+    ESSPostProcessor(const ModelInputParams &modelParams);
+    /**
+   * Default constructor not supported.
+   */
+    ESSPostProcessor() = delete;
+
+    /**
+   * Destructor for ESS.
+   */
+    ~ESSPostProcessor();
+
+    /**
+   * Inference function for a given BGR image
+   * @param outputdisparityMap Disparity map rescaled to orginal resolution (CPU/GPU tensor)
+   * @param inputDisparityMap input Disparity map (GPU tensor)
+   * @param stream Cuda stream
+   */
+    void execute(Tensor<NHWC, C1, F32> &outputdisparityMap, const Tensor<NHWC, C1, F32> &inputdisparityMap,
+                 cudaStream_t stream = 0);
+
+private:
+    struct ESSPostProcessorImpl;
+    std::unique_ptr<ESSPostProcessorImpl> m_pImpl;
+};
+
+}} // namespace cvcore::ess
+#endif
diff --git a/isaac_ros_ess/gxf/ess/3dv/src/ESS.cpp b/isaac_ros_ess/gxf/ess/3dv/src/ESS.cpp
new file mode 100644
index 0000000..151545c
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/3dv/src/ESS.cpp
@@ -0,0 +1,230 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <cv/ess/ESS.h>
+
+#include <cuda_runtime_api.h>
+
+#ifdef NVBENCH_ENABLE
+#include <nvbench/GPU.h>
+#endif
+
+#include <cv/core/CVError.h>
+#include <cv/core/Image.h>
+#include <cv/core/Memory.h>
+#include <cv/inferencer/IInferenceBackend.h>
+#include <cv/inferencer/Inferencer.h>
+
+#include <cv/tensor_ops/ImageUtils.h>
+
+namespace cvcore { namespace ess {
+
+/* Default parameters used for the model provided*/
+const ImagePreProcessingParams defaultPreProcessorParams = {
+    BGR_U8,                                  /**< Input type of image.*/
+    {-128, -128, -128},                      /**< Mean value */
+    {1.0 / 255.0, 1.0 / 255.0, 1.0 / 255.0}, /**< Normalization factor */
+    {0.5, 0.5, 0.5}};                        /**< Standard deviation */
+
+const ModelInputParams defaultModelInputParams = {1,       /**< Max batch size supported */
+                                                  960,     /**< Input layer width of the network */
+                                                  576,     /**< Input layer height of the network */
+                                                  RGB_U8}; /**< Input image type the network is trained with */
+
+const ModelInferenceParams defaultInferenceParams = {
+    "models/ess.engine",           /**< Engine file path */
+    {"input_left", "input_right"}, /**< Input layer name of the model */
+    {"output_left"}};              /**< Output layer name of the network */
+
+const ESSPreProcessorParams defaultESSPreProcessorParams = {
+    PreProcessType::RESIZE}; // Resize the input image to the network input dimensions
+
+struct ESS::ESSImpl
+{
+    inferencer::TensorRTInferenceParams tensorrtParams;
+    inferencer::InferenceBackendClient client;
+
+    // Model includes 2 input layers and 1 output layer
+    Tensor<NHWC, C1, F32> m_outputDevice;
+    Tensor<NCHW, C3, F32> m_inputLeftPlanar;
+    Tensor<NCHW, C3, F32> m_inputRightPlanar;
+
+    // Preprocess and PostProcess Objects
+    std::unique_ptr<ESSPreProcessor> m_preprocess;
+    std::unique_ptr<ESSPostProcessor> m_postprocess;
+
+    // Max batch Size supported
+    size_t m_maxBatchSize;
+
+    std::string m_leftInputLayerName, m_rightInputLayerName;
+
+    size_t m_networkInputWidth, m_networkInputHeight;
+
+    ESSImpl(const ImagePreProcessingParams &imgParams, const ModelInputParams &modelParams,
+            const ModelInferenceParams &modelInferParams, const ESSPreProcessorParams &essParams)
+        : m_maxBatchSize(modelParams.maxBatchSize)
+    {
+        if (modelInferParams.inputLayers.size() != 2 || modelInferParams.outputLayers.size() != 1 ||
+            modelParams.maxBatchSize <= 0)
+        {
+            throw ErrorCode::INVALID_ARGUMENT;
+        }
+
+        // Initialize Preprocessor and postprocessor
+        m_preprocess.reset(new ESSPreProcessor(imgParams, modelParams, essParams));
+        m_postprocess.reset(new ESSPostProcessor(modelParams));
+
+        // Initialize TRT backend
+        tensorrtParams = {inferencer::TRTInferenceType::TRT_ENGINE,
+                          nullptr,
+                          modelInferParams.engineFilePath,
+                          modelParams.maxBatchSize,
+                          modelInferParams.inputLayers,
+                          modelInferParams.outputLayers};
+
+        std::error_code err =
+            inferencer::InferenceBackendFactory::CreateTensorRTInferenceBackendClient(client, tensorrtParams);
+
+        if (err != cvcore::make_error_code(cvcore::ErrorCode::SUCCESS))
+        {
+            throw err;
+        }
+
+        inferencer::ModelMetaData modelInfo = client->getModelMetaData();
+
+        m_networkInputHeight  = modelParams.inputLayerHeight;
+        m_networkInputWidth   = modelParams.inputLayerWidth;
+        m_inputLeftPlanar     = {m_networkInputWidth, m_networkInputHeight, modelParams.maxBatchSize, false};
+        m_inputRightPlanar    = {m_networkInputWidth, m_networkInputHeight, modelParams.maxBatchSize, false};
+        size_t outputWidth    = modelInfo.outputLayers[modelInferParams.outputLayers[0]].shape[2];
+        size_t outputHeight   = modelInfo.outputLayers[modelInferParams.outputLayers[0]].shape[1];
+        m_outputDevice        = {outputWidth, outputHeight, modelParams.maxBatchSize, false};
+        m_leftInputLayerName  = modelInferParams.inputLayers[0];
+        m_rightInputLayerName = modelInferParams.inputLayers[1];
+        CHECK_ERROR_CODE(client->setInput(m_inputLeftPlanar, modelInferParams.inputLayers[0]));
+        CHECK_ERROR_CODE(client->setInput(m_inputRightPlanar, modelInferParams.inputLayers[1]));
+        CHECK_ERROR_CODE(client->setOutput(m_outputDevice, modelInferParams.outputLayers[0]));
+    }
+
+    ~ESSImpl()
+    {
+        CHECK_ERROR_CODE(client->unregister());
+        inferencer::InferenceBackendFactory::DestroyTensorRTInferenceBackendClient(client);
+    }
+
+    void execute(Tensor<NHWC, C1, F32> &output, const Tensor<NHWC, C3, U8> &inputLeft,
+                 const Tensor<NHWC, C3, U8> &inputRight, cudaStream_t stream)
+    {
+        size_t batchSize = inputLeft.getDepth();
+        if (inputLeft.isCPU() || inputRight.isCPU())
+        {
+            throw std::invalid_argument("ESS : Input type should be GPU buffer");
+        }
+
+        if (inputLeft.getDepth() > m_maxBatchSize || inputRight.getDepth() > m_maxBatchSize)
+        {
+            throw std::invalid_argument("ESS : Input batch size cannot exceed max batch size\n");
+        }
+
+        if (inputLeft.getDepth() != inputRight.getDepth() || output.getDepth() != inputLeft.getDepth())
+        {
+            throw std::invalid_argument("ESS : Batch size of input and output images don't match!\n");
+        }
+        m_preprocess->execute(m_inputLeftPlanar, m_inputRightPlanar, inputLeft, inputRight, stream);
+
+#ifdef NVBENCH_ENABLE
+        size_t inputWidth          = inputLeft.getWidth();
+        size_t inputHeight         = inputLeft.getHeight();
+        const std::string testName = "ESSInference_batch" + std::to_string(batchSize) + "_" +
+                                     std::to_string(inputWidth) + "x" + std::to_string(inputHeight) + "x" +
+                                     std::to_string(inputLeft.getChannelCount()) + "_GPU";
+        nv::bench::Timer timerFunc = nv::bench::GPU(testName.c_str(), nv::bench::Flag::DEFAULT, stream);
+#endif
+
+        CHECK_ERROR_CODE(client->setCudaStream(stream));
+        CHECK_ERROR_CODE(client->infer(batchSize));
+        // PostProcess
+        m_postprocess->execute(output, m_outputDevice, stream);
+    }
+
+    void execute(Tensor<NHWC, C1, F32> &output, const Tensor<NCHW, C3, F32> &inputLeft,
+                 const Tensor<NCHW, C3, F32> &inputRight, cudaStream_t stream)
+    {
+        size_t batchSize = inputLeft.getDepth();
+        if (inputLeft.isCPU() || inputRight.isCPU())
+        {
+            throw std::invalid_argument("ESS : Input type should be GPU buffer");
+        }
+
+        if (inputLeft.getDepth() > m_maxBatchSize || inputRight.getDepth() > m_maxBatchSize)
+        {
+            throw std::invalid_argument("ESS : Input batch size cannot exceed max batch size\n");
+        }
+
+        if (inputLeft.getDepth() != inputRight.getDepth() || output.getDepth() != inputLeft.getDepth())
+        {
+            throw std::invalid_argument("ESS : Batch size of input and output images don't match!\n");
+        }
+
+        if (inputLeft.getWidth() != m_networkInputWidth || inputLeft.getHeight() != m_networkInputHeight)
+        {
+            throw std::invalid_argument("ESS : Left preprocessed input does not match network input dimensions!\n");
+        }
+
+        if (inputRight.getWidth() != m_networkInputWidth || inputRight.getHeight() != m_networkInputHeight)
+        {
+            throw std::invalid_argument("ESS : Right preprocessed input does not match network input dimensions!\n");
+        }
+#ifdef NVBENCH_ENABLE
+        size_t inputWidth          = inputLeft.getWidth();
+        size_t inputHeight         = inputLeft.getHeight();
+        const std::string testName = "ESSInference_batch" + std::to_string(batchSize) + "_" +
+                                     std::to_string(inputWidth) + "x" + std::to_string(inputHeight) + "x" +
+                                     std::to_string(inputLeft.getChannelCount()) + "_GPU";
+        nv::bench::Timer timerFunc = nv::bench::GPU(testName.c_str(), nv::bench::Flag::DEFAULT, stream);
+#endif
+        // inference
+        CHECK_ERROR_CODE(client->setInput(inputLeft, m_leftInputLayerName));
+        CHECK_ERROR_CODE(client->setInput(inputRight, m_rightInputLayerName));
+
+        CHECK_ERROR_CODE(client->setCudaStream(stream));
+        CHECK_ERROR_CODE(client->infer(batchSize));
+        // PostProcess
+        m_postprocess->execute(output, m_outputDevice, stream);
+    }
+};
+
+ESS::ESS(const ImagePreProcessingParams &imgParams, const ModelInputParams &modelParams,
+         const ModelInferenceParams &modelInferParams, const ESSPreProcessorParams &essParams)
+    : m_pImpl(new ESSImpl(imgParams, modelParams, modelInferParams, essParams))
+{
+}
+
+ESS::~ESS() {}
+
+void ESS::execute(Tensor<NHWC, C1, F32> &output, const Tensor<NCHW, C3, F32> &inputLeft,
+                  const Tensor<NCHW, C3, F32> &inputRight, cudaStream_t stream)
+{
+    m_pImpl->execute(output, inputLeft, inputRight, stream);
+}
+
+void ESS::execute(Tensor<NHWC, C1, F32> &output, const Tensor<NHWC, C3, U8> &inputLeft,
+                  const Tensor<NHWC, C3, U8> &inputRight, cudaStream_t stream)
+{
+    m_pImpl->execute(output, inputLeft, inputRight, stream);
+}
+}} // namespace cvcore::ess
diff --git a/isaac_ros_ess/gxf/ess/3dv/src/ESSPostProcess.cpp b/isaac_ros_ess/gxf/ess/3dv/src/ESSPostProcess.cpp
new file mode 100644
index 0000000..ed823a0
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/3dv/src/ESSPostProcess.cpp
@@ -0,0 +1,129 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <cv/ess/ESS.h>
+
+#include <stdexcept>
+
+#ifdef NVBENCH_ENABLE
+#include <nvbench/GPU.h>
+#endif
+
+#include <cv/core/CVError.h>
+#include <cv/core/Memory.h>
+#include <cv/tensor_ops/BBoxUtils.h>
+#include <cv/tensor_ops/ImageUtils.h>
+
+namespace cvcore { namespace ess {
+
+struct ESSPostProcessor::ESSPostProcessorImpl
+{
+    ESSPostProcessorImpl(const ModelInputParams &modelParams)
+        : m_maxBatchSize(modelParams.maxBatchSize)
+        , m_networkWidth(modelParams.inputLayerWidth)
+        , m_networkHeight(modelParams.inputLayerHeight)
+    {
+        m_scaledDisparityDevice = {m_networkWidth, m_networkHeight, m_maxBatchSize, false};
+        m_outputDisparityDevice = {m_networkWidth, m_networkHeight, m_maxBatchSize, false};
+    }
+
+    void resizeBuffers(std::size_t width, std::size_t height)
+    {
+        if (m_outputDisparityDevice.getWidth() == width && m_outputDisparityDevice.getHeight() == height)
+        {
+            return;
+        }
+        m_outputDisparityDevice = {width, height, m_maxBatchSize, false};
+    }
+
+    void process(Tensor<NHWC, C1, F32> &outputDisparity, const Tensor<NHWC, C1, F32> &inputDisparity,
+                 cudaStream_t stream)
+    {
+        if (inputDisparity.isCPU())
+        {
+            throw std::invalid_argument("ESSPostProcessor : Input Tensor must be GPU Tensor.");
+        }
+
+        if (inputDisparity.getWidth() != m_networkWidth || inputDisparity.getHeight() != m_networkHeight)
+        {
+            throw std::invalid_argument(
+                "ESSPostProcessor : Input Tensor dimension "
+                "does not match network input "
+                "requirement");
+        }
+
+        if (inputDisparity.getDepth() != outputDisparity.getDepth())
+        {
+            throw std::invalid_argument("ESSPostProcessor : Input/Output Tensor batchsize mismatch.");
+        }
+
+        const size_t batchSize = inputDisparity.getDepth();
+        if (batchSize > m_maxBatchSize)
+        {
+            throw std::invalid_argument("ESSPostProcessor : Input batchsize exceeds Max Batch size.");
+        }
+        const size_t outputWidth  = outputDisparity.getWidth();
+        const size_t outputHeight = outputDisparity.getHeight();
+
+        // Disparity map values are scaled based on the outputWidth/networkInputWidth ratio
+        const float scale = static_cast<float>(outputWidth) / m_networkWidth;
+        Tensor<NHWC, C1, F32> scaledDisparity(m_scaledDisparityDevice.getWidth(), m_scaledDisparityDevice.getHeight(),
+                                              batchSize, m_scaledDisparityDevice.getData(), false);
+
+        tensor_ops::Normalize(scaledDisparity, inputDisparity, scale, 0, stream);
+        if (!outputDisparity.isCPU())
+        {
+            tensor_ops::Resize(outputDisparity, m_scaledDisparityDevice, stream);
+        }
+        else
+        {
+            resizeBuffers(outputWidth, outputHeight);
+            Tensor<NHWC, C1, F32> outputDisparityDevice(m_outputDisparityDevice.getWidth(),
+                                                        m_outputDisparityDevice.getHeight(), batchSize,
+                                                        m_outputDisparityDevice.getData(), false);
+            tensor_ops::Resize(outputDisparityDevice, m_scaledDisparityDevice, stream);
+            cvcore::Copy(outputDisparity, outputDisparityDevice, stream);
+            CHECK_ERROR(cudaStreamSynchronize(stream));
+        }
+    }
+
+    size_t m_maxBatchSize;
+    size_t m_networkWidth, m_networkHeight;
+    Tensor<NHWC, C1, F32> m_scaledDisparityDevice;
+    Tensor<NHWC, C1, F32> m_outputDisparityDevice;
+};
+
+void ESSPostProcessor::execute(Tensor<NHWC, C1, F32> &outputDisparity, const Tensor<NHWC, C1, F32> &inputDisparity,
+                               cudaStream_t stream)
+{
+#ifdef NVBENCH_ENABLE
+    const std::string testName = "ESSPostProcessor_batch" + std::to_string(outputDisparity.getDepth()) + "_" +
+                                 std::to_string(outputDisparity.getWidth()) + "x" +
+                                 std::to_string(outputDisparity.getHeight()) + "_GPU";
+    nv::bench::Timer timerFunc = nv::bench::GPU(testName.c_str(), nv::bench::Flag::DEFAULT, stream);
+#endif
+    m_pImpl->process(outputDisparity, inputDisparity, stream);
+}
+
+ESSPostProcessor::ESSPostProcessor(const ModelInputParams &modelInputParams)
+    : m_pImpl(new ESSPostProcessor::ESSPostProcessorImpl(modelInputParams))
+{
+}
+
+ESSPostProcessor::~ESSPostProcessor() {}
+
+}} // namespace cvcore::ess
diff --git a/isaac_ros_ess/gxf/ess/3dv/src/ESSPreProcess.cpp b/isaac_ros_ess/gxf/ess/3dv/src/ESSPreProcess.cpp
new file mode 100644
index 0000000..001c528
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/3dv/src/ESSPreProcess.cpp
@@ -0,0 +1,190 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <cv/ess/ESS.h>
+
+#include <stdexcept>
+
+#ifdef NVBENCH_ENABLE
+#include <nvbench/GPU.h>
+#endif
+
+#include <cv/core/CVError.h>
+#include <cv/core/Memory.h>
+#include <cv/tensor_ops/BBoxUtils.h>
+#include <cv/tensor_ops/ImageUtils.h>
+
+namespace cvcore { namespace ess {
+
+struct ESSPreProcessor::ESSPreProcessorImpl
+{
+    size_t m_maxBatchSize;
+    size_t m_outputWidth;
+    size_t m_outputHeight;
+    PreProcessType m_processType;
+    ImagePreProcessingParams m_preProcessorParams;
+    Tensor<NHWC, C3, U8> m_resizedDeviceLeftInput;
+    Tensor<NHWC, C3, U8> m_resizedDeviceRightInput;
+    Tensor<NHWC, C3, F32> m_normalizedDeviceLeftInput;
+    Tensor<NHWC, C3, F32> m_normalizedDeviceRightInput;
+    bool m_swapRB;
+
+    ESSPreProcessorImpl(const ImagePreProcessingParams &imgParams, const ModelInputParams &modelParams,
+                        const ESSPreProcessorParams &essParams)
+        : m_maxBatchSize(modelParams.maxBatchSize)
+        , m_outputHeight(modelParams.inputLayerHeight)
+        , m_outputWidth(modelParams.inputLayerWidth)
+        , m_processType(essParams.preProcessType)
+        , m_preProcessorParams(imgParams)
+    {
+        if (imgParams.imgType != BGR_U8 && imgParams.imgType != RGB_U8)
+        {
+            throw std::invalid_argument("ESSPreProcessor : Only image types RGB_U8/BGR_U8 are supported\n");
+        }
+        m_resizedDeviceLeftInput = {modelParams.inputLayerWidth, modelParams.inputLayerHeight, modelParams.maxBatchSize,
+                                    false};
+        m_resizedDeviceRightInput    = {modelParams.inputLayerWidth, modelParams.inputLayerHeight,
+                                     modelParams.maxBatchSize, false};
+        m_normalizedDeviceLeftInput  = {modelParams.inputLayerWidth, modelParams.inputLayerHeight,
+                                       modelParams.maxBatchSize, false};
+        m_normalizedDeviceRightInput = {modelParams.inputLayerWidth, modelParams.inputLayerHeight,
+                                        modelParams.maxBatchSize, false};
+        m_swapRB                     = imgParams.imgType != modelParams.modelInputType;
+    }
+
+    void process(Tensor<NCHW, C3, F32> &outputLeft, Tensor<NCHW, C3, F32> &outputRight,
+                 const Tensor<NHWC, C3, U8> &inputLeft, const Tensor<NHWC, C3, U8> &inputRight, cudaStream_t stream)
+    {
+        if (inputLeft.isCPU() || inputRight.isCPU() || outputLeft.isCPU() || outputRight.isCPU())
+        {
+            throw std::invalid_argument("ESSPreProcessor : Input/Output Tensor must be GPU Tensor.");
+        }
+
+        if (outputLeft.getWidth() != m_outputWidth || outputLeft.getHeight() != m_outputHeight ||
+            outputRight.getWidth() != m_outputWidth || outputRight.getHeight() != m_outputHeight)
+        {
+            throw std::invalid_argument(
+                "ESSPreProcessor : Output Tensor dimension does not match network input requirement");
+        }
+
+        if (inputLeft.getWidth() != inputRight.getWidth() || inputLeft.getHeight() != inputRight.getHeight())
+        {
+            throw std::invalid_argument("ESSPreProcessor : Input tensor dimensions don't match");
+        }
+
+        if (outputLeft.getDepth() != inputLeft.getDepth() || outputRight.getDepth() != inputRight.getDepth() ||
+            inputLeft.getDepth() != inputRight.getDepth())
+        {
+            throw std::invalid_argument("ESSPreProcessor : Input/Output Tensor batchsize mismatch.");
+        }
+
+        if (outputLeft.getDepth() > m_maxBatchSize)
+        {
+            throw std::invalid_argument("ESSPreProcessor : Input/Output batchsize exceeds max batch size.");
+        }
+
+        const size_t batchSize   = inputLeft.getDepth();
+        const size_t inputWidth  = inputLeft.getWidth();
+        const size_t inputHeight = inputLeft.getHeight();
+
+        if (m_processType == PreProcessType::RESIZE)
+        {
+            tensor_ops::Resize(m_resizedDeviceLeftInput, inputLeft, stream);
+            tensor_ops::Resize(m_resizedDeviceRightInput, inputRight, stream);
+        }
+        else
+        {
+            const float centerX = inputWidth / 2;
+            const float centerY = inputHeight / 2;
+            const float offsetX = m_outputWidth / 2;
+            const float offsetY = m_outputHeight / 2;
+            BBox srcCrop, dstCrop;
+            dstCrop      = {0, 0, static_cast<int>(m_outputWidth - 1), static_cast<int>(m_outputHeight - 1)};
+            srcCrop.xmin = std::max(0, static_cast<int>(centerX - offsetX));
+            srcCrop.ymin = std::max(0, static_cast<int>(centerY - offsetY));
+            srcCrop.xmax = std::min(static_cast<int>(m_outputWidth - 1), static_cast<int>(centerX + offsetX));
+            srcCrop.ymax = std::min(static_cast<int>(m_outputHeight - 1), static_cast<int>(centerY + offsetY));
+            for (size_t i = 0; i < batchSize; i++)
+            {
+                Tensor<HWC, C3, U8> inputLeftCrop(
+                    inputWidth, inputHeight,
+                    const_cast<uint8_t *>(inputLeft.getData()) + i * inputLeft.getStride(TensorDimension::DEPTH),
+                    false);
+                Tensor<HWC, C3, U8> outputLeftCrop(
+                    m_outputWidth, m_outputHeight,
+                    m_resizedDeviceLeftInput.getData() + i * m_resizedDeviceLeftInput.getStride(TensorDimension::DEPTH),
+                    false);
+                Tensor<HWC, C3, U8> inputRightCrop(
+                    inputWidth, inputHeight,
+                    const_cast<uint8_t *>(inputRight.getData()) + i * inputRight.getStride(TensorDimension::DEPTH),
+                    false);
+                Tensor<HWC, C3, U8> outputRightCrop(m_outputWidth, m_outputHeight,
+                                                    m_resizedDeviceRightInput.getData() +
+                                                        i * m_resizedDeviceRightInput.getStride(TensorDimension::DEPTH),
+                                                    false);
+                tensor_ops::CropAndResize(outputLeftCrop, inputLeftCrop, dstCrop, srcCrop,
+                                          tensor_ops::InterpolationType::INTERP_LINEAR, stream);
+                tensor_ops::CropAndResize(outputRightCrop, inputRightCrop, dstCrop, srcCrop,
+                                          tensor_ops::InterpolationType::INTERP_LINEAR, stream);
+            }
+        }
+
+        if (m_swapRB)
+        {
+            tensor_ops::ConvertColorFormat(m_resizedDeviceLeftInput, m_resizedDeviceLeftInput, tensor_ops::BGR2RGB,
+                                           stream);
+            tensor_ops::ConvertColorFormat(m_resizedDeviceRightInput, m_resizedDeviceRightInput, tensor_ops::BGR2RGB,
+                                           stream);
+        }
+
+        float scale[3];
+        for (size_t i = 0; i < 3; i++)
+        {
+            scale[i] = m_preProcessorParams.normalization[i] / m_preProcessorParams.stdDev[i];
+        }
+
+        tensor_ops::Normalize(m_normalizedDeviceLeftInput, m_resizedDeviceLeftInput, scale,
+                              m_preProcessorParams.pixelMean, stream);
+        tensor_ops::Normalize(m_normalizedDeviceRightInput, m_resizedDeviceRightInput, scale,
+                              m_preProcessorParams.pixelMean, stream);
+        tensor_ops::InterleavedToPlanar(outputLeft, m_normalizedDeviceLeftInput, stream);
+        tensor_ops::InterleavedToPlanar(outputRight, m_normalizedDeviceRightInput, stream);
+    }
+};
+
+void ESSPreProcessor::execute(Tensor<NCHW, C3, F32> &outputLeft, Tensor<NCHW, C3, F32> &outputRight,
+                              const Tensor<NHWC, C3, U8> &inputLeft, const Tensor<NHWC, C3, U8> &inputRight,
+                              cudaStream_t stream)
+{
+#ifdef NVBENCH_ENABLE
+    const std::string testName = "ESSPreProcessor_batch" + std::to_string(inputLeft.getDepth()) + "_" +
+                                 std::to_string(inputLeft.getWidth()) + "x" + std::to_string(inputLeft.getHeight()) +
+                                 "_GPU";
+    nv::bench::Timer timerFunc = nv::bench::GPU(testName.c_str(), nv::bench::Flag::DEFAULT, stream);
+#endif
+    m_pImpl->process(outputLeft, outputRight, inputLeft, inputRight, stream);
+}
+
+ESSPreProcessor::ESSPreProcessor(const ImagePreProcessingParams &preProcessorParams,
+                                 const ModelInputParams &modelInputParams, const ESSPreProcessorParams &essParams)
+    : m_pImpl(new ESSPreProcessor::ESSPreProcessorImpl(preProcessorParams, modelInputParams, essParams))
+{
+}
+
+ESSPreProcessor::~ESSPreProcessor() {}
+
+}} // namespace cvcore::ess
diff --git a/isaac_ros_ess/gxf/ess/CMakeLists.txt b/isaac_ros_ess/gxf/ess/CMakeLists.txt
new file mode 100644
index 0000000..da68934
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/CMakeLists.txt
@@ -0,0 +1,129 @@
+# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+# Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+project(gxf_cvcore_ess LANGUAGES C CXX)
+
+if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+  add_compile_options(-fPIC -w)
+endif()
+
+# Dependencies
+find_package(CUDAToolkit)
+include(YamlCpp)
+find_package(GXF ${ISAAC_ROS_GXF_VERSION} MODULE REQUIRED
+    COMPONENTS
+    core
+    cuda
+    multimedia
+    serialization
+)
+find_package(TENSORRT)
+
+# Create extension
+add_library(gxf_cvcore_ess SHARED
+  ESS.cpp
+  extensions/ess/ESS.hpp
+  ESSRegistry.cpp
+
+  extensions/tensor_ops/ImageAdapter.cpp
+  extensions/tensor_ops/ImageAdapter.hpp
+  extensions/tensor_ops/ImageUtils.cpp
+  extensions/tensor_ops/ImageUtils.hpp
+
+  extensions/tensor_ops/detail/ImageAdapterTensorImpl.cpp
+  extensions/tensor_ops/detail/ImageAdapterTensorImpl.hpp
+  extensions/tensor_ops/detail/ImageAdapterVideoBufferImpl.cpp
+  extensions/tensor_ops/detail/ImageAdapterVideoBufferImpl.hpp
+)
+
+set(CMAKE_INCLUDE_CURRENT_DIR TRUE)
+target_include_directories(gxf_cvcore_ess PRIVATE
+  ${CMAKE_CURRENT_SOURCE_DIR}/cvcore/include
+  ${CMAKE_CURRENT_SOURCE_DIR}/3dv/include)
+
+add_library(cvcore_ess STATIC
+  # Tensorops
+  cvcore/src/tensor_ops/ArithmeticOperations.cpp
+  cvcore/src/tensor_ops/BBoxUtils.cpp
+  cvcore/src/tensor_ops/ColorConversions.cpp
+  cvcore/src/tensor_ops/DBScan.cpp
+  cvcore/src/tensor_ops/Errors.cpp
+  cvcore/src/tensor_ops/Filters.cpp
+  cvcore/src/tensor_ops/Filters.h
+  cvcore/src/tensor_ops/FusedOperations.cpp
+  cvcore/src/tensor_ops/GeometryTransforms.cpp
+  cvcore/src/tensor_ops/IImageWarp.cpp
+  cvcore/src/tensor_ops/NppUtils.cpp
+  cvcore/src/tensor_ops/NppUtils.h
+  cvcore/src/tensor_ops/OneEuroFilter.cpp
+  cvcore/src/tensor_ops/TensorOperators.cpp
+
+  # Core
+  cvcore/src/core/cvcore/Array.cpp
+  cvcore/src/core/cvcore/Dummy.cu
+  cvcore/src/core/cvcore/MathTypes.cpp
+  cvcore/src/core/cvcore/Tensor.cpp
+  cvcore/src/core/utility/CVError.cpp
+  cvcore/src/core/utility/Instrumentation.cpp
+  cvcore/src/core/utility/Memory.cpp
+  cvcore/src/core/utility/ProfileUtils.cpp
+
+  # Inferencer (ESS only)
+  cvcore/src/inferencer/tensorrt/TensorRTInferencer.cpp
+  cvcore/src/inferencer/Inferencer.cpp
+  cvcore/src/inferencer/Errors.cpp
+  cvcore/src/inferencer/tensorrt/TensorRTUtils.h
+  cvcore/src/inferencer/tensorrt/TensorRTUtils.cpp
+  cvcore/src/inferencer/tensorrt/TensorRTInferencer.h
+
+  # TRTBackend
+  cvcore/src/trtbackend/TRTBackend.cpp
+)
+
+target_include_directories(cvcore_ess PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/cvcore/include)
+target_compile_options(cvcore_ess PUBLIC -fPIC)
+target_link_libraries(cvcore_ess PUBLIC
+  CUDA::cudart
+  CUDA::nppc
+  CUDA::nppial
+  CUDA::nppicc
+  CUDA::nppidei
+  CUDA::nppif
+  CUDA::nppig
+  CUDA::nppisu
+  TENSORRT::nvinfer
+)
+
+add_library(ess_3dv STATIC
+  3dv/src/ESS.cpp
+  3dv/src/ESSPostProcess.cpp
+  3dv/src/ESSPreProcess.cpp
+)
+target_include_directories(ess_3dv PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/3dv/include)
+target_compile_options(ess_3dv PUBLIC -fPIC)
+target_link_libraries(ess_3dv PUBLIC
+  cvcore_ess
+)
+target_link_libraries(gxf_cvcore_ess
+  PUBLIC
+    GXF::cuda
+    GXF::multimedia
+    yaml-cpp
+  PRIVATE
+    cvcore_ess
+    ess_3dv
+)
diff --git a/isaac_ros_ess/gxf/ess/ESS.cpp b/isaac_ros_ess/gxf/ess/ESS.cpp
new file mode 100644
index 0000000..593e34c
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/ESS.cpp
@@ -0,0 +1,325 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "extensions/ess/ESS.hpp"
+
+#include <algorithm>
+#include "gxf/cuda/cuda_stream_id.hpp"
+#include "gxf/cuda/cuda_stream_pool.hpp"
+#include "gxf/multimedia/video.hpp"
+#include "gxf/std/tensor.hpp"
+#include "gxf/std/timestamp.hpp"
+
+namespace nvidia {
+namespace cvcore {
+
+namespace detail {
+
+// Function to bind a cuda stream with cid into downstream message
+gxf_result_t BindCudaStream(gxf::Entity& message, gxf_uid_t cid) {
+  if (cid == kNullUid) {
+    GXF_LOG_ERROR("stream_cid is null");
+    return GXF_FAILURE;
+  }
+  auto output_stream_id = message.add<gxf::CudaStreamId>("stream");
+  if (!output_stream_id) {
+    GXF_LOG_ERROR("failed to add cudastreamid.");
+    return GXF_FAILURE;
+  }
+  output_stream_id.value()->stream_cid = cid;
+  return GXF_SUCCESS;
+}
+
+// Function to record a new cuda event
+gxf_result_t RecordCudaEvent(gxf::Entity& message, gxf::Handle<gxf::CudaStream>& stream) {
+  // Create a new event
+  cudaEvent_t cuda_event;
+  cudaEventCreateWithFlags(&cuda_event, 0);
+  gxf::CudaEvent event;
+  auto ret = event.initWithEvent(cuda_event, stream->dev_id(), [](auto) {});
+  if (!ret) {
+    GXF_LOG_ERROR("failed to init cuda event");
+    return GXF_FAILURE;
+  }
+  // Record the event
+  // Can define []() { GXF_LOG_DEBUG("tensorops event synced"); } as callback func for debug purpose
+  ret = stream->record(event.event().value(),
+                       [event = cuda_event, entity = message.clone().value()](auto) { cudaEventDestroy(event); });
+  if (!ret) {
+    GXF_LOG_ERROR("record event failed");
+    return ret.error();
+  }
+  return GXF_SUCCESS;
+}
+
+} // namespace detail
+
+gxf_result_t ESS::registerInterface(gxf::Registrar* registrar) {
+  gxf::Expected<void> result;
+
+  result &= registrar->parameter(left_image_name_, "left_image_name", "The name of the left image to be received", "",
+                                 gxf::Registrar::NoDefaultParameter(), GXF_PARAMETER_FLAGS_OPTIONAL);
+  result &= registrar->parameter(right_image_name_, "right_image_name", "The name of the right image to be received",
+                                 "", gxf::Registrar::NoDefaultParameter(), GXF_PARAMETER_FLAGS_OPTIONAL);
+  result &= registrar->parameter(output_name_, "output_name", "The name of the tensor to be passed to next node", "");
+  result &= registrar->parameter(stream_pool_, "stream_pool", "The Cuda Stream pool for allocate cuda stream", "",
+                                 gxf::Registrar::NoDefaultParameter(), GXF_PARAMETER_FLAGS_OPTIONAL);
+  result &= registrar->parameter(pool_, "pool", "Memory pool for allocating output data", "");
+  result &= registrar->parameter(left_image_receiver_, "left_image_receiver", "Receiver to get the left image", "");
+  result &= registrar->parameter(right_image_receiver_, "right_image_receiver", "Receiver to get the right image", "");
+  result &= registrar->parameter(output_transmitter_, "output_transmitter", "Transmitter to send the data", "");
+  result &= registrar->parameter(output_adapter_, "output_adapter", "Adapter to send output data", "");
+
+  result &= registrar->parameter(image_type_, "image_type", "Type of input image: BGR_U8 or RGB_U8", "");
+  result &= registrar->parameter(pixel_mean_, "pixel_mean", "The mean for each channel", "");
+  result &= registrar->parameter(normalization_, "normalization", "The normalization for each channel", "");
+  result &=
+    registrar->parameter(standard_deviation_, "standard_deviation", "The standard deviation for each channel", "");
+
+  result &= registrar->parameter(max_batch_size_, "max_batch_size", "The max batch size to run inference on", "");
+  result &= registrar->parameter(input_layer_width_, "input_layer_width", "The model input layer width", "");
+  result &= registrar->parameter(input_layer_height_, "input_layer_height", "The model input layer height", "");
+  result &= registrar->parameter(model_input_type_, "model_input_type", "The model input image: BGR_U8 or RGB_U8", "");
+
+  result &= registrar->parameter(engine_file_path_, "engine_file_path", "The path to the serialized TRT engine", "");
+  result &= registrar->parameter(input_layers_name_, "input_layers_name", "The names of the input layers", "");
+  result &= registrar->parameter(output_layers_name_, "output_layers_name", "The names of the output layers", "");
+
+  result &= registrar->parameter(preprocess_type_, "preprocess_type",
+                                 "The type of ESS preprocessing: RESIZE / CENTER_CROP", "");
+  result &= registrar->parameter(output_width_, "output_width", "The width of output result", "",
+                                 gxf::Registrar::NoDefaultParameter(), GXF_PARAMETER_FLAGS_OPTIONAL);
+  result &= registrar->parameter(output_height_, "output_height", "The height of output result", "",
+                                 gxf::Registrar::NoDefaultParameter(), GXF_PARAMETER_FLAGS_OPTIONAL);
+  result &= registrar->parameter(timestamp_policy_, "timestamp_policy",
+                                 "Input channel to get timestamp 0(left)/1(right)", "", 0);
+
+  return gxf::ToResultCode(result);
+}
+
+gxf_result_t ESS::start() {
+  // Allocate cuda stream using stream pool if necessary
+  if (stream_pool_.try_get()) {
+    auto stream = stream_pool_.try_get().value()->allocateStream();
+    if (!stream) {
+      GXF_LOG_ERROR("allocating stream failed.");
+      return GXF_FAILURE;
+    }
+    cuda_stream_ = std::move(stream.value());
+    if (!cuda_stream_->stream()) {
+      GXF_LOG_ERROR("allocated stream is not initialized.");
+      return GXF_FAILURE;
+    }
+  }
+  // Setting image pre-processing params for ESS
+  const auto& pixel_mean_vec         = pixel_mean_.get();
+  const auto& normalization_vec      = normalization_.get();
+  const auto& standard_deviation_vec = standard_deviation_.get();
+  if (pixel_mean_vec.size() != 3 || normalization_vec.size() != 3 || standard_deviation_vec.size() != 3) {
+    GXF_LOG_ERROR("Invalid preprocessing params.");
+    return GXF_FAILURE;
+  }
+
+  if (image_type_.get() == "BGR_U8") {
+    preProcessorParams.imgType = ::cvcore::BGR_U8;
+  } else if (image_type_.get() == "RGB_U8") {
+    preProcessorParams.imgType = ::cvcore::RGB_U8;
+  } else {
+    GXF_LOG_INFO("Wrong input image type. BGR_U8 and RGB_U8 are only supported.");
+    return GXF_FAILURE;
+  }
+  std::copy(pixel_mean_vec.begin(), pixel_mean_vec.end(), preProcessorParams.pixelMean);
+  std::copy(normalization_vec.begin(), normalization_vec.end(), preProcessorParams.normalization);
+  std::copy(standard_deviation_vec.begin(), standard_deviation_vec.end(), preProcessorParams.stdDev);
+
+  // Setting model input params for ESS
+  modelInputParams.maxBatchSize     = max_batch_size_.get();
+  modelInputParams.inputLayerWidth  = input_layer_width_.get();
+  modelInputParams.inputLayerHeight = input_layer_height_.get();
+  if (model_input_type_.get() == "BGR_U8") {
+    modelInputParams.modelInputType = ::cvcore::BGR_U8;
+  } else if (model_input_type_.get() == "RGB_U8") {
+    modelInputParams.modelInputType = ::cvcore::RGB_U8;
+  } else {
+    GXF_LOG_INFO("Wrong model input type. BGR_U8 and RGB_U8 are only supported.");
+    return GXF_FAILURE;
+  }
+
+  // Setting inference params for ESS
+  inferenceParams = {engine_file_path_.get(), input_layers_name_.get(), output_layers_name_.get()};
+
+  // Setting extra params for ESS
+  if (preprocess_type_.get() == "RESIZE") {
+    extraParams = {::cvcore::ess::PreProcessType::RESIZE};
+  } else if (preprocess_type_.get() == "CENTER_CROP") {
+    extraParams = {::cvcore::ess::PreProcessType::CENTER_CROP};
+  } else {
+    GXF_LOG_ERROR("Invalid preprocessing type.");
+    return GXF_FAILURE;
+  }
+
+  // Setting ESS object with the provided params
+  objESS.reset(new ::cvcore::ess::ESS(preProcessorParams, modelInputParams, inferenceParams, extraParams));
+
+  return GXF_SUCCESS;
+}
+
+gxf_result_t ESS::tick() {
+  // Get a CUDA stream for execution
+  cudaStream_t cuda_stream = 0;
+  if (!cuda_stream_.is_null()) {
+    cuda_stream = cuda_stream_->stream().value();
+  }
+  // Receiving the data
+  auto inputLeftMessage = left_image_receiver_->receive();
+  if (!inputLeftMessage) {
+    return GXF_FAILURE;
+  }
+  if (cuda_stream != 0) {
+    detail::RecordCudaEvent(inputLeftMessage.value(), cuda_stream_);
+    auto inputLeftStreamId = inputLeftMessage.value().get<gxf::CudaStreamId>("stream");
+    if (inputLeftStreamId) {
+      auto inputLeftStream = gxf::Handle<gxf::CudaStream>::Create(inputLeftStreamId.value().context(),
+                                                                  inputLeftStreamId.value()->stream_cid);
+      // NOTE: This is an expensive call. It will halt the current CPU thread until all events
+      //   previously associated with the stream are cleared
+      if (!inputLeftStream.value()->syncStream()) {
+        GXF_LOG_ERROR("sync left stream failed.");
+        return GXF_FAILURE;
+      }
+    }
+  }
+
+  auto inputRightMessage = right_image_receiver_->receive();
+  if (!inputRightMessage) {
+    return GXF_FAILURE;
+  }
+  if (cuda_stream != 0) {
+    detail::RecordCudaEvent(inputRightMessage.value(), cuda_stream_);
+    auto inputRightStreamId = inputRightMessage.value().get<gxf::CudaStreamId>("stream");
+    if (inputRightStreamId) {
+      auto inputRightStream = gxf::Handle<gxf::CudaStream>::Create(inputRightStreamId.value().context(),
+                                                                   inputRightStreamId.value()->stream_cid);
+      // NOTE: This is an expensive call. It will halt the current CPU thread until all events
+      //   previously associated with the stream are cleared
+      if (!inputRightStream.value()->syncStream()) {
+        GXF_LOG_ERROR("sync right stream failed.");
+        return GXF_FAILURE;
+      }
+    }
+  }
+
+  auto maybeLeftName   = left_image_name_.try_get();
+  auto leftInputBuffer = inputLeftMessage.value().get<gxf::VideoBuffer>(maybeLeftName ? maybeLeftName.value().c_str() : nullptr);
+  if (!leftInputBuffer) {
+    return GXF_FAILURE;
+  }
+  auto maybeRightName   = right_image_name_.try_get();
+  auto rightInputBuffer = inputRightMessage.value().get<gxf::VideoBuffer>(maybeRightName ? maybeRightName.value().c_str() : nullptr);
+  if (!rightInputBuffer) {
+    return GXF_FAILURE;
+  }
+  if (leftInputBuffer.value()->storage_type() != gxf::MemoryStorageType::kDevice ||
+      rightInputBuffer.value()->storage_type() != gxf::MemoryStorageType::kDevice) {
+    GXF_LOG_ERROR("input images must be on GPU.");
+    return GXF_FAILURE;
+  }
+
+  const auto& leftBufferInfo  = leftInputBuffer.value()->video_frame_info();
+  const auto& rightBufferInfo = rightInputBuffer.value()->video_frame_info();
+  if (leftBufferInfo.width != rightBufferInfo.width || leftBufferInfo.height != rightBufferInfo.height ||
+      leftBufferInfo.color_format != rightBufferInfo.color_format) {
+    GXF_LOG_ERROR("left/right images mismatch.");
+    return GXF_FAILURE;
+  }
+  const size_t outputWidth  = output_width_.try_get() ? output_width_.try_get().value() : leftBufferInfo.width;
+  const size_t outputHeight = output_height_.try_get() ? output_height_.try_get().value() : leftBufferInfo.height;
+
+  // Creating GXF Tensor or VideoBuffer to hold the data to be transmitted
+  gxf::Expected<gxf::Entity> outputMessage = gxf::Entity::New(context());
+  if (!outputMessage) {
+    return outputMessage.error();
+  }
+  auto error = output_adapter_.get()->AddImageToMessage<::cvcore::ImageType::Y_F32>(
+    outputMessage.value(), outputWidth, outputHeight, pool_.get(), false, output_name_.get().c_str());
+  if (error != GXF_SUCCESS) {
+    return GXF_FAILURE;
+  }
+  auto outputImage = output_adapter_.get()->WrapImageFromMessage<::cvcore::ImageType::Y_F32>(
+    outputMessage.value(), output_name_.get().c_str());
+  if (!outputImage) {
+    return GXF_FAILURE;
+  }
+
+  // Creating CVCore Tensors to hold the input and output data
+  ::cvcore::Tensor<::cvcore::NHWC, ::cvcore::C1, ::cvcore::F32> outputImageDevice(outputWidth, outputHeight, 1,
+                                                                                  outputImage.value().getData(), false);
+
+  // Running the inference
+  auto inputColorFormat = leftBufferInfo.color_format;
+  if (inputColorFormat == gxf::VideoFormat::GXF_VIDEO_FORMAT_RGB ||
+      inputColorFormat == gxf::VideoFormat::GXF_VIDEO_FORMAT_BGR) {
+    ::cvcore::Tensor<::cvcore::NHWC, ::cvcore::C3, ::cvcore::U8> leftImageDevice(
+      leftBufferInfo.width, leftBufferInfo.height, 1, reinterpret_cast<uint8_t*>(leftInputBuffer.value()->pointer()),
+      false);
+    ::cvcore::Tensor<::cvcore::NHWC, ::cvcore::C3, ::cvcore::U8> rightImageDevice(
+      leftBufferInfo.width, leftBufferInfo.height, 1, reinterpret_cast<uint8_t*>(rightInputBuffer.value()->pointer()),
+      false);
+    objESS->execute(outputImageDevice, leftImageDevice, rightImageDevice, cuda_stream);
+  } else if (inputColorFormat == gxf::VideoFormat::GXF_VIDEO_FORMAT_R32_G32_B32 ||
+             inputColorFormat == gxf::VideoFormat::GXF_VIDEO_FORMAT_B32_G32_R32) {
+    ::cvcore::Tensor<::cvcore::NCHW, ::cvcore::C3, ::cvcore::F32> leftImageDevice(
+      leftBufferInfo.width, leftBufferInfo.height, 1, reinterpret_cast<float*>(leftInputBuffer.value()->pointer()),
+      false);
+    ::cvcore::Tensor<::cvcore::NCHW, ::cvcore::C3, ::cvcore::F32> rightImageDevice(
+      leftBufferInfo.width, leftBufferInfo.height, 1, reinterpret_cast<float*>(rightInputBuffer.value()->pointer()),
+      false);
+    objESS->execute(outputImageDevice, leftImageDevice, rightImageDevice, cuda_stream);
+  } else {
+    GXF_LOG_ERROR("invalid input image type.");
+    return GXF_FAILURE;
+  }
+
+  // Allocate a cuda event that can be used to record on each tick
+  if (!cuda_stream_.is_null()) {
+    detail::BindCudaStream(outputMessage.value(), cuda_stream_.cid());
+    detail::RecordCudaEvent(outputMessage.value(), cuda_stream_);
+  }
+
+  // Pass down timestamp if necessary
+  auto maybeDaqTimestamp = timestamp_policy_.get() == 0 ? inputLeftMessage.value().get<gxf::Timestamp>()
+                                                        : inputRightMessage.value().get<gxf::Timestamp>();
+  if (maybeDaqTimestamp) {
+    auto outputTimestamp = outputMessage.value().add<gxf::Timestamp>(maybeDaqTimestamp.value().name());
+    if (!outputTimestamp) {
+      return outputTimestamp.error();
+    }
+    *outputTimestamp.value() = *maybeDaqTimestamp.value();
+  }
+
+  // Send the data
+  output_transmitter_->publish(outputMessage.value());
+  return GXF_SUCCESS;
+}
+
+gxf_result_t ESS::stop() {
+  objESS.reset(nullptr);
+  return GXF_SUCCESS;
+}
+
+} // namespace cvcore
+} // namespace nvidia
diff --git a/isaac_ros_ess/gxf/ess/ESSRegistry.cpp b/isaac_ros_ess/gxf/ess/ESSRegistry.cpp
new file mode 100644
index 0000000..d99b5b4
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/ESSRegistry.cpp
@@ -0,0 +1,27 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "extensions/ess/ESS.hpp"
+#include "gxf/std/extension_factory_helper.hpp"
+
+GXF_EXT_FACTORY_BEGIN()
+GXF_EXT_FACTORY_SET_INFO(0xfa198e2ff99642fc, 0x8fda0d23c0251610, "NvCvESSExtension", "CVCORE ESS module", "Nvidia_Gxf",
+                         "1.0.1", "LICENSE");
+
+GXF_EXT_FACTORY_ADD(0x1aa1eea914344afe, 0x97fddaaedb594120, nvidia::cvcore::ESS, nvidia::gxf::Codelet,
+                    "ESS GXF Extension");
+GXF_EXT_FACTORY_END()
diff --git a/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/Array.h b/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/Array.h
new file mode 100644
index 0000000..c9f23d8
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/Array.h
@@ -0,0 +1,386 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_ARRAY_H
+#define CVCORE_ARRAY_H
+
+#include <cassert>
+#include <new>
+#include <utility>
+
+namespace cvcore {
+
+/**
+ * Base implementation of Array.
+ */
+class ArrayBase
+{
+public:
+    /**
+     * Constructor of a non-owning arrays.
+     * @param capacity capacity of the array.
+     * @param elemSize byte size of each element.
+     * @param dataPtr data pointer to the raw source array.
+     * @param isCPU whether to allocate the array on CPU or GPU.
+     */
+    ArrayBase(std::size_t capacity, std::size_t elemSize, void *dataPtr, bool isCPU);
+
+    /**
+     * Constructor of a memory-owning arrays
+     * @param capacity capacity of the array.
+     * @param elemSize byte size of each element.
+     * @param isCPU whether to allocate the array on CPU or GPU.
+     */
+    ArrayBase(std::size_t capacity, std::size_t elemSize, bool isCPU);
+
+    /**
+     * Destructor of ArrayBase.
+     */
+    ~ArrayBase();
+
+    /**
+     * ArrayBase is non-copyable.
+     */
+    ArrayBase(const ArrayBase &) = delete;
+
+    /**
+     * ArrayBase is non-copyable.
+     */
+    ArrayBase &operator=(const ArrayBase &) = delete;
+
+    /**
+     * Move constructor of ArrayBase. 
+     */
+    ArrayBase(ArrayBase &&);
+
+    /**
+     * Move assignment operator of ArrayBase.
+     */
+    ArrayBase &operator=(ArrayBase &&);
+
+    /**
+     * Get the pointer to specified index.
+     * @param idx element index.
+     * @return pointer to the specified element.
+     */
+    void *getElement(int idx) const;
+
+    /**
+     * Get the size of the array.
+     * @return size of the array.
+     */
+    std::size_t getSize() const;
+
+    /**
+     * Get the capacity of the array.
+     * @return size of the array.
+     */
+    std::size_t getCapacity() const;
+
+    /**
+     * Get the size of each element.
+     * @return size of each element.
+     */
+    std::size_t getElementSize() const;
+
+    /**
+     * Set the size of the array.
+     * @param size size of the array.
+     */
+    void setSize(std::size_t size);
+
+    /**
+     * Get the flag whether the array is CPU or GPU array.
+     * @return whether the array is CPU array.
+     */
+    bool isCPU() const;
+
+    /**
+     * Get the flag whether the array is owning memory space.
+     * @return whether the array owns memory.
+     */
+    bool isOwning() const;
+
+    /**
+     * Get the raw pointer to the array data.
+     * @return void pointer to the first element of the array.
+     */
+    void *getData() const;
+
+private:
+    ArrayBase();
+
+    void *m_data;
+    std::size_t m_size;
+    std::size_t m_capacity;
+    std::size_t m_elemSize;
+    bool m_isOwning;
+    bool m_isCPU;
+};
+
+/**
+ * Implementation of Array class.
+ * @tparam T type of element in array.
+ */
+template<typename T>
+class Array : public ArrayBase
+{
+public:
+    /**
+     * Default constructor of an array.
+     */
+    Array()
+        : ArrayBase{0, sizeof(T), nullptr, true}
+    {
+    }
+
+    /**
+     * Constructor of a non-owning array.
+     * @param size size of the array.
+     * @param capacity capacity of the array.
+     * @param dataPtr data pointer to the raw source array.
+     * @param isCPU whether to allocate array on CPU or GPU.
+     */
+    Array(std::size_t size, std::size_t capacity, void *dataPtr, bool isCPU = true)
+        : ArrayBase{capacity, sizeof(T), dataPtr, isCPU}
+    {
+        ArrayBase::setSize(size);
+    }
+
+    /**
+     * Constructor of a memory-owning array.
+     * @param capacity capacity of the array.
+     * @param isCPU whether to allocate array on CPU or GPU.
+     */
+    Array(std::size_t capacity, bool isCPU = true)
+        : ArrayBase{capacity, sizeof(T), isCPU}
+    {
+    }
+
+    /**
+     * Destructor of the Array.
+     */
+    ~Array()
+    {
+        // call resize here such that CPU-based destructor
+        // will call destructors of the objects stored
+        // in the array before deallocating the storage
+        setSize(0);
+    }
+
+    /**
+     * Array is non-copyable.
+     */
+    Array(const Array &) = delete;
+
+    /**
+     * Array is non-copyable.
+     */
+    Array &operator=(const Array &) = delete;
+
+    /**
+     * Move constructor of Array.
+     */
+    Array(Array &&t)
+        : Array()
+    {
+        *this = std::move(t);
+    }
+
+    /**
+     * Move assignment operator of Array.
+     */
+    Array &operator=(Array &&t)
+    {
+        static_cast<ArrayBase &>(*this) = std::move(t);
+        return *this;
+    }
+
+    /**
+     * Set size of the Array.
+     * @param size size of the Array.
+     */
+    void setSize(std::size_t size)
+    {
+        const std::size_t oldSize = getSize();
+        ArrayBase::setSize(size);
+        if (isCPU())
+        {
+            // shrinking case
+            for (std::size_t i = size; i < oldSize; ++i)
+            {
+                reinterpret_cast<T *>(getElement(i))->~T();
+            }
+            // expanding case
+            for (std::size_t i = oldSize; i < size; ++i)
+            {
+                new (getElement(i)) T;
+            }
+        }
+    }
+
+    /**
+     * Const array index operator.
+     * @param idx index of element.
+     * @return const reference to the specified element.
+     */
+    const T &operator[](int idx) const
+    {
+        assert(idx >= 0 && idx < getSize());
+        return *reinterpret_cast<T *>(getElement(idx));
+    }
+
+    /**
+     * Array index operator.
+     * @param idx index of element.
+     * @return reference to the specified element.
+     */
+    T &operator[](int idx)
+    {
+        assert(idx >= 0 && idx < getSize());
+        return *reinterpret_cast<T *>(getElement(idx));
+    }
+};
+
+/**
+ * Implementation of ArrayN class.
+ * @tparam T type of element in array.
+ * @tparam N capacity of array.
+ */
+template<typename T, std::size_t N>
+class ArrayN : public ArrayBase
+{
+public:
+    /**
+     * Default constructor of ArrayN (create an owning Tensor with capacity N).
+     */
+    ArrayN()
+        : ArrayBase{N, sizeof(T), true}
+    {
+        setSize(N);
+    }
+
+    /**
+     * Constructor of a non-owning ArrayN.
+     * @param size size of the array.
+     * @param dataPtr data pointer to the raw source array.
+     * @param isCPU whether to allocate array on CPU or GPU.
+     */
+    ArrayN(std::size_t size, void *dataPtr, bool isCPU = true)
+        : ArrayBase{N, sizeof(T), dataPtr, isCPU}
+    {
+        ArrayBase::setSize(size);
+    }
+
+    /**
+     * Constructor of a memory-owning ArrayN.
+     * @param isCPU whether to allocate array on CPU or GPU.
+     */
+    ArrayN(bool isCPU)
+        : ArrayBase{N, sizeof(T), isCPU}
+    {
+        setSize(N);
+    }
+
+    /**
+     * Destructor of the ArrayN.
+     */
+    ~ArrayN()
+    {
+        // call resize here such that CPU-based destructor
+        // will call destructors of the objects stored
+        // in the array before deallocating the storage
+        setSize(0);
+    }
+
+    /**
+     * ArrayN is non-copyable.
+     */
+    ArrayN(const ArrayN &) = delete;
+
+    /**
+     * ArrayN is non-copyable.
+     */
+    ArrayN &operator=(const ArrayN &) = delete;
+
+    /**
+     * Move constructor of ArrayN.
+     */
+    ArrayN(ArrayN &&t)
+        : ArrayN()
+    {
+        *this = std::move(t);
+    }
+
+    /**
+     * Move assignment operator of ArrayN.
+     */
+    ArrayN &operator=(ArrayN &&t)
+    {
+        static_cast<ArrayBase &>(*this) = std::move(t);
+        return *this;
+    }
+
+    /**
+     * Set size of the ArrayN.
+     * @param size size of the ArrayN.
+     */
+    void setSize(std::size_t size)
+    {
+        const std::size_t oldSize = getSize();
+        ArrayBase::setSize(size);
+        if (isCPU())
+        {
+            // shrinking case
+            for (std::size_t i = size; i < oldSize; ++i)
+            {
+                reinterpret_cast<T *>(getElement(i))->~T();
+            }
+            // expanding case
+            for (std::size_t i = oldSize; i < size; ++i)
+            {
+                new (getElement(i)) T;
+            }
+        }
+    }
+
+    /**
+     * Const ArrayN index operator.
+     * @param idx index of element.
+     * @return const reference to the specified element.
+     */
+    const T &operator[](int idx) const
+    {
+        assert(idx >= 0 && idx < getSize());
+        return *reinterpret_cast<T *>(getElement(idx));
+    }
+
+    /**
+     * ArrayN index operator.
+     * @param idx index of element.
+     * @return reference to the specified element.
+     */
+    T &operator[](int idx)
+    {
+        assert(idx >= 0 && idx < getSize());
+        return *reinterpret_cast<T *>(getElement(idx));
+    }
+};
+
+} // namespace cvcore
+
+#endif // CVCORE_ARRAY_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/BBox.h b/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/BBox.h
new file mode 100644
index 0000000..93100d3
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/BBox.h
@@ -0,0 +1,142 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_BBOX_H
+#define CVCORE_BBOX_H
+
+#include <algorithm>
+#include <stdexcept>
+#include <utility>
+
+namespace cvcore {
+
+/**
+ * A struct.
+ * Structure used to store bounding box.
+ */
+struct BBox
+{
+    int xmin{0}; /**< minimum x coordinate. */
+    int ymin{0}; /**< minimum y coordinate. */
+    int xmax{0}; /**< maximum x coordinate. */
+    int ymax{0}; /**< maximum y coordinate. */
+
+    /**
+    * Clamp a bounding box based on a restricting clamp box
+    * @param Clamping bounding box (xmin, ymin, xmax, ymax)
+    * @return Clamped bounding box
+    */
+    BBox clamp(const BBox &clampBox) const
+    {
+        BBox outbox;
+        outbox.xmin = std::max(clampBox.xmin, xmin);
+        outbox.xmax = std::min(clampBox.xmax, xmax);
+        outbox.ymin = std::max(clampBox.ymin, ymin);
+        outbox.ymax = std::min(clampBox.ymax, ymax);
+        return outbox;
+    }
+
+    /**
+    * @return Width of the bounding box
+    */
+    size_t getWidth() const
+    {
+        return xmax - xmin;
+    }
+
+    /**
+    * @return Height of the bounding box
+    */
+    size_t getHeight() const
+    {
+        return ymax - ymin;
+    }
+
+    /**
+     * Checks if the bounding box is valid.
+     */
+    bool isValid() const
+    {
+        return (xmin < xmax) && (ymin < ymax) && (getWidth() > 0) && (getHeight() > 0);
+    }
+
+    /**
+    * Returns the center of the bounding box
+    * @return X,Y coordinate tuple
+    */
+    std::pair<int, int> getCenter() const
+    {
+        int centerX = xmin + getWidth() / 2;
+        int centerY = ymin + getHeight() / 2;
+        return std::pair<int, int>(centerX, centerY);
+    }
+
+    /**
+    * Scales bounding box based along the width and height retaining the same center.
+    * @param Scale in X direction along the width
+    * @param Scale in Y direction along the height
+    * @return Scaled bounding box
+    */
+    BBox scale(float scaleW, float scaleH) const
+    {
+        auto center = getCenter();
+        float newW  = getWidth() * scaleW;
+        float newH  = getHeight() * scaleH;
+        BBox outbox;
+        outbox.xmin = center.first - newW / 2;
+        outbox.xmax = center.first + newW / 2;
+        outbox.ymin = center.second - newH / 2;
+        outbox.ymax = center.second + newH / 2;
+
+        return outbox;
+    }
+
+    /**
+    * Resizes bounding box to a square bounding box based on
+    * the longest edge and clamps the bounding box based on the limits provided.
+    * @param Clamping bounding box (xmin, ymin, xmax, ymax)
+    * @return Sqaure bounding box
+    */
+    BBox squarify(const BBox &clampBox) const
+    {
+        size_t w = getWidth();
+        size_t h = getHeight();
+
+        BBox clampedBox1 = clamp(clampBox);
+        if (!clampedBox1.isValid())
+        {
+            throw std::range_error("Invalid bounding box generated\n");
+        }
+        float scaleW     = static_cast<float>(std::max(w, h)) / w;
+        float scaleH     = static_cast<float>(std::max(w, h)) / h;
+        BBox scaledBBox  = clampedBox1.scale(scaleW, scaleH);
+        BBox clampedBox2 = scaledBBox.clamp(clampBox);
+        if (!clampedBox2.isValid())
+        {
+            throw std::range_error("Invalid bounding box generated\n");
+        }
+        size_t newW      = clampedBox2.getWidth();
+        size_t newH      = clampedBox2.getHeight();
+        size_t minW      = std::min(newH, newW);
+        clampedBox2.ymax = clampedBox2.ymin + minW;
+        clampedBox2.xmax = clampedBox2.xmin + minW;
+        return clampedBox2;
+    }
+};
+
+} // namespace cvcore
+#endif // CVCORE_BBOX_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/CVError.h b/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/CVError.h
new file mode 100644
index 0000000..82c16c1
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/CVError.h
@@ -0,0 +1,116 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_CVERROR_H
+#define CVCORE_CVERROR_H
+
+#include <cstdio>
+#include <cstdlib>
+#include <stdexcept>
+#include <string>
+#include <system_error>
+
+#include <cuda_runtime.h>
+
+namespace cvcore {
+
+// CVCORE ERROR CODES
+// -----------------------------------------------------------------------------
+// Defining the CVCORE Error Codes on std::error_condition
+// std::error_condition creates a set of sub-system independent codes which may
+// be used to describe ANY downstream error in a broad sense. An std::error_code
+// is defined within the sub-system context (i.e. tensor_ops, trtbackend, ...)
+// which is mapped to the cvcore::ErrorCode.
+// As an example, cvcore::ErrorCode -1 may not ABSOLUTELY mean the same as
+// tensor_ops::FaultCode -1, but does mean the same as tensor_ops:FaultCode 4.
+// Thus, tensor_ops::FaultCode 4 needs to be mapped to cvcore::ErrorCode -1.
+enum class ErrorCode : std::int32_t
+{
+    SUCCESS = 0,
+    NOT_READY,
+    NOT_IMPLEMENTED,
+    INVALID_ARGUMENT,
+    INVALID_IMAGE_FORMAT,
+    INVALID_STORAGE_TYPE,
+    INVALID_ENGINE_TYPE,
+    INVALID_OPERATION,
+    DETECTED_NAN_IN_RESULT,
+    OUT_OF_MEMORY,
+    DEVICE_ERROR,
+    SYSTEM_ERROR,
+};
+
+} // namespace cvcore
+
+// WARNING: Extending base C++ namespace to cover cvcore error codes
+namespace std {
+
+template<>
+struct is_error_condition_enum<cvcore::ErrorCode> : true_type
+{
+};
+
+template<>
+struct is_error_code_enum<cvcore::ErrorCode> : true_type
+{
+};
+
+} // namespace std
+
+namespace cvcore {
+
+std::error_condition make_error_condition(ErrorCode) noexcept;
+
+std::error_code make_error_code(ErrorCode) noexcept;
+
+// -----------------------------------------------------------------------------
+
+inline void CheckCudaError(cudaError_t code, const char *file, const int line)
+{
+    if (code != cudaSuccess)
+    {
+        const char *errorMessage  = cudaGetErrorString(code);
+        const std::string message = "CUDA error returned at " + std::string(file) + ":" + std::to_string(line) +
+                                    ", Error code: " + std::to_string(code) + " (" + std::string(errorMessage) + ")";
+        throw std::runtime_error(message);
+    }
+}
+
+inline void CheckErrorCode(std::error_code err, const char *file, const int line)
+{
+    const std::string message = "Error returned at " + std::string(file) + ":" + std::to_string(line) +
+                                ", Error code: " + std::string(err.message());
+
+    if (err != cvcore::make_error_code(cvcore::ErrorCode::SUCCESS))
+    {
+        throw std::runtime_error(message);
+    }
+}
+
+} // namespace cvcore
+
+#define CHECK_ERROR(val)                                   \
+    {                                                      \
+        cvcore::CheckCudaError((val), __FILE__, __LINE__); \
+    }
+
+#define CHECK_ERROR_CODE(val)                               \
+    {                                                    \
+        cvcore::CheckErrorCode((val), __FILE__, __LINE__); \
+    }
+
+#endif // CVCORE_CVERROR_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/CameraModel.h b/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/CameraModel.h
new file mode 100644
index 0000000..157acf4
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/CameraModel.h
@@ -0,0 +1,292 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_CAMERAMODEL_H
+#define CVCORE_CAMERAMODEL_H
+
+#include <array>
+
+#include "cv/core/Array.h"
+#include "cv/core/MathTypes.h"
+
+namespace cvcore {
+
+/**
+ * An enum.
+ * Enum type for Camera Distortion type.
+ */
+enum class CameraDistortionType : uint8_t
+{
+    UNKNOWN,                    /**< Unknown arbitrary distortion model. */
+    NONE,                       /**< No distortion applied. */
+    Polynomial,                 /**< Polynomial distortion model. */
+    FisheyeEquidistant,         /**< Equidistant Fisheye distortion model. */
+    FisheyeEquisolid,           /**< Equisolid Fisheye distortion model. */
+    FisheyeOrthoGraphic,        /**< Orthographic Fisheye distortion model. */
+    FisheyeStereographic        /**< Stereographic Fisheye distortion model. */
+};
+
+/**
+ * Struct type used to store Camera Distortion model type and coefficients.
+ */
+struct CameraDistortionModel
+{
+    CameraDistortionType type;  /**< Camera distortion model type. */
+    union                       /**< Camera distortion model coefficients. */
+    {
+        float coefficients[8];
+        struct
+        {
+            float k1, k2, k3, k4, k5, k6;
+            float p1, p2;
+        };
+    };
+
+    CameraDistortionModel()
+        : type(CameraDistortionType::UNKNOWN),
+          k1(0.0), k2(0.0), k3(0.0), k4(0.0), k5(0.0), k6(0.0),
+          p1(0.0), p2(0.0) {}
+
+    /**
+    * Camera Distortion Model creation using array of coefficients.
+    * @param distortionType Camera distortion model type
+    * @param distortionCoefficients An array of camera distortion model coefficients
+    * @return Camera Distortion Model
+    */
+    CameraDistortionModel(CameraDistortionType distortionType, std::array<float, 8> & distortionCoefficients)
+        : type(distortionType)
+    {
+        std::copy(distortionCoefficients.begin(), distortionCoefficients.end(), std::begin(coefficients));
+    }
+
+    /**
+    * Camera Distortion Model creation using individual coefficients.
+    * @param distortionType Camera distortion model type
+    * @param k1 Camera distortion model coefficient - k1
+    * @param k2 Camera distortion model coefficient - k2
+    * @param k3 Camera distortion model coefficient - k3
+    * @param k4 Camera distortion model coefficient - k4
+    * @param k5 Camera distortion model coefficient - k5
+    * @param k6 Camera distortion model coefficient - k6
+    * @param p1 Camera distortion model coefficient - p1
+    * @param p2 Camera distortion model coefficient - p2
+    * @return Camera Distortion Model
+    */
+    CameraDistortionModel(CameraDistortionType distortionType, float k1, float k2, float k3, \
+                         float k4, float k5, float k6, float p1, float p2)
+        : type(distortionType)
+        , k1(k1)
+        , k2(k2)
+        , k3(k3)
+        , k4(k4)
+        , k5(k5)
+        , k6(k6)
+        , p1(p1)
+        , p2(p2)
+    {
+
+    }
+
+    /**
+    * Get camera distortion model type.
+    * @return Camera distortion model type
+    */
+    CameraDistortionType getDistortionType() const
+    {
+        return type;
+    }
+
+    /**
+    * Get camera distortion model coefficients.
+    * @return Camera distortion model coefficients array
+    */
+    const float * getCoefficients() const
+    {
+        return &coefficients[0];
+    }
+
+    inline bool operator==(const CameraDistortionModel & other) const noexcept
+    {
+        return this->k1 == other.k1 &&
+               this->k2 == other.k2 && 
+               this->k3 == other.k3 && 
+               this->k4 == other.k4 && 
+               this->k5 == other.k5 && 
+               this->k6 == other.k6 && 
+               this->p1 == other.p1 && 
+               this->p2 == other.p2;
+    }
+
+    inline bool operator!=(const CameraDistortionModel & other) const noexcept
+    {
+        return !(*this == other);
+    }
+};
+
+/**
+ * Struct type used to store Camera Intrinsics.
+ */
+struct CameraIntrinsics
+{
+    CameraIntrinsics() = default;
+
+    /**
+    * Camera Instrinsics creation with given intrinsics values
+    * @param fx Camera axis x focal length in pixels
+    * @param fy Camera axis y focal length in pixels
+    * @param cx Camera axis x principal point in pixels
+    * @param cy Camera axis y principal point in pixels
+    * @param s Camera slanted pixel
+    * @return Camera Intrinsics
+    */
+    CameraIntrinsics(float fx, float fy, float cx, float cy, float s = 0.0)
+    {
+        m_intrinsics[0][0] = fx;
+        m_intrinsics[0][1] = s;
+        m_intrinsics[0][2] = cx;
+        m_intrinsics[1][0] = 0.0;
+        m_intrinsics[1][1] = fy;
+        m_intrinsics[1][2] = cy;
+    }
+
+    /**
+    * Get camera intrinsics x focal length.
+    * @return Camera x focal length
+    */
+    float fx() const
+    {
+        return m_intrinsics[0][0];
+    }
+
+    /**
+    * Get camera intrinsics y focal length.
+    * @return Camera y focal length
+    */
+    float fy() const
+    {
+        return m_intrinsics[1][1];
+    }
+
+    /**
+    * Get camera intrinsics x principal point.
+    * @return Camera x principal point
+    */
+    float cx() const
+    {
+        return m_intrinsics[0][2];
+    }
+
+    /**
+    * Get camera intrinsics y principal point.
+    * @return Camera y principal point
+    */
+    float cy() const
+    {
+        return m_intrinsics[1][2];
+    }
+
+    /**
+    * Get camera intrinsics slanted pixels.
+    * @return Camera slanted pixels
+    */
+    float skew() const
+    {
+        return m_intrinsics[0][1];
+    }
+
+    /**
+    * Get camera intrinsics 2D array.
+    * @return Camera intrisics array
+    */
+    const float * getMatrix23() const
+    {
+        return &m_intrinsics[0][0];
+    }
+
+    inline bool operator==(const CameraIntrinsics & other) const noexcept
+    {
+        return m_intrinsics[0][0] == other.m_intrinsics[0][0] &&
+               m_intrinsics[0][1] == other.m_intrinsics[0][1] &&
+               m_intrinsics[0][2] == other.m_intrinsics[0][2] &&
+               m_intrinsics[1][0] == other.m_intrinsics[1][0] &&
+               m_intrinsics[1][1] == other.m_intrinsics[1][1] &&
+               m_intrinsics[1][2] == other.m_intrinsics[1][2];
+    }
+
+    inline bool operator!=(const CameraIntrinsics & other) const noexcept
+    {
+        return !(*this == other);
+    }
+
+    float m_intrinsics[2][3] {{1.0, 0.0, 0.0},{0.0, 1.0, 0.0}};           /**< Camera intrinsics 2D arrat. */
+};
+
+/**
+ * Struct type used to store Camera Extrinsics.
+ */
+struct CameraExtrinsics
+{
+    using RawMatrixType = float[3][4];
+
+    CameraExtrinsics() = default;
+
+    /**
+    * Camera Extrinsics creation with given extrinsics as raw 2D [3 x 4] array
+    * @param extrinsics Camera extrinsics as raw 2D array
+    * @return Camera Extrinsics
+    */
+    explicit CameraExtrinsics(const RawMatrixType & extrinsics)
+    {
+        std::copy(&extrinsics[0][0], &extrinsics[0][0] + 3 * 4, &m_extrinsics[0][0]);
+    }
+
+    inline bool operator==(const CameraExtrinsics & other) const noexcept
+    {
+        return m_extrinsics[0][0] == other.m_extrinsics[0][0] &&
+                m_extrinsics[0][1] == other.m_extrinsics[0][1] &&
+                m_extrinsics[0][2] == other.m_extrinsics[0][2] &&
+                m_extrinsics[0][3] == other.m_extrinsics[0][3] &&
+                m_extrinsics[1][0] == other.m_extrinsics[1][0] &&
+                m_extrinsics[1][1] == other.m_extrinsics[1][1] &&
+                m_extrinsics[1][2] == other.m_extrinsics[1][2] &&
+                m_extrinsics[1][3] == other.m_extrinsics[1][3] &&
+                m_extrinsics[2][0] == other.m_extrinsics[2][0] &&
+                m_extrinsics[2][1] == other.m_extrinsics[2][1] &&
+                m_extrinsics[2][2] == other.m_extrinsics[2][2] &&
+                m_extrinsics[2][3] == other.m_extrinsics[2][3];
+    }
+
+    inline bool operator!=(const CameraExtrinsics & other) const noexcept
+    {
+        return !(*this == other);
+    }
+
+    RawMatrixType m_extrinsics {{1.0, 0.0, 0.0, 0.0},
+                                {0.0, 1.0, 0.0, 0.0},
+                                {0.0, 0.0, 1.0, 0.0}};
+};
+
+struct CameraModel 
+{
+    CameraIntrinsics intrinsic;
+    CameraExtrinsics extrinsic;
+    CameraDistortionModel distortion;
+};
+
+} // namespace cvcore
+
+#endif // CVCORE_CAMERAMODEL_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/ComputeEngine.h b/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/ComputeEngine.h
new file mode 100644
index 0000000..65fe7ca
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/ComputeEngine.h
@@ -0,0 +1,43 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_COMPUTEENGINE_H
+#define CVCORE_COMPUTEENGINE_H
+
+#include <type_traits>
+
+namespace cvcore {
+
+enum class ComputeEngine : unsigned int
+{
+    UNKNOWN       = 0x00, // 0000_0000
+
+    CPU           = 0x01, // 0000_0001
+    PVA           = 0x02, // 0000_0010
+    VIC           = 0x04, // 0000_0100
+    NVENC         = 0x08, // 0000_1000
+    GPU           = 0x10, // 0001_0000
+    DLA           = 0x20, // 0010_0000
+    DLA_CORE_0    = 0x40, // 0100_0000
+    DLA_CORE_1    = 0x80, // 1000_0000
+
+    COMPUTE_FAULT = 0xFF  // 1111_1111
+};
+
+} // namespace cvcore
+
+#endif // CVCORE_COMPUTEENGINE_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/Core.h b/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/Core.h
new file mode 100644
index 0000000..42732d9
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/Core.h
@@ -0,0 +1,35 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CORE_H
+#define CORE_H
+
+namespace cvcore {
+
+// Enable dll imports/exports in case of windows support
+#ifdef _WIN32
+#ifdef CVCORE_EXPORT_SYMBOLS // Needs to be enabled in case of compiling dll
+#define CVCORE_API __declspec(dllexport)  // Exports symbols when compiling the library.
+#else
+#define CVCORE_API __declspec(dllimport)  // Imports the symbols when linked with library.
+#endif
+#else
+#define CVCORE_API
+#endif
+
+} // namespace cvcore
+#endif // CORE_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/Image.h b/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/Image.h
new file mode 100644
index 0000000..263a699
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/Image.h
@@ -0,0 +1,893 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_IMAGE_H
+#define CVCORE_IMAGE_H
+
+#include <cassert>
+#include <functional>
+#include <tuple>
+#include <type_traits>
+
+#include "Memory.h"
+#include "Tensor.h"
+
+namespace cvcore {
+
+/**
+ * An enum.
+ * Enum type for image type.
+ */
+enum ImageType
+{
+    Y_U8,            /**< 8-bit unsigned gray. */
+    Y_U16,           /**< 16-bit unsigned gray. */
+    Y_S8,            /**< 8-bit signed gray. */
+    Y_S16,           /**< 16-bit signed gray. */
+    Y_F16,           /**< half normalized gray. */
+    Y_F32,           /**< float normalized gray. */
+    RGB_U8,          /**< 8-bit RGB. */
+    RGB_U16,         /**< 16-bit RGB. */
+    RGB_F16,         /**< half RGB. */
+    RGB_F32,         /**< float RGB. */
+    BGR_U8,          /**< 8-bit BGR. */
+    BGR_U16,         /**< 16-bit BGR. */
+    BGR_F16,         /**< half BGR. */
+    BGR_F32,         /**< float BGR. */
+    RGBA_U8,         /**< 8-bit RGBA. */
+    RGBA_U16,        /**< 16-bit RGBA. */
+    RGBA_F16,        /**< half RGBA. */
+    RGBA_F32,        /**< float RGBA. */
+    PLANAR_RGB_U8,   /**< 8-bit planar RGB. */
+    PLANAR_RGB_U16,  /**< 16-bit planar RGB. */
+    PLANAR_RGB_F16,  /**< half planar RGB. */
+    PLANAR_RGB_F32,  /**< float planar RGB. */
+    PLANAR_BGR_U8,   /**< 8-bit planar BGR. */
+    PLANAR_BGR_U16,  /**< 16-bit planar BGR. */
+    PLANAR_BGR_F16,  /**< half planar BGR. */
+    PLANAR_BGR_F32,  /**< float planar BGR. */
+    PLANAR_RGBA_U8,  /**< 8-bit planar RGBA. */
+    PLANAR_RGBA_U16, /**< 16-bit planar RGBA. */
+    PLANAR_RGBA_F16, /**< half planar RGBA. */
+    PLANAR_RGBA_F32, /**< float planar RGBA. */
+    NV12,            /**< 8-bit planar Y + interleaved and subsampled (1/4 Y samples) UV. */
+    NV24,            /**< 8-bit planar Y + interleaved UV. */
+};
+
+/**
+ * Struct type for image preprocessing params
+ */
+struct ImagePreProcessingParams
+{
+    ImageType imgType;      /**< Input Image Type. */
+    float pixelMean[3];     /**< Image Mean value offset for R,G,B channels. Default is 0.0f */
+    float normalization[3]; /**< Scale or normalization values for  R,G,B channels. Default is 1.0/255.0f */
+    float stdDev[3];        /**< Standard deviation values for  R,G,B channels. Default is 1.0f */
+};
+
+template<ImageType T>
+struct IsCompositeImage : std::integral_constant<bool, T == NV12 || T == NV24>
+{
+};
+
+template<ImageType T>
+struct IsPlanarImage
+    : std::integral_constant<bool, T == PLANAR_RGB_U8 || T == PLANAR_RGB_U16 || T == PLANAR_RGB_F16 ||
+                                       T == PLANAR_RGB_F32 || T == PLANAR_BGR_U8 || T == PLANAR_BGR_U16 ||
+                                       T == PLANAR_BGR_F16 || T == PLANAR_BGR_F32 || T == PLANAR_RGBA_U8 ||
+                                       T == PLANAR_RGBA_U16 || T == PLANAR_RGBA_F16 || T == PLANAR_RGBA_F32>
+{
+};
+
+template<ImageType T>
+struct IsInterleavedImage : std::integral_constant<bool, !IsCompositeImage<T>::value && !IsPlanarImage<T>::value>
+{
+};
+
+/**
+ * Image traits that map ImageType to TensorLayout, ChannelCount and ChannelType.
+ */
+template<ImageType T, size_t N>
+struct ImageTraits;
+
+template<>
+struct ImageTraits<Y_U8, 3>
+{
+    static constexpr TensorLayout TL = TensorLayout::HWC;
+    static constexpr ChannelCount CC = ChannelCount::C1;
+    static constexpr ChannelType CT  = ChannelType::U8;
+};
+
+template<>
+struct ImageTraits<Y_U8, 4>
+{
+    static constexpr TensorLayout TL = TensorLayout::NHWC;
+    static constexpr ChannelCount CC = ChannelCount::C1;
+    static constexpr ChannelType CT  = ChannelType::U8;
+};
+
+template<>
+struct ImageTraits<Y_U16, 3>
+{
+    static constexpr TensorLayout TL = TensorLayout::HWC;
+    static constexpr ChannelCount CC = ChannelCount::C1;
+    static constexpr ChannelType CT  = ChannelType::U16;
+};
+
+template<>
+struct ImageTraits<Y_U16, 4>
+{
+    static constexpr TensorLayout TL = TensorLayout::NHWC;
+    static constexpr ChannelCount CC = ChannelCount::C1;
+    static constexpr ChannelType CT  = ChannelType::U16;
+};
+
+template<>
+struct ImageTraits<Y_S8, 3>
+{
+    static constexpr TensorLayout TL = TensorLayout::HWC;
+    static constexpr ChannelCount CC = ChannelCount::C1;
+    static constexpr ChannelType CT  = ChannelType::S8;
+};
+
+template<>
+struct ImageTraits<Y_S8, 4>
+{
+    static constexpr TensorLayout TL = TensorLayout::NHWC;
+    static constexpr ChannelCount CC = ChannelCount::C1;
+    static constexpr ChannelType CT  = ChannelType::S8;
+};
+
+template<>
+struct ImageTraits<Y_S16, 3>
+{
+    static constexpr TensorLayout TL = TensorLayout::HWC;
+    static constexpr ChannelCount CC = ChannelCount::C1;
+    static constexpr ChannelType CT  = ChannelType::S16;
+};
+
+template<>
+struct ImageTraits<Y_S16, 4>
+{
+    static constexpr TensorLayout TL = TensorLayout::NHWC;
+    static constexpr ChannelCount CC = ChannelCount::C1;
+    static constexpr ChannelType CT  = ChannelType::S16;
+};
+
+template<>
+struct ImageTraits<Y_F32, 3>
+{
+    static constexpr TensorLayout TL = TensorLayout::HWC;
+    static constexpr ChannelCount CC = ChannelCount::C1;
+    static constexpr ChannelType CT  = ChannelType::F32;
+};
+
+template<>
+struct ImageTraits<Y_F32, 4>
+{
+    static constexpr TensorLayout TL = TensorLayout::NHWC;
+    static constexpr ChannelCount CC = ChannelCount::C1;
+    static constexpr ChannelType CT  = ChannelType::F32;
+};
+
+template<>
+struct ImageTraits<RGB_U8, 3>
+{
+    static constexpr TensorLayout TL = TensorLayout::HWC;
+    static constexpr ChannelCount CC = ChannelCount::C3;
+    static constexpr ChannelType CT  = ChannelType::U8;
+};
+
+template<>
+struct ImageTraits<RGB_U8, 4>
+{
+    static constexpr TensorLayout TL = TensorLayout::NHWC;
+    static constexpr ChannelCount CC = ChannelCount::C3;
+    static constexpr ChannelType CT  = ChannelType::U8;
+};
+
+template<>
+struct ImageTraits<RGB_U16, 3>
+{
+    static constexpr TensorLayout TL = TensorLayout::HWC;
+    static constexpr ChannelCount CC = ChannelCount::C3;
+    static constexpr ChannelType CT  = ChannelType::U16;
+};
+
+template<>
+struct ImageTraits<RGB_U16, 4>
+{
+    static constexpr TensorLayout TL = TensorLayout::NHWC;
+    static constexpr ChannelCount CC = ChannelCount::C3;
+    static constexpr ChannelType CT  = ChannelType::U16;
+};
+
+template<>
+struct ImageTraits<RGB_F32, 3>
+{
+    static constexpr TensorLayout TL = TensorLayout::HWC;
+    static constexpr ChannelCount CC = ChannelCount::C3;
+    static constexpr ChannelType CT  = ChannelType::F32;
+};
+
+template<>
+struct ImageTraits<RGB_F32, 4>
+{
+    static constexpr TensorLayout TL = TensorLayout::NHWC;
+    static constexpr ChannelCount CC = ChannelCount::C3;
+    static constexpr ChannelType CT  = ChannelType::F32;
+};
+
+template<>
+struct ImageTraits<BGR_U8, 3>
+{
+    static constexpr TensorLayout TL = TensorLayout::HWC;
+    static constexpr ChannelCount CC = ChannelCount::C3;
+    static constexpr ChannelType CT  = ChannelType::U8;
+};
+
+template<>
+struct ImageTraits<BGR_U8, 4>
+{
+    static constexpr TensorLayout TL = TensorLayout::NHWC;
+    static constexpr ChannelCount CC = ChannelCount::C3;
+    static constexpr ChannelType CT  = ChannelType::U8;
+};
+
+template<>
+struct ImageTraits<BGR_U16, 3>
+{
+    static constexpr TensorLayout TL = TensorLayout::HWC;
+    static constexpr ChannelCount CC = ChannelCount::C3;
+    static constexpr ChannelType CT  = ChannelType::U16;
+};
+
+template<>
+struct ImageTraits<BGR_U16, 4>
+{
+    static constexpr TensorLayout TL = TensorLayout::NHWC;
+    static constexpr ChannelCount CC = ChannelCount::C3;
+    static constexpr ChannelType CT  = ChannelType::U16;
+};
+
+template<>
+struct ImageTraits<BGR_F32, 3>
+{
+    static constexpr TensorLayout TL = TensorLayout::HWC;
+    static constexpr ChannelCount CC = ChannelCount::C3;
+    static constexpr ChannelType CT  = ChannelType::F32;
+};
+
+template<>
+struct ImageTraits<BGR_F32, 4>
+{
+    static constexpr TensorLayout TL = TensorLayout::NHWC;
+    static constexpr ChannelCount CC = ChannelCount::C3;
+    static constexpr ChannelType CT  = ChannelType::F32;
+};
+
+template<>
+struct ImageTraits<PLANAR_RGB_U8, 3>
+{
+    static constexpr TensorLayout TL = TensorLayout::CHW;
+    static constexpr ChannelCount CC = ChannelCount::C3;
+    static constexpr ChannelType CT  = ChannelType::U8;
+};
+
+template<>
+struct ImageTraits<PLANAR_RGB_U8, 4>
+{
+    static constexpr TensorLayout TL = TensorLayout::NCHW;
+    static constexpr ChannelCount CC = ChannelCount::C3;
+    static constexpr ChannelType CT  = ChannelType::U8;
+};
+
+template<>
+struct ImageTraits<PLANAR_RGB_U16, 3>
+{
+    static constexpr TensorLayout TL = TensorLayout::CHW;
+    static constexpr ChannelCount CC = ChannelCount::C3;
+    static constexpr ChannelType CT  = ChannelType::U16;
+};
+
+template<>
+struct ImageTraits<PLANAR_RGB_U16, 4>
+{
+    static constexpr TensorLayout TL = TensorLayout::NCHW;
+    static constexpr ChannelCount CC = ChannelCount::C3;
+    static constexpr ChannelType CT  = ChannelType::U16;
+};
+
+template<>
+struct ImageTraits<PLANAR_RGB_F32, 3>
+{
+    static constexpr TensorLayout TL = TensorLayout::CHW;
+    static constexpr ChannelCount CC = ChannelCount::C3;
+    static constexpr ChannelType CT  = ChannelType::F32;
+};
+
+template<>
+struct ImageTraits<PLANAR_RGB_F32, 4>
+{
+    static constexpr TensorLayout TL = TensorLayout::NCHW;
+    static constexpr ChannelCount CC = ChannelCount::C3;
+    static constexpr ChannelType CT  = ChannelType::F32;
+};
+
+template<>
+struct ImageTraits<PLANAR_BGR_U8, 3>
+{
+    static constexpr TensorLayout TL = TensorLayout::CHW;
+    static constexpr ChannelCount CC = ChannelCount::C3;
+    static constexpr ChannelType CT  = ChannelType::U8;
+};
+
+template<>
+struct ImageTraits<PLANAR_BGR_U8, 4>
+{
+    static constexpr TensorLayout TL = TensorLayout::NCHW;
+    static constexpr ChannelCount CC = ChannelCount::C3;
+    static constexpr ChannelType CT  = ChannelType::U8;
+};
+
+template<>
+struct ImageTraits<PLANAR_BGR_U16, 3>
+{
+    static constexpr TensorLayout TL = TensorLayout::CHW;
+    static constexpr ChannelCount CC = ChannelCount::C3;
+    static constexpr ChannelType CT  = ChannelType::U16;
+};
+
+template<>
+struct ImageTraits<PLANAR_BGR_U16, 4>
+{
+    static constexpr TensorLayout TL = TensorLayout::NCHW;
+    static constexpr ChannelCount CC = ChannelCount::C3;
+    static constexpr ChannelType CT  = ChannelType::U16;
+};
+
+template<>
+struct ImageTraits<PLANAR_BGR_F32, 3>
+{
+    static constexpr TensorLayout TL = TensorLayout::CHW;
+    static constexpr ChannelCount CC = ChannelCount::C3;
+    static constexpr ChannelType CT  = ChannelType::F32;
+};
+
+template<>
+struct ImageTraits<PLANAR_BGR_F32, 4>
+{
+    static constexpr TensorLayout TL = TensorLayout::NCHW;
+    static constexpr ChannelCount CC = ChannelCount::C3;
+    static constexpr ChannelType CT  = ChannelType::F32;
+};
+
+/**
+ * Get the bytes of each element for a specific ImageType.
+ */
+inline size_t GetImageElementSize(const ImageType type)
+{
+    size_t imageElementSize;
+
+    switch (type)
+    {
+    case ImageType::Y_U8:
+    case ImageType::Y_S8:
+    case ImageType::RGB_U8:
+    case ImageType::BGR_U8:
+    case ImageType::RGBA_U8:
+    case ImageType::PLANAR_RGB_U8:
+    case ImageType::PLANAR_BGR_U8:
+    case ImageType::PLANAR_RGBA_U8:
+    {
+        imageElementSize = 1;
+        break;
+    }
+    case ImageType::Y_U16:
+    case ImageType::Y_S16:
+    case ImageType::RGB_U16:
+    case ImageType::BGR_U16:
+    case ImageType::RGBA_U16:
+    case ImageType::PLANAR_RGB_U16:
+    case ImageType::PLANAR_BGR_U16:
+    case ImageType::PLANAR_RGBA_U16:
+    case ImageType::Y_F16:
+    case ImageType::RGB_F16:
+    case ImageType::BGR_F16:
+    case ImageType::RGBA_F16:
+    case ImageType::PLANAR_RGB_F16:
+    case ImageType::PLANAR_BGR_F16:
+    case ImageType::PLANAR_RGBA_F16:
+    {
+        imageElementSize = 2;
+        break;
+    }
+    case ImageType::Y_F32:
+    case ImageType::RGB_F32:
+    case ImageType::BGR_F32:
+    case ImageType::RGBA_F32:
+    case ImageType::PLANAR_RGB_F32:
+    case ImageType::PLANAR_BGR_F32:
+    case ImageType::PLANAR_RGBA_F32:
+    {
+        imageElementSize = 4;
+        break;
+    }
+    default:
+    {
+        imageElementSize = 0;
+    }
+    }
+
+    return imageElementSize;
+}
+
+/**
+ * Get the number of channels for a specific ImageType.
+ */
+inline size_t GetImageChannelCount(const ImageType type)
+{
+    size_t imageChannelCount;
+
+    switch (type)
+    {
+    case ImageType::Y_U8:
+    case ImageType::Y_U16:
+    case ImageType::Y_S8:
+    case ImageType::Y_S16:
+    case ImageType::Y_F16:
+    case ImageType::Y_F32:
+    {
+        imageChannelCount = 1;
+        break;
+    }
+    case ImageType::RGB_U8:
+    case ImageType::RGB_U16:
+    case ImageType::RGB_F16:
+    case ImageType::RGB_F32:
+    case ImageType::BGR_U8:
+    case ImageType::BGR_U16:
+    case ImageType::BGR_F16:
+    case ImageType::BGR_F32:
+    case ImageType::PLANAR_RGB_U8:
+    case ImageType::PLANAR_RGB_U16:
+    case ImageType::PLANAR_RGB_F16:
+    case ImageType::PLANAR_RGB_F32:
+    case ImageType::PLANAR_BGR_U8:
+    case ImageType::PLANAR_BGR_U16:
+    case ImageType::PLANAR_BGR_F16:
+    case ImageType::PLANAR_BGR_F32:
+    {
+        imageChannelCount = 3;
+        break;
+    }
+    case ImageType::RGBA_U8:
+    case ImageType::RGBA_U16:
+    case ImageType::RGBA_F16:
+    case ImageType::RGBA_F32:
+    case ImageType::PLANAR_RGBA_U8:
+    case ImageType::PLANAR_RGBA_U16:
+    case ImageType::PLANAR_RGBA_F16:
+    case ImageType::PLANAR_RGBA_F32:
+    {
+        imageChannelCount = 4;
+        break;
+    }
+    default:
+    {
+        imageChannelCount = 0;
+    }
+    }
+
+    return imageChannelCount;
+};
+
+template<ImageType T>
+class Image
+{
+};
+
+template<>
+class Image<ImageType::Y_U8> : public Tensor<HWC, C1, U8>
+{
+    using Tensor<HWC, C1, U8>::Tensor;
+};
+
+template<>
+class Image<ImageType::Y_U16> : public Tensor<HWC, C1, U16>
+{
+    using Tensor<HWC, C1, U16>::Tensor;
+};
+
+template<>
+class Image<ImageType::Y_S8> : public Tensor<HWC, C1, S8>
+{
+    using Tensor<HWC, C1, S8>::Tensor;
+};
+
+template<>
+class Image<ImageType::Y_S16> : public Tensor<HWC, C1, S16>
+{
+    using Tensor<HWC, C1, S16>::Tensor;
+};
+
+template<>
+class Image<ImageType::Y_F16> : public Tensor<HWC, C1, F16>
+{
+    using Tensor<HWC, C1, F16>::Tensor;
+};
+
+template<>
+class Image<ImageType::Y_F32> : public Tensor<HWC, C1, F32>
+{
+    using Tensor<HWC, C1, F32>::Tensor;
+};
+
+template<>
+class Image<ImageType::RGB_U8> : public Tensor<HWC, C3, U8>
+{
+    using Tensor<HWC, C3, U8>::Tensor;
+};
+
+template<>
+class Image<ImageType::RGB_U16> : public Tensor<HWC, C3, U16>
+{
+    using Tensor<HWC, C3, U16>::Tensor;
+};
+
+template<>
+class Image<ImageType::RGB_F16> : public Tensor<HWC, C3, F16>
+{
+    using Tensor<HWC, C3, F16>::Tensor;
+};
+
+template<>
+class Image<ImageType::RGB_F32> : public Tensor<HWC, C3, F32>
+{
+    using Tensor<HWC, C3, F32>::Tensor;
+};
+
+template<>
+class Image<ImageType::BGR_U8> : public Tensor<HWC, C3, U8>
+{
+    using Tensor<HWC, C3, U8>::Tensor;
+};
+
+template<>
+class Image<ImageType::BGR_U16> : public Tensor<HWC, C3, U16>
+{
+    using Tensor<HWC, C3, U16>::Tensor;
+};
+
+template<>
+class Image<ImageType::BGR_F16> : public Tensor<HWC, C3, F16>
+{
+    using Tensor<HWC, C3, F16>::Tensor;
+};
+
+template<>
+class Image<ImageType::BGR_F32> : public Tensor<HWC, C3, F32>
+{
+    using Tensor<HWC, C3, F32>::Tensor;
+};
+
+template<>
+class Image<ImageType::RGBA_U8> : public Tensor<HWC, C4, U8>
+{
+    using Tensor<HWC, C4, U8>::Tensor;
+};
+
+template<>
+class Image<ImageType::RGBA_U16> : public Tensor<HWC, C4, U16>
+{
+    using Tensor<HWC, C4, U16>::Tensor;
+};
+
+template<>
+class Image<ImageType::RGBA_F16> : public Tensor<HWC, C4, F16>
+{
+    using Tensor<HWC, C4, F16>::Tensor;
+};
+
+template<>
+class Image<ImageType::RGBA_F32> : public Tensor<HWC, C4, F32>
+{
+    using Tensor<HWC, C4, F32>::Tensor;
+};
+
+template<>
+class Image<ImageType::PLANAR_RGB_U8> : public Tensor<CHW, C3, U8>
+{
+    using Tensor<CHW, C3, U8>::Tensor;
+};
+
+template<>
+class Image<ImageType::PLANAR_RGB_U16> : public Tensor<CHW, C3, U16>
+{
+    using Tensor<CHW, C3, U16>::Tensor;
+};
+
+template<>
+class Image<ImageType::PLANAR_RGB_F16> : public Tensor<CHW, C3, F16>
+{
+    using Tensor<CHW, C3, F16>::Tensor;
+};
+
+template<>
+class Image<ImageType::PLANAR_RGB_F32> : public Tensor<CHW, C3, F32>
+{
+    using Tensor<CHW, C3, F32>::Tensor;
+};
+
+template<>
+class Image<ImageType::PLANAR_BGR_U8> : public Tensor<CHW, C3, U8>
+{
+    using Tensor<CHW, C3, U8>::Tensor;
+};
+
+template<>
+class Image<ImageType::PLANAR_BGR_U16> : public Tensor<CHW, C3, U16>
+{
+    using Tensor<CHW, C3, U16>::Tensor;
+};
+
+template<>
+class Image<ImageType::PLANAR_BGR_F16> : public Tensor<CHW, C3, F16>
+{
+    using Tensor<CHW, C3, F16>::Tensor;
+};
+
+template<>
+class Image<ImageType::PLANAR_BGR_F32> : public Tensor<CHW, C3, F32>
+{
+    using Tensor<CHW, C3, F32>::Tensor;
+};
+
+template<>
+class Image<ImageType::PLANAR_RGBA_U8> : public Tensor<CHW, C4, U8>
+{
+    using Tensor<CHW, C4, U8>::Tensor;
+};
+
+template<>
+class Image<ImageType::PLANAR_RGBA_U16> : public Tensor<CHW, C4, U16>
+{
+    using Tensor<CHW, C4, U16>::Tensor;
+};
+
+template<>
+class Image<ImageType::PLANAR_RGBA_F16> : public Tensor<CHW, C4, F16>
+{
+    using Tensor<CHW, C4, F16>::Tensor;
+};
+
+template<>
+class Image<ImageType::PLANAR_RGBA_F32> : public Tensor<CHW, C4, F32>
+{
+    using Tensor<CHW, C4, F32>::Tensor;
+};
+
+template<>
+class Image<ImageType::NV12>
+{
+public:
+    Image(std::size_t width, std::size_t height, bool isCPU = true)
+        : m_data(std::make_tuple(Y(width, height, isCPU), UV(width / 2, height / 2, isCPU)))
+    {
+        assert(width % 2 == 0 && height % 2 == 0);
+    }
+
+    Image(std::size_t width, std::size_t height, std::uint8_t *dataPtrLuma, std::uint8_t *dataPtrChroma,
+          bool isCPU = true)
+        : m_data(std::make_tuple(Y(width, height, dataPtrLuma, isCPU), UV(width / 2, height / 2, dataPtrChroma, isCPU)))
+    {
+        assert(width % 2 == 0 && height % 2 == 0);
+    }
+
+    Image(std::size_t width, std::size_t height, std::size_t rowPitchLuma, std::size_t rowPitchChroma,
+          std::uint8_t *dataPtrLuma, std::uint8_t *dataPtrChroma, bool isCPU = true)
+        : m_data(std::make_tuple(Y(width, height, rowPitchLuma, dataPtrLuma, isCPU),
+                                 UV(width / 2, height / 2, rowPitchChroma, dataPtrChroma, isCPU)))
+    {
+        assert(width % 2 == 0 && height % 2 == 0);
+    }
+
+    std::size_t getLumaWidth() const
+    {
+        return std::get<0>(m_data).getWidth();
+    }
+
+    std::size_t getLumaHeight() const
+    {
+        return std::get<0>(m_data).getHeight();
+    }
+
+    std::size_t getChromaWidth() const
+    {
+        return std::get<1>(m_data).getWidth();
+    }
+
+    std::size_t getChromaHeight() const
+    {
+        return std::get<1>(m_data).getHeight();
+    }
+
+    std::size_t getLumaStride(TensorDimension dim) const
+    {
+        return std::get<0>(m_data).getStride(dim);
+    }
+
+    std::size_t getChromaStride(TensorDimension dim) const
+    {
+        return std::get<1>(m_data).getStride(dim);
+    }
+
+    std::uint8_t *getLumaData()
+    {
+        return std::get<0>(m_data).getData();
+    }
+
+    std::uint8_t *getChromaData()
+    {
+        return std::get<1>(m_data).getData();
+    }
+
+    const std::uint8_t *getLumaData() const
+    {
+        return std::get<0>(m_data).getData();
+    }
+
+    std::size_t getLumaDataSize() const
+    {
+        return std::get<0>(m_data).getDataSize();
+    }
+
+    const std::uint8_t *getChromaData() const
+    {
+        return std::get<1>(m_data).getData();
+    }
+
+    std::size_t getChromaDataSize() const
+    {
+        return std::get<1>(m_data).getDataSize();
+    }
+
+    bool isCPU() const
+    {
+        return std::get<0>(m_data).isCPU();
+    }
+
+    friend void Copy(Image<NV12> &dst, const Image<NV12> &src, cudaStream_t stream);
+
+private:
+    using Y  = Tensor<HWC, C1, U8>;
+    using UV = Tensor<HWC, C2, U8>;
+
+    std::tuple<Y, UV> m_data;
+};
+
+template<>
+class Image<ImageType::NV24>
+{
+public:
+    Image(std::size_t width, std::size_t height, bool isCPU = true)
+        : m_data(std::make_tuple(Y(width, height, isCPU), UV(width, height, isCPU)))
+    {
+    }
+
+    Image(std::size_t width, std::size_t height, std::uint8_t *dataPtrLuma, std::uint8_t *dataPtrChroma,
+          bool isCPU = true)
+        : m_data(std::make_tuple(Y(width, height, dataPtrLuma, isCPU), UV(width, height, dataPtrChroma, isCPU)))
+    {
+    }
+
+    Image(std::size_t width, std::size_t height, std::size_t rowPitchLuma, std::size_t rowPitchChroma,
+          std::uint8_t *dataPtrLuma, std::uint8_t *dataPtrChroma, bool isCPU = true)
+        : m_data(std::make_tuple(Y(width, height, rowPitchLuma, dataPtrLuma, isCPU),
+                                 UV(width, height, rowPitchChroma, dataPtrChroma, isCPU)))
+    {
+    }
+
+    std::size_t getLumaWidth() const
+    {
+        return std::get<0>(m_data).getWidth();
+    }
+
+    std::size_t getLumaHeight() const
+    {
+        return std::get<0>(m_data).getHeight();
+    }
+
+    std::size_t getChromaWidth() const
+    {
+        return std::get<1>(m_data).getWidth();
+    }
+
+    std::size_t getChromaHeight() const
+    {
+        return std::get<1>(m_data).getHeight();
+    }
+
+    std::size_t getLumaStride(TensorDimension dim) const
+    {
+        return std::get<0>(m_data).getStride(dim);
+    }
+
+    std::size_t getChromaStride(TensorDimension dim) const
+    {
+        return std::get<1>(m_data).getStride(dim);
+    }
+
+    std::uint8_t *getLumaData()
+    {
+        return std::get<0>(m_data).getData();
+    }
+
+    const std::uint8_t *getLumaData() const
+    {
+        return std::get<0>(m_data).getData();
+    }
+
+    std::size_t getLumaDataSize() const
+    {
+        return std::get<0>(m_data).getDataSize();
+    }
+
+    std::uint8_t *getChromaData()
+    {
+        return std::get<1>(m_data).getData();
+    }
+
+    const std::uint8_t *getChromaData() const
+    {
+        return std::get<1>(m_data).getData();
+    }
+
+    std::size_t getChromaDataSize() const
+    {
+        return std::get<1>(m_data).getDataSize();
+    }
+
+    bool isCPU() const
+    {
+        return std::get<0>(m_data).isCPU();
+    }
+
+    friend void Copy(Image<NV24> &dst, const Image<NV24> &src, cudaStream_t stream);
+
+private:
+    using Y  = Tensor<HWC, C1, U8>;
+    using UV = Tensor<HWC, C2, U8>;
+
+    std::tuple<Y, UV> m_data;
+};
+
+void inline Copy(Image<NV12> &dst, const Image<NV12> &src, cudaStream_t stream = 0)
+{
+    Copy(std::get<0>(dst.m_data), std::get<0>(src.m_data), stream);
+    Copy(std::get<1>(dst.m_data), std::get<1>(src.m_data), stream);
+}
+
+void inline Copy(Image<NV24> &dst, const Image<NV24> &src, cudaStream_t stream = 0)
+{
+    Copy(std::get<0>(dst.m_data), std::get<0>(src.m_data), stream);
+    Copy(std::get<1>(dst.m_data), std::get<1>(src.m_data), stream);
+}
+
+} // namespace cvcore
+
+#endif // CVCORE_IMAGE_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/Instrumentation.h b/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/Instrumentation.h
new file mode 100644
index 0000000..6324b8d
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/Instrumentation.h
@@ -0,0 +1,65 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_INSTRUMENTATION_H
+#define CVCORE_INSTRUMENTATION_H
+
+#include <iostream>
+
+namespace cvcore { namespace profiler {
+
+/**
+ * A enum class used to find out the type of profiler output required
+ */
+enum class ProfilerJsonOutputType : uint32_t
+{
+    JSON_OFF,       /**< print the aggregate values of each timer in pretty print format */
+    JSON_AGGREGATE, /**< print the aggregate values of each timer in JSON format
+                         along with the pretty print format. Pretty print format
+                         gets printed on the terminal */
+    JSON_SEPARATE   /**< print all the elapsed times for all timers along with the
+                         aggregate values from JSON_AGGREGATE option */
+};
+
+/**
+* Flush call to print the timer values in a file input
+* @param jsonHelperType used to find out the type of profiler output required
+* @return filename used to write the timer values
+*/
+void flush(const std::string& filename, ProfilerJsonOutputType jsonHelperType);
+
+/**
+* Flush call to print the timer values in a output stream
+* @param jsonHelperType used to find out the type of profiler output required
+* @return output stream used to write the timer values
+*/
+void flush(std::ostream& output, ProfilerJsonOutputType jsonHelperType);
+
+/**
+* Flush call to print the timer values on the terminal
+* @param jsonHelperType used to find out the type of profiler output required
+*/
+void flush(ProfilerJsonOutputType jsonHelperType);
+
+
+/**
+* Clear all the profile timers
+*/
+void clear();
+
+}}
+#endif
diff --git a/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/MathTypes.h b/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/MathTypes.h
new file mode 100644
index 0000000..fd9db1a
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/MathTypes.h
@@ -0,0 +1,234 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_Math_H
+#define CVCORE_Math_H
+
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+#include <vector>
+
+#include "Tensor.h"
+
+namespace cvcore {
+
+/** Matrix types using Tensor backend */
+using Matrixf = Tensor<CHW, C1, F32>;
+using Matrixd = Tensor<CHW, C1, F64>;
+
+/**
+ * A struct.
+ * Structure used to store Vector2 Values.
+ */
+template<class T>
+struct Vector2
+{
+    T x; /**< point x coordinate. */
+    T y; /**< point y coordinate. */
+
+    inline T &operator[](size_t i)
+    {
+        if (i == 0)
+        {
+            return x;
+        }
+        else if (i == 1)
+        {
+            return y;
+        }
+        else
+        {
+            throw std::out_of_range("cvcore::Vector2 ==> Requested index is out of bounds");
+        }
+    }
+};
+
+/**
+ * A struct.
+ * Structure used to store Vector3 Values.
+ */
+template<class T>
+struct Vector3
+{
+    T x; /**< point x coordinate. */
+    T y; /**< point y coordinate. */
+    T z; /**< point z coordinate. */
+
+    inline T &operator[](size_t i)
+    {
+        if (i == 0)
+        {
+            return x;
+        }
+        else if (i == 1)
+        {
+            return y;
+        }
+        else if (i == 2)
+        {
+            return z;
+        }
+        else
+        {
+            throw std::out_of_range("cvcore::Vector3 ==> Requested index is out of bounds");
+        }
+    }
+};
+
+using Vector2i = Vector2<int>;
+using Vector3i = Vector3<int>;
+
+using Vector2f = Vector2<float>;
+using Vector3f = Vector3<float>;
+
+using Vector2d = Vector2<double>;
+using Vector3d = Vector3<double>;
+
+/**
+ * A struct
+ * Structure used to store AxisAngle Rotation parameters.
+ */
+struct AxisAngleRotation
+{
+    double angle;  /** Counterclockwise rotation angle [0, 2PI]. */
+    Vector3d axis; /** 3d axis of rotation. */
+
+    AxisAngleRotation()
+        : angle(0.0)
+        , axis{0, 0, 0}
+    {
+    }
+
+    AxisAngleRotation(double angleinput, Vector3d axisinput)
+        : angle(angleinput)
+        , axis(axisinput)
+    {
+    }
+};
+
+/**
+ * A struct.
+ * Structure used to store quaternion rotation representation.
+ * A rotation of unit vector u with rotation theta can be represented in quaternion as:
+ * q={cos(theta/2)+ i(u*sin(theta/2))}
+*/
+struct Quaternion
+{
+    double qx, qy, qz; /** Axis or imaginary component of the quaternion representation. */
+    double qw;         /** Angle or real component of the quaternion representation. */
+
+    Quaternion()
+        : qx(0.0)
+        , qy(0.0)
+        , qz(0.0)
+        , qw(0.0)
+    {
+    }
+
+    Quaternion(double qxinput, double qyinput, double qzinput, double qwinput)
+        : qx(qxinput)
+        , qy(qyinput)
+        , qz(qzinput)
+        , qw(qwinput)
+    {
+    }
+};
+
+/**
+ * Convert rotation matrix to rotation vector.
+ * @param rotMatrix Rotation matrix of 3x3 values.
+ * @return 3D Rotation vector {theta * xaxis, theta * yaxis, theta * zaxis}
+ * where theta is the angle of rotation in radians
+ */
+Vector3d RotationMatrixToRotationVector(const std::vector<double> &rotMatrix);
+
+/**
+ * Convert rotation matrix to axis angle representation.
+ * @param rotMatrix Rotation matrix of 3x3 values.
+ * @return Axis angle rotation
+ */
+AxisAngleRotation RotationMatrixToAxisAngleRotation(const std::vector<double> &rotMatrix);
+
+/**
+ * Convert axis angle representation to rotation matrix.
+ * @param axisangle  Axis angle rotation.
+ * @return Rotation matrix of 3x3 values.
+ */
+std::vector<double> AxisAngleToRotationMatrix(const AxisAngleRotation &axisangle);
+
+/**
+ * Convert axis angle representation to 3d rotation vector.
+ * Rotation vector is  {theta * xaxis, theta * yaxis, theta * zaxis}
+ * where theta is the angle of rotation in radians.
+ * @param axisangle  Axis angle rotation.
+ * @return 3D Rotation Vector
+ */
+Vector3d AxisAngleRotationToRotationVector(const AxisAngleRotation &axisangle);
+
+/**
+ * Convert rotation vector to axis angle representation.
+ * @param rotVector 3D rotation vector.
+ * @return Axis angle rotation.
+ */
+AxisAngleRotation RotationVectorToAxisAngleRotation(const Vector3d &rotVector);
+
+/**
+ * Convert axis angle representation to quaternion.
+ * @param axisangle Axis angle representation.
+ * @return Quaternion rotation.
+ */
+Quaternion AxisAngleRotationToQuaternion(const AxisAngleRotation &axisangle);
+
+/**
+ * Convert quaternion rotation to axis angle rotation.
+ * @param qrotation Quaternion rotation representation.
+ * @return Axis angle rotation.
+ */
+AxisAngleRotation QuaternionToAxisAngleRotation(const Quaternion &qrotation);
+
+/**
+ * Convert quaternion rotation to rotation matrix.
+ * @param qrotation Quaternion rotation representation.
+ * @return Rotation matrix.
+ */
+std::vector<double> QuaternionToRotationMatrix(const Quaternion &qrotation);
+
+/**
+ * Convert rotation matrix to Quaternion.
+ * @param rotMatrix Rotation matrix
+ * @return Quaternion rotation.
+ */
+Quaternion RotationMatrixToQuaternion(const std::vector<double> &rotMatrix);
+
+/**
+ * A struct.
+ * Structure used to store Pose3D parameters.
+ */
+template<class T>
+struct Pose3
+{
+    AxisAngleRotation rotation; /**Rotation expressed in axis angle notation.*/
+    Vector3<T> translation;     /*Translation expressed as x,y,z coordinates.*/
+};
+
+using Pose3d = Pose3<double>;
+using Pose3f = Pose3<float>;
+
+} // namespace cvcore
+
+#endif // CVCORE_Math_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/Memory.h b/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/Memory.h
new file mode 100644
index 0000000..7d3d113
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/Memory.h
@@ -0,0 +1,135 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_MEMORY_H
+#define CVCORE_MEMORY_H
+
+#include <cuda_runtime_api.h>
+
+#include "Tensor.h"
+
+namespace cvcore {
+
+/**
+ * Implementation of tensor copy.
+ * @param dst destination TensorBase.
+ * @param src source TensorBase.
+ * @param stream cuda stream.
+ */
+void TensorBaseCopy(TensorBase &dst, const TensorBase &src, cudaStream_t stream = 0);
+
+/**
+ * Implementation of tensor copy for 2D pitch linear tensors.
+ * @param dst destination TensorBase.
+ * @param src source TensorBase.
+ * @param dstPitch pitch of destination Tensor in bytes.
+ * @param srcPitch pitch of source Tensor in bytes.
+ * @param widthInBytes width in bytes.
+ * @param height height of tensor.
+ * @param stream cuda stream.
+ */
+void TensorBaseCopy2D(TensorBase &dst, const TensorBase &src, int dstPitch, int srcPitch, int widthInBytes, int height,
+                      cudaStream_t stream = 0);
+
+/**
+ * Memory copy function between two non HWC/CHW/NHWC/NCHW Tensors.
+ * @tparam TL TensorLayout type.
+ * @tparam CC Channel Count.
+ * @tparam CT ChannelType.
+ * @param dst destination Tensor.
+ * @param src source Tensor which copy from.
+ * @param stream cuda stream.
+ */
+template<TensorLayout TL, ChannelCount CC, ChannelType CT,
+         typename std::enable_if<TL != HWC && TL != CHW && TL != NHWC && TL != NCHW>::type * = nullptr>
+void Copy(Tensor<TL, CC, CT> &dst, const Tensor<TL, CC, CT> &src, cudaStream_t stream = 0)
+{
+    TensorBaseCopy(dst, src, stream);
+}
+
+/**
+ * Memory copy function between two HWC Tensors.
+ * @tparam TL TensorLayout type.
+ * @tparam CC Channel Count.
+ * @tparam CT ChannelType.
+ * @param dst destination Tensor.
+ * @param src source Tensor which copy from.
+ * @param stream cuda stream.
+ */
+template<TensorLayout TL, ChannelCount CC, ChannelType CT, typename std::enable_if<TL == HWC>::type * = nullptr>
+void Copy(Tensor<TL, CC, CT> &dst, const Tensor<TL, CC, CT> &src, cudaStream_t stream = 0)
+{
+    TensorBaseCopy2D(dst, src, dst.getStride(TensorDimension::HEIGHT) * GetChannelSize(CT),
+                     src.getStride(TensorDimension::HEIGHT) * GetChannelSize(CT),
+                     dst.getWidth() * dst.getChannelCount() * GetChannelSize(CT), src.getHeight(), stream);
+}
+
+/**
+ * Memory copy function between two NHWC Tensors.
+ * @tparam TL TensorLayout type.
+ * @tparam CC Channel Count.
+ * @tparam CT ChannelType.
+ * @param dst destination Tensor.
+ * @param src source Tensor which copy from.
+ * @param stream cuda stream.
+ */
+template<TensorLayout TL, ChannelCount CC, ChannelType CT, typename std::enable_if<TL == NHWC>::type * = nullptr>
+void Copy(Tensor<TL, CC, CT> &dst, const Tensor<TL, CC, CT> &src, cudaStream_t stream = 0)
+{
+    TensorBaseCopy2D(dst, src, dst.getStride(TensorDimension::HEIGHT) * GetChannelSize(CT),
+                     src.getStride(TensorDimension::HEIGHT) * GetChannelSize(CT),
+                     dst.getWidth() * dst.getChannelCount() * GetChannelSize(CT), src.getDepth() * src.getHeight(),
+                     stream);
+}
+
+/**
+ * Memory copy function between two CHW Tensors.
+ * @tparam TL TensorLayout type.
+ * @tparam CC Channel Count.
+ * @tparam CT ChannelType.
+ * @param dst destination Tensor.
+ * @param src source Tensor which copy from.
+ * @param stream cuda stream.
+ */
+template<TensorLayout TL, ChannelCount CC, ChannelType CT, typename std::enable_if<TL == CHW>::type * = nullptr>
+void Copy(Tensor<TL, CC, CT> &dst, const Tensor<TL, CC, CT> &src, cudaStream_t stream = 0)
+{
+    TensorBaseCopy2D(dst, src, dst.getStride(TensorDimension::HEIGHT) * GetChannelSize(CT),
+                     src.getStride(TensorDimension::HEIGHT) * GetChannelSize(CT), dst.getWidth() * GetChannelSize(CT),
+                     src.getChannelCount() * src.getHeight(), stream);
+}
+
+/**
+ * Memory copy function between two NCHW Tensors.
+ * @tparam TL TensorLayout type.
+ * @tparam CC Channel Count.
+ * @tparam CT ChannelType.
+ * @param dst destination Tensor.
+ * @param src source Tensor which copy from.
+ * @param stream cuda stream.
+ */
+template<TensorLayout TL, ChannelCount CC, ChannelType CT, typename std::enable_if<TL == NCHW>::type * = nullptr>
+void Copy(Tensor<TL, CC, CT> &dst, const Tensor<TL, CC, CT> &src, cudaStream_t stream = 0)
+{
+    TensorBaseCopy2D(dst, src, dst.getStride(TensorDimension::HEIGHT) * GetChannelSize(CT),
+                     src.getStride(TensorDimension::HEIGHT) * GetChannelSize(CT), dst.getWidth() * GetChannelSize(CT),
+                     src.getDepth() * src.getChannelCount() * src.getHeight(), stream);
+}
+
+} // namespace cvcore
+
+#endif // CVCORE_MEMORY_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/Model.h b/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/Model.h
new file mode 100644
index 0000000..4a14945
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/Model.h
@@ -0,0 +1,50 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_MODEL_H
+#define CVCORE_MODEL_H
+
+#include <vector>
+
+#include "Image.h"
+
+namespace cvcore {
+
+/**
+ * Struct to describe input type required by the model
+ */
+struct ModelInputParams
+{
+    size_t maxBatchSize;      /**< maxbatchSize supported by network*/
+    size_t inputLayerWidth;   /**< Input layer width */
+    size_t inputLayerHeight;  /**< Input layer Height */
+    ImageType modelInputType; /**< Input Layout type */
+};
+
+/**
+ * Struct to describe the model
+ */
+struct ModelInferenceParams
+{
+    std::string engineFilePath;            /**< Engine file path. */
+    std::vector<std::string> inputLayers;  /**< names of input layers. */
+    std::vector<std::string> outputLayers; /**< names of output layers. */
+};
+
+} // namespace cvcore
+
+#endif // CVCORE_MODEL_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/ProfileUtils.h b/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/ProfileUtils.h
new file mode 100644
index 0000000..0a4e9a5
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/ProfileUtils.h
@@ -0,0 +1,40 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_PROFILE_UTILS_H
+#define CVCORE_PROFILE_UTILS_H
+
+#include <string>
+
+namespace cvcore {
+
+/**
+ * Export one profiling item to specified json file.
+ * @param outputPath output json file path.
+ * @param taskName item name showing in the output json file.
+ * @param tMin minimum running time in milliseconds.
+ * @param tMax maximum running time in milliseconds.
+ * @param tAvg average running time in milliseconds.
+ * @param isCPU whether CPU or GPU time.
+ * @param iterations number of iterations.
+ */
+void ExportToJson(const std::string outputPath, const std::string taskName, float tMin, float tMax, float tAvg,
+                  bool isCPU, int iterations = 100);
+
+} // namespace cvcore
+
+#endif // CVCORE_PROFILE_UTILS_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/Tensor.h b/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/Tensor.h
new file mode 100644
index 0000000..b06e531
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/Tensor.h
@@ -0,0 +1,1189 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_TENSOR_H
+#define CVCORE_TENSOR_H
+
+#include <initializer_list>
+#include <stdexcept>
+#include <string>
+#include <type_traits>
+
+namespace cvcore {
+
+// there is no CUDA dependency at the data level, so we map half type to uint16_t for now
+using half = std::uint16_t;
+
+/**
+ * An enum.
+ * Enum type for tensor layout type.
+ */
+enum TensorLayout
+{
+    LC,          /**< length, channel (channel interleaved). */
+    CL,          /**< channel, length (channel planar). */
+    HWC,         /**< height, width, channel (channel interleaved). */
+    CHW,         /**< channel, height, width (channel planar). */
+    DHWC,        /**< depth, height, width, channel (channel interleaved). */
+    NHWC = DHWC, /**< alias for DHWC. */
+    DCHW,        /**< depth, channel, height, width (channel planar). */
+    NCHW = DCHW, /**< alias for DCHW. */
+    CDHW,        /**< channel, depth, height, width (channel planar). */
+};
+
+/**
+ * An enum.
+ * Enum type for tensor channel count.
+ */
+enum ChannelCount
+{
+    C1, /**< 1 channels. */
+    C2, /**< 2 channels. */
+    C3, /**< 3 channels. */
+    C4, /**< 4 channels. */
+    CX, /**< varying number of channels. */
+};
+
+/**
+ * An enum.
+ * Enum type for channel type.
+ */
+enum ChannelType
+{
+    U8,  /**< uint8_t. */
+    U16, /**< uint16_t. */
+    S8,  /**< int8_t. */
+    S16, /**< int16_t. */
+    F16, /**< cvcore::half. */
+    F32, /**< float. */
+    F64, /**< double. */
+};
+
+/**
+ * An enum.
+ * Enum type for dimension type.
+ */
+enum class TensorDimension
+{
+    LENGTH,  /**< length dimension. */
+    HEIGHT,  /**< height dimension. */
+    WIDTH,   /**< width dimension. */
+    CHANNEL, /**< channel dimension. */
+    DEPTH,   /**< depth dimension. */
+};
+
+/**
+ * Function to get name of a TensorLayout value as string.
+ * @param TL the TensorLayout value.
+ * @return string name of TL.
+ */
+std::string GetTensorLayoutAsString(TensorLayout TL);
+
+/**
+ * Function to get name of a ChannelCount value as string.
+ * @param CC the ChannelCount value.
+ * @return string name of CC.
+ */
+std::string GetChannelCountAsString(ChannelCount CC);
+
+/**
+ * Function to get name of a ChannelType value as string.
+ * @param CT the ChannelType value.
+ * @return string name of CT.
+ */
+std::string GetChannelTypeAsString(ChannelType CT);
+
+/**
+ * Function to get name of a Memory type used.
+ * @param bool isCPU of Tensor as input
+ * @return string name of Memory type.
+ */
+std::string GetMemoryTypeAsString(bool isCPU);
+
+/**
+ * Function to get element size (in bytes) of a ChannelType.
+ * @param CT the ChannelType value.
+ * @return size in bytes.
+ */
+std::size_t GetChannelSize(ChannelType CT);
+
+/**
+ * Implementation of TensorBase class.
+ */
+class TensorBase
+{
+public:
+    /**
+     * Struct for storing dimension data.
+     */
+    struct DimData
+    {
+        std::size_t size;   /**< size of each dimension. */
+        std::size_t stride; /**< stride of each dimension. */
+    };
+
+    /**
+     * Constructor of a non-owning tensor.
+     * @param type ChannelType of the Tensor.
+     * @param dimData pointer to the DimData array.
+     * @param dimCount number of dimensions.
+     * @param dataPtr raw pointer to the source data array.
+     * @param isCPU whether to allocate tensor on CPU or GPU.
+     */
+    TensorBase(ChannelType type, const DimData *dimData, int dimCount, void *dataPtr, bool isCPU);
+
+    /**
+     * Constructor of a non-owning tensor.
+     * @param type ChannelType of the Tensor.
+     * @param dimData initializer_list of DimData.
+     * @param dataPtr raw pointer to the source data array.
+     * @param isCPU whether to allocate tensor on CPU or GPU.
+     */
+    TensorBase(ChannelType type, std::initializer_list<DimData> dimData, void *dataPtr, bool isCPU);
+
+    /**
+     * Constructor of a memory-owning tensor.
+     * @param type ChannelType of the Tensor.
+     * @param dimData pointer to the DimData array.
+     * @param dimCount number of dimensions.
+     * @param isCPU whether to allocate tensor on CPU or GPU.
+     */
+    TensorBase(ChannelType type, const DimData *dimData, int dimCount, bool isCPU);
+
+    /**
+     * Constructor of a memory-owning tensor.
+     * @param type ChannelType of the Tensor.
+     * @param dimData initializer_list of DimData.
+     * @param isCPU whether to allocate tensor on CPU or GPU.
+     */
+    TensorBase(ChannelType type, std::initializer_list<DimData> dimData, bool isCPU);
+
+    /**
+     * Destructor of Tensor.
+     */
+    ~TensorBase();
+
+    /**
+     * TensorBase is non-copyable.
+     */
+    TensorBase(const TensorBase &) = delete;
+
+    /**
+     * TensorBase is non-copyable.
+     */
+    TensorBase &operator=(const TensorBase &) = delete;
+
+    /**
+     * Move Constructor of TensorBase.
+     */
+    TensorBase(TensorBase &&);
+
+    /**
+     * Move operator of TensorBase.
+     */
+    TensorBase &operator=(TensorBase &&);
+
+    /**
+     * Get the dimension count of TensorBase.
+     * @return number of dimensions.
+     */
+    int getDimCount() const;
+
+    /**
+     * Get the size of given dimension.
+     * @param dimIdx dimension index.
+     * @return size of the specified dimension.
+     */
+    std::size_t getSize(int dimIdx) const;
+
+    /**
+     * Get the stride of given dimension.
+     * @param dimIdx dimension index.
+     * @return stride of the specified dimension.
+     */
+    std::size_t getStride(int dimIdx) const;
+
+    /**
+     * Get the ChannelType of the Tensor.
+     * @return ChannelType of the Tensor.
+     */
+    ChannelType getType() const;
+
+    /**
+     * Get the raw data pointer to the Tensor.
+     * @return void data pointer to the Tensor.
+     */
+    void *getData() const;
+
+    /**
+     * Get the total size of the Tensor in bytes.
+     * @return total bytes of the Tensor.
+     */
+    std::size_t getDataSize() const;
+
+    /**
+     * Get the flag whether the Tensor is allocated in CPU or GPU.
+     * @return whether the Tensor is allocated in CPU.
+     */
+    bool isCPU() const;
+
+    /**
+     * Get the flag whether the Tensor owns the data.
+     * @return whether the Tensor owns data in memory.
+     */
+    bool isOwning() const;
+
+protected:
+    TensorBase();
+
+private:
+    static constexpr int kMinDimCount = 2;
+    static constexpr int kMaxDimCount = 4;
+
+    void *m_data;
+
+    int m_dimCount;
+    DimData m_dimData[kMaxDimCount];
+
+    ChannelType m_type;
+    bool m_isOwning;
+    bool m_isCPU;
+};
+
+namespace detail {
+
+template<TensorLayout TL>
+struct DimToIndex2D
+{
+    static_assert(TL == LC || TL == CL, "unsupported variant!");
+    static constexpr int kLength  = TL == LC ? 0 : 1;
+    static constexpr int kChannel = TL == LC ? 1 : 0;
+};
+
+template<TensorLayout TL>
+struct DimToIndex3D
+{
+    static_assert(TL == HWC || TL == CHW, "unsupported variant!");
+    static constexpr int kWidth   = TL == HWC ? 1 : 2;
+    static constexpr int kHeight  = TL == HWC ? 0 : 1;
+    static constexpr int kChannel = TL == HWC ? 2 : 0;
+};
+
+template<TensorLayout TL>
+struct DimToIndex4D
+{
+    static_assert(TL == DHWC || TL == DCHW || TL == CDHW, "unsupported variant!");
+    static constexpr int kWidth   = TL == DHWC ? 2 : (TL == DCHW ? 3 : 3);
+    static constexpr int kHeight  = TL == DHWC ? 1 : (TL == DCHW ? 2 : 2);
+    static constexpr int kDepth   = TL == DHWC ? 0 : (TL == DCHW ? 0 : 1);
+    static constexpr int kChannel = TL == DHWC ? 3 : (TL == DCHW ? 1 : 0);
+};
+
+template<TensorLayout TL, typename = void>
+struct LayoutToIndex
+{
+};
+
+template<TensorLayout TL>
+struct LayoutToIndex<TL, typename std::enable_if<TL == LC || TL == CL>::type> : public DimToIndex2D<TL>
+{
+    static constexpr int kDimCount = 2;
+};
+
+template<TensorLayout TL>
+struct LayoutToIndex<TL, typename std::enable_if<TL == HWC || TL == CHW>::type> : public DimToIndex3D<TL>
+{
+    static constexpr int kDimCount = 3;
+};
+
+template<TensorLayout TL>
+struct LayoutToIndex<TL, typename std::enable_if<TL == DHWC || TL == DCHW || TL == CDHW>::type>
+    : public DimToIndex4D<TL>
+{
+    static constexpr int kDimCount = 4;
+};
+
+template<ChannelType CT>
+struct ChannelTypeToNative
+{
+};
+
+template<>
+struct ChannelTypeToNative<U8>
+{
+    using Type = std::uint8_t;
+};
+
+template<>
+struct ChannelTypeToNative<U16>
+{
+    using Type = std::uint16_t;
+};
+
+template<>
+struct ChannelTypeToNative<S8>
+{
+    using Type = std::int8_t;
+};
+
+template<>
+struct ChannelTypeToNative<S16>
+{
+    using Type = std::int16_t;
+};
+
+template<>
+struct ChannelTypeToNative<F32>
+{
+    using Type = float;
+};
+
+template<>
+struct ChannelTypeToNative<F16>
+{
+    using Type = cvcore::half;
+};
+
+template<>
+struct ChannelTypeToNative<F64>
+{
+    using Type = double;
+};
+
+template<ChannelCount CC>
+constexpr std::size_t ChannelToCount()
+{
+    switch (CC)
+    {
+    case C1:
+        return 1;
+    case C2:
+        return 2;
+    case C3:
+        return 3;
+    case C4:
+        return 4;
+    }
+    return 0; // this is safe as this function will never be called for dynamic channel counts
+}
+
+/**
+ * Implementation of 2D tensors.
+ * @tparam TL tensor layout type.
+ * @tparam CT channel type.
+ */
+template<TensorLayout TL, ChannelType CT>
+class Tensor2D : public TensorBase
+{
+    using DataType = typename ChannelTypeToNative<CT>::Type;
+
+public:
+    /**
+     * Default Constructor.
+     */
+    Tensor2D() = default;
+
+    /**
+     * Constructor of a memory-owning 2D tensor.
+     * @param dimData initializer_list of DimData.
+     * @param isCPU whether to allocate tensor on CPU or GPU.
+     */
+    Tensor2D(std::initializer_list<DimData> dimData, bool isCPU)
+        : TensorBase(CT, dimData, isCPU)
+    {
+    }
+
+    /**
+     * Constructor of a non-owning 2D tensor.
+     * @param dimData initializer_list of DimData.
+     * @param dataPtr raw pointer to the source data array.
+     * @param isCPU whether to allocate tensor on CPU or GPU.
+     */
+    Tensor2D(std::initializer_list<DimData> dimData, DataType *dataPtr, bool isCPU)
+        : TensorBase(CT, dimData, dataPtr, isCPU)
+    {
+    }
+
+    /**
+     * Get the length of the 2D tensor.
+     * @return length of the 2D tensor.
+     */
+    std::size_t getLength() const
+    {
+        return getSize(DimToIndex2D<TL>::kLength);
+    }
+
+    /**
+     * Get the channel count of the 2D tensor.
+     * @return channel count of the 2D tensor.
+     */
+    std::size_t getChannelCount() const
+    {
+        return getSize(DimToIndex2D<TL>::kChannel);
+    }
+
+    /**
+     * Expose base getStride() function.
+     */
+    using TensorBase::getStride;
+
+    /**
+     * Get the stride of the 2D tensor.
+     * @param dim tensor dimension.
+     * @return tensor stride of the given dimension.
+     */
+    std::size_t getStride(TensorDimension dim) const
+    {
+        switch (dim)
+        {
+        case TensorDimension::LENGTH:
+            return getStride(DimToIndex2D<TL>::kLength);
+        case TensorDimension::CHANNEL:
+            return getStride(DimToIndex2D<TL>::kChannel);
+        default:
+            throw std::out_of_range("cvcore::Tensor2D::getStride ==> Requested TensorDimension is out of bounds");
+        }
+    }
+
+    /**
+     * Get the raw data pointer to the 2D tensor.
+     * @return data pointer to the 2D tensor.
+     */
+    DataType *getData()
+    {
+        return reinterpret_cast<DataType *>(TensorBase::getData());
+    }
+
+    /**
+     * Get the const raw data pointer to the 2D tensor.
+     * @return const data pointer to the 2D tensor.
+     */
+    const DataType *getData() const
+    {
+        return reinterpret_cast<DataType *>(TensorBase::getData());
+    }
+};
+
+/**
+ * Implementation of 3D tensors.
+ * @tparam TL tensor layout type.
+ * @tparam CT channel type.
+ */
+template<TensorLayout TL, ChannelType CT>
+class Tensor3D : public TensorBase
+{
+    using DataType = typename ChannelTypeToNative<CT>::Type;
+
+public:
+    /**
+     * Default Constructor.
+     */
+    Tensor3D() = default;
+
+    /**
+     * Constructor of a memory-owning 3D tensor.
+     * @param dimData initializer_list of DimData.
+     * @param isCPU whether to allocate tensor on CPU or GPU.
+     */
+    Tensor3D(std::initializer_list<DimData> dimData, bool isCPU)
+        : TensorBase(CT, dimData, isCPU)
+    {
+    }
+
+    /**
+     * Constructor of a non-owning 3D tensor.
+     * @param dimData initializer_list of DimData.
+     * @param dataPtr raw pointer to the source data array.
+     * @param isCPU whether to allocate tensor on CPU or GPU.
+     */
+    Tensor3D(std::initializer_list<DimData> dimData, DataType *dataPtr, bool isCPU)
+        : TensorBase(CT, dimData, dataPtr, isCPU)
+    {
+    }
+
+    /**
+     * Get the width of the 3D tensor.
+     * @return width of the 3D tensor.
+     */
+    std::size_t getWidth() const
+    {
+        return getSize(DimToIndex3D<TL>::kWidth);
+    }
+
+    /**
+     * Get the height of the 3D tensor.
+     * @return height of the 3D tensor.
+     */
+    std::size_t getHeight() const
+    {
+        return getSize(DimToIndex3D<TL>::kHeight);
+    }
+
+    /**
+     * Get the channel count of the 3D tensor.
+     * @return channel count of the 3D tensor.
+     */
+    std::size_t getChannelCount() const
+    {
+        return getSize(DimToIndex3D<TL>::kChannel);
+    }
+
+    /**
+     * Expose base getStride() function.
+     */
+    using TensorBase::getStride;
+
+    /**
+     * Get the stride of the 3D tensor.
+     * @param dim tensor dimension.
+     * @return tensor stride of the given dimension.
+     */
+    std::size_t getStride(TensorDimension dim) const
+    {
+        switch (dim)
+        {
+        case TensorDimension::HEIGHT:
+            return getStride(DimToIndex3D<TL>::kHeight);
+        case TensorDimension::WIDTH:
+            return getStride(DimToIndex3D<TL>::kWidth);
+        case TensorDimension::CHANNEL:
+            return getStride(DimToIndex3D<TL>::kChannel);
+        default:
+            throw std::out_of_range("cvcore::Tensor3D::getStride ==> Requested TensorDimension is out of bounds");
+        }
+    }
+
+    /**
+     * Get the raw data pointer to the 3D tensor.
+     * @return data pointer to the 3D tensor.
+     */
+    DataType *getData()
+    {
+        return reinterpret_cast<DataType *>(TensorBase::getData());
+    }
+
+    /**
+     * Get the const raw data pointer to the 3D tensor.
+     * @return const data pointer to the 3D tensor.
+     */
+    const DataType *getData() const
+    {
+        return reinterpret_cast<DataType *>(TensorBase::getData());
+    }
+};
+
+/**
+ * Implementation of 4D tensors.
+ * @tparam TL tensor layout type.
+ * @tparam CT channel type.
+ */
+template<TensorLayout TL, ChannelType CT>
+class Tensor4D : public TensorBase
+{
+    using DataType = typename ChannelTypeToNative<CT>::Type;
+
+public:
+    /**
+     * Default Constructor.
+     */
+    Tensor4D() = default;
+
+    /**
+     * Constructor of a memory-owning 4D tensor.
+     * @param dimData initializer_list of DimData.
+     * @param isCPU whether to allocate tensor on CPU or GPU.
+     */
+    Tensor4D(std::initializer_list<DimData> dimData, bool isCPU)
+        : TensorBase(CT, dimData, isCPU)
+    {
+    }
+
+    /**
+     * Constructor of a non-owning 4D tensor.
+     * @param dimData initializer_list of DimData.
+     * @param dataPtr raw pointer to the source data array.
+     * @param isCPU whether to allocate tensor on CPU or GPU.
+     */
+    Tensor4D(std::initializer_list<DimData> dimData, DataType *dataPtr, bool isCPU)
+        : TensorBase(CT, dimData, dataPtr, isCPU)
+    {
+    }
+
+    /**
+     * Get the width of the 4D tensor.
+     * @return width of the 4D tensor.
+     */
+    std::size_t getWidth() const
+    {
+        return getSize(DimToIndex4D<TL>::kWidth);
+    }
+
+    /**
+     * Get the height of the 4D tensor.
+     * @return height of the 4D tensor.
+     */
+    std::size_t getHeight() const
+    {
+        return getSize(DimToIndex4D<TL>::kHeight);
+    }
+
+    /**
+     * Get the depth of the 4D tensor.
+     * @return depth of the 4D tensor.
+     */
+    std::size_t getDepth() const
+    {
+        return getSize(DimToIndex4D<TL>::kDepth);
+    }
+
+    /**
+     * Get the channel count of the 4D tensor.
+     * @return channel count of the 4D tensor.
+     */
+    std::size_t getChannelCount() const
+    {
+        return getSize(DimToIndex4D<TL>::kChannel);
+    }
+
+    /**
+     * Expose base getStride() function.
+     */
+    using TensorBase::getStride;
+
+    /**
+     * Get the stride of the 4D tensor.
+     * @param dim tensor dimension.
+     * @return tensor stride of the given dimension.
+     */
+    std::size_t getStride(TensorDimension dim) const
+    {
+        switch (dim)
+        {
+        case TensorDimension::HEIGHT:
+            return getStride(DimToIndex4D<TL>::kHeight);
+        case TensorDimension::WIDTH:
+            return getStride(DimToIndex4D<TL>::kWidth);
+        case TensorDimension::CHANNEL:
+            return getStride(DimToIndex4D<TL>::kChannel);
+        case TensorDimension::DEPTH:
+            return getStride(DimToIndex4D<TL>::kDepth);
+        default:
+            throw std::out_of_range("cvcore::Tensor4D::getStride ==> Requested TensorDimension is out of bounds");
+        }
+    }
+
+    /**
+     * Get the raw data pointer to the 4D tensor.
+     * @return data pointer to the 4D tensor.
+     */
+    DataType *getData()
+    {
+        return reinterpret_cast<DataType *>(TensorBase::getData());
+    }
+
+    /**
+     * Get the const raw data pointer to the 4D tensor.
+     * @return const data pointer to the 4D tensor.
+     */
+    const DataType *getData() const
+    {
+        return reinterpret_cast<DataType *>(TensorBase::getData());
+    }
+};
+
+} // namespace detail
+
+template<TensorLayout TL, ChannelCount CC, ChannelType CT>
+class Tensor;
+
+// 2D Tensors
+
+/**
+ * 2D LC tensors.
+ * @tparam CC channel count.
+ * @tparam CT channel type.
+ */
+template<ChannelCount CC, ChannelType CT>
+class Tensor<LC, CC, CT> : public detail::Tensor2D<LC, CT>
+{
+public:
+    using DataType = typename detail::ChannelTypeToNative<CT>::Type;
+
+    static constexpr ChannelCount kChannelCount = CC;
+
+    Tensor() = default;
+
+    template<ChannelCount T = CC, typename = typename std::enable_if<T != CX>::type>
+    Tensor(std::size_t length, bool isCPU = true)
+        : detail::Tensor2D<LC, CT>({{length, detail::ChannelToCount<CC>()}, {detail::ChannelToCount<CC>(), 1}}, isCPU)
+    {
+    }
+
+    template<ChannelCount T = CC, typename = typename std::enable_if<T == CX>::type>
+    Tensor(std::size_t length, std::size_t channelCount, bool isCPU = true)
+        : detail::Tensor2D<LC, CT>({{length, channelCount}, {channelCount, 1}}, isCPU)
+    {
+    }
+
+    template<ChannelCount T = CC, typename = typename std::enable_if<T != CX>::type>
+    Tensor(std::size_t length, DataType *dataPtr, bool isCPU = true)
+        : detail::Tensor2D<LC, CT>({{length, detail::ChannelToCount<CC>()}, {detail::ChannelToCount<CC>(), 1}}, dataPtr,
+                                   isCPU)
+    {
+    }
+
+    template<ChannelCount T = CC, typename = typename std::enable_if<T == CX>::type>
+    Tensor(std::size_t length, std::size_t channelCount, DataType *dataPtr, bool isCPU = true)
+        : detail::Tensor2D<LC, CT>({{length, channelCount}, {channelCount, 1}}, dataPtr, isCPU)
+    {
+    }
+};
+
+/**
+ * 2D CL tensors.
+ * @tparam CC channel count.
+ * @tparam CT channel type.
+ */
+template<ChannelCount CC, ChannelType CT>
+class Tensor<CL, CC, CT> : public detail::Tensor2D<CL, CT>
+{
+public:
+    using DataType = typename detail::ChannelTypeToNative<CT>::Type;
+
+    static constexpr ChannelCount kChannelCount = CC;
+
+    Tensor() = default;
+
+    template<ChannelCount T = CC, typename = typename std::enable_if<T != CX>::type>
+    Tensor(std::size_t length, bool isCPU = true)
+        : detail::Tensor2D<CL, CT>({{detail::ChannelToCount<CC>(), length}, {length, 1}}, isCPU)
+    {
+    }
+
+    template<ChannelCount T = CC, typename = typename std::enable_if<T == CX>::type>
+    Tensor(std::size_t length, std::size_t channelCount, bool isCPU = true)
+        : detail::Tensor2D<CL, CT>({{channelCount, length}, {length, 1}}, isCPU)
+    {
+    }
+
+    template<ChannelCount T = CC, typename = typename std::enable_if<T != CX>::type>
+    Tensor(std::size_t length, DataType *dataPtr, bool isCPU = true)
+        : detail::Tensor2D<CL, CT>({{detail::ChannelToCount<CC>(), length}, {length, 1}}, dataPtr, isCPU)
+    {
+    }
+
+    template<ChannelCount T = CC, typename = typename std::enable_if<T == CX>::type>
+    Tensor(std::size_t length, std::size_t channelCount, DataType *dataPtr, bool isCPU = true)
+        : detail::Tensor2D<CL, CT>({{channelCount, length}, {length, 1}}, dataPtr, isCPU)
+    {
+    }
+};
+
+// 3D Tensors
+
+/**
+ * 3D HWC tensors.
+ * @tparam CC channel count.
+ * @tparam CT channel type.
+ */
+template<ChannelCount CC, ChannelType CT>
+class Tensor<HWC, CC, CT> : public detail::Tensor3D<HWC, CT>
+{
+public:
+    using DataType = typename detail::ChannelTypeToNative<CT>::Type;
+
+    static constexpr ChannelCount kChannelCount = CC;
+
+    Tensor() = default;
+
+    template<ChannelCount T = CC, typename B = bool,
+             typename std::enable_if<T != CX && std::is_same<B, bool>::value>::type * = nullptr>
+    Tensor(std::size_t width, std::size_t height, B isCPU = true)
+        : detail::Tensor3D<HWC, CT>({{height, width * detail::ChannelToCount<CC>()},
+                                     {width, detail::ChannelToCount<CC>()},
+                                     {detail::ChannelToCount<CC>(), 1}},
+                                    isCPU)
+    {
+    }
+
+    template<ChannelCount T = CC, typename B = bool,
+             typename std::enable_if<T == CX && std::is_same<B, bool>::value>::type * = nullptr>
+    Tensor(std::size_t width, std::size_t height, std::size_t channelCount, B isCPU = true)
+        : detail::Tensor3D<HWC, CT>({{height, width * channelCount}, {width, channelCount}, {channelCount, 1}}, isCPU)
+    {
+    }
+
+    template<ChannelCount T = CC, typename B = bool,
+             typename std::enable_if<T != CX && std::is_same<B, bool>::value>::type * = nullptr>
+    Tensor(std::size_t width, std::size_t height, DataType *dataPtr, B isCPU = true)
+        : detail::Tensor3D<HWC, CT>({{height, width * detail::ChannelToCount<CC>()},
+                                     {width, detail::ChannelToCount<CC>()},
+                                     {detail::ChannelToCount<CC>(), 1}},
+                                    dataPtr, isCPU)
+    {
+    }
+
+    template<ChannelCount T = CC, typename B = bool,
+             typename std::enable_if<T != CX && std::is_same<B, bool>::value>::type * = nullptr>
+    Tensor(std::size_t width, std::size_t height, std::size_t rowPitch, DataType *dataPtr, B isCPU = true)
+        : detail::Tensor3D<HWC, CT>({{height, rowPitch / GetChannelSize(CT)},
+                                     {width, detail::ChannelToCount<CC>()},
+                                     {detail::ChannelToCount<CC>(), 1}},
+                                    dataPtr, isCPU)
+    {
+        if (rowPitch % GetChannelSize(CT) != 0)
+        {
+            throw std::domain_error(
+                "cvcore::Tensor<HWC, CC, CT>::Tensor ==> Parameter rowPitch is not evenly divisible by channel size");
+        }
+    }
+
+    template<ChannelCount T = CC, typename B = bool,
+             typename std::enable_if<T == CX && std::is_same<B, bool>::value>::type * = nullptr>
+    Tensor(std::size_t width, std::size_t height, std::size_t channelCount, DataType *dataPtr, B isCPU = true)
+        : detail::Tensor3D<HWC, CT>({{height, width * channelCount}, {width, channelCount}, {channelCount, 1}}, dataPtr,
+                                    isCPU)
+    {
+    }
+
+    template<ChannelCount T = CC, typename B = bool,
+             typename std::enable_if<T == CX && std::is_same<B, bool>::value>::type * = nullptr>
+    Tensor(std::size_t width, std::size_t height, std::size_t channelCount, std::size_t rowPitch, DataType *dataPtr,
+           B isCPU = true)
+        : detail::Tensor3D<HWC, CT>({{height, rowPitch / GetChannelSize(CT)}, {width, channelCount}, {channelCount, 1}},
+                                    dataPtr, isCPU)
+    {
+        if (rowPitch % GetChannelSize(CT) != 0)
+        {
+            throw std::domain_error(
+                "cvcore::Tensor<HWC, CC, CT>::Tensor ==> Parameter rowPitch is not evenly divisible by channel size");
+        }
+    }
+};
+
+/**
+ * 3D CHW tensors.
+ * @tparam CC channel count.
+ * @tparam CT channel type.
+ */
+template<ChannelCount CC, ChannelType CT>
+class Tensor<CHW, CC, CT> : public detail::Tensor3D<CHW, CT>
+{
+public:
+    using DataType = typename detail::ChannelTypeToNative<CT>::Type;
+
+    static constexpr ChannelCount kChannelCount = CC;
+
+    Tensor() = default;
+
+    template<ChannelCount T = CC, typename = typename std::enable_if<T != CX>::type>
+    Tensor(std::size_t width, std::size_t height, bool isCPU = true)
+        : detail::Tensor3D<CHW, CT>({{detail::ChannelToCount<CC>(), width * height}, {height, width}, {width, 1}},
+                                    isCPU)
+    {
+    }
+
+    template<ChannelCount T = CC, typename = typename std::enable_if<T == CX>::type>
+    Tensor(std::size_t width, std::size_t height, std::size_t channelCount, bool isCPU = true)
+        : detail::Tensor3D<CHW, CT>({{channelCount, width * height}, {height, width}, {width, 1}}, isCPU)
+    {
+    }
+
+    template<ChannelCount T = CC, typename = typename std::enable_if<T != CX>::type>
+    Tensor(std::size_t width, std::size_t height, DataType *dataPtr, bool isCPU = true)
+        : detail::Tensor3D<CHW, CT>({{detail::ChannelToCount<CC>(), width * height}, {height, width}, {width, 1}},
+                                    dataPtr, isCPU)
+    {
+    }
+
+    template<ChannelCount T = CC, typename B = bool,
+             typename std::enable_if<T != CX && std::is_same<B, bool>::value>::type * = nullptr>
+    Tensor(std::size_t width, std::size_t height, std::size_t rowPitch, DataType *dataPtr, B isCPU = true)
+        : detail::Tensor3D<CHW, CT>({{detail::ChannelToCount<CC>(), height * rowPitch / GetChannelSize(CT)},
+                                     {height, rowPitch / GetChannelSize(CT)},
+                                     {width, 1}},
+                                    dataPtr, isCPU)
+    {
+        if (rowPitch % GetChannelSize(CT) != 0)
+        {
+            throw std::domain_error(
+                "cvcore::Tensor<CHW, CC, CT>::Tensor ==> Parameter rowPitch is not evenly divisible by channel size");
+        }
+    }
+
+    template<ChannelCount T = CC, typename = typename std::enable_if<T == CX>::type>
+    Tensor(std::size_t width, std::size_t height, std::size_t channelCount, DataType *dataPtr, bool isCPU = true)
+        : detail::Tensor3D<CHW, CT>({{channelCount, width * height}, {height, width}, {width, 1}}, dataPtr, isCPU)
+    {
+    }
+
+    template<ChannelCount T = CC, typename B = bool,
+             typename std::enable_if<T == CX && std::is_same<B, bool>::value>::type * = nullptr>
+    Tensor(std::size_t width, std::size_t height, std::size_t channelCount, std::size_t rowPitch, DataType *dataPtr,
+           B isCPU = true)
+        : detail::Tensor3D<CHW, CT>({{channelCount, height * rowPitch / GetChannelSize(CT)},
+                                     {height, rowPitch / GetChannelSize(CT)},
+                                     {width, 1}},
+                                    dataPtr, isCPU)
+    {
+        if (rowPitch % GetChannelSize(CT) != 0)
+        {
+            throw std::domain_error(
+                "cvcore::Tensor<CHW, CC, CT>::Tensor ==> Parameter rowPitch is not evenly divisible by channel size");
+        }
+    }
+};
+
+// 4D Tensors
+
+/**
+ * 4D DHWC tensors.
+ * @tparam CC channel count.
+ * @tparam CT channel type.
+ */
+template<ChannelCount CC, ChannelType CT>
+class Tensor<DHWC, CC, CT> : public detail::Tensor4D<DHWC, CT>
+{
+public:
+    using DataType = typename detail::ChannelTypeToNative<CT>::Type;
+
+    static constexpr ChannelCount kChannelCount = CC;
+
+    Tensor() = default;
+
+    template<ChannelCount T = CC, typename B = bool,
+             typename std::enable_if<T != CX && std::is_same<B, bool>::value>::type * = nullptr>
+    Tensor(std::size_t width, std::size_t height, std::size_t depth, B isCPU = true)
+        : detail::Tensor4D<DHWC, CT>({{depth, height * width * detail::ChannelToCount<CC>()},
+                                      {height, width * detail::ChannelToCount<CC>()},
+                                      {width, detail::ChannelToCount<CC>()},
+                                      {detail::ChannelToCount<CC>(), 1}},
+                                     isCPU)
+    {
+    }
+
+    template<ChannelCount T = CC, typename B = bool,
+             typename std::enable_if<T == CX && std::is_same<B, bool>::value>::type * = nullptr>
+    Tensor(std::size_t width, std::size_t height, std::size_t depth, std::size_t channelCount, B isCPU = true)
+        : detail::Tensor4D<DHWC, CT>({{depth, height * width * channelCount},
+                                      {height, width * channelCount},
+                                      {width, channelCount},
+                                      {channelCount, 1}},
+                                     isCPU)
+    {
+    }
+
+    template<ChannelCount T = CC, typename B = bool,
+             typename std::enable_if<T != CX && std::is_same<B, bool>::value>::type * = nullptr>
+    Tensor(std::size_t width, std::size_t height, std::size_t depth, DataType *dataPtr, B isCPU = true)
+        : detail::Tensor4D<DHWC, CT>({{depth, height * width * detail::ChannelToCount<CC>()},
+                                      {height, width * detail::ChannelToCount<CC>()},
+                                      {width, detail::ChannelToCount<CC>()},
+                                      {detail::ChannelToCount<CC>(), 1}},
+                                     dataPtr, isCPU)
+    {
+    }
+
+    template<ChannelCount T = CC, typename B = bool,
+             typename std::enable_if<T != CX && std::is_same<B, bool>::value>::type * = nullptr>
+    Tensor(std::size_t width, std::size_t height, std::size_t depth, std::size_t rowPitch, DataType *dataPtr,
+           B isCPU = true)
+        : detail::Tensor4D<DHWC, CT>({{depth, height * rowPitch / GetChannelSize(CT)},
+                                      {height, rowPitch / GetChannelSize(CT)},
+                                      {width, detail::ChannelToCount<CC>()},
+                                      {detail::ChannelToCount<CC>(), 1}},
+                                     dataPtr, isCPU)
+    {
+        if (rowPitch % GetChannelSize(CT) != 0)
+        {
+            throw std::domain_error(
+                "cvcore::Tensor<DHWC, CC, CT>::Tensor ==> Parameter rowPitch is not evenly divisible by channel size");
+        }
+    }
+
+    template<ChannelCount T = CC, typename B = bool,
+             typename std::enable_if<T == CX && std::is_same<B, bool>::value>::type * = nullptr>
+    Tensor(std::size_t width, std::size_t height, std::size_t depth, std::size_t channelCount, DataType *dataPtr,
+           B isCPU = true)
+        : detail::Tensor4D<DHWC, CT>({{depth, height * width * channelCount},
+                                      {height, width * channelCount},
+                                      {width, channelCount},
+                                      {channelCount, 1}},
+                                     dataPtr, isCPU)
+    {
+    }
+
+    template<ChannelCount T = CC, typename B = bool,
+             typename std::enable_if<T == CX && std::is_same<B, bool>::value>::type * = nullptr>
+    Tensor(std::size_t width, std::size_t height, std::size_t depth, std::size_t channelCount, std::size_t rowPitch,
+           DataType *dataPtr, B isCPU = true)
+        : detail::Tensor4D<DHWC, CT>({{depth, height * rowPitch / GetChannelSize(CT)},
+                                      {height, rowPitch / GetChannelSize(CT)},
+                                      {width, channelCount},
+                                      {channelCount, 1}},
+                                     dataPtr, isCPU)
+    {
+        if (rowPitch % GetChannelSize(CT) != 0)
+        {
+            throw std::domain_error(
+                "cvcore::Tensor<DHWC, CC, CT>::Tensor ==> Parameter rowPitch is not evenly divisible by channel size");
+        }
+    }
+};
+
+/**
+ * 4D DCHW tensors.
+ * @tparam CC channel count.
+ * @tparam CT channel type.
+ */
+template<ChannelCount CC, ChannelType CT>
+class Tensor<DCHW, CC, CT> : public detail::Tensor4D<DCHW, CT>
+{
+public:
+    using DataType = typename detail::ChannelTypeToNative<CT>::Type;
+
+    static constexpr ChannelCount kChannelCount = CC;
+
+    Tensor() = default;
+
+    template<ChannelCount T = CC, typename = typename std::enable_if<T != CX>::type>
+    Tensor(std::size_t width, std::size_t height, std::size_t depth, bool isCPU = true)
+        : detail::Tensor4D<DCHW, CT>({{depth, detail::ChannelToCount<CC>() * width * height},
+                                      {detail::ChannelToCount<CC>(), width * height},
+                                      {height, width},
+                                      {width, 1}},
+                                     isCPU)
+    {
+    }
+
+    template<ChannelCount T = CC, typename = typename std::enable_if<T == CX>::type>
+    Tensor(std::size_t width, std::size_t height, std::size_t depth, std::size_t channelCount, bool isCPU = true)
+        : detail::Tensor4D<DCHW, CT>(
+              {{depth, channelCount * width * height}, {channelCount, width * height}, {height, width}, {width, 1}},
+              isCPU)
+    {
+    }
+
+    template<ChannelCount T = CC, typename = typename std::enable_if<T != CX>::type>
+    Tensor(std::size_t width, std::size_t height, std::size_t depth, DataType *dataPtr, bool isCPU = true)
+        : detail::Tensor4D<DCHW, CT>({{depth, detail::ChannelToCount<CC>() * width * height},
+                                      {detail::ChannelToCount<CC>(), width * height},
+                                      {height, width},
+                                      {width, 1}},
+                                     dataPtr, isCPU)
+    {
+    }
+
+    template<ChannelCount T = CC, typename B = bool,
+             typename std::enable_if<T != CX && std::is_same<B, bool>::value>::type * = nullptr>
+    Tensor(std::size_t width, std::size_t height, std::size_t depth, std::size_t rowPitch, DataType *dataPtr,
+           B isCPU = true)
+        : detail::Tensor4D<DCHW, CT>({{depth, detail::ChannelToCount<CC>() * height * rowPitch / GetChannelSize(CT)},
+                                      {detail::ChannelToCount<CC>(), height * rowPitch / GetChannelSize(CT)},
+                                      {height, rowPitch / GetChannelSize(CT)},
+                                      {width, 1}},
+                                     dataPtr, isCPU)
+    {
+        if (rowPitch % GetChannelSize(CT) != 0)
+        {
+            throw std::domain_error(
+                "cvcore::Tensor<DCHW, CC, CT>::Tensor ==> Parameter rowPitch is not evenly divisible by channel size");
+        }
+    }
+
+    template<ChannelCount T = CC, typename = typename std::enable_if<T == CX>::type>
+    Tensor(std::size_t width, std::size_t height, std::size_t depth, std::size_t channelCount, DataType *dataPtr,
+           bool isCPU = true)
+        : detail::Tensor4D<DCHW, CT>(
+              {{depth, channelCount * width * height}, {channelCount, width * height}, {height, width}, {width, 1}},
+              dataPtr, isCPU)
+    {
+    }
+
+    template<ChannelCount T = CC, typename B = bool,
+             typename std::enable_if<T == CX && std::is_same<B, bool>::value>::type * = nullptr>
+    Tensor(std::size_t width, std::size_t height, std::size_t depth, std::size_t channelCount, std::size_t rowPitch,
+           DataType *dataPtr, B isCPU = true)
+        : detail::Tensor4D<DCHW, CT>({{depth, channelCount * height * rowPitch / GetChannelSize(CT)},
+                                      {channelCount, height * rowPitch / GetChannelSize(CT)},
+                                      {height, rowPitch / GetChannelSize(CT)},
+                                      {width, 1}},
+                                     dataPtr, isCPU)
+    {
+        if (rowPitch % GetChannelSize(CT) != 0)
+        {
+            throw std::domain_error(
+                "cvcore::Tensor<DCHW, CC, CT>::Tensor ==> Parameter rowPitch is not evenly divisible by channel size");
+        }
+    }
+};
+
+/**
+ * 4D CDHW tensors.
+ * @tparam CC channel count.
+ * @tparam CT channel type.
+ */
+template<ChannelCount CC, ChannelType CT>
+class Tensor<CDHW, CC, CT> : public detail::Tensor4D<CDHW, CT>
+{
+public:
+    using DataType = typename detail::ChannelTypeToNative<CT>::Type;
+
+    static constexpr ChannelCount kChannelCount = CC;
+
+    Tensor() = default;
+
+    template<ChannelCount T = CC, typename = typename std::enable_if<T != CX>::type>
+    Tensor(std::size_t width, std::size_t height, std::size_t depth, bool isCPU = true)
+        : detail::Tensor4D<CDHW, CT>({{detail::ChannelToCount<CC>(), depth * width * height},
+                                      {depth, width * height},
+                                      {height, width},
+                                      {width, 1}},
+                                     isCPU)
+    {
+    }
+
+    template<ChannelCount T = CC, typename = typename std::enable_if<T == CX>::type>
+    Tensor(std::size_t width, std::size_t height, std::size_t depth, std::size_t channelCount, bool isCPU = true)
+        : detail::Tensor4D<CDHW, CT>(
+              {{channelCount, depth * width * height}, {depth, width * height}, {height, width}, {width, 1}}, isCPU)
+    {
+    }
+
+    template<ChannelCount T = CC, typename = typename std::enable_if<T != CX>::type>
+    Tensor(std::size_t width, std::size_t height, std::size_t depth, DataType *dataPtr, bool isCPU = true)
+        : detail::Tensor4D<CDHW, CT>({{detail::ChannelToCount<CC>(), depth * width * height},
+                                      {depth, width * height},
+                                      {height, width},
+                                      {width, 1}},
+                                     dataPtr, isCPU)
+    {
+    }
+
+    template<ChannelCount T = CC, typename = typename std::enable_if<T == CX>::type>
+    Tensor(std::size_t width, std::size_t height, std::size_t depth, std::size_t channelCount, DataType *dataPtr,
+           bool isCPU = true)
+        : detail::Tensor4D<CDHW, CT>(
+              {{channelCount, depth * width * height}, {depth, width * height}, {height, width}, {width, 1}}, dataPtr,
+              isCPU)
+    {
+    }
+};
+
+} // namespace cvcore
+
+#endif // CVCORE_TENSOR_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/TensorList.h b/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/TensorList.h
new file mode 100644
index 0000000..4ec6695
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/TensorList.h
@@ -0,0 +1,37 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_TENSORLIST_H
+#define CVCORE_TENSORLIST_H
+
+#include <type_traits>
+
+#include "Traits.h"
+#include "Array.h"
+#include "Tensor.h"
+
+namespace cvcore {
+
+/**
+ * @brief Implementation of a list of tensors of the same rank but, potentially, different dimensions
+ */
+template<typename TensorType>
+using TensorList = typename std::enable_if<traits::is_tensor<TensorType>::value, Array<TensorType>>::type;
+
+} // namespace cvcore
+
+#endif // CVCORE_TENSORLIST_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/TensorMap.h b/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/TensorMap.h
new file mode 100644
index 0000000..93c8783
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/TensorMap.h
@@ -0,0 +1,534 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_TENSORMAP_H
+#define CVCORE_TENSORMAP_H
+
+#include <type_traits>
+#include <set>
+#include <queue>
+#include <vector>
+#include <unordered_map>
+
+#include "Traits.h"
+#include "TensorList.h"
+
+namespace cvcore {
+
+/**
+ * @brief Implementation of a map of tensors of the same rank but, potentially, different dimensions, over the batch dimension
+ * 
+ * @tparam TensorType Any CVCORE tensor type
+ * @tparam KeyType Any STL hashable data type
+ */
+template<typename TensorType, typename KeyType = std::size_t,
+         typename = void>
+class TensorMap {};
+
+template<TensorLayout TL, ChannelCount CC, ChannelType CT,
+         typename KT>
+class TensorMap<Tensor<TL, CC, CT>, KT,
+                typename std::enable_if<traits::is_batch<Tensor<TL, CC, CT>>::value>::type>
+{
+    using my_type = TensorMap<Tensor<TL, CC, CT>, KT>;
+
+    public:
+        using key_type = KT;
+        using unit_type = Tensor<TL, CC, CT>;
+        using element_type = traits::remove_batch_t<unit_type>;
+        using frame_type = TensorList<element_type>;
+        using buffer_type = TensorList<unit_type>;
+
+        template<ChannelCount T = CC, typename = void>
+        struct dim_data_type
+        {
+            std::size_t height;
+            std::size_t width;
+        };
+        
+        template<ChannelCount T>
+        struct dim_data_type<T, typename std::enable_if<T == CX>::type>
+        {
+            std::size_t height;
+            std::size_t width;
+            std::size_t channels;
+        };
+
+        TensorMap() = default;
+        TensorMap(const my_type &) = delete;
+        TensorMap(my_type && other)
+        {
+            *this = std::move(other);
+        }
+
+        /**
+         * @brief Construct a new Tensor Map object
+         * 
+         * @param batchSize The batch dimension of all sub-tensors
+         * @param dimData  The dimensional description of all sub-tensors in at least HWC format
+         * @param isCPU A boolean flag specifying what device to allocate the sub-tensors
+         */
+        template<ChannelCount T = CC, typename = typename std::enable_if<T != CX>::type>
+        TensorMap(std::size_t batchSize,
+                  const std::vector<dim_data_type<T>> & dimData, 
+                  bool isCPU = true)
+            : m_maxBatchSize{batchSize}, m_size{dimData.size()},
+              m_isCPU{isCPU}, m_buffer{dimData.size(), true}
+        {
+            m_buffer.setSize(m_size);
+
+            int i = 0;
+            for(auto dim: dimData)
+            {
+                m_buffer[i] = std::move(unit_type(dim.width,
+                                                  dim.height,
+                                                  m_maxBatchSize,
+                                                  m_isCPU));
+                ++i;
+            }
+
+            for(std::size_t i = 0; i < m_maxBatchSize; ++i)
+            {
+                m_pool.insert(i);
+            }
+        }
+
+        /**
+         * @brief Construct a new Tensor Map object
+         * 
+         * @param batchSize The batch dimension of all sub-tensors
+         * @param dimData  The dimensional description of all sub-tensors in at least HWC format
+         * @param isCPU A boolean flag specifying what device to allocate the sub-tensors
+         */
+        template<ChannelCount T = CC, typename = typename std::enable_if<T == CX>::type>
+        TensorMap(std::size_t batchSize,
+                  const std::vector<dim_data_type<CX>> & dimData,
+                  bool isCPU = true)
+            : m_maxBatchSize{batchSize}, m_size{dimData.size()},
+              m_isCPU{isCPU}, m_buffer{dimData.size(), true}
+        {
+            m_buffer.setSize(m_size);
+
+            int i = 0;
+            for(auto dim: dimData)
+            {
+                m_buffer[i] = std::move(unit_type(dim.width,
+                                                  dim.height,
+                                                  m_maxBatchSize,
+                                                  dim.channels,
+                                                  m_isCPU));
+                ++i;
+            }
+
+            for(std::size_t i = 0; i < m_maxBatchSize; ++i)
+            {
+                m_pool.insert(i);
+            }
+        }
+
+        ~TensorMap() = default;
+
+        my_type & operator=(const my_type &) = delete;
+        my_type & operator=(my_type && other)
+        {
+            std::swap(m_mapping, other.m_mapping);
+            std::swap(m_pool, other.m_pool);
+            std::swap(m_maxBatchSize, other.m_maxBatchSize);
+            std::swap(m_size, other.m_size);
+            std::swap(m_isCPU, other.m_isCPU);
+            std::swap(m_buffer, other.m_buffer);
+
+            return *this;
+        }
+
+        /**
+         * @brief A mapping of the batch dimension index to a given key
+         * 
+         * @details Given a set of pairs such that the keys AND values are unique
+         *   respectively, the key-wise mapping of the batch dimension is reset
+         *   to the provided values.
+         * 
+         * @param pairs An unordered map of the uniqe key value pairs
+         * @return true If the length of ``pairs`` is less than the max batch size
+         *   and the key value pairs are one-to-one and onto.
+         * @return false If the conditions of ``true`` are not met.
+         */
+        bool remap(const std::unordered_map<key_type, std::size_t> & pairs)
+        {
+            bool result = false;
+
+            if(pairs.size() <= m_maxBatchSize)
+            {
+                for(std::size_t i = 0; i < m_maxBatchSize; ++i)
+                {
+                    m_pool.insert(i);
+                }
+
+                m_mapping.clear();
+                for(auto mapping: pairs)
+                {
+                    if(m_pool.erase(mapping.second))
+                    {
+                        m_mapping[mapping.first] = mapping.second;
+                    }
+                }
+
+                if((pairs.size() + m_pool.size()) == m_maxBatchSize)
+                {
+                    result = true;
+                }
+            }
+
+            return result;
+        }
+
+        /**
+         * @brief Associates a given key with the first available batch index
+         * 
+         * @details Assuming the associated keys has not reached `maxBatchSize``
+         *   then this function associates a given key with the first available
+         *   batch index and returns that index value. If no batch index is
+         *   available -1 is returned. NOTE: if ``key`` is already associated
+         *   with a batch index, the that index is returned.
+         * 
+         * @param key The key to be associated with a batch index value
+         * @return std::intmax_t The batch index associated with the key or -1
+         *   if no index is available. NOTE: because std::intmax_t is not a full
+         *   covering of std::size_t, it is possible for wrap around to happen.
+         */
+        std::intmax_t map(const key_type & key)
+        {
+            auto it = m_mapping.find(key);
+
+            if(it == m_mapping.end() && !m_pool.empty())
+            {
+                auto value = m_pool.begin();
+                it = m_mapping.insert({key, *value}).first;
+                m_pool.erase(value);
+            }
+
+            return static_cast<std::intmax_t>(it != m_mapping.end() ? it->second : -1);
+        }
+
+        /**
+         * @brief Dissociates a given key with a batch index if possible
+         * 
+         * @details Assuming the given key is associated with a batch index this
+         *   function removes the association and returns the batch index is was
+         *   associated with. If no batch index is found associated with the given
+         *   key, -1 is returned.
+         * 
+         * @param key The key to be dissociated
+         * @return std::intmax_t The batch index associated with the key or -1
+         *   if not found. NOTE: because std::intmax_t is not a full covering of
+         *   std::size_t, it is possible for wrap around to happen.
+         */
+        std::intmax_t unmap(const key_type & key)
+        {
+            std::intmax_t result = -1;
+
+            auto it = m_mapping.find(key);
+
+            if(it != m_mapping.end())
+            {
+                result = static_cast<std::intmax_t>(it->second);
+                m_pool.insert(it->second);
+                m_mapping.erase(it);
+            }
+
+            return result;
+        }
+
+        /**
+         * @brief The number of keys associated with a batch index
+         * 
+         * @return std::size_t 
+         */
+        std::size_t getKeyCount() const noexcept
+        {
+            return m_mapping.size();
+        }
+
+        /**
+         * @brief The maximum number of batch index
+         * 
+         * @return std::size_t 
+         */
+        std::size_t getMaxBatchSize() const noexcept
+        {
+            return m_maxBatchSize;
+        }
+        
+        /**
+         * @brief The number of sub-tensors
+         * 
+         * @return std::size_t 
+         */
+        std::size_t getUnitCount() const
+        {
+            return m_size;
+        }
+
+        /**
+         * Get the size of given dimension.
+         * @param dimIdx dimension index.
+         * @return size of the specified dimension.
+         */
+        std::size_t getTensorSize(std::size_t unitIdx, std::size_t dimIdx) const
+        {
+            return m_buffer[unitIdx].getSize(dimIdx);
+        }
+
+        /**
+         * Get the stride of given dimension.
+         * @param dimIdx dimension index.
+         * @return stride of the specified dimension.
+         */
+        std::size_t getTensorStride(std::size_t unitIdx, std::size_t dimIdx) const
+        {
+            return m_buffer[unitIdx].getStride(dimIdx);
+        }
+
+        template<ChannelCount T = CC, typename = typename std::enable_if<T != CX>::type>
+        unit_type getUnit(std::size_t idx)
+        {
+            unit_type result{m_buffer[idx].getWidth(),
+                             m_buffer[idx].getHeight(),
+                             m_buffer[idx].getDepth(),
+                             m_buffer[idx].getData(),
+                             m_buffer[idx].isCPU()};
+            return result;
+        }
+
+        template<ChannelCount T = CC, typename = typename std::enable_if<T == CX>::type>
+        unit_type getUnit(std::size_t idx, ChannelCount UNUSED = T)
+        {
+            unit_type result{m_buffer[idx].getWidth(),
+                             m_buffer[idx].getHeight(),
+                             m_buffer[idx].getDepth(),
+                             m_buffer[idx].getChannelCount(),
+                             m_buffer[idx].getData(),
+                             m_buffer[idx].isCPU()};
+            return result;
+        }
+
+        template<ChannelCount T = CC, typename = typename std::enable_if<T != CX>::type>
+        frame_type getFrame(const key_type & idx)
+        {
+            frame_type result;
+
+            if(m_mapping.find(idx) != m_mapping.end())
+            {
+                std::size_t at = m_mapping[idx];
+                result = std::move(frame_type{m_buffer.getSize(), m_buffer.isCPU()});
+                result.setSize(m_size);
+                for(std::size_t i = 0; i < m_size; ++i)
+                {
+                    element_type element{m_buffer[i].getWidth(),
+                                         m_buffer[i].getHeight(),
+                                         m_buffer[i].getData() +
+                                            at * m_buffer[i].getStride(TensorDimension::DEPTH),
+                                         m_buffer[i].isCPU()};
+                    result[i] = std::move(element);
+                }
+            }
+
+            return result;
+        }
+        
+        template<ChannelCount T = CC, typename = typename std::enable_if<T == CX>::type>
+        frame_type getFrame(const key_type & idx, ChannelCount UNUSED = T)
+        {
+            frame_type result;
+
+            if(m_mapping.find(idx) != m_mapping.end())
+            {
+                std::size_t at = m_mapping[idx];
+                result = std::move(frame_type{m_buffer.getSize(), m_buffer.isCPU()});
+                result.setSize(m_size);
+                for(std::size_t i = 0; i < m_size; ++i)
+                {
+                    element_type element{m_buffer[i].getWidth(),
+                                        m_buffer[i].getHeight(),
+                                        m_buffer[i].getChannelCount(),
+                                        m_buffer[i].getData() +
+                                            at * m_buffer[i].getStride(TensorDimension::DEPTH),
+                                        m_buffer[i].isCPU()};
+                    result[i] = std::move(element);
+                }
+            }
+
+            return result;
+        }
+
+        template<ChannelCount T = CC, typename = typename std::enable_if<T != CX>::type>
+        frame_type getFrame(key_type && idx)
+        {
+            frame_type result;
+
+            if(m_mapping.find(idx) != m_mapping.end())
+            {
+                std::size_t at = m_mapping[idx];
+                result = std::move(frame_type{m_buffer.getSize(), m_buffer.isCPU()});
+                result.setSize(m_size);
+                for(std::size_t i = 0; i < m_size; ++i)
+                {
+                    element_type element{m_buffer[i].getWidth(),
+                                         m_buffer[i].getHeight(),
+                                         m_buffer[i].getData() +
+                                            at * m_buffer[i].getStride(TensorDimension::DEPTH),
+                                         m_buffer[i].isCPU()};
+                    result[i] = std::move(element);
+                }
+            }
+
+            return result;
+        }
+        
+        template<ChannelCount T = CC, typename = typename std::enable_if<T == CX>::type>
+        frame_type getFrame(key_type && idx, ChannelCount UNUSED = T)
+        {
+            frame_type result;
+
+            if(m_mapping.find(idx) != m_mapping.end())
+            {
+                std::size_t at = m_mapping[idx];
+                result = std::move(frame_type{m_buffer.getSize(), m_buffer.isCPU()});
+                result.setSize(m_size);
+                for(std::size_t i = 0; i < m_size; ++i)
+                {
+                    element_type element{m_buffer[i].getWidth(),
+                                         m_buffer[i].getHeight(),
+                                         m_buffer[i].getChannelCount(),
+                                         m_buffer[i].getData() +
+                                            at * m_buffer[i].getStride(TensorDimension::DEPTH),
+                                         m_buffer[i].isCPU()};
+                    result[i] = std::move(element);
+                }
+            }
+
+            return result;
+        }
+
+        template<ChannelCount T = CC, typename = typename std::enable_if<T != CX>::type>
+        element_type getElement(const key_type & keyIdx, std::size_t unitIdx)
+        {
+            element_type element;
+
+            if(m_mapping.find(keyIdx) != m_mapping.end())
+            {
+                std::size_t at = m_mapping[keyIdx];
+                element = std::move(element_type{m_buffer[unitIdx].getWidth(),
+                                    m_buffer[unitIdx].getHeight(),
+                                    m_buffer[unitIdx].getData() +
+                                        at * m_buffer[unitIdx].getStride(TensorDimension::DEPTH),
+                                    m_buffer[unitIdx].isCPU()});
+            }
+
+            return element;
+        }
+        
+        template<ChannelCount T = CC, typename = typename std::enable_if<T == CX>::type>
+        element_type getElement(const key_type & keyIdx, std::size_t unitIdx, ChannelCount UNUSED = T)
+        {
+            element_type element;
+
+            if(m_mapping.find(keyIdx) != m_mapping.end())
+            {
+                std::size_t at = m_mapping[keyIdx];
+                element = std::move(element_type{m_buffer[unitIdx].getWidth(),
+                                    m_buffer[unitIdx].getHeight(),
+                                    m_buffer[unitIdx].getChannelCount(),
+                                    m_buffer[unitIdx].getData() +
+                                        at * m_buffer[unitIdx].getStride(TensorDimension::DEPTH),
+                                    m_buffer[unitIdx].isCPU()});
+            }
+
+            return element;
+        }
+
+        template<ChannelCount T = CC, typename = typename std::enable_if<T != CX>::type>
+        element_type getElement(key_type && keyIdx, std::size_t unitIdx)
+        {
+            element_type element;
+
+            if(m_mapping.find(keyIdx) != m_mapping.end())
+            {
+                std::size_t at = m_mapping[keyIdx];
+                element = std::move(element_type{m_buffer[unitIdx].getWidth(),
+                                    m_buffer[unitIdx].getHeight(),
+                                    m_buffer[unitIdx].getData() +
+                                        at * m_buffer[unitIdx].getStride(TensorDimension::DEPTH),
+                                    m_buffer[unitIdx].isCPU()});
+            }
+
+            return element;
+        }
+        
+        template<ChannelCount T = CC, typename = typename std::enable_if<T == CX>::type>
+        element_type getElement(key_type && keyIdx, std::size_t unitIdx, ChannelCount UNUSED = T)
+        {
+            element_type element;
+
+            if(m_mapping.find(keyIdx) != m_mapping.end())
+            {
+                std::size_t at = m_mapping[keyIdx];
+                element = std::move(element_type{m_buffer[unitIdx].getWidth(),
+                                    m_buffer[unitIdx].getHeight(),
+                                    m_buffer[unitIdx].getChannelCount(),
+                                    m_buffer[unitIdx].getData() +
+                                        at * m_buffer[unitIdx].getStride(TensorDimension::DEPTH),
+                                    m_buffer[unitIdx].isCPU()});
+            }
+
+            return element;
+        }
+        
+        /**
+         * Get the ChannelType of the Tensor.
+         * @return ChannelType of the Tensor.
+         */
+        constexpr ChannelType getType() const noexcept
+        {
+            return CT;
+        }
+        
+        /**
+         * Get the flag whether the Tensor is allocated in CPU or GPU.
+         * @return whether the Tensor is allocated in CPU.
+         */
+        bool isCPU() const noexcept
+        {
+            return m_isCPU;
+        }
+
+    private:
+        // Mapping and Pool form a unique-to-unique isometry between
+        // the keys and indices of the batch dimension
+        mutable std::unordered_map<KT, std::size_t> m_mapping;
+        mutable std::set<std::size_t> m_pool;
+
+        std::size_t m_maxBatchSize;
+        std::size_t m_size;
+        bool m_isCPU;
+
+        buffer_type m_buffer;
+};
+
+} // namespace cvcore
+
+#endif // CVCORE_TENSORMAP_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/Traits.h b/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/Traits.h
new file mode 100644
index 0000000..a78b780
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/include/cv/core/Traits.h
@@ -0,0 +1,478 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_TRAITS_H
+#define CVCORE_TRAITS_H
+
+#include <type_traits>
+
+#include "MathTypes.h"
+#include "Tensor.h"
+
+namespace cvcore { namespace traits {
+
+// -----------------------------------------------------------------------------
+// Type Properties
+// -----------------------------------------------------------------------------
+
+template<typename TensorType>
+struct is_tensor : std::false_type
+{
+};
+
+template<TensorLayout TL, ChannelCount CC, ChannelType CT>
+struct is_tensor<Tensor<TL, CC, CT>> : std::true_type
+{
+};
+
+template<typename TensorType>
+struct is_planar : std::false_type
+{
+    static_assert(is_tensor<TensorType>::value, "");
+};
+
+template<TensorLayout TL, ChannelCount CC, ChannelType CT>
+struct is_planar<Tensor<TL, CC, CT>> : std::integral_constant<bool, TL == CHW || TL == DCHW || TL == CDHW>
+{
+};
+
+template<typename TensorType>
+struct is_interleaved : std::false_type
+{
+    static_assert(is_tensor<TensorType>::value, "");
+};
+
+template<TensorLayout TL, ChannelCount CC, ChannelType CT>
+struct is_interleaved<Tensor<TL, CC, CT>> : std::integral_constant<bool, TL == HWC || TL == DHWC>
+{
+};
+
+template<typename TensorType>
+struct is_batch : std::false_type
+{
+    static_assert(is_tensor<TensorType>::value, "");
+};
+
+template<TensorLayout TL, ChannelCount CC, ChannelType CT>
+struct is_batch<Tensor<TL, CC, CT>> : std::integral_constant<bool, TL == DHWC || TL == DCHW || TL == CDHW>
+{
+};
+
+// -----------------------------------------------------------------------------
+// Type Modifications
+// -----------------------------------------------------------------------------
+
+template<typename TensorType>
+struct to_planar
+{
+    static_assert(is_tensor<TensorType>::value, "");
+
+    using type = TensorType;
+};
+
+template<ChannelCount CC, ChannelType CT>
+struct to_planar<Tensor<HWC, CC, CT>>
+{
+    using type = Tensor<CHW, CC, CT>;
+};
+
+template<ChannelCount CC, ChannelType CT>
+struct to_planar<Tensor<DHWC, CC, CT>>
+{
+    using type = Tensor<DCHW, CC, CT>;
+};
+
+template<typename TensorType>
+using to_planar_t = typename to_planar<TensorType>::type;
+
+template<typename TensorType>
+struct to_interleaved
+{
+    static_assert(is_tensor<TensorType>::value, "");
+
+    using type = TensorType;
+};
+
+template<ChannelCount CC, ChannelType CT>
+struct to_interleaved<Tensor<CHW, CC, CT>>
+{
+    using type = Tensor<HWC, CC, CT>;
+};
+
+template<ChannelCount CC, ChannelType CT>
+struct to_interleaved<Tensor<DCHW, CC, CT>>
+{
+    using type = Tensor<DHWC, CC, CT>;
+};
+
+template<ChannelCount CC, ChannelType CT>
+struct to_interleaved<Tensor<CDHW, CC, CT>>
+{
+    using type = Tensor<DHWC, CC, CT>;
+};
+
+template<typename TensorType>
+using to_interleaved_t = typename to_interleaved<TensorType>::type;
+
+template<typename TensorType>
+struct add_batch
+{
+    static_assert(is_tensor<TensorType>::value, "");
+
+    using type = TensorType;
+};
+
+template<ChannelCount CC, ChannelType CT>
+struct add_batch<Tensor<CHW, CC, CT>>
+{
+    using type = Tensor<DCHW, CC, CT>;
+};
+
+template<ChannelCount CC, ChannelType CT>
+struct add_batch<Tensor<HWC, CC, CT>>
+{
+    using type = Tensor<DHWC, CC, CT>;
+};
+
+template<typename TensorType>
+using add_batch_t = typename add_batch<TensorType>::type;
+
+template<typename TensorType>
+struct remove_batch
+{
+    static_assert(is_tensor<TensorType>::value, "");
+
+    using type = TensorType;
+};
+
+template<ChannelCount CC, ChannelType CT>
+struct remove_batch<Tensor<DCHW, CC, CT>>
+{
+    using type = Tensor<CHW, CC, CT>;
+};
+
+template<ChannelCount CC, ChannelType CT>
+struct remove_batch<Tensor<CDHW, CC, CT>>
+{
+    using type = Tensor<CHW, CC, CT>;
+};
+
+template<ChannelCount CC, ChannelType CT>
+struct remove_batch<Tensor<DHWC, CC, CT>>
+{
+    using type = Tensor<HWC, CC, CT>;
+};
+
+template<typename TensorType>
+using remove_batch_t = typename remove_batch<TensorType>::type;
+
+template<typename TensorType>
+struct to_c1
+{
+    static_assert(is_tensor<TensorType>::value, "");
+};
+
+template<TensorLayout TL, ChannelCount CC, ChannelType CT>
+struct to_c1<Tensor<TL, CC, CT>>
+{
+    using type = Tensor<TL, C1, CT>;
+};
+
+template<typename TensorType>
+using to_c1_t = typename to_c1<TensorType>::type;
+
+template<typename TensorType>
+struct to_c2
+{
+    static_assert(is_tensor<TensorType>::value, "");
+};
+
+template<TensorLayout TL, ChannelCount CC, ChannelType CT>
+struct to_c2<Tensor<TL, CC, CT>>
+{
+    using type = Tensor<TL, C2, CT>;
+};
+
+template<typename TensorType>
+using to_c2_t = typename to_c2<TensorType>::type;
+
+template<typename TensorType>
+struct to_c3
+{
+    static_assert(is_tensor<TensorType>::value, "");
+};
+
+template<TensorLayout TL, ChannelCount CC, ChannelType CT>
+struct to_c3<Tensor<TL, CC, CT>>
+{
+    using type = Tensor<TL, C3, CT>;
+};
+
+template<typename TensorType>
+using to_c3_t = typename to_c3<TensorType>::type;
+
+template<typename TensorType>
+struct to_c4
+{
+    static_assert(is_tensor<TensorType>::value, "");
+};
+
+template<TensorLayout TL, ChannelCount CC, ChannelType CT>
+struct to_c4<Tensor<TL, CC, CT>>
+{
+    using type = Tensor<TL, C4, CT>;
+};
+
+template<typename TensorType>
+using to_c4_t = typename to_c4<TensorType>::type;
+
+template<typename TensorType>
+struct to_cx
+{
+    static_assert(is_tensor<TensorType>::value, "");
+};
+
+template<TensorLayout TL, ChannelCount CC, ChannelType CT>
+struct to_cx<Tensor<TL, CC, CT>>
+{
+    using type = Tensor<TL, CX, CT>;
+};
+
+template<typename TensorType>
+using to_cx_t = typename to_cx<TensorType>::type;
+
+template<typename TensorType>
+struct to_u8
+{
+    static_assert(is_tensor<TensorType>::value, "");
+};
+
+template<TensorLayout TL, ChannelCount CC, ChannelType CT>
+struct to_u8<Tensor<TL, CC, CT>>
+{
+    using type = Tensor<TL, CC, U8>;
+};
+
+template<typename TensorType>
+using to_u8_t = typename to_u8<TensorType>::type;
+
+template<typename TensorType>
+struct to_u16
+{
+    static_assert(is_tensor<TensorType>::value, "");
+};
+
+template<TensorLayout TL, ChannelCount CC, ChannelType CT>
+struct to_u16<Tensor<TL, CC, CT>>
+{
+    using type = Tensor<TL, CC, U16>;
+};
+
+template<typename TensorType>
+using to_u16_t = typename to_u16<TensorType>::type;
+
+template<typename TensorType>
+struct to_f16
+{
+    static_assert(is_tensor<TensorType>::value, "");
+};
+
+template<TensorLayout TL, ChannelCount CC, ChannelType CT>
+struct to_f16<Tensor<TL, CC, CT>>
+{
+    using type = Tensor<TL, CC, F16>;
+};
+
+template<typename TensorType>
+using to_f16_t = typename to_f16<TensorType>::type;
+
+template<typename TensorType>
+struct to_f32
+{
+    static_assert(is_tensor<TensorType>::value, "");
+};
+
+template<TensorLayout TL, ChannelCount CC, ChannelType CT>
+struct to_f32<Tensor<TL, CC, CT>>
+{
+    using type = Tensor<TL, CC, F32>;
+};
+
+template<typename TensorType>
+using to_f32_t = typename to_f32<TensorType>::type;
+
+template<typename TensorType>
+using to_c1u8 = to_c1<to_u8_t<TensorType>>;
+
+template<typename TensorType>
+using to_c1u16 = to_c1<to_u16_t<TensorType>>;
+
+template<typename TensorType>
+using to_c1f16 = to_c1<to_f16_t<TensorType>>;
+
+template<typename TensorType>
+using to_c1f32 = to_c1<to_f32_t<TensorType>>;
+
+template<typename TensorType>
+using to_c2u8 = to_c2<to_u8_t<TensorType>>;
+
+template<typename TensorType>
+using to_c2u16 = to_c2<to_u16_t<TensorType>>;
+
+template<typename TensorType>
+using to_c2f16 = to_c2<to_f16_t<TensorType>>;
+
+template<typename TensorType>
+using to_c2f32 = to_c2<to_f32_t<TensorType>>;
+
+template<typename TensorType>
+using to_c3u8 = to_c3<to_u8_t<TensorType>>;
+
+template<typename TensorType>
+using to_c3u16 = to_c3<to_u16_t<TensorType>>;
+
+template<typename TensorType>
+using to_c3f16 = to_c3<to_f16_t<TensorType>>;
+
+template<typename TensorType>
+using to_c3f32 = to_c3<to_f32_t<TensorType>>;
+
+template<typename TensorType>
+using to_c4u8 = to_c4<to_u8_t<TensorType>>;
+
+template<typename TensorType>
+using to_c4u16 = to_c4<to_u16_t<TensorType>>;
+
+template<typename TensorType>
+using to_c4f16 = to_c4<to_f16_t<TensorType>>;
+
+template<typename TensorType>
+using to_c4f32 = to_c4<to_f32_t<TensorType>>;
+
+template<typename TensorType>
+using to_cxu8 = to_cx<to_u8_t<TensorType>>;
+
+template<typename TensorType>
+using to_cxu16 = to_cx<to_u16_t<TensorType>>;
+
+template<typename TensorType>
+using to_cxf16 = to_cx<to_f16_t<TensorType>>;
+
+template<typename TensorType>
+using to_cxf32 = to_cx<to_f32_t<TensorType>>;
+
+template<typename TensorType>
+using to_c1u8_t = to_c1_t<to_u8_t<TensorType>>;
+
+template<typename TensorType>
+using to_c1u16_t = to_c1_t<to_u16_t<TensorType>>;
+
+template<typename TensorType>
+using to_c1f16_t = to_c1_t<to_f16_t<TensorType>>;
+
+template<typename TensorType>
+using to_c1f32_t = to_c1_t<to_f32_t<TensorType>>;
+
+template<typename TensorType>
+using to_c2u8_t = to_c2_t<to_u8_t<TensorType>>;
+
+template<typename TensorType>
+using to_c2u16_t = to_c2_t<to_u16_t<TensorType>>;
+
+template<typename TensorType>
+using to_c2f16_t = to_c2_t<to_f16_t<TensorType>>;
+
+template<typename TensorType>
+using to_c2f32_t = to_c2_t<to_f32_t<TensorType>>;
+
+template<typename TensorType>
+using to_c3u8_t = to_c3_t<to_u8_t<TensorType>>;
+
+template<typename TensorType>
+using to_c3u16_t = to_c3_t<to_u16_t<TensorType>>;
+
+template<typename TensorType>
+using to_c3f16_t = to_c3_t<to_f16_t<TensorType>>;
+
+template<typename TensorType>
+using to_c3f32_t = to_c3_t<to_f32_t<TensorType>>;
+
+template<typename TensorType>
+using to_c4u8_t = to_c4_t<to_u8_t<TensorType>>;
+
+template<typename TensorType>
+using to_c4u16_t = to_c4_t<to_u16_t<TensorType>>;
+
+template<typename TensorType>
+using to_c4f16_t = to_c4_t<to_f16_t<TensorType>>;
+
+template<typename TensorType>
+using to_c4f32_t = to_c4_t<to_f32_t<TensorType>>;
+
+template<typename TensorType>
+using to_cxu8_t = to_cx_t<to_u8_t<TensorType>>;
+
+template<typename TensorType>
+using to_cxu16_t = to_cx_t<to_u16_t<TensorType>>;
+
+template<typename TensorType>
+using to_cxf16_t = to_cx_t<to_f16_t<TensorType>>;
+
+template<typename TensorType>
+using to_cxf32_t = to_cx_t<to_f32_t<TensorType>>;
+
+template<typename T>
+struct get_dim;
+
+template<>
+struct get_dim<float>
+{
+    static constexpr int value = 1;
+};
+
+template<>
+struct get_dim<double>
+{
+    static constexpr int value = 1;
+};
+
+template<>
+struct get_dim<Vector2d>
+{
+    static constexpr int value = 2;
+};
+
+template<>
+struct get_dim<Vector2f>
+{
+    static constexpr int value = 2;
+};
+
+template<>
+struct get_dim<Vector3d>
+{
+    static constexpr int value = 3;
+};
+
+template<>
+struct get_dim<Vector3f>
+{
+    static constexpr int value = 3;
+};
+}} // namespace cvcore::traits
+
+#endif // CVCORE_TRAITS_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/include/cv/inferencer/Errors.h b/isaac_ros_ess/gxf/ess/cvcore/include/cv/inferencer/Errors.h
new file mode 100644
index 0000000..c569f10
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/include/cv/inferencer/Errors.h
@@ -0,0 +1,58 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_ERRORS_H
+#define CVCORE_ERRORS_H
+
+#include "cv/core/CVError.h"
+
+namespace cvcore { namespace inferencer {
+
+/*
+ * Enum class describing the inference error codes.
+ */
+enum class InferencerErrorCode : std::int32_t
+{
+    SUCCESS = 0,
+    INVALID_ARGUMENT,
+    INVALID_OPERATION,
+    NOT_IMPLEMENTED,
+    TRITON_SERVER_NOT_READY,
+    TRITON_CUDA_SHARED_MEMORY_ERROR,
+    TRITON_INFERENCE_ERROR,
+    TRITON_REGISTER_LAYER_ERROR,
+    TENSORRT_INFERENCE_ERROR,
+};
+
+}} // namespace cvcore::inferencer
+
+namespace std {
+
+template<>
+struct is_error_code_enum<cvcore::inferencer::InferencerErrorCode> : true_type
+{
+};
+
+} // namespace std
+
+namespace cvcore { namespace inferencer {
+
+std::error_code make_error_code(InferencerErrorCode) noexcept;
+
+}} // namespace cvcore::inferencer
+
+#endif // CVCORE_ERRORS_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/include/cv/inferencer/IInferenceBackend.h b/isaac_ros_ess/gxf/ess/cvcore/include/cv/inferencer/IInferenceBackend.h
new file mode 100644
index 0000000..7213a06
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/include/cv/inferencer/IInferenceBackend.h
@@ -0,0 +1,185 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_IINFERENCEBACKEND_H
+#define CVCORE_IINFERENCEBACKEND_H
+
+#include <NvInferRuntime.h>
+#include <cuda_runtime.h>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "cv/core/Tensor.h"
+
+namespace cvcore { namespace inferencer {
+
+/**
+ * Struct type to describe input and output layers.
+ */
+struct LayerInfo
+{
+    size_t index;                 /**< Block Index of layer */
+    std::string name;             /**< Name of layer */
+    std::vector<int64_t> shape;   /**< Shape of layer */
+    cvcore::ChannelType dataType; /**< Datatype of layer */
+    cvcore::TensorLayout layout;  /**< Tensor layour of layer */
+    size_t layerSize;
+};
+
+/**
+ * Enum class to describe the backend protocol for triton
+ */
+enum class BackendProtocol
+{
+    GRPC, /**< GRPC protocol */
+    HTTP  /**< HTTP Protocol */
+};
+
+/**
+ * Struct type to describe the input for triton inference.
+ */
+struct TritonRemoteInferenceParams
+{
+    std::string serverUrl;        /**< Server url created by running the triton server for a give model path */
+    bool verbose;                 /**< Verbose log from backend */
+    BackendProtocol protocolType; /**< Backend protocol type */
+    std::string modelName;        /**< Model name as per model respoitory */
+    std::string modelVersion;     /** Model version as per model repository */
+};
+
+/**
+ * Struct type to describe the model metadata parsed by the inference backend.
+ */
+struct ModelMetaData
+{
+    std::string modelName;                                  /**< Model name */
+    std::string modelVersion;                               /**< Model version */
+    std::unordered_map<std::string, LayerInfo> inputLayers; /**< Map of input layer information indexed by layer name */
+    std::unordered_map<std::string, LayerInfo>
+        outputLayers;    /**< Map of output layer information indexed by layer name */
+    size_t maxBatchSize; /**< Maximum batch size */
+};
+
+/**
+ * Enum type for TensorRT inference type
+ */
+enum class TRTInferenceType
+{
+    TRT_ENGINE,          /**< Inference using TRT engine file */
+    TRT_ENGINE_IN_MEMORY /**< Inference using TRT Cuda Engine */
+};
+
+/**
+ * TRT Logger
+ */
+class TRTLogger : public nvinfer1::ILogger
+{
+public:
+    void log(Severity severity, const char *msg) noexcept
+    {
+        if ((severity == Severity::kERROR) || (severity == Severity::kWARNING))
+        {
+            std::cerr << msg << std::endl;
+        }
+    }
+};
+
+/**
+ * Struct type to describe the input for triton inference.
+ */
+struct TensorRTInferenceParams
+{
+    TRTInferenceType inferType;    /**< TensorRT inference type */
+    nvinfer1::ICudaEngine *engine; /**< Cuda engine file for TRT_ENGINE_IN_MEMORY type. Nullptr if TRT_ENGINE is used */
+    std::string engineFilePath;    /**< Engine file path for TRT_ENGINE type. Set to null otherwise */
+    std::size_t maxBatchSize;      /**< Max Batch size */
+    std::vector<std::string> inputLayerNames;  /** Input layer names */
+    std::vector<std::string> outputLayerNames; /** Output layer names */
+    int32_t dlaID{-1};
+};
+
+/**
+ * Interface for TritonRemote , Triton C and Native TensorRT inference backend.
+ */
+class IInferenceBackendClient
+{
+public:
+    virtual ~IInferenceBackendClient() noexcept = default;
+
+/**
+ * Function to set input layer data
+ * @param trtInputBuffer Input GPU buffer
+ * @param inputLayerName Input Layer name
+ * @return error code
+ */
+    virtual std::error_code setInput(const cvcore::TensorBase &trtInputBuffer, std::string inputLayerName) = 0;
+
+/**
+ * Function to set output layer data
+ * @param trtInputBuffer Output GPU buffer
+ * @param outputLayerName Output Layer name
+ * @return error code
+ */
+    virtual std::error_code setOutput(cvcore::TensorBase &trtOutputBuffer, std::string outputLayerName) = 0;
+
+/**
+ * Returns the model metadata parsed by the inference backend.
+ * @return ModelMetaData
+ */
+    virtual ModelMetaData getModelMetaData() const = 0;
+
+/**
+ * Inference based on input and output set. enqueueV2 for TensorRT and inferSync for Triton.
+ * @param Batch size of input for inference. Default set to 1. Must be <= Max Batch Size used for buffers.
+ * @return error code
+ */
+    virtual std::error_code infer(size_t batchSize = 1) = 0;
+
+/**
+ * Sets the cuda stream applicable only for TensorRT backend.
+ * @param cudaStream_t cuda input stream
+ * @return error code
+ */
+    virtual std::error_code setCudaStream(cudaStream_t) = 0; // Only in TRT
+
+/**
+ * Unregisters the tensor mapped from the inference backend
+ * @param input/output layer name
+ * @return error code
+ */
+    virtual std::error_code unregister(std::string layerName) = 0;
+
+/**
+ * Unregisters all the tensor mappeds from the inference backend
+ * @return error code
+ */
+    virtual std::error_code unregister() = 0;
+
+protected:
+    IInferenceBackendClient()                                = default;
+    IInferenceBackendClient(const IInferenceBackendClient &) = default;
+    IInferenceBackendClient &operator=(const IInferenceBackendClient &) = default;
+    IInferenceBackendClient(IInferenceBackendClient &&) noexcept        = default;
+    IInferenceBackendClient &operator=(IInferenceBackendClient &&) noexcept = default;
+};
+
+using InferenceBackendClient = IInferenceBackendClient *;
+}} // namespace cvcore::inferencer
+#endif
diff --git a/isaac_ros_ess/gxf/ess/cvcore/include/cv/inferencer/Inferencer.h b/isaac_ros_ess/gxf/ess/cvcore/include/cv/inferencer/Inferencer.h
new file mode 100644
index 0000000..ad61131
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/include/cv/inferencer/Inferencer.h
@@ -0,0 +1,79 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_INFERENCER_H
+#define CVCORE_INFERENCER_H
+
+#include <memory>
+#include <mutex>
+#include <unordered_map>
+
+#include "cv/core/Tensor.h"
+#include "cv/inferencer/IInferenceBackend.h"
+
+namespace cvcore { namespace inferencer {
+
+/**
+ * A class to create and destroy a client for a given inference backend type
+ */
+class InferenceBackendFactory
+{
+public:
+#ifdef ENABLE_TRITON
+
+/**
+ * Function to create client for Triton remote inference backend based on the Triton remote inference paramaters.
+ * @param client client object created
+ * @param Triton remote inference config parameters.
+ * @return error code
+ */
+    static std::error_code CreateTritonRemoteInferenceBackendClient(InferenceBackendClient &client,
+                                                                    const TritonRemoteInferenceParams &);
+
+/**
+ * Function to Destroy the triton grpc client
+ * @param client client object created
+ * @return error code
+ */
+    static std::error_code DestroyTritonRemoteInferenceBackendClient(InferenceBackendClient &client);
+#endif
+
+/**
+ * Function to create client for TensorRT inference backend based on the TensorRT inference paramaters.
+ * @param client client object created
+ * @param TensorRT inference config parameters.
+ * @return error code
+ */
+    static std::error_code CreateTensorRTInferenceBackendClient(InferenceBackendClient &client,
+                                                                const TensorRTInferenceParams &);
+
+/**
+ * Function to Destroy the tensorrt client
+ * @param client client object created
+ * @return error code
+ */
+    static std::error_code DestroyTensorRTInferenceBackendClient(InferenceBackendClient &client);
+
+private:
+#ifdef ENABLE_TRITON
+    // Keeps track of any changes in url/model repo path and returns an existing / new client instance.
+    static std::unordered_map<std::string, std::pair<std::size_t, InferenceBackendClient>> tritonRemoteMap;
+#endif
+    static std::mutex inferenceMutex;
+};
+}} // namespace cvcore::inferencer
+#endif
diff --git a/isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/BBoxUtils.h b/isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/BBoxUtils.h
new file mode 100644
index 0000000..624139b
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/BBoxUtils.h
@@ -0,0 +1,135 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_BBOX_UTILS_H
+#define CVCORE_BBOX_UTILS_H
+
+#include "cv/core/BBox.h"
+
+namespace cvcore { namespace tensor_ops {
+
+/**
+ * An enum.
+ * Enum type for Bounding Box interpolation
+ */
+enum BBoxInterpolationType
+{
+    CONST_INTERPOLATION, /**< Uses constant value interpolation */
+    IMAGE_INTERPOLATION, /**< Interpolates based on image size */
+};
+
+/**
+ * An enum.
+ * Enum type for Bounding Box scaling operation.
+ */
+enum BBoxScaleType
+{
+    NORMAL, /**< Scale box without fixing center point. */
+    CENTER  /**< Scale box with center fixed. */
+};
+
+/**
+ * Function to calculate the intersection of two bounding boxes.
+ * @param a one of the BBox.
+ * @param b the other BBox.
+ * @return intersection area of the two bounding boxes.
+ */
+float GetIntersection(const BBox &a, const BBox &b);
+
+/**
+ * Function to calculate the union of two bounding boxes.
+ * @param a one of the BBox.
+ * @param b the other BBox.
+ * @return union area of the two bounding boxes.
+ */
+float GetUnion(const BBox &a, const BBox &b);
+
+/**
+ * Function to calculate the IoU (Intersection over Union) of two bounding boxes.
+ * @param a one of the BBox.
+ * @param b the other BBox.
+ * @return IoU of the two bounding boxes.
+ */
+float GetIoU(const BBox &a, const BBox &b);
+
+/**
+ * Function to merge two BBox together.
+ * @param a one of the BBox.
+ * @param b the other BBox.
+ * @return Merged bounding box.
+ */
+BBox MergeBoxes(const BBox &a, const BBox &b);
+
+/**
+ * Clamp BBox.
+ * @param a BBox to be clamped.
+ * @param b boundary BBox.
+ * @return clamped BBox.
+ */
+BBox ClampBox(const BBox &a, const BBox &b);
+
+/**
+ * Interpolate bounding boxes.
+ * @param currLeft left x coordinate.
+ * @param currRight right x coordinate.
+ * @param currBottom bottom y coordinate.
+ * @param currTop top y coordiante.
+ * @param xScaler scale ratio along width direction.
+ * @param yScaler scale ratio along height direction.
+ * @param currColumn current column index.
+ * @param currRow current row index.
+ * @param type bbox interpolation type.
+ * @param bboxNorm bounding box scaled factor.
+ * @return interpolated BBox.
+ */
+BBox InterpolateBoxes(float currLeft, float currRight, float currBottom, float currTop, float xScaler, float yScaler,
+                      int currColumn, int currRow, BBoxInterpolationType type = IMAGE_INTERPOLATION,
+                      float bboxNorm = 1.0);
+
+/**
+ * Scale BBox.
+ * @param bbox input BBox.
+ * @param xScaler scale ratio along width direction.
+ * @param yScaler scale ratio along height direction.
+ * @param type BBox scaling type.
+ * @return scaled BBox.
+ */
+BBox ScaleBox(const BBox &bbox, float xScaler, float yScaler, BBoxScaleType type = NORMAL);
+
+/**
+ * Transform BBox.
+ * @param bbox input BBox.
+ * @param xScaler scale ratio along width direction.
+ * @param yScaler scale ratio along height direction.
+ * @param xOffset offset along width direction in pixels.
+ * @param yOffset offset along height direction in pixels.
+ * @return transformed BBox.
+ */
+BBox TransformBox(const BBox &bbox, float xScaler, float yScaler, float xOffset, float yOffset);
+
+/**
+ * Squarify BBox.
+ * @param box input BBox.
+ * @param boundary boundary BBox used for clamping.
+ * @param scale scaling factor.
+ * @return squarified BBox.
+ */
+BBox SquarifyBox(const BBox &box, const BBox &boundary, float scale);
+
+}} // namespace cvcore::tensor_ops
+
+#endif // CVCORE_BBOX_UTILS_H
\ No newline at end of file
diff --git a/isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/DBScan.h b/isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/DBScan.h
new file mode 100644
index 0000000..24b4dc4
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/DBScan.h
@@ -0,0 +1,91 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_DBSCAN_H
+#define CVCORE_DBSCAN_H
+
+#include "cv/core/Array.h"
+#include "cv/core/BBox.h"
+
+namespace cvcore { namespace tensor_ops {
+
+/**
+ * An enum.
+ * Enum type for BBox merge type.
+ */
+enum BBoxMergeType
+{
+    MAXIMUM,  /**< merge by expanding bounding boxes */
+    WEIGHTED, /**< weighted by confidence scores. */
+};
+
+/**
+ * DBScan implementation used for post-processing of object detection.
+ */
+class DBScan
+{
+public:
+    /**
+     * DBScan constructor.
+     * @param pointsize size of the input array.
+     * @param minPoints minimum number of neighbors within the radius.
+     * @param epsilon radius of neighborhood around a point.
+     */
+    DBScan(int pointsSize, int minPoints, float epsilon);
+
+    /**
+     * Run DBScan cluster and return the cluster indices.
+     * @param input input unclustered BBoxes array.
+     * @param clusters output array containing cluster ids.
+     */
+    void doCluster(Array<BBox> &input, Array<int> &clusters);
+
+    /**
+     * Run DBScan cluster and return the cluster bboxes.
+     * @param input input unclustered BBoxes array.
+     * @param output output clustered BBoxes array.
+     * @param type bbox merge type
+     */
+    void doClusterAndMerge(Array<BBox> &input, Array<BBox> &output, BBoxMergeType type = MAXIMUM);
+
+    /**
+     * Run DBScan cluster and return the cluster bboxes weighted on input weights.
+     * @param input input unclustered BBoxes array.
+     * @param weights weights needed for merging clusterd bboxes.
+     * @param output output clustered BBoxes array.
+     * @param type bbox merge type
+     */
+    void doClusterAndMerge(Array<BBox> &input, Array<float> &weights, Array<BBox> &output,
+                           BBoxMergeType type = WEIGHTED);
+
+    /**
+     * Get the number of clusters.
+     * @return number of clusters.
+     */
+    int getNumClusters() const;
+
+private:
+    int m_pointsSize;
+    int m_numClusters;
+    int m_minPoints;
+    float m_epsilon;
+    Array<int> m_clusterStates;
+};
+
+}} // namespace cvcore::tensor_ops
+
+#endif // CVCORE_DBSCAN_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/Errors.h b/isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/Errors.h
new file mode 100644
index 0000000..4cdc6ab
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/Errors.h
@@ -0,0 +1,48 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_ERRORS_H
+#define CVCORE_ERRORS_H
+
+#include "cv/core/CVError.h"
+
+namespace cvcore { namespace tensor_ops {
+
+enum class TensorOpsErrorCode : std::int32_t
+{
+    SUCCESS = 0,
+    COMPUTE_ENGINE_UNSUPPORTED_BY_CONTEXT,
+    CAMERA_DISTORTION_MODEL_UNSUPPORTED
+};
+
+}} // namespace cvcore::tensor_ops
+
+// WARNING: Extending base C++ namespace to cover cvcore error codes
+namespace std {
+
+template <>
+struct is_error_code_enum<cvcore::tensor_ops::TensorOpsErrorCode> : true_type {};
+
+} // namespace std
+
+namespace cvcore { namespace tensor_ops {
+
+std::error_code make_error_code(TensorOpsErrorCode) noexcept;
+
+}} // namespace cvcore::tensor_ops
+
+#endif // CVCORE_ERRORS_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/IImageWarp.h b/isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/IImageWarp.h
new file mode 100644
index 0000000..4a81e1a
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/IImageWarp.h
@@ -0,0 +1,63 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_IIMAGEWARP_H
+#define CVCORE_IIMAGEWARP_H
+
+#include <system_error>
+#include <array>
+#include <vector>
+
+namespace cvcore { namespace tensor_ops {
+
+struct ImageGrid
+{
+    static constexpr std::size_t MAX_HORIZ_REGIONS = 4;
+    static constexpr std::size_t MAX_VERT_REGIONS = 4;
+    static constexpr std::size_t MIN_REGION_WIDTH = 64;
+    static constexpr std::size_t MIN_REGION_HIGHT = 16;
+
+    std::int8_t numHorizRegions{0};
+    std::int8_t numVertRegions{0};
+    std::array<std::int16_t, MAX_HORIZ_REGIONS> horizInterval;
+    std::array<std::int16_t, MAX_VERT_REGIONS> vertInterval;
+    std::array<std::int16_t, MAX_VERT_REGIONS> regionHeight;
+    std::array<std::int16_t, MAX_HORIZ_REGIONS> regionWidth;
+};
+
+class IImageWarp
+{
+    public:
+        // Public Destructor
+        virtual ~IImageWarp() = 0;
+
+    protected:
+        // Protected Constructor(s)
+        IImageWarp()                       = default;
+        IImageWarp(const IImageWarp &)     = default;
+        IImageWarp(IImageWarp &&) noexcept = default;
+
+        // Protected Operator(s)
+        IImageWarp &operator=(const IImageWarp &)     = default;
+        IImageWarp &operator=(IImageWarp &&) noexcept = default;
+};
+
+using ImageWarp = IImageWarp*;
+
+}} // namespace cvcore::tensor_ops
+
+#endif // CVCORE_IIMAGEWARP_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/ITensorOperatorContext.h b/isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/ITensorOperatorContext.h
new file mode 100644
index 0000000..5aa6cdb
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/ITensorOperatorContext.h
@@ -0,0 +1,65 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_ITENSOROPERATORCONTEXT_H
+#define CVCORE_ITENSOROPERATORCONTEXT_H
+
+#include <memory>
+
+#include "ITensorOperatorStream.h"
+#include "cv/core/CVError.h"
+#include "cv/core/ComputeEngine.h"
+
+namespace cvcore { namespace tensor_ops {
+
+enum class TensorBackend : std::uint8_t
+{
+    NPP,
+    VPI,
+    DALI
+};
+
+class ITensorOperatorContext
+{
+public:
+    // Public Constructor(s)/Destructor
+    virtual ~ITensorOperatorContext() noexcept = default;
+
+    // Public Accessor Method(s)
+    virtual std::error_code CreateStream(TensorOperatorStream &, const ComputeEngine &) = 0;
+    virtual std::error_code DestroyStream(TensorOperatorStream &)                       = 0;
+
+    virtual bool IsComputeEngineCompatible(const ComputeEngine &) const noexcept = 0;
+
+    virtual TensorBackend Backend() const noexcept = 0;
+
+protected:
+    // Protected Constructor(s)
+    ITensorOperatorContext()                                   = default;
+    ITensorOperatorContext(const ITensorOperatorContext &)     = default;
+    ITensorOperatorContext(ITensorOperatorContext &&) noexcept = default;
+
+    // Protected Operator(s)
+    ITensorOperatorContext &operator=(const ITensorOperatorContext &) = default;
+    ITensorOperatorContext &operator=(ITensorOperatorContext &&) noexcept = default;
+};
+
+using TensorOperatorContext = ITensorOperatorContext *;
+
+}} // namespace cvcore::tensor_ops
+
+#endif // CVCORE_ITENSOROPERATORCONTEXT_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/ITensorOperatorStream.h b/isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/ITensorOperatorStream.h
new file mode 100644
index 0000000..c4de9df
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/ITensorOperatorStream.h
@@ -0,0 +1,251 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_ITENSOROPERATORSTREAM_H
+#define CVCORE_ITENSOROPERATORSTREAM_H
+
+#include <exception>
+#include <memory>
+#include <system_error>
+#include <vector>
+
+#include "cv/core/CameraModel.h"
+#include "cv/core/ComputeEngine.h"
+#include "cv/core/Image.h"
+
+#include "Errors.h"
+#include "IImageWarp.h"
+#include "ImageUtils.h"
+namespace cvcore { namespace tensor_ops {
+
+class NotImplementedException : public std::logic_error
+{
+public:
+    NotImplementedException()
+        : std::logic_error("Method not yet implemented.")
+    {
+    }
+};
+
+class ITensorOperatorStream
+{
+public:
+    // Public Constructor(s)/Destructor
+    virtual ~ITensorOperatorStream() noexcept = default;
+
+    // Public Accessor Method(s)
+    virtual std::error_code Status() noexcept = 0;
+
+    virtual std::error_code GenerateWarpFromCameraModel(ImageWarp & warp,
+                                                        const ImageGrid & grid,
+                                                        const CameraModel & source,
+                                                        const CameraIntrinsics & target) = 0;
+
+    virtual std::error_code DestroyWarp(ImageWarp & warp) noexcept = 0;
+
+    // Public Mutator Method(s)
+    virtual std::error_code Remap(Image<RGB_U8> &outputImage, const Image<RGB_U8> &inputImage,
+                                  const ImageWarp warp,
+                                  InterpolationType interpolation = INTERP_LINEAR,
+                                  BorderType border = BORDER_ZERO)
+    {
+        throw NotImplementedException();
+        return make_error_code(ErrorCode::NOT_IMPLEMENTED);
+    }
+    virtual std::error_code Remap(Image<BGR_U8> &outputImage, const Image<BGR_U8> &inputImage,
+                                  const ImageWarp warp,
+                                  InterpolationType interpolation = INTERP_LINEAR,
+                                  BorderType border = BORDER_ZERO)
+    {
+        throw NotImplementedException();
+        return make_error_code(ErrorCode::NOT_IMPLEMENTED);
+    }
+
+    virtual std::error_code Remap(Image<NV12> &outputImage, const Image<NV12> &inputImage,
+                                  const ImageWarp warp,
+                                  InterpolationType interpolation = INTERP_LINEAR,
+                                  BorderType border = BORDER_ZERO)
+    {
+        throw NotImplementedException();
+        return make_error_code(ErrorCode::NOT_IMPLEMENTED);
+    }
+
+    virtual std::error_code Remap(Image<NV24> &outputImage, const Image<NV24> &inputImage,
+                                  const ImageWarp warp,
+                                  InterpolationType interpolation = INTERP_LINEAR,
+                                  BorderType border = BORDER_ZERO)
+    {
+        throw NotImplementedException();
+        return make_error_code(ErrorCode::NOT_IMPLEMENTED);
+    }
+
+    virtual std::error_code Resize(Image<RGB_U8> &outputImage, const Image<RGB_U8> &inputImage,
+                                   InterpolationType interpolation = INTERP_LINEAR, BorderType border = BORDER_ZERO)
+    {
+        throw NotImplementedException();
+        return make_error_code(ErrorCode::NOT_IMPLEMENTED);
+    }
+    virtual std::error_code Resize(Image<BGR_U8> &outputImage, const Image<BGR_U8> &inputImage,
+                                   InterpolationType interpolation = INTERP_LINEAR, BorderType border = BORDER_ZERO)
+    {
+        throw NotImplementedException();
+        return make_error_code(ErrorCode::NOT_IMPLEMENTED);
+    }
+    virtual std::error_code Resize(Image<NV12> &outputImage, const Image<NV12> &inputImage,
+                                   InterpolationType interpolation = INTERP_LINEAR, BorderType border = BORDER_ZERO)
+    {
+        throw NotImplementedException();
+        return make_error_code(ErrorCode::NOT_IMPLEMENTED);
+    }
+    virtual std::error_code Resize(Image<RGBA_U8> &outputImage, const Image<RGBA_U8> &inputImage,
+                                   InterpolationType interpolation = INTERP_LINEAR, BorderType border = BORDER_ZERO)
+    {
+        throw NotImplementedException();
+        return make_error_code(ErrorCode::NOT_IMPLEMENTED);
+    }
+    virtual std::error_code Resize(Image<NV24> &outputImage, const Image<NV24> &inputImage,
+                                   InterpolationType interpolation = INTERP_LINEAR, BorderType border = BORDER_ZERO)
+    {
+        throw NotImplementedException();
+        return make_error_code(ErrorCode::NOT_IMPLEMENTED);
+    }
+
+    virtual std::error_code Normalize(Image<RGB_U8> &outputImage, const Image<RGB_U8> &inputImage
+                                      /* only configuration parameters */)
+    {
+        throw NotImplementedException();
+        return make_error_code(ErrorCode::NOT_IMPLEMENTED);
+    }
+    virtual std::error_code Normalize(Image<BGR_U8> &outputImage, const Image<BGR_U8> &inputImage
+                                      /* only configuration parameters */)
+    {
+        throw NotImplementedException();
+        return make_error_code(ErrorCode::NOT_IMPLEMENTED);
+    }
+    virtual std::error_code Normalize(Image<NV12> &outputImage, const Image<NV12> &inputImage
+                                      /* only configuration parameters */)
+    {
+        throw NotImplementedException();
+        return make_error_code(ErrorCode::NOT_IMPLEMENTED);
+    }
+    virtual std::error_code Normalize(Image<NV24> &outputImage, const Image<NV24> &inputImage
+                                      /* only configuration parameters */)
+    {
+        throw NotImplementedException();
+        return make_error_code(ErrorCode::NOT_IMPLEMENTED);
+    }
+
+    virtual std::error_code ColorConvert(Image<BGR_U8> &outputImage, const Image<RGB_U8> &inputImage)
+    {
+        throw NotImplementedException();
+        return make_error_code(ErrorCode::NOT_IMPLEMENTED);
+    }
+
+    virtual std::error_code ColorConvert(Image<RGB_U8> &outputImage, const Image<BGR_U8> &inputImage)
+    {
+        throw NotImplementedException();
+        return make_error_code(ErrorCode::NOT_IMPLEMENTED);
+    }
+
+    virtual std::error_code ColorConvert(Image<BGR_U8> &outputImage, const Image<NV12> &inputImage)
+    {
+        throw NotImplementedException();
+        return make_error_code(ErrorCode::NOT_IMPLEMENTED);
+    }
+
+    virtual std::error_code ColorConvert(Image<BGR_U8> &outputImage, const Image<NV24> &inputImage)
+    {
+        throw NotImplementedException();
+        return make_error_code(ErrorCode::NOT_IMPLEMENTED);
+    }
+
+    virtual std::error_code ColorConvert(Image<NV12> &outputImage, const Image<BGR_U8> &inputImage)
+    {
+        throw NotImplementedException();
+        return make_error_code(ErrorCode::NOT_IMPLEMENTED);
+    }
+
+    virtual std::error_code ColorConvert(Image<NV24> &outputImage, const Image<BGR_U8> &inputImage)
+    {
+        throw NotImplementedException();
+        return make_error_code(ErrorCode::NOT_IMPLEMENTED);
+    }
+
+    virtual std::error_code ColorConvert(Image<RGB_U8> &outputImage, const Image<NV12> &inputImage)
+    {
+        throw NotImplementedException();
+        return make_error_code(ErrorCode::NOT_IMPLEMENTED);
+    }
+
+    virtual std::error_code ColorConvert(Image<RGB_U8> &outputImage, const Image<NV24> &inputImage)
+    {
+        throw NotImplementedException();
+        return make_error_code(ErrorCode::NOT_IMPLEMENTED);
+    }
+
+    virtual std::error_code ColorConvert(Image<NV12> &outputImage, const Image<RGB_U8> &inputImage)
+    {
+        throw NotImplementedException();
+        return make_error_code(ErrorCode::NOT_IMPLEMENTED);
+    }
+
+    virtual std::error_code ColorConvert(Image<NV24> &outputImage, const Image<RGB_U8> &inputImage)
+    {
+        throw NotImplementedException();
+        return make_error_code(ErrorCode::NOT_IMPLEMENTED);
+    }
+
+    virtual std::error_code StereoDisparityEstimator(Image<Y_F32> &outputImage, const Image<Y_U8> &inputLeftImage,
+                                                     const Image<Y_U8> &inputRightImage, size_t windowSize,
+                                                     size_t maxDisparity)
+    {
+        throw NotImplementedException();
+        return make_error_code(ErrorCode::NOT_IMPLEMENTED);
+    }
+
+    virtual std::error_code StereoDisparityEstimator(Image<Y_F32> &outputImage, const Image<NV12> &inputLeftImage,
+                                                     const Image<NV12> &inputRightImage, size_t windowSize,
+                                                     size_t maxDisparity)
+    {
+        throw NotImplementedException();
+        return make_error_code(ErrorCode::NOT_IMPLEMENTED);
+    }
+
+    virtual std::error_code StereoDisparityEstimator(Image<Y_F32> &outputImage, const Image<NV24> &inputLeftImage,
+                                                     const Image<NV24> &inputRightImage, size_t windowSize,
+                                                     size_t maxDisparity)
+    {
+        throw NotImplementedException();
+        return make_error_code(ErrorCode::NOT_IMPLEMENTED);
+    }
+
+protected:
+    // Protected Constructor(s)
+    ITensorOperatorStream()                                  = default;
+    ITensorOperatorStream(const ITensorOperatorStream &)     = default;
+    ITensorOperatorStream(ITensorOperatorStream &&) noexcept = default;
+
+    // Protected Operator(s)
+    ITensorOperatorStream &operator=(const ITensorOperatorStream &) = default;
+    ITensorOperatorStream &operator=(ITensorOperatorStream &&) noexcept = default;
+};
+
+using TensorOperatorStream = ITensorOperatorStream *;
+
+}} // namespace cvcore::tensor_ops
+
+#endif // CVCORE_ITENSOROPERATORSTREAM_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/ImageUtils.h b/isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/ImageUtils.h
new file mode 100644
index 0000000..e46cd42
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/ImageUtils.h
@@ -0,0 +1,1091 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_IMAGE_UTILS_H
+#define CVCORE_IMAGE_UTILS_H
+
+#include <memory>
+
+#include <cuda_runtime.h>
+
+#include "cv/core/BBox.h"
+#include "cv/core/Tensor.h"
+
+namespace cvcore { namespace tensor_ops {
+
+/**
+ * An enum.
+ * Enum type for color conversion type.
+ */
+enum ColorConversionType
+{
+    BGR2RGB,  /**< convert BGR to RGB. */
+    RGB2BGR,  /**< convert RGB to BGR. */
+    BGR2GRAY, /**< convert BGR to Grayscale. */
+    RGB2GRAY, /**< convert RGB to Grayscale. */
+    GRAY2BGR, /**< convert Grayscale to BGR. */
+    GRAY2RGB, /**< convert Grayscale to RGB. */
+};
+
+/**
+ * An enum.
+ * Enum type for resize interpolation type.
+ */
+enum InterpolationType
+{
+    INTERP_NEAREST,         /**< nearest interpolation. */
+    INTERP_LINEAR,          /**< linear interpolation. */
+    INTERP_CUBIC_BSPLINE,   /**< cubic bspline interpolation. */
+    INTERP_CUBIC_CATMULLROM /**< cubic catmullrom interpolation. */
+};
+
+/**
+ * An enum.
+ * Enum type for resize interpolation type.
+ */
+enum BorderType
+{
+    BORDER_ZERO,
+    BORDER_REPEAT,
+    BORDER_REVERSE,
+    BORDER_MIRROR
+};
+
+// please note the following functions all require GPU Tensors
+
+/**
+ * Image resizing for one channel HWC format uint_8 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param type interpolation type.
+ * @param keep_aspect_ratio whether to keep aspect ratio.
+ * @param stream specified cuda stream.
+ */
+void Resize(Tensor<HWC, C1, U8> &dst, const Tensor<HWC, C1, U8> &src, bool keep_aspect_ratio = false,
+            InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Image batch resizing for one channel NHWC uint_8 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param type interpolation type.
+ * @param keep_aspect_ratio whether to keep aspect ratio.
+ * @param stream specified cuda stream.
+ */
+void Resize(Tensor<NHWC, C1, U8> &dst, const Tensor<NHWC, C1, U8> &src, bool keep_aspect_ratio = false,
+            InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Image resizing of a region of interest for one channel HWC format uint_8 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param dstROI destination crop region.
+ * @param srcROI source crop region.
+ * @param type interpolation type.
+ * @param stream specified cuda stream.
+ */
+void CropAndResize(Tensor<HWC, C1, U8> &dst, const Tensor<HWC, C1, U8> &src, const BBox &dstROI, const BBox &srcROI,
+                   InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Image resizing of a region of interest for one channel HWC format uint_8 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param srcROI source crop region.
+ * @param type interpolation type.
+ * @param stream specified cuda stream.
+ */
+void CropAndResize(Tensor<HWC, C1, U8> &dst, const Tensor<HWC, C1, U8> &src, const BBox &srcROI,
+                   InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Image resizing for one channel HWC format uint_16 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param type interpolation type.
+ * @param keep_aspect_ratio whether to keep aspect ratio.
+ * @param stream specified cuda stream.
+ */
+void Resize(Tensor<HWC, C1, U16> &dst, const Tensor<HWC, C1, U16> &src, bool keep_aspect_ratio = false,
+            InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Image batch resizing for one channel HWC format uint_16 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param type interpolation type.
+ * @param keep_aspect_ratio whether to keep aspect ratio.
+ * @param stream specified cuda stream.
+ */
+void Resize(Tensor<NHWC, C1, U16> &dst, const Tensor<NHWC, C1, U16> &src, bool keep_aspect_ratio = false,
+            InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Image resizing of a region of interest for one channel HWC format uint_16 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param dstROI destination crop region.
+ * @param srcROI source crop region.
+ * @param type interpolation type.
+ * @param stream specified cuda stream.
+ */
+void CropAndResize(Tensor<HWC, C1, U16> &dst, const Tensor<HWC, C1, U16> &src, const BBox &dstROI, const BBox &srcROI,
+                   InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Image resizing of a region of interest for one channel HWC format uint_16 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param srcROI source crop region.
+ * @param type interpolation type.
+ * @param stream specified cuda stream.
+ */
+void CropAndResize(Tensor<HWC, C1, U16> &dst, const Tensor<HWC, C1, U16> &src, const BBox &srcROI,
+                   InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Image resizing for one channel HWC format FP32 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param type interpolation type.
+ * @param keep_aspect_ratio whether to keep aspect ratio.
+ * @param stream specified cuda stream.
+ */
+void Resize(Tensor<HWC, C1, F32> &dst, const Tensor<HWC, C1, F32> &src, bool keep_aspect_ratio = false,
+            InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Image batch resizing for one channel HWC format FP32 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param type interpolation type.
+ * @param keep_aspect_ratio whether to keep aspect ratio.
+ * @param stream specified cuda stream.
+ */
+void Resize(Tensor<NHWC, C1, F32> &dst, const Tensor<NHWC, C1, F32> &src, bool keep_aspect_ratio = false,
+            InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Image resizing of a region of interest for one channel HWC format FP32 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param dstROI destination crop region.
+ * @param srcROI source crop region.
+ * @param type interpolation type.
+ * @param stream specified cuda stream.
+ */
+void CropAndResize(Tensor<HWC, C1, F32> &dst, const Tensor<HWC, C1, F32> &src, const BBox &dstROI, const BBox &srcROI,
+                   InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Image resizing of a region of interest for one channel HWC format FP32 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param srcROI source crop region.
+ * @param type interpolation type.
+ * @param stream specified cuda stream.
+ */
+void CropAndResize(Tensor<HWC, C1, F32> &dst, const Tensor<HWC, C1, F32> &src, const BBox &srcROI,
+                   InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Image resizing for one channel CHW format uint_8 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param type interpolation type.
+ * @param keep_aspect_ratio whether to keep aspect ratio.
+ * @param stream specified cuda stream.
+ */
+void Resize(Tensor<CHW, C1, U8> &dst, const Tensor<CHW, C1, U8> &src, bool keep_aspect_ratio = false,
+            InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Image resizing of a region of interest for one channel CHW format uint_8 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param dstROI destination crop region.
+ * @param srcROI source crop region.
+ * @param type interpolation type.
+ * @param stream specified cuda stream.
+ */
+void CropAndResize(Tensor<CHW, C1, U8> &dst, const Tensor<CHW, C1, U8> &src, const BBox &dstROI, const BBox &srcROI,
+                   InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Image resizing of a region of interest for one channel CHW format uint_8 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param srcROI source crop region.
+ * @param type interpolation type.
+ * @param stream specified cuda stream.
+ */
+void CropAndResize(Tensor<CHW, C1, U8> &dst, const Tensor<CHW, C1, U8> &src, const BBox &srcROI,
+                   InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Image resizing for one channel CHW format uint_16 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param type interpolation type.
+ * @param keep_aspect_ratio whether to keep aspect ratio.
+ * @param stream specified cuda stream.
+ */
+void Resize(Tensor<CHW, C1, U16> &dst, const Tensor<CHW, C1, U16> &src, bool keep_aspect_ratio = false,
+            InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Image resizing of a region of interest for one channel CHW format uint_16 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param dstROI destination crop region.
+ * @param srcROI source crop region.
+ * @param type interpolation type.
+ * @param stream specified cuda stream.
+ */
+void CropAndResize(Tensor<CHW, C1, U16> &dst, const Tensor<CHW, C1, U16> &src, const BBox &dstROI, const BBox &srcROI,
+                   InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Image resizing of a region of interest for one channel CHW format uint_16 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param srcROI source crop region.
+ * @param type interpolation type.
+ * @param stream specified cuda stream.
+ */
+void CropAndResize(Tensor<CHW, C1, U16> &dst, const Tensor<CHW, C1, U16> &src, const BBox &srcROI,
+                   InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Image resizing for one channel CHW format FP32 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param type interpolation type.
+ * @param keep_aspect_ratio whether to keep aspect ratio.
+ * @param stream specified cuda stream.
+ */
+void Resize(Tensor<CHW, C1, F32> &dst, const Tensor<CHW, C1, F32> &src, bool keep_aspect_ratio = false,
+            InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Image resizing of a region of interest for one channel CHW format FP32 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param dstROI destination crop region.
+ * @param srcROI source crop region.
+ * @param type interpolation type.
+ * @param stream specified cuda stream.
+ */
+void CropAndResize(Tensor<CHW, C1, F32> &dst, const Tensor<CHW, C1, F32> &src, const BBox &dstROI, const BBox &srcROI,
+                   InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Image resizing of a region of interest for one channel CHW format FP32 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param srcROI source crop region.
+ * @param type interpolation type.
+ * @param stream specified cuda stream.
+ */
+void CropAndResize(Tensor<CHW, C1, F32> &dst, const Tensor<CHW, C1, F32> &src, const BBox &srcROI,
+                   InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Image resizing for three channels interleaved uint_8 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param type interpolation type.
+ * @param keep_aspect_ratio whether to keep aspect ratio.
+ * @param stream specified cuda stream.
+ */
+void Resize(Tensor<HWC, C3, U8> &dst, const Tensor<HWC, C3, U8> &src, bool keep_aspect_ratio = false,
+            InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Image batch resizing for three channels interleaved uint_8 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param type interpolation type.
+ * @param keep_aspect_ratio whether to keep aspect ratio.
+ * @param stream specified cuda stream.
+ */
+void Resize(Tensor<NHWC, C3, U8> &dst, const Tensor<NHWC, C3, U8> &src, bool keep_aspect_ratio = false,
+            InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Image resizing of a region of intesrest for three channel HWC format uint_8 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param dstROI destination crop region.
+ * @param srcROI source crop region.
+ * @param type interpolation type.
+ * @param stream specified cuda stream.
+ */
+void CropAndResize(Tensor<HWC, C3, U8> &dst, const Tensor<HWC, C3, U8> &src, const BBox &dstROI, const BBox &srcROI,
+                   InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Image resizing of a region of intesrest for three channel HWC format uint_8 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param srcROI source crop region.
+ * @param type interpolation type.
+ * @param stream specified cuda stream.
+ */
+void CropAndResize(Tensor<HWC, C3, U8> &dst, const Tensor<HWC, C3, U8> &src, const BBox &srcROI,
+                   InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Image resizing for three channels interleaved uint_16 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param type interpolation type.
+ * @param keep_aspect_ratio whether to keep aspect ratio.
+ * @param stream specified cuda stream.
+ */
+void Resize(Tensor<HWC, C3, U16> &dst, const Tensor<HWC, C3, U16> &src, bool keep_aspect_ratio = false,
+            InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Image batch resizing for three channels interleaved uint_16 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param type interpolation type.
+ * @param keep_aspect_ratio whether to keep aspect ratio.
+ * @param stream specified cuda stream.
+ */
+void Resize(Tensor<NHWC, C3, U16> &dst, const Tensor<NHWC, C3, U16> &src, bool keep_aspect_ratio = false,
+            InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Image resizing of a region of intesrest for three channel HWC format uint_16 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param dstROI destination crop region.
+ * @param srcROI source crop region.
+ * @param type interpolation type.
+ * @param stream specified cuda stream.
+ */
+void CropAndResize(Tensor<HWC, C3, U16> &dst, const Tensor<HWC, C3, U16> &src, const BBox &dstROI, const BBox &srcROI,
+                   InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Image resizing of a region of intesrest for three channel HWC format uint_16 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param srcROI source crop region.
+ * @param type interpolation type.
+ * @param stream specified cuda stream.
+ */
+void CropAndResize(Tensor<HWC, C3, U16> &dst, const Tensor<HWC, C3, U16> &src, const BBox &srcROI,
+                   InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Image resizing for three channels interleaved float type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param type interpolation type.
+ * @param keep_aspect_ratio whether to keep aspect ratio.
+ * @param stream specified cuda stream.
+ */
+void Resize(Tensor<HWC, C3, F32> &dst, const Tensor<HWC, C3, F32> &src, bool keep_aspect_ratio = false,
+            InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Image batch resizing for three channels interleaved float type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param type interpolation type.
+ * @param keep_aspect_ratio whether to keep aspect ratio.
+ * @param stream specified cuda stream.
+ */
+void Resize(Tensor<NHWC, C3, F32> &dst, const Tensor<NHWC, C3, F32> &src, bool keep_aspect_ratio = false,
+            InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Image resizing of a region of intesrest for three channel HWC format float type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param dstROI destination crop region.
+ * @param srcROI source crop region.
+ * @param type interpolation type.
+ * @param stream specified cuda stream.
+ */
+void CropAndResize(Tensor<HWC, C3, F32> &dst, const Tensor<HWC, C3, F32> &src, const BBox &dstROI, const BBox &srcROI,
+                   InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Image resizing of a region of intesrest for three channel HWC format float type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param srcROI source crop region.
+ * @param type interpolation type.
+ * @param stream specified cuda stream.
+ */
+void CropAndResize(Tensor<HWC, C3, F32> &dst, const Tensor<HWC, C3, F32> &src, const BBox &srcROI,
+                   InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Crop a region of interest for one channel HWC format uint_8 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param srcROI source crop region.
+ * @param stream specified cuda stream.
+ */
+void Crop(Tensor<HWC, C1, U8> &dst, const Tensor<HWC, C1, U8> &src, const BBox &srcROI, cudaStream_t stream = 0);
+
+/**
+ * Crop a region of interest for one channel HWC format uint_16 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param srcROI source crop region.
+ * @param stream specified cuda stream.
+ */
+void Crop(Tensor<HWC, C1, U16> &dst, const Tensor<HWC, C1, U16> &src, const BBox &srcROI, cudaStream_t stream = 0);
+
+/**
+ * Crop a region of interest for one channel HWC format float type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param srcROI source crop region.
+ * @param stream specified cuda stream.
+ */
+void Crop(Tensor<HWC, C1, F32> &dst, const Tensor<HWC, C1, F32> &src, const BBox &srcROI, cudaStream_t stream = 0);
+
+/**
+ * Crop a region of interest for one channel CHW format uint_8 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param srcROI source crop region.
+ * @param stream specified cuda stream.
+ */
+void Crop(Tensor<CHW, C1, U8> &dst, const Tensor<CHW, C1, U8> &src, const BBox &srcROI, cudaStream_t stream = 0);
+
+/**
+ * Crop a region of interest for one channel CHW format uint_16 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param srcROI source crop region.
+ * @param stream specified cuda stream.
+ */
+void Crop(Tensor<CHW, C1, U16> &dst, const Tensor<CHW, C1, U16> &src, const BBox &srcROI, cudaStream_t stream = 0);
+
+/**
+ * Crop a region of interest for one channel CHW format float type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param srcROI source crop region.
+ * @param stream specified cuda stream.
+ */
+void Crop(Tensor<CHW, C1, F32> &dst, const Tensor<CHW, C1, F32> &src, const BBox &srcROI, cudaStream_t stream = 0);
+
+/**
+ * Crop a region of interest for three channels HWC format uint_8 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param srcROI source crop region.
+ * @param stream specified cuda stream.
+ */
+void Crop(Tensor<HWC, C3, U8> &dst, const Tensor<HWC, C3, U8> &src, const BBox &srcROI, cudaStream_t stream = 0);
+
+/**
+ * Crop a region of interest for three channels HWC format uint_16 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param srcROI source crop region.
+ * @param stream specified cuda stream.
+ */
+void Crop(Tensor<HWC, C3, U16> &dst, const Tensor<HWC, C3, U16> &src, const BBox &srcROI, cudaStream_t stream = 0);
+
+/**
+ * Crop a region of interest for three channels HWC format float type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param srcROI source crop region.
+ * @param stream specified cuda stream.
+ */
+void Crop(Tensor<HWC, C3, F32> &dst, const Tensor<HWC, C3, F32> &src, const BBox &srcROI, cudaStream_t stream = 0);
+
+/**
+ * Apply a perspective transformation to one channel HWC format uint_8 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param coeffs 3x3 transformation matrix.
+ * @param type interpolation type.
+ * @param stream specified cuda stream.
+ */
+void WarpPerspective(Tensor<HWC, C1, U8> &dst, const Tensor<HWC, C1, U8> &src, const double coeffs[3][3],
+                     InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Apply a perspective transformation to one channel HWC format uint_16 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param coeffs 3x3 transformation matrix.
+ * @param type interpolation type.
+ * @param stream specified cuda stream.
+ */
+void WarpPerspective(Tensor<HWC, C1, U16> &dst, const Tensor<HWC, C1, U16> &src, const double coeffs[3][3],
+                     InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Apply a perspective transformation to one channel HWC format float type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param coeffs 3x3 transformation matrix.
+ * @param type interpolation type.
+ * @param stream specified cuda stream.
+ */
+void WarpPerspective(Tensor<HWC, C1, F32> &dst, const Tensor<HWC, C1, F32> &src, const double coeffs[3][3],
+                     InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Apply a perspective transformation to three channels HWC format uint_8 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param coeffs 3x3 transformation matrix.
+ * @param type interpolation type.
+ * @param stream specified cuda stream.
+ */
+void WarpPerspective(Tensor<HWC, C3, U8> &dst, const Tensor<HWC, C3, U8> &src, const double coeffs[3][3],
+                     InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Apply a perspective transformation to three channels HWC format uint_16 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param coeffs 3x3 transformation matrix.
+ * @param type interpolation type.
+ * @param stream specified cuda stream.
+ */
+void WarpPerspective(Tensor<HWC, C3, U16> &dst, const Tensor<HWC, C3, U16> &src, const double coeffs[3][3],
+                     InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/**
+ * Apply a perspective transformation to three channels HWC format float type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param coeffs 3x3 transformation matrix.
+ * @param type interpolation type.
+ * @param stream specified cuda stream.
+ */
+void WarpPerspective(Tensor<HWC, C3, F32> &dst, const Tensor<HWC, C3, F32> &src, const double coeffs[3][3],
+                     InterpolationType type = INTERP_LINEAR, cudaStream_t stream = 0);
+
+/** Color conversion between two three channels interleaved uint_8 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param type color conversion type.
+ * @param stream specified cuda stream.
+ */
+void ConvertColorFormat(Tensor<HWC, C3, U8> &dst, const Tensor<HWC, C3, U8> &src, ColorConversionType type,
+                        cudaStream_t stream = 0);
+
+/** Batch color conversion between three channels interleaved uint_8 type Tensors.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param type color conversion type.
+ * @param stream specified cuda stream.
+ */
+void ConvertColorFormat(Tensor<NHWC, C3, U8> &dst, const Tensor<NHWC, C3, U8> &src, ColorConversionType type,
+                        cudaStream_t stream = 0);
+
+/** Color conversion between two three channels interleaved uint_16 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param type color conversion type.
+ * @param stream specified cuda stream.
+ */
+void ConvertColorFormat(Tensor<HWC, C3, U16> &dst, const Tensor<HWC, C3, U16> &src, ColorConversionType type,
+                        cudaStream_t stream = 0);
+
+/** Batch color conversion between three channels interleaved uint_16 type Tensors.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param type color conversion type.
+ * @param stream specified cuda stream.
+ */
+void ConvertColorFormat(Tensor<NHWC, C3, U16> &dst, const Tensor<NHWC, C3, U16> &src, ColorConversionType type,
+                        cudaStream_t stream = 0);
+
+/** Color conversion between two three channels interleaved float type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param type color conversion type.
+ * @param stream specified cuda stream.
+ */
+void ConvertColorFormat(Tensor<HWC, C3, F32> &dst, const Tensor<HWC, C3, F32> &src, ColorConversionType type,
+                        cudaStream_t stream = 0);
+
+/** Batch color conversion between three channels interleaved float type Tensors.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param type color conversion type.
+ * @param stream specified cuda stream.
+ */
+void ConvertColorFormat(Tensor<NHWC, C3, F32> &dst, const Tensor<NHWC, C3, F32> &src, ColorConversionType type,
+                        cudaStream_t stream = 0);
+
+/** Color conversion from three channels interleaved uint_8 type Tensor to one channel Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param type color conversion type.
+ * @param stream specified cuda stream.
+ */
+void ConvertColorFormat(Tensor<HWC, C1, U8> &dst, const Tensor<HWC, C3, U8> &src, ColorConversionType type,
+                        cudaStream_t stream = 0);
+
+/** Color conversion from three channels interleaved uint_16 type Tensor to one channel Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param type color conversion type.
+ * @param stream specified cuda stream.
+ */
+void ConvertColorFormat(Tensor<HWC, C1, U16> &dst, const Tensor<HWC, C3, U16> &src, ColorConversionType type,
+                        cudaStream_t stream = 0);
+
+/** Color conversion from three channels interleaved float type Tensor to one channel Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param type color conversion type.
+ * @param stream specified cuda stream.
+ */
+void ConvertColorFormat(Tensor<HWC, C1, F32> &dst, const Tensor<HWC, C3, F32> &src, ColorConversionType type,
+                        cudaStream_t stream = 0);
+
+/** Color conversion from one channel interleaved uint_8 type Tensor to three channels Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param type color conversion type.
+ * @param stream specified cuda stream.
+ */
+void ConvertColorFormat(Tensor<HWC, C3, U8> &dst, const Tensor<HWC, C1, U8> &src, ColorConversionType type,
+                        cudaStream_t stream = 0);
+
+/** Color conversion from one channel interleaved uint_16 type Tensor to three channels Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param type color conversion type.
+ * @param stream specified cuda stream.
+ */
+void ConvertColorFormat(Tensor<HWC, C3, U16> &dst, const Tensor<HWC, C1, U16> &src, ColorConversionType type,
+                        cudaStream_t stream = 0);
+
+/** Color conversion from one channel interleaved float type Tensor to three channels Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param type color conversion type.
+ * @param stream specified cuda stream.
+ */
+void ConvertColorFormat(Tensor<HWC, C3, F32> &dst, const Tensor<HWC, C1, F32> &src, ColorConversionType type,
+                        cudaStream_t stream = 0);
+
+/** Convert bit depth from F32 to U8 of a single channel channel Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param scale multiply the pixel values by a factor.
+ * @param stream specified cuda stream.
+ */
+void ConvertBitDepth(Tensor<HWC, C1, U8> &dst, Tensor<HWC, C1, F32> &src, const float scale, cudaStream_t stream = 0);
+
+/** Convert bit depth from F32 to U8 of a N * single channel Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param scale multiply the pixel values by a factor.
+ * @param stream specified cuda stream.
+ */
+void ConvertBitDepth(Tensor<NHWC, C1, U8> &dst, Tensor<NHWC, C1, F32> &src, const float scale, cudaStream_t stream = 0);
+
+/**
+ * Normalization for three channels interleaved uint8_t type Tensor.
+ * Each element x will be transformed to (x + offset) * scale
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param scale scaling factor for normalization.
+ * @param offset offset constant for normalization.
+ * @stream specified cuda stream.
+ */
+void Normalize(Tensor<HWC, C3, F32> &dst, const Tensor<HWC, C3, U8> &src, const float scale[3], const float offset[3],
+               cudaStream_t stream = 0);
+
+/**
+ * Batch normalization for three channels interleaved uint8_t type Tensor.
+ * Each element x will be transformed to (x + offset) * scale
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param scale scaling factor for normalization.
+ * @param offset offset constant for normalization.
+ * @stream specified cuda stream.
+ */
+void Normalize(Tensor<NHWC, C3, F32> &dst, const Tensor<NHWC, C3, U8> &src, const float scale[3], const float offset[3],
+               cudaStream_t stream = 0);
+
+/**
+ * Normalization for three channels interleaved uint16_t type Tensor.
+ * Each element x will be transformed to (x + offset) * scale
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param scale scaling factor for normalization.
+ * @param offset offset constant for normalization.
+ * @stream specified cuda stream.
+ */
+void Normalize(Tensor<HWC, C3, F32> &dst, const Tensor<HWC, C3, U16> &src, const float scale[3], const float offset[3],
+               cudaStream_t stream = 0);
+
+/**
+ * Batch normalization for three channels interleaved uint16_t type Tensor.
+ * Each element x will be transformed to (x + offset) * scale
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param scale scaling factor for normalization.
+ * @param offset offset constant for normalization.
+ * @stream specified cuda stream.
+ */
+void Normalize(Tensor<NHWC, C3, F32> &dst, const Tensor<NHWC, C3, U16> &src, const float scale[3],
+               const float offset[3], cudaStream_t stream = 0);
+
+/**
+ * Normalization for three channels interleaved float type Tensor.
+ * Each element x will be transformed to (x + offset) * scale
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param scale scaling factor for normalization.
+ * @param offset offset constant for normalization.
+ * @stream specified cuda stream.
+ */
+void Normalize(Tensor<HWC, C3, F32> &dst, const Tensor<HWC, C3, F32> &src, const float scale[3], const float offset[3],
+               cudaStream_t stream = 0);
+
+/**
+ * Batch normalization for three channels interleaved float type Tensor.
+ * Each element x will be transformed to (x + offset) * scale
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param scale scaling factor for normalization.
+ * @param offset offset constant for normalization.
+ * @stream specified cuda stream.
+ */
+void Normalize(Tensor<NHWC, C3, F32> &dst, const Tensor<NHWC, C3, F32> &src, const float scale[3],
+               const float offset[3], cudaStream_t stream = 0);
+
+/**
+ * Normalization for one channel interleaved uint8_t type Tensor.
+ * Each element x will be transformed to (x + offset) * scale
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param scale scaling factor for normalization.
+ * @param offset offset constant for normalization.
+ * @stream specified cuda stream.
+ */
+void Normalize(Tensor<HWC, C1, F32> &dst, const Tensor<HWC, C1, U8> &src, const float scale, const float offset,
+               cudaStream_t stream = 0);
+
+/**
+ * Batch normalization for one channel interleaved uint8_t type Tensor.
+ * Each element x will be transformed to (x + offset) * scale
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param scale scaling factor for normalization.
+ * @param offset offset constant for normalization.
+ * @stream specified cuda stream.
+ */
+void Normalize(Tensor<NHWC, C1, F32> &dst, const Tensor<NHWC, C1, U8> &src, const float scale, const float offset,
+               cudaStream_t stream = 0);
+
+/**
+ * Normalization for one channel interleaved uint16_t type Tensor.
+ * Each element x will be transformed to (x + offset) * scale
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param scale scaling factor for normalization.
+ * @param offset offset constant for normalization.
+ * @stream specified cuda stream.
+ */
+void Normalize(Tensor<HWC, C1, F32> &dst, const Tensor<HWC, C1, U16> &src, const float scale, const float offset,
+               cudaStream_t stream = 0);
+
+/**
+ * Batch normalization for one channel interleaved uint16_t type Tensor.
+ * Each element x will be transformed to (x + offset) * scale
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param scale scaling factor for normalization.
+ * @param offset offset constant for normalization.
+ * @stream specified cuda stream.
+ */
+void Normalize(Tensor<NHWC, C1, F32> &dst, const Tensor<NHWC, C1, U16> &src, const float scale, const float offset,
+               cudaStream_t stream = 0);
+
+/**
+ * Normalization for one channel interleaved float type Tensor.
+ * Each element x will be transformed to (x + offset) * scale
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param scale scaling factor for normalization.
+ * @param offset offset constant for normalization.
+ * @stream specified cuda stream.
+ */
+void Normalize(Tensor<HWC, C1, F32> &dst, const Tensor<HWC, C1, F32> &src, const float scale, const float offset,
+               cudaStream_t stream = 0);
+
+/**
+ * Batch normalization for one channel interleaved float type Tensor.
+ * Each element x will be transformed to (x + offset) * scale
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param scale scaling factor for normalization.
+ * @param offset offset constant for normalization.
+ * @stream specified cuda stream.
+ */
+void Normalize(Tensor<NHWC, C1, F32> &dst, const Tensor<NHWC, C1, F32> &src, const float scale, const float offset,
+               cudaStream_t stream = 0);
+
+/**
+ * Normalization for one channel planar uint8_t type Tensor.
+ * Each element x will be transformed to (x + offset) * scale
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param scale scaling factor for normalization.
+ * @param offset offset constant for normalization.
+ * @stream specified cuda stream.
+ */
+void Normalize(Tensor<CHW, C1, F32> &dst, const Tensor<CHW, C1, U8> &src, const float scale, const float offset,
+               cudaStream_t stream = 0);
+
+/**
+ * Batch normalization for one channel planar uint8_t type Tensor.
+ * Each element x will be transformed to (x + offset) * scale
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param scale scaling factor for normalization.
+ * @param offset offset constant for normalization.
+ * @stream specified cuda stream.
+ */
+void Normalize(Tensor<NCHW, C1, F32> &dst, const Tensor<NCHW, C1, U8> &src, const float scale, const float offset,
+               cudaStream_t stream = 0);
+
+/**
+ * Normalization for one channel planar uint16_t type Tensor.
+ * Each element x will be transformed to (x + offset) * scale
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param scale scaling factor for normalization.
+ * @param offset offset constant for normalization.
+ * @stream specified cuda stream.
+ */
+void Normalize(Tensor<CHW, C1, F32> &dst, const Tensor<CHW, C1, U16> &src, const float scale, const float offset,
+               cudaStream_t stream = 0);
+
+/**
+ * Batch normalization for one channel planar uint16_t type Tensor.
+ * Each element x will be transformed to (x + offset) * scale
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param scale scaling factor for normalization.
+ * @param offset offset constant for normalization.
+ * @stream specified cuda stream.
+ */
+void Normalize(Tensor<NCHW, C1, F32> &dst, const Tensor<NCHW, C1, U16> &src, const float scale, const float offset,
+               cudaStream_t stream = 0);
+
+/**
+ * Normalization for one channel planar float type Tensor.
+ * Each element x will be transformed to (x + offset) * scale
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param scale scaling factor for normalization.
+ * @param offset offset constant for normalization.
+ * @stream specified cuda stream.
+ */
+void Normalize(Tensor<CHW, C1, F32> &dst, const Tensor<CHW, C1, F32> &src, const float scale, const float offset,
+               cudaStream_t stream = 0);
+
+/**
+ * Batch normalization for one channel planar float type Tensor.
+ * Each element x will be transformed to (x + offset) * scale
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param scale scaling factor for normalization.
+ * @param offset offset constant for normalization.
+ * @stream specified cuda stream.
+ */
+void Normalize(Tensor<NCHW, C1, F32> &dst, const Tensor<NCHW, C1, F32> &src, const float scale, const float offset,
+               cudaStream_t stream = 0);
+
+/**
+ * Convert interleaved image to planar image for three channels uint8_t type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param stream specified cuda stream.
+ */
+void InterleavedToPlanar(Tensor<CHW, C3, U8> &dst, const Tensor<HWC, C3, U8> &src, cudaStream_t stream = 0);
+
+/**
+ * Batch convert interleaved image to planar image for three channels uint8_t type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param stream specified cuda stream.
+ */
+void InterleavedToPlanar(Tensor<NCHW, C3, U8> &dst, const Tensor<NHWC, C3, U8> &src, cudaStream_t stream = 0);
+
+/**
+ * Convert interleaved image to planar image for three channels uint16_t type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param stream specified cuda stream.
+ */
+void InterleavedToPlanar(Tensor<CHW, C3, U16> &dst, const Tensor<HWC, C3, U16> &src, cudaStream_t stream = 0);
+
+/**
+ * Batch Convert interleaved image to planar image for three channels uint16_t type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param stream specified cuda stream.
+ */
+void InterleavedToPlanar(Tensor<NCHW, C3, U16> &dst, const Tensor<NHWC, C3, U16> &src, cudaStream_t stream = 0);
+
+/**
+ * Convert interleaved image to planar image for three channels float type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param stream specified cuda stream.
+ */
+void InterleavedToPlanar(Tensor<CHW, C3, F32> &dst, const Tensor<HWC, C3, F32> &src, cudaStream_t stream = 0);
+
+/**
+ * Batch convert interleaved image to planar image for three channels float type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param stream specified cuda stream.
+ */
+void InterleavedToPlanar(Tensor<NCHW, C3, F32> &dst, const Tensor<NHWC, C3, F32> &src, cudaStream_t stream = 0);
+
+/**
+ * Convert interleaved image to planar image for single channel uint8_t type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param stream specified cuda stream.
+ */
+void InterleavedToPlanar(Tensor<CHW, C1, U8> &dst, const Tensor<HWC, C1, U8> &src, cudaStream_t stream = 0);
+
+/**
+ * Batch convert interleaved image to planar image for single channel uint8_t type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param stream specified cuda stream.
+ */
+void InterleavedToPlanar(Tensor<NCHW, C1, U8> &dst, const Tensor<NHWC, C1, U8> &src, cudaStream_t stream = 0);
+
+/**
+ * Convert interleaved image to planar image for single channel uint16_t type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param stream specified cuda stream.
+ */
+void InterleavedToPlanar(Tensor<CHW, C1, U16> &dst, const Tensor<HWC, C1, U16> &src, cudaStream_t stream = 0);
+
+/**
+ * Batch Convert interleaved image to planar image for single channel uint16_t type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param stream specified cuda stream.
+ */
+void InterleavedToPlanar(Tensor<NCHW, C1, U16> &dst, const Tensor<NHWC, C1, U16> &src, cudaStream_t stream = 0);
+
+/**
+ * Convert interleaved image to planar image for single channel float type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param stream specified cuda stream.
+ */
+void InterleavedToPlanar(Tensor<CHW, C1, F32> &dst, const Tensor<HWC, C1, F32> &src, cudaStream_t stream = 0);
+
+/**
+ * Batch convert interleaved image to planar image for single channel float type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param stream specified cuda stream.
+ */
+void InterleavedToPlanar(Tensor<NCHW, C1, F32> &dst, const Tensor<NHWC, C1, F32> &src, cudaStream_t stream = 0);
+
+/**
+ * Combines various functions to imitate OpenCV blobFromImage() for various type tensor input.
+ * @tparam TL_IN input tensor layout (HWC/NHWC).
+ * @tparam TL_OUT output tensor layout(CHW/NCHW).
+ * @tparam CC channel count.
+ * @tparam CT channel type for input tensor (output is fixed: F32).
+ */
+template<TensorLayout TL_IN, TensorLayout TL_OUT, ChannelCount CC, ChannelType CT>
+class ImageToNormalizedPlanarTensorOperator
+{
+public:
+    /**
+     * Implementation for ImageToNormalizedPlanarTensorOperator.
+     */
+    struct ImageToNormalizedPlanarTensorOperatorImpl;
+
+    /**
+     * Constructor for HWC -> CHW tensors.
+     */
+    template<TensorLayout T = TL_IN, typename std::enable_if<T == HWC>::type * = nullptr>
+    ImageToNormalizedPlanarTensorOperator(int width, int height);
+
+    /**
+     * Constructor for NHWC -> NCHW tensors.
+     */
+    template<TensorLayout T = TL_IN, typename std::enable_if<T == NHWC>::type * = nullptr>
+    ImageToNormalizedPlanarTensorOperator(int width, int height, int depth);
+
+    /**
+     * Destructor for ImageToNormalizedPlanarTensorOperator.
+     */
+    ~ImageToNormalizedPlanarTensorOperator();
+
+    /**
+     * Run the composite operations on three channels tensors.
+     * @param dst destination tensor.
+     * @param src source tensor.
+     * @param scale scale factor for normalization.
+     * @param offset offset constant for normalization.
+     * @param swapRB whether to swap the first and last channels.
+     * @param keep_aspect_ratio whether to keep aspect ratio when resizing.
+     * @param stream specified cuda stream.
+     */
+    template<ChannelCount T = CC, typename std::enable_if<T == C3>::type * = nullptr>
+    void operator()(Tensor<TL_OUT, CC, F32> &dst, const Tensor<TL_IN, CC, CT> &src, const float scale[3],
+                    const float offset[3], bool swapRB, bool keep_aspect_ratio = false, cudaStream_t stream = 0);
+
+    /**
+     * Run the composite operations on single channel tensors.
+     * @param dst destination tensor.
+     * @param src source tensor.
+     * @param scale scale factor for normalization.
+     * @param offset offset constant for normalization.
+     * @param keep_aspect_ratio whether to keep aspect ratio when resizing.
+     * @param stream specified cuda stream.
+     */
+    template<ChannelCount T = CC, typename std::enable_if<T == C1>::type * = nullptr>
+    void operator()(Tensor<TL_OUT, CC, F32> &dst, const Tensor<TL_IN, CC, CT> &src, float scale, float offset,
+                    bool keep_aspect_ratio = false, cudaStream_t stream = 0);
+
+private:
+    std::unique_ptr<ImageToNormalizedPlanarTensorOperatorImpl> m_pImpl;
+};
+
+}} // namespace cvcore::tensor_ops
+
+#endif // CVCORE_IMAGE_UTILS_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/OneEuroFilter.h b/isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/OneEuroFilter.h
new file mode 100644
index 0000000..4947042
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/OneEuroFilter.h
@@ -0,0 +1,82 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_ONEEUROFILTER_H
+#define CVCORE_ONEEUROFILTER_H
+
+#include <memory>
+#include <vector>
+
+#include "cv/core/CVError.h"
+
+namespace cvcore { namespace tensor_ops {
+
+/**
+ * Euro Filter Parameters
+ */
+struct OneEuroFilterParams
+{
+    float dataUpdateRate;  /**< Data Update rate in Hz. */
+    float minCutoffFreq;   /**< Minimum Cut off frequency in Hz. */
+    float cutoffSlope;     /**< Beta or Speed coefficient which is a tuning parameter. */
+    float derivCutoffFreq; /**< Cutoff frequency for derivative. */
+};
+
+/* 
+The one euro filter is a low pass filter for filtering noisy signals in real-time. The filtering uses exponential smoothing where the smooting factor is computed dynamically using the input data update rate. The smoothing factor provides a trade off between slow speed jitter vs high speed lag.
+There are two main tuning parameters for the filter, the speed coeffcient Beta and the minimum cut off frequency.
+If high speed lag is a problem, increase beta; if slow speed jitter is a problem, decrease fcmin.
+Reference : http://cristal.univ-lille.fr/~casiez/1euro/
+*/
+template<typename T>
+class OneEuroFilter
+{
+public:
+    struct OneEuroFilterImpl;
+    /**
+ * Euro Filter Constructor.
+ * @param filterParams Filter parameters
+ */
+    OneEuroFilter(const OneEuroFilterParams &filterParams);
+
+    /**
+ * Reset Euro filter Parameters.
+ * @param filterParams Filter parameters
+ * @return Error code
+ */
+    std::error_code resetParams(const OneEuroFilterParams &filterParams);
+
+    /**
+ * Filter the input. Supports float, double, vector2f, Vector3f, Vector3d, Vector3f input types.
+ * @param inValue Noisy input to be filtered.
+ * @param filteredValue Filtered output
+ * @return Error code
+ */
+    std::error_code execute(T &filteredValue, T inValue);
+
+    ~OneEuroFilter();
+
+private:
+    /**
+   * Implementation of EuroFilter.
+   */
+    std::unique_ptr<OneEuroFilterImpl> m_pImpl;
+};
+
+}} // namespace cvcore::tensor_ops
+
+#endif // CVCORE_ONEEUROFILTER_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/TensorOperators.h b/isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/TensorOperators.h
new file mode 100644
index 0000000..e32a3df
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/include/cv/tensor_ops/TensorOperators.h
@@ -0,0 +1,48 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_TENSOROPERATORS_H
+#define CVCORE_TENSOROPERATORS_H
+
+#include <memory>
+#include <unordered_map>
+#include <mutex>
+
+#include "cv/tensor_ops/ITensorOperatorContext.h"
+#include "cv/tensor_ops/ITensorOperatorStream.h"
+
+namespace cvcore { namespace tensor_ops {
+
+class TensorContextFactory
+{
+public:
+    static std::error_code CreateContext(TensorOperatorContext &, TensorBackend backend);
+    static std::error_code DestroyContext(TensorOperatorContext &context);
+
+    static bool IsBackendSupported(TensorBackend backend);
+
+private:
+    using MultitonType = std::unordered_map<TensorBackend,
+                                            std::pair<std::size_t, std::unique_ptr<ITensorOperatorContext>>>;
+
+    static MultitonType instances;
+    static std::mutex instanceMutex;
+};
+
+}} // namespace cvcore::tensor_ops
+
+#endif // CVCORE_TENSOROPERATORS_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/include/cv/trtbackend/TRTBackend.h b/isaac_ros_ess/gxf/ess/cvcore/include/cv/trtbackend/TRTBackend.h
new file mode 100644
index 0000000..52b2e40
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/include/cv/trtbackend/TRTBackend.h
@@ -0,0 +1,203 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_TRTBACKEND_H
+#define CVCORE_TRTBACKEND_H
+
+#include <cassert>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include <cuda_runtime.h>
+
+namespace cvcore {
+
+/**
+ * Enum to specify different precision types.
+ */
+enum TRTBackendPrecision
+{
+    INT8, /**< 8-bit integer precision. */
+    FP16, /**< 16-bit float precision. */
+    FP32  /**< 32-bit float precision. */
+};
+
+/**
+ * Enum to specify TensorRT blob layout type.
+ */
+enum TRTBackendBlobLayout
+{
+    PLANAR,     /**< planar input type. */
+    INTERLEAVED /**< interleaved input type. */
+};
+
+/**
+ * Struct to store TensorRT blob dimensions.
+ */
+struct TRTBackendBlobSize
+{
+    int channels; /**< channels count. */
+    int height;   /**< blob height. */
+    int width;    /**< blob width. */
+};
+
+/**
+ * Enum to specify model type.
+ */
+enum ModelType
+{
+    ONNX,                /**< ONNX model. */
+    UFF,                 /**< Uff model. */
+    TRT_ENGINE,          /**< serialized TensorRT engine. */
+    TRT_ENGINE_IN_MEMORY /**< a memory pointer to deserialized TensorRT ICudaEngine. */
+};
+
+/**
+ * Parameters for TRTBackend.
+ */
+struct TRTBackendParams
+{
+    // This constructor is for backward compatibility with all the other core modules which use trtbackend.
+    TRTBackendParams(std::vector<std::string> inputLayers, std::vector<TRTBackendBlobSize> inputDims,
+                     std::vector<std::string> outputLayers, std::string weightsPath, std::string enginePath,
+                     TRTBackendPrecision precision, int batchSize, ModelType modelType, bool explicitBatch,
+                     void *engine = nullptr, TRTBackendBlobLayout layout = PLANAR)
+        : inputLayers(inputLayers)
+        , inputDims(inputDims)
+        , outputLayers(outputLayers)
+        , weightsPath(weightsPath)
+        , enginePath(enginePath)
+        , precision(precision)
+        , batchSize(batchSize)
+        , modelType(modelType)
+        , explicitBatch(explicitBatch)
+        , trtEngineInMemory(engine)
+        , inputLayout(layout)
+    {
+    }
+
+    std::vector<std::string> inputLayers;      /**< names of input layers. */
+    std::vector<TRTBackendBlobSize> inputDims; /**< dimensions of input layers. */
+    std::vector<std::string> outputLayers;     /**< names of output layers. */
+    std::string weightsPath;                   /**< model weight path. */
+    std::string enginePath;                    /**< TensorRT engine path. */
+    TRTBackendPrecision precision;             /**< TensorRT engine precision. */
+    int batchSize;                             /**< inference batch size. */
+    ModelType modelType;                       /**< model type. */
+    bool explicitBatch;                        /**< whether it is explicit batch dimension. */
+    void *
+        trtEngineInMemory; /**< pointer to hold deserialized TensorRT ICudaEngine, for ModelType::TRT_ENGINE_IN_MEMORY. */
+    TRTBackendBlobLayout inputLayout; /**< input blob layout. */
+};
+
+// Forward declaration
+struct TRTImpl;
+
+/**
+ * TensorRT wrapper class.
+ */
+class TRTBackend
+{
+public:
+    /**
+     * Constructor of TRTBackend.
+     * @param modelFilePath path of the network model.
+     * @param precision TensorRT precision type.
+     */
+    TRTBackend(const char *modelFilePath, TRTBackendPrecision precision, int batchSize = 1, bool explicitBatch = false);
+
+    /**
+     * Constructor of TRTBackend.
+     * @param inputParams parameters of TRTBackend.
+     */
+    TRTBackend(TRTBackendParams &inputParams);
+
+    /**
+     * Destructor of TRTBackend.
+     */
+    ~TRTBackend();
+
+    /**
+     * Run inference.
+     * @param buffer input GPU buffers.
+     */
+    [[deprecated]] void infer(void **buffer);
+
+    /**
+     * Run inference.
+     * @param buffer input GPU buffers.
+     * @param batchSize run infer with specific batch size, passed in setBindingDimension() call.
+     * @param stream update cuda stream in this instance.
+     */
+    void infer(void **buffer, int batchSize, cudaStream_t stream);
+
+    /**
+     * Get the cuda stream for TRTBackend.
+     * @return return cuda stream ID.
+     */
+    [[deprecated]] cudaStream_t getCUDAStream() const;
+
+    /**
+     * Set the cuda stream for TRTBackend.
+     * @param stream specified cudaStream_t for the TensorRT Engine.
+     */
+    [[deprecated]] void setCUDAStream(cudaStream_t stream);
+
+    /**
+     * Get all input/output bindings count.
+     * @return number of all bindings.
+     */
+    int getBlobCount() const;
+
+    /**
+     * Get the blob dimension for given blob index.
+     * @param blobIndex blob index.
+     * @return blob dimension for the given index.
+     */
+    TRTBackendBlobSize getTRTBackendBlobSize(int blobIndex) const;
+
+    /**
+     * Get the total number of elements for the given blob index.
+     * @param blobIndex blob index.
+     * @return total size for the given index.
+     */
+    int getBlobLinearSize(int blobIndex) const;
+
+    /**
+     * Get the blob index for the given blob name.
+     * @param blobName blob name.
+     * @return blob index for the given name.
+     */
+    int getBlobIndex(const char *blobName) const;
+
+    /**
+     * Check if binding is input.
+     * @param index binding index.
+     * @return whether the binding is input.
+     */
+    bool bindingIsInput(const int index) const;
+
+private:
+    // TRT related variables
+    std::unique_ptr<TRTImpl> m_pImpl;
+};
+
+} // namespace cvcore
+
+#endif // CVCORE_TRTBACKEND_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/core/cvcore/Array.cpp b/isaac_ros_ess/gxf/ess/cvcore/src/core/cvcore/Array.cpp
new file mode 100644
index 0000000..05cd535
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/core/cvcore/Array.cpp
@@ -0,0 +1,145 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "cv/core/Array.h"
+
+#include <cuda_runtime_api.h>
+
+#include <cassert>
+#include <stdexcept>
+
+namespace cvcore {
+
+ArrayBase::ArrayBase()
+    : m_data{nullptr}
+    , m_size{0}
+    , m_capacity{0}
+    , m_elemSize{0}
+    , m_isOwning{false}
+    , m_isCPU{true}
+{
+}
+
+ArrayBase::ArrayBase(std::size_t capacity, std::size_t elemSize, void *dataPtr, bool isCPU)
+    : ArrayBase()
+{
+    m_isOwning = false;
+    m_isCPU    = isCPU;
+    m_elemSize = elemSize;
+    m_capacity = capacity;
+    m_data     = dataPtr;
+}
+
+ArrayBase::ArrayBase(std::size_t capacity, std::size_t elemSize, bool isCPU)
+    : ArrayBase(capacity, elemSize, nullptr, isCPU)
+{
+    m_isOwning = true;
+
+    // allocate
+    const size_t arraySize = capacity * elemSize;
+    if (arraySize > 0)
+    {
+        if (isCPU)
+        {
+            m_data = std::malloc(arraySize);
+        }
+        else
+        {
+            if (cudaMalloc(&m_data, arraySize) != 0)
+            {
+                throw std::runtime_error("CUDA alloc() failed!");
+            }
+        }
+    }
+}
+
+ArrayBase::~ArrayBase()
+{
+    if (m_isOwning)
+    {
+        if (m_isCPU)
+        {
+            std::free(m_data);
+        }
+        else
+        {
+            cudaFree(m_data);
+        }
+    }
+}
+
+ArrayBase::ArrayBase(ArrayBase &&t)
+    : ArrayBase()
+{
+    *this = std::move(t);
+}
+
+ArrayBase &ArrayBase::operator=(ArrayBase &&t)
+{
+    using std::swap;
+
+    swap(m_data, t.m_data);
+    swap(m_size, t.m_size);
+    swap(m_capacity, t.m_capacity);
+    swap(m_elemSize, t.m_elemSize);
+    swap(m_isOwning, t.m_isOwning);
+    swap(m_isCPU, t.m_isCPU);
+
+    return *this;
+}
+
+void *ArrayBase::getElement(int idx) const
+{
+    assert(idx >= 0 && idx < m_capacity);
+    return reinterpret_cast<char *>(m_data) + idx * m_elemSize;
+}
+
+std::size_t ArrayBase::getSize() const
+{
+    return m_size;
+}
+std::size_t ArrayBase::getCapacity() const
+{
+    return m_capacity;
+}
+std::size_t ArrayBase::getElementSize() const
+{
+    return m_elemSize;
+}
+
+void ArrayBase::setSize(std::size_t size)
+{
+    assert(size <= m_capacity);
+    m_size = size;
+}
+
+bool ArrayBase::isCPU() const
+{
+    return m_isCPU;
+}
+
+bool ArrayBase::isOwning() const
+{
+    return m_isOwning;
+}
+
+void *ArrayBase::getData() const
+{
+    return m_data;
+}
+
+} // namespace cvcore
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/core/cvcore/Dummy.cu b/isaac_ros_ess/gxf/ess/cvcore/src/core/cvcore/Dummy.cu
new file mode 100644
index 0000000..e69de29
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/core/cvcore/MathTypes.cpp b/isaac_ros_ess/gxf/ess/cvcore/src/core/cvcore/MathTypes.cpp
new file mode 100644
index 0000000..e0b4adc
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/core/cvcore/MathTypes.cpp
@@ -0,0 +1,244 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "cv/core/MathTypes.h"
+
+namespace cvcore {
+
+namespace {
+
+AxisAngleRotation RotationMatrixToAxisAngleUtil(const std::vector<double> &rotMatrix)
+{
+    assert(rotMatrix.size() == 9);
+    AxisAngleRotation axisangle;
+    double row0 = 0.5 * (rotMatrix[7] - rotMatrix[5]);
+    double row1 = 0.5 * (rotMatrix[2] - rotMatrix[6]);
+    double row2 = 0.5 * (rotMatrix[3] - rotMatrix[1]);
+
+    double sin_angle = std::sqrt(row0 * row0 + row1 * row1 + row2 * row2);
+    double cos_angle = 0.5 * (rotMatrix[0] + rotMatrix[4] + rotMatrix[8] - 1.0);
+    sin_angle        = sin_angle > 1.0 ? 1.0 : sin_angle;
+    cos_angle        = cos_angle > 1.0 ? 1.0 : (cos_angle < -1.0 ? -1.0 : cos_angle);
+
+    if (sin_angle == 0.0)
+    {
+        axisangle.angle  = 0;
+        axisangle.axis.x = 0;
+        axisangle.axis.y = 0;
+        axisangle.axis.z = 0;
+    }
+    else
+    {
+        axisangle.angle  = std::atan2(sin_angle, cos_angle);
+        axisangle.axis.x = row0 / sin_angle;
+        axisangle.axis.y = row1 / sin_angle;
+        axisangle.axis.z = row2 / sin_angle;
+    }
+    return axisangle;
+}
+} // namespace
+
+Vector3d RotationMatrixToRotationVector(const std::vector<double> &rotMatrix)
+{
+    AxisAngleRotation axisangle = RotationMatrixToAxisAngleUtil(rotMatrix);
+    Vector3d rotVector;
+    rotVector.x = axisangle.angle * axisangle.axis.x;
+    rotVector.y = axisangle.angle * axisangle.axis.y;
+    rotVector.z = axisangle.angle * axisangle.axis.z;
+    return rotVector;
+}
+
+AxisAngleRotation RotationMatrixToAxisAngleRotation(const std::vector<double> &rotMatrix)
+{
+    AxisAngleRotation axisangle = RotationMatrixToAxisAngleUtil(rotMatrix);
+    return axisangle;
+}
+
+std::vector<double> AxisAngleToRotationMatrix(const AxisAngleRotation &axisangle)
+{
+    std::vector<double> rotMatrix;
+    rotMatrix.resize(9);
+    double cosangle = std::cos(axisangle.angle);
+    double sinagle  = std::sin(axisangle.angle);
+    double temp     = 1.0 - cosangle;
+
+    rotMatrix[0] = cosangle + axisangle.axis.x * axisangle.axis.x * temp;
+    rotMatrix[4] = cosangle + axisangle.axis.y * axisangle.axis.y * temp;
+    rotMatrix[8] = cosangle + axisangle.axis.z * axisangle.axis.z * temp;
+
+    double value1 = axisangle.axis.x * axisangle.axis.y * temp;
+    double value2 = axisangle.axis.z * sinagle;
+    rotMatrix[3]  = value1 + value2;
+    rotMatrix[1]  = value1 - value2;
+    value1        = axisangle.axis.x * axisangle.axis.z * temp;
+    value2        = axisangle.axis.y * sinagle;
+    rotMatrix[6]  = value1 - value2;
+    rotMatrix[2]  = value1 + value2;
+    value1        = axisangle.axis.y * axisangle.axis.z * temp;
+    value2        = axisangle.axis.x * sinagle;
+    rotMatrix[7]  = value1 + value2;
+    rotMatrix[5]  = value1 - value2;
+    return rotMatrix;
+}
+
+Vector3d AxisAngleRotationToRotationVector(const AxisAngleRotation &axisangle)
+{
+    double angle = axisangle.angle;
+    Vector3d rotVector;
+    rotVector.x = angle * axisangle.axis.x;
+    rotVector.y = angle * axisangle.axis.y;
+    rotVector.z = angle * axisangle.axis.z;
+    return rotVector;
+}
+
+AxisAngleRotation RotationVectorToAxisAngleRotation(const Vector3d &rotVector)
+{
+    double normVector = rotVector.x * rotVector.x + rotVector.y * rotVector.y + rotVector.z * rotVector.z;
+    normVector        = std::sqrt(normVector);
+    AxisAngleRotation axisangle;
+    if (normVector == 0)
+    {
+        axisangle.angle  = 0;
+        axisangle.axis.x = 0;
+        axisangle.axis.y = 0;
+        axisangle.axis.z = 0;
+    }
+    else
+    {
+        axisangle.angle  = normVector;
+        axisangle.axis.x = rotVector.x / normVector;
+        axisangle.axis.y = rotVector.y / normVector;
+        axisangle.axis.z = rotVector.z / normVector;
+    }
+    return axisangle;
+}
+
+Quaternion AxisAngleRotationToQuaternion(const AxisAngleRotation &axisangle)
+{
+    Quaternion qrotation;
+    qrotation.qx = axisangle.axis.x * sin(axisangle.angle / 2);
+    qrotation.qy = axisangle.axis.y * sin(axisangle.angle / 2);
+    qrotation.qz = axisangle.axis.z * sin(axisangle.angle / 2);
+    qrotation.qw = std::cos(axisangle.angle / 2);
+    return qrotation;
+}
+
+AxisAngleRotation QuaternionToAxisAngleRotation(const Quaternion &qrotation)
+{
+    Quaternion qtemp(qrotation.qx, qrotation.qy, qrotation.qz, qrotation.qw);
+    if (qrotation.qw > 1)
+    {
+        double qnorm = qrotation.qx * qrotation.qx + qrotation.qy * qrotation.qy + qrotation.qz * qrotation.qz +
+                       qrotation.qw * qrotation.qw;
+        qnorm    = std::sqrt(qnorm);
+        qtemp.qx = qrotation.qx / qnorm;
+        qtemp.qy = qrotation.qy / qnorm;
+        qtemp.qz = qrotation.qz / qnorm;
+        qtemp.qw = qrotation.qw / qnorm;
+    }
+    AxisAngleRotation axisangle;
+    axisangle.angle = 2 * std::acos(qtemp.qw);
+    double normaxis = std::sqrt(1 - qtemp.qw * qtemp.qw);
+    if (normaxis < 0.001)
+    {
+        axisangle.axis.x = qtemp.qx;
+        axisangle.axis.y = qtemp.qy;
+        axisangle.axis.z = qtemp.qz;
+    }
+    else
+    {
+        axisangle.axis.x = qtemp.qx / normaxis;
+        axisangle.axis.y = qtemp.qy / normaxis;
+        axisangle.axis.z = qtemp.qz / normaxis;
+    }
+    return axisangle;
+}
+
+std::vector<double> QuaternionToRotationMatrix(const Quaternion &qrotation)
+{
+    std::vector<double> rotMatrix;
+    rotMatrix.resize(9);
+    double qxsquare = qrotation.qx * qrotation.qx;
+    double qysquare = qrotation.qy * qrotation.qy;
+    double qzsquare = qrotation.qz * qrotation.qz;
+    double qwsquare = qrotation.qw * qrotation.qw;
+
+    // Ensure quaternion is normalized
+    double invsersenorm = 1 / (qxsquare + qysquare + qzsquare + qwsquare);
+    rotMatrix[0]        = (qxsquare - qysquare - qzsquare + qwsquare) * invsersenorm;
+    rotMatrix[4]        = (-qxsquare + qysquare - qzsquare + qwsquare) * invsersenorm;
+    rotMatrix[8]        = (-qxsquare - qysquare + qzsquare + qwsquare) * invsersenorm;
+
+    double value1 = qrotation.qx * qrotation.qy;
+    double value2 = qrotation.qz * qrotation.qw;
+    rotMatrix[3]  = 2.0 * (value1 + value2) * invsersenorm;
+    rotMatrix[1]  = 2.0 * (value1 - value2) * invsersenorm;
+
+    value1       = qrotation.qx * qrotation.qz;
+    value2       = qrotation.qy * qrotation.qw;
+    rotMatrix[6] = 2.0 * (value1 - value2) * invsersenorm;
+    rotMatrix[2] = 2.0 * (value1 + value2) * invsersenorm;
+    value1       = qrotation.qz * qrotation.qy;
+    value2       = qrotation.qx * qrotation.qw;
+    rotMatrix[7] = 2.0 * (value1 + value2) * invsersenorm;
+    rotMatrix[5] = 2.0 * (value1 - value2) * invsersenorm;
+    return rotMatrix;
+}
+
+Quaternion RotationMatrixToQuaternion(const std::vector<double> &rotMatrix)
+{
+    Quaternion qrotation;
+    double diagsum = rotMatrix[0] + rotMatrix[4] + rotMatrix[8];
+    if (diagsum > 0)
+    {
+        double temp  = 1 / (2 * std::sqrt(diagsum + 1.0));
+        qrotation.qw = 0.25 / temp;
+        qrotation.qx = (rotMatrix[7] - rotMatrix[5]) * temp;
+        qrotation.qy = (rotMatrix[2] - rotMatrix[6]) * temp;
+        qrotation.qz = (rotMatrix[3] - rotMatrix[1]) * temp;
+    }
+    else
+    {
+        if (rotMatrix[0] > rotMatrix[4] && rotMatrix[0] > rotMatrix[8])
+        {
+            double temp  = 2 * std::sqrt(rotMatrix[0] - rotMatrix[4] - rotMatrix[8] + 1.0);
+            qrotation.qx = 0.25 * temp;
+            qrotation.qw = (rotMatrix[7] - rotMatrix[5]) * temp;
+            qrotation.qy = (rotMatrix[2] - rotMatrix[6]) * temp;
+            qrotation.qz = (rotMatrix[3] - rotMatrix[1]) * temp;
+        }
+        else if (rotMatrix[0] > rotMatrix[8])
+        {
+            double temp  = 2 * std::sqrt(rotMatrix[4] - rotMatrix[0] - rotMatrix[8] + 1.0);
+            qrotation.qy = 0.25 * temp;
+            qrotation.qx = (rotMatrix[7] - rotMatrix[5]) * temp;
+            qrotation.qw = (rotMatrix[2] - rotMatrix[6]) * temp;
+            qrotation.qz = (rotMatrix[3] - rotMatrix[1]) * temp;
+        }
+        else
+        {
+            double temp  = 2 * std::sqrt(rotMatrix[8] - rotMatrix[4] - rotMatrix[0] + 1.0);
+            qrotation.qz = 0.25 * temp;
+            qrotation.qx = (rotMatrix[7] - rotMatrix[5]) * temp;
+            qrotation.qy = (rotMatrix[2] - rotMatrix[6]) * temp;
+            qrotation.qw = (rotMatrix[3] - rotMatrix[1]) * temp;
+        }
+    }
+    return qrotation;
+}
+
+} // namespace cvcore
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/core/cvcore/Tensor.cpp b/isaac_ros_ess/gxf/ess/cvcore/src/core/cvcore/Tensor.cpp
new file mode 100644
index 0000000..83ede60
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/core/cvcore/Tensor.cpp
@@ -0,0 +1,270 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "cv/core/Tensor.h"
+
+#include <cuda_runtime_api.h>
+
+#include <cassert>
+#include <cstdlib>
+#include <algorithm>
+#include <functional>
+#include <stdexcept>
+
+namespace cvcore {
+
+TensorBase::TensorBase()
+    : m_data(nullptr)
+    , m_dimCount(0)
+    , m_type(U8)
+    , m_isOwning(false)
+    , m_isCPU(true)
+{
+    for (int i = 0; i < kMaxDimCount; ++i)
+    {
+        m_dimData[i] = {0, 0};
+    }
+}
+
+TensorBase::TensorBase(ChannelType type, const DimData *dimData, int dimCount, void *dataPtr, bool isCPU)
+    : TensorBase()
+{
+    assert(dimCount >= kMinDimCount && dimCount <= kMaxDimCount);
+
+    m_isOwning = false;
+    m_isCPU    = isCPU;
+
+    m_type     = type;
+    m_dimCount = dimCount;
+    for (int i = 0; i < dimCount; ++i)
+    {
+        m_dimData[i] = dimData[i];
+    }
+
+    m_data = dataPtr;
+}
+
+TensorBase::TensorBase(ChannelType type, std::initializer_list<TensorBase::DimData> dimData, void *dataPtr, bool isCPU)
+    : TensorBase(type, dimData.begin(), dimData.size(), dataPtr, isCPU)
+{
+}
+
+TensorBase::TensorBase(ChannelType type, const DimData *dimData, int dimCount, bool isCPU)
+    : TensorBase(type, dimData, dimCount, nullptr, isCPU)
+{
+    m_isOwning = true;
+
+    // compute tensor memory block size
+    const std::size_t tensorSize = getDataSize();
+
+    // allocate
+    if (isCPU)
+    {
+        m_data = std::malloc(tensorSize);
+    }
+    else
+    {
+        if (cudaMalloc(&m_data, tensorSize) != 0)
+        {
+            throw std::runtime_error("CUDA alloc() failed!");
+        }
+    }
+}
+
+TensorBase::TensorBase(ChannelType type, std::initializer_list<TensorBase::DimData> dimData, bool isCPU)
+    : TensorBase(type, dimData.begin(), dimData.size(), isCPU)
+{
+}
+
+TensorBase::~TensorBase()
+{
+    if (m_isOwning)
+    {
+        if (m_isCPU)
+        {
+            std::free(m_data);
+        }
+        else
+        {
+            cudaFree(m_data);
+        }
+    }
+}
+
+TensorBase::TensorBase(TensorBase &&t)
+    : TensorBase()
+{
+    *this = std::move(t);
+}
+
+TensorBase &TensorBase::operator=(TensorBase &&t)
+{
+    using std::swap;
+
+    swap(m_data, t.m_data);
+    swap(m_dimCount, t.m_dimCount);
+    swap(m_type, t.m_type);
+    swap(m_isOwning, t.m_isOwning);
+    swap(m_isCPU, t.m_isCPU);
+
+    for (int i = 0; i < kMaxDimCount; ++i)
+    {
+        swap(m_dimData[i], t.m_dimData[i]);
+    }
+
+    return *this;
+}
+
+int TensorBase::getDimCount() const
+{
+    return m_dimCount;
+}
+
+std::size_t TensorBase::getSize(int dimIdx) const
+{
+    assert(dimIdx >= 0 && dimIdx < m_dimCount);
+    return m_dimData[dimIdx].size;
+}
+
+std::size_t TensorBase::getStride(int dimIdx) const
+{
+    assert(dimIdx >= 0 && dimIdx < m_dimCount);
+    return m_dimData[dimIdx].stride;
+}
+
+ChannelType TensorBase::getType() const
+{
+    return m_type;
+}
+
+void *TensorBase::getData() const
+{
+    return m_data;
+}
+
+std::size_t TensorBase::getDataSize() const
+{
+    std::size_t tensorSize = m_dimData[0].size * m_dimData[0].stride;
+    for (int i = 1; i < m_dimCount; ++i)
+    {
+        tensorSize = std::max(tensorSize, m_dimData[i].size * m_dimData[i].stride);
+    }
+    tensorSize *= GetChannelSize(m_type);
+    return tensorSize;
+}
+
+bool TensorBase::isCPU() const
+{
+    return m_isCPU;
+}
+
+bool TensorBase::isOwning() const
+{
+    return m_isOwning;
+}
+
+std::string GetTensorLayoutAsString(TensorLayout TL)
+{
+    switch (TL)
+    {
+    case TensorLayout::CL:
+        return "CL";
+    case TensorLayout::LC:
+        return "LC";
+    case TensorLayout::HWC:
+        return "HWC";
+    case TensorLayout::CHW:
+        return "CHW";
+    case TensorLayout::DHWC:
+        return "DHWC";
+    case TensorLayout::DCHW:
+        return "DCHW";
+    case TensorLayout::CDHW:
+        return "CDHW";
+    default:
+        throw std::runtime_error("Invalid TensorLayout");
+    }
+}
+
+std::string GetChannelCountAsString(ChannelCount CC)
+{
+    switch (CC)
+    {
+    case ChannelCount::C1:
+        return "C1";
+    case ChannelCount::C2:
+        return "C2";
+    case ChannelCount::C3:
+        return "C3";
+    case ChannelCount::C4:
+        return "C4";
+    case ChannelCount::CX:
+        return "CX";
+    default:
+        throw std::runtime_error("Invalid ChannelCount");
+    }
+}
+
+std::string GetChannelTypeAsString(ChannelType CT)
+{
+    switch (CT)
+    {
+    case ChannelType::U8:
+        return "U8";
+    case ChannelType::U16:
+        return "U16";
+    case ChannelType::S8:
+        return "S8";
+    case ChannelType::S16:
+        return "S16";
+    case ChannelType::F16:
+        return "F16";
+    case ChannelType::F32:
+        return "F32";
+    case ChannelType::F64:
+        return "F64";
+    default:
+        throw std::runtime_error("Invalid ChannelType");
+    }
+}
+
+std::size_t GetChannelSize(ChannelType CT)
+{
+    switch (CT)
+    {
+    case U8:
+    case S8:
+        return 1;
+    case F16:
+    case U16:
+    case S16:
+        return 2;
+    case F32:
+        return 4;
+    case F64:
+        return 8;
+    default:
+        throw std::runtime_error("Invalid ChannelType");
+    }
+}
+
+std::string GetMemoryTypeAsString(bool isCPU)
+{
+    return isCPU? "CPU" : "GPU";
+}
+
+} // namespace cvcore
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/core/utility/CVError.cpp b/isaac_ros_ess/gxf/ess/cvcore/src/core/utility/CVError.cpp
new file mode 100644
index 0000000..a6e62c7
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/core/utility/CVError.cpp
@@ -0,0 +1,123 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "cv/core/CVError.h"
+
+#include <string>
+#include <utility>
+#include <stdexcept>
+
+#ifndef __cpp_lib_to_underlying
+// Using a C++23 feature by hacking std
+namespace std
+{
+    template<typename Enum>
+    constexpr underlying_type_t<Enum> to_underlying(Enum e) noexcept
+    {
+        return static_cast<underlying_type_t<Enum>>(e);
+    }
+};
+#endif // __cpp_lib_to_underlying
+
+namespace cvcore {
+
+namespace detail
+{
+    struct CoreErrorCategory : std::error_category
+    {
+        virtual const char * name() const noexcept override final
+        {
+            return "cvcore-error";
+        }
+
+        virtual std::string message(int value) const override final
+        {
+            std::string result;
+
+            switch(value)
+            {
+                case std::to_underlying(ErrorCode::SUCCESS):
+                    result = "(SUCCESS) No errors detected";
+                    break;
+                case std::to_underlying(ErrorCode::NOT_READY):
+                    result = "(NOT_READY) The execution of the requested "
+                             "operation is not to return";
+                    break;
+                case std::to_underlying(ErrorCode::NOT_IMPLEMENTED):
+                    result = "(NOT_IMPLEMENTED) The requested operation is not "
+                             "implemented";
+                    break;
+                case std::to_underlying(ErrorCode::INVALID_ARGUMENT):
+                    result = "(INVALID_ARGUMENT) The argument provided to the "
+                             "operation is not currently supported";
+                    break;
+                case std::to_underlying(ErrorCode::INVALID_IMAGE_FORMAT):
+                    result = "(INVALID_IMAGE_FORMAT) The requested image format "
+                             "is not supported by the operation";
+                    break;
+                case std::to_underlying(ErrorCode::INVALID_STORAGE_TYPE):
+                    result = "(INVALID_STORAGE_TYPE) The requested storage type "
+                             "is not supported by the operation";
+                    break;
+                case std::to_underlying(ErrorCode::INVALID_ENGINE_TYPE):
+                    result = "(INVALID_ENGINE_TYPE) The requested engine type "
+                             "is not supported by the operation";
+                    break;
+                case std::to_underlying(ErrorCode::INVALID_OPERATION):
+                    result = "(INVALID_OPERATION) The requested operation is "
+                             "not supported";
+                    break;
+                case std::to_underlying(ErrorCode::DETECTED_NAN_IN_RESULT):
+                    result = "(DETECTED_NAN_IN_RESULT) NaN was detected in the "
+                             "return value of the operation";
+                    break;
+                case std::to_underlying(ErrorCode::OUT_OF_MEMORY):
+                    result = "(OUT_OF_MEMORY) The device has run out of memory";
+                    break;
+                case std::to_underlying(ErrorCode::DEVICE_ERROR):
+                    result = "(DEVICE_ERROR) A device level error has been "
+                             "encountered";
+                    break;
+                case std::to_underlying(ErrorCode::SYSTEM_ERROR):
+                    result = "(SYSTEM_ERROR) A system level error has been "
+                             "encountered";
+                    break;
+                default:
+                    result = "(Unrecognized Condition) Value " + std::to_string(value) +
+                             " does not map to known error code literal " +
+                             " defined by cvcore::ErrorCode";
+                    break;
+            }
+
+            return result;
+        }
+    };
+} // namespace detail
+
+const detail::CoreErrorCategory errorCategory{};
+
+std::error_condition make_error_condition(ErrorCode ec) noexcept
+{
+    return {std::to_underlying(ec), errorCategory};
+}
+
+std::error_code make_error_code(ErrorCode ec) noexcept
+{
+    return {std::to_underlying(ec), errorCategory};
+}
+
+} // namespace cvcore
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/core/utility/Instrumentation.cpp b/isaac_ros_ess/gxf/ess/cvcore/src/core/utility/Instrumentation.cpp
new file mode 100644
index 0000000..583b646
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/core/utility/Instrumentation.cpp
@@ -0,0 +1,95 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "cv/core/Instrumentation.h"
+
+#ifdef NVBENCH_ENABLE
+#include <nvbench/CPU.h>
+#include <nvbench/GPU.h>
+#include <nvbench/VPI.h>
+#endif
+
+namespace cvcore { namespace profiler {
+
+#ifdef NVBENCH_ENABLE
+nv::bench::JsonHelper mapProfilerJsonOutputTypeToNvbenchType(ProfilerJsonOutputType jsonType)
+{
+    nv::bench::JsonHelper nvbenchJsonOutputType = nv::bench::JsonHelper::JSON_OFF;
+    if (jsonType == ProfilerJsonOutputType::JSON_OFF)
+    {
+        nvbenchJsonOutputType = nv::bench::JsonHelper::JSON_OFF;
+    }
+    else if (jsonType == ProfilerJsonOutputType::JSON_SEPARATE)
+    {
+        nvbenchJsonOutputType = nv::bench::JsonHelper::JSON_SEPARATE;
+    }
+    else if (jsonType == ProfilerJsonOutputType::JSON_AGGREGATE)
+    {
+        nvbenchJsonOutputType = nv::bench::JsonHelper::JSON_AGGREGATE;
+    }
+    return nvbenchJsonOutputType;
+}
+#endif
+
+void flush(const std::string& filename, ProfilerJsonOutputType jsonType)
+{
+#ifdef NVBENCH_ENABLE
+    nv::bench::JsonHelper nvbenchJsonOutputType = mapProfilerJsonOutputTypeToNvbenchType(jsonType);
+    if (!filename.empty())
+    {
+        nv::bench::Pool::instance().flushToFile(filename.c_str(), -1, INT_MAX, nvbenchJsonOutputType);
+    }
+    else
+    {
+        nv::bench::Pool::instance().flush(std::clog, -1, INT_MAX, nvbenchJsonOutputType);
+    }
+#else
+    return;
+#endif
+
+}
+
+void flush(std::ostream& output, ProfilerJsonOutputType jsonType)
+{
+#ifdef NVBENCH_ENABLE
+    nv::bench::JsonHelper nvbenchJsonOutputType = mapProfilerJsonOutputTypeToNvbenchType(jsonType);
+    nv::bench::Pool::instance().flush(output, -1, INT_MAX, nvbenchJsonOutputType);
+#else
+    return;
+#endif
+}
+
+void flush(ProfilerJsonOutputType jsonType)
+{
+#ifdef NVBENCH_ENABLE
+    nv::bench::JsonHelper nvbenchJsonOutputType = mapProfilerJsonOutputTypeToNvbenchType(jsonType);
+    nv::bench::Pool::instance().flush(std::clog, -1, INT_MAX, nvbenchJsonOutputType);
+#else
+    return;
+#endif
+}
+
+void clear()
+{
+#ifdef NVBENCH_ENABLE
+    nv::bench::Pool::instance().clear();
+#else
+    return;
+#endif
+}
+
+}}
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/core/utility/Memory.cpp b/isaac_ros_ess/gxf/ess/cvcore/src/core/utility/Memory.cpp
new file mode 100644
index 0000000..e75a614
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/core/utility/Memory.cpp
@@ -0,0 +1,124 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "cv/core/Memory.h"
+
+#include <cuda_runtime.h>
+
+#include <cassert>
+#include <cstring>
+#include <stdexcept>
+
+namespace cvcore {
+
+namespace {
+
+// Copy 2D CPU pitch linear tensors
+void Memcpy2DCPU(void *dst, size_t dstPitch, const void *src, size_t srcPitch, size_t widthInBytes, size_t height)
+{
+    uint8_t *dstPt       = reinterpret_cast<uint8_t *>(dst);
+    const uint8_t *srcPt = reinterpret_cast<const uint8_t *>(src);
+    for (size_t i = 0; i < height; i++)
+    {
+        memcpy(dstPt, srcPt, widthInBytes);
+        dstPt += dstPitch;
+        srcPt += srcPitch;
+    }
+}
+
+} // anonymous namespace
+
+void TensorBaseCopy(TensorBase &dst, const TensorBase &src, cudaStream_t stream)
+{
+    if (dst.getDataSize() != src.getDataSize())
+    {
+        throw std::runtime_error("Tensor stride mismatch!");
+    }
+    assert(dst.getDimCount() == src.getDimCount());
+    int dimCount = src.getDimCount();
+    for (int i = 0; i < dimCount - 1; i++)
+    {
+        if (src.getStride(i) != src.getStride(i + 1) * src.getSize(i + 1) ||
+            dst.getStride(i) != dst.getStride(i + 1) * dst.getSize(i + 1))
+        {
+            throw std::runtime_error("Tensor is not contiguous in memory!");
+        }
+    }
+    if (dst.isCPU() && src.isCPU())
+    {
+        memcpy(dst.getData(), src.getData(), src.getDataSize());
+        return;
+    }
+    cudaError_t error;
+    if (!dst.isCPU() && src.isCPU())
+    {
+        error = cudaMemcpyAsync(dst.getData(), src.getData(), src.getDataSize(), cudaMemcpyHostToDevice, stream);
+    }
+    else if (dst.isCPU() && !src.isCPU())
+    {
+        error = cudaMemcpyAsync(dst.getData(), src.getData(), src.getDataSize(), cudaMemcpyDeviceToHost, stream);
+    }
+    else
+    {
+        error = cudaMemcpyAsync(dst.getData(), src.getData(), src.getDataSize(), cudaMemcpyDeviceToDevice, stream);
+    }
+    if (error != cudaSuccess)
+    {
+        throw std::runtime_error("CUDA memcpy failed!");
+    }
+}
+
+void TensorBaseCopy2D(TensorBase &dst, const TensorBase &src, int dstPitch, int srcPitch, int widthInBytes, int height,
+                      cudaStream_t stream)
+{
+    assert(dst.getDimCount() == src.getDimCount());
+    int dimCount = src.getDimCount();
+    for (int i = 0; i < dimCount; i++)
+    {
+        if (dst.getSize(i) != src.getSize(i))
+        {
+            throw std::runtime_error("Tensor size mismatch!");
+        }
+    }
+    if (dst.isCPU() && src.isCPU())
+    {
+        Memcpy2DCPU(dst.getData(), dstPitch, src.getData(), srcPitch, widthInBytes, height);
+        return;
+    }
+    cudaError_t error;
+    if (!dst.isCPU() && src.isCPU())
+    {
+        error = cudaMemcpy2DAsync(dst.getData(), dstPitch, src.getData(), srcPitch, widthInBytes, height,
+                                  cudaMemcpyHostToDevice, stream);
+    }
+    else if (dst.isCPU() && !src.isCPU())
+    {
+        error = cudaMemcpy2DAsync(dst.getData(), dstPitch, src.getData(), srcPitch, widthInBytes, height,
+                                  cudaMemcpyDeviceToHost, stream);
+    }
+    else
+    {
+        error = cudaMemcpy2DAsync(dst.getData(), dstPitch, src.getData(), srcPitch, widthInBytes, height,
+                                  cudaMemcpyDeviceToDevice, stream);
+    }
+    if (error != cudaSuccess)
+    {
+        throw std::runtime_error("CUDA memcpy failed!");
+    }
+}
+
+} // namespace cvcore
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/core/utility/ProfileUtils.cpp b/isaac_ros_ess/gxf/ess/cvcore/src/core/utility/ProfileUtils.cpp
new file mode 100644
index 0000000..233ae19
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/core/utility/ProfileUtils.cpp
@@ -0,0 +1,127 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <nlohmann/json.hpp>
+
+#include <cuda_runtime.h>
+
+#include <fstream>
+#include <iomanip>
+#include <stdexcept>
+
+#if defined(_MSC_VER) || defined(__WIN32)
+#   include <intrin.h>
+#   include <windows.h>
+#   include <sysinfoapi.h>
+#endif
+
+using json = nlohmann::json;
+
+namespace cvcore {
+
+namespace {
+
+#if defined(_MSC_VER) || defined(__WIN32)
+std::string GetCPUName()
+{
+    // https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2008/hskdteyh(v=vs.90)?redirectedfrom=MSDN
+    char CPUBrandString[0x40];
+    int CPUInfo[4] = {-1};
+
+    // Calling __cpuid with 0x80000000 as the InfoType argument
+    // gets the number of valid extended IDs.
+    __cpuid(CPUInfo, 0x80000000);
+    unsigned i, nExIds = CPUInfo[0];
+    memset(CPUBrandString, 0, sizeof(CPUBrandString));
+
+    // Get the information associated with each extended ID.
+    for (i=0x80000000; i<=nExIds; ++i)
+    {
+        __cpuid(CPUInfo, i);
+
+        // Interpret CPU brand string and cache information.
+        if  (i == 0x80000002)
+            memcpy(CPUBrandString, CPUInfo, sizeof(CPUInfo));
+        else if  (i == 0x80000003)
+            memcpy(CPUBrandString + 16, CPUInfo, sizeof(CPUInfo));
+        else if  (i == 0x80000004)
+            memcpy(CPUBrandString + 32, CPUInfo, sizeof(CPUInfo));
+    }
+    return CPUBrandString;
+}
+#else
+std::string GetCPUName()
+{
+    std::ifstream cpuInfo("/proc/cpuinfo");
+    if (!cpuInfo.good())
+    {
+        throw std::runtime_error("unable to retrieve cpu info");
+    }
+    std::string line;
+    while (std::getline(cpuInfo, line))
+    {
+        int delimiterPos = line.find(':');
+        if (delimiterPos != std::string::npos)
+        {
+            std::string key = line.substr(0, delimiterPos);
+            if (key.find("model name") != std::string::npos)
+            {
+                std::string info = line.substr(delimiterPos + 1);
+                info.erase(0, info.find_first_not_of(' '));
+                return info;
+            }
+        }
+    }
+    return "CPU"; // default name if no cpu model name retrieved
+}
+#endif
+
+std::string GetGPUName()
+{
+    int deviceId;
+    cudaGetDevice(&deviceId);
+    cudaDeviceProp prop;
+    cudaError_t error = cudaGetDeviceProperties(&prop, deviceId);
+    if (error != 0)
+    {
+        throw std::runtime_error("unable to retrieve cuda device info");
+    }
+    return std::string(prop.name);
+}
+
+} // anonymous namespace
+
+void ExportToJson(const std::string outputPath, const std::string taskName, float tMin, float tMax, float tAvg,
+                  bool isCPU, int iterations = 100)
+{
+    std::ifstream in(outputPath);
+    json jsonHandler;
+    if (in.good())
+    {
+        in >> jsonHandler;
+    }
+    in.close();
+
+    const std::string platform      = isCPU ? "CPU: " + GetCPUName() : "GPU: " + GetGPUName();
+    jsonHandler[platform][taskName] = {{"iter", iterations}, {"min", tMin}, {"max", tMax}, {"avg", tAvg}};
+
+    std::ofstream out(outputPath);
+    out << std::setw(4) << jsonHandler << std::endl;
+    out.close();
+}
+
+} // namespace cvcore
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/inferencer/Errors.cpp b/isaac_ros_ess/gxf/ess/cvcore/src/inferencer/Errors.cpp
new file mode 100644
index 0000000..f6135a9
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/inferencer/Errors.cpp
@@ -0,0 +1,129 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "cv/inferencer/Errors.h"
+
+#ifndef __cpp_lib_to_underlying
+namespace std {
+template<typename Enum>
+constexpr underlying_type_t<Enum> to_underlying(Enum e) noexcept
+{
+    return static_cast<underlying_type_t<Enum>>(e);
+}
+};     // namespace std
+#endif // __cpp_lib_to_underlying
+
+namespace cvcore { namespace inferencer {
+
+namespace detail {
+struct InferencerErrorCategory : std::error_category
+{
+    virtual const char *name() const noexcept override final
+    {
+        return "cvcore-inferencer-error";
+    }
+
+    virtual std::string message(int value) const override final
+    {
+        std::string result;
+
+        switch (value)
+        {
+        case std::to_underlying(InferencerErrorCode::SUCCESS):
+            result = "(SUCCESS) No errors detected";
+            break;
+        case std::to_underlying(InferencerErrorCode::INVALID_ARGUMENT):
+            result = "(INVALID_ARGUMENT) Invalid config parameter or input argument";
+            break;
+        case std::to_underlying(InferencerErrorCode::INVALID_OPERATION):
+            result = "(INVALID_OPERATION) Invalid operation performed";
+            break;
+        case std::to_underlying(InferencerErrorCode::TRITON_SERVER_NOT_READY):
+            result = "(TRITON_SERVER_NOT_READY) Triton server is not live or the serverUrl is incorrect";
+            break;
+        case std::to_underlying(InferencerErrorCode::TRITON_CUDA_SHARED_MEMORY_ERROR):
+            result = "(TRITON_CUDA_SHARED_MEMORY_ERROR) Unable to map/unmap cuda shared memory for triton server";
+            break;
+        case std::to_underlying(InferencerErrorCode::TRITON_INFERENCE_ERROR):
+            result = "(TRITON_INFERENCE_ERROR) Error during inference using triton API";
+            break;
+        case std::to_underlying(InferencerErrorCode::TRITON_REGISTER_LAYER_ERROR):
+            result = "(TRITON_REGISTER_LAYER_ERROR) Error when setting input or output layers";
+            break;
+        case std::to_underlying(InferencerErrorCode::TENSORRT_INFERENCE_ERROR):
+            result = "(TENSORRT_INFERENCE_ERROR) Error when running TensorRT enqueue/execute";
+            break;
+        default:
+            result = "(Unrecognized Condition) Value " + std::to_string(value) +
+                     " does not map to known error code literal " +
+                     " defined by cvcore::inferencer::InferencerErrorCode";
+            break;
+        }
+
+        return result;
+    }
+
+    virtual std::error_condition default_error_condition(int code) const noexcept override final
+    {
+        std::error_condition result;
+
+        switch (code)
+        {
+        case std::to_underlying(InferencerErrorCode::SUCCESS):
+            result = ErrorCode::SUCCESS;
+            break;
+        case std::to_underlying(InferencerErrorCode::INVALID_ARGUMENT):
+            result = ErrorCode::INVALID_ARGUMENT;
+            break;
+        case std::to_underlying(InferencerErrorCode::INVALID_OPERATION):
+            result = ErrorCode::INVALID_OPERATION;
+            break;
+        case std::to_underlying(InferencerErrorCode::NOT_IMPLEMENTED):
+            result = ErrorCode::NOT_IMPLEMENTED;
+            break;
+        case std::to_underlying(InferencerErrorCode::TRITON_SERVER_NOT_READY):
+            result = ErrorCode::NOT_READY;
+            break;
+        case std::to_underlying(InferencerErrorCode::TRITON_CUDA_SHARED_MEMORY_ERROR):
+            result = ErrorCode::DEVICE_ERROR;
+            break;
+        case std::to_underlying(InferencerErrorCode::TRITON_INFERENCE_ERROR):
+            result = ErrorCode::INVALID_OPERATION;
+            break;
+        case std::to_underlying(InferencerErrorCode::TENSORRT_INFERENCE_ERROR):
+            result = ErrorCode::INVALID_OPERATION;
+            break;
+        case std::to_underlying(InferencerErrorCode::TRITON_REGISTER_LAYER_ERROR):
+            result = ErrorCode::INVALID_OPERATION;
+            break;
+        default:
+            result = ErrorCode::NOT_IMPLEMENTED;
+            break;
+        }
+
+        return result;
+    }
+};
+} // namespace detail
+
+const detail::InferencerErrorCategory errorCategory{};
+
+std::error_code make_error_code(InferencerErrorCode ec) noexcept
+{
+    return {std::to_underlying(ec), errorCategory};
+}
+}} // namespace cvcore::inferencer
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/inferencer/Inferencer.cpp b/isaac_ros_ess/gxf/ess/cvcore/src/inferencer/Inferencer.cpp
new file mode 100644
index 0000000..2ce6a2d
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/inferencer/Inferencer.cpp
@@ -0,0 +1,130 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "cv/inferencer/Inferencer.h"
+
+#include <iostream>
+
+#include "cv/inferencer/Errors.h"
+#include "cv/inferencer/IInferenceBackend.h"
+#include "tensorrt/TensorRTInferencer.h"
+#include "triton/TritonGrpcInferencer.h"
+
+namespace cvcore { namespace inferencer {
+std::mutex InferenceBackendFactory::inferenceMutex;
+
+#ifdef ENABLE_TRITON
+std::unordered_map<std::string, std::pair<std::size_t, InferenceBackendClient>>
+    InferenceBackendFactory::tritonRemoteMap;
+
+std::error_code InferenceBackendFactory::CreateTritonRemoteInferenceBackendClient(
+    InferenceBackendClient &client, const TritonRemoteInferenceParams &params)
+{
+    std::lock_guard<std::mutex> instanceLock(inferenceMutex);
+
+    if (params.protocolType == BackendProtocol::HTTP)
+    {
+        return ErrorCode::NOT_IMPLEMENTED;
+    }
+    std::error_code result = ErrorCode::SUCCESS;
+    std::string hashString = params.serverUrl + params.modelName + params.modelVersion;
+
+    try
+    {
+        if (tritonRemoteMap.find(hashString) != tritonRemoteMap.end())
+        {
+            client = tritonRemoteMap[hashString].second;
+            tritonRemoteMap[hashString].first++;
+        }
+        else
+        {
+            tritonRemoteMap[hashString] =
+                std::make_pair<std::size_t, InferenceBackendClient>(1, new TritonGrpcInferencer(params));
+        }
+    }
+    catch (std::error_code &e)
+    {
+        result = e;
+    }
+    catch (...)
+    {
+        result = ErrorCode::INVALID_ARGUMENT;
+    }
+    client = tritonRemoteMap[hashString].second;
+    return result;
+}
+
+std::error_code InferenceBackendFactory::DestroyTritonRemoteInferenceBackendClient(InferenceBackendClient &client)
+{
+    std::lock_guard<std::mutex> instanceLock(inferenceMutex);
+    for (auto &it : tritonRemoteMap)
+    {
+        if (it.second.second == client)
+        {
+            it.second.first--;
+            if (it.second.first == 0)
+            {
+                tritonRemoteMap.erase(it.first);
+                client->unregister();
+                delete client;
+                client = nullptr;
+            }
+            break;
+        }
+    }
+    client = nullptr;
+    return ErrorCode::SUCCESS;
+}
+#endif
+
+std::error_code InferenceBackendFactory::CreateTensorRTInferenceBackendClient(InferenceBackendClient &client,
+                                                                              const TensorRTInferenceParams &params)
+{
+
+    std::lock_guard<std::mutex> instanceLock(inferenceMutex);
+    std::error_code result = ErrorCode::SUCCESS;
+    try
+    {
+        client = new TensorRTInferencer(params);
+    }
+    catch (std::error_code &e)
+    {
+        result = e;
+    }
+    catch (...)
+    {
+        result = ErrorCode::INVALID_ARGUMENT;
+    }
+    return result;
+}
+
+std::error_code InferenceBackendFactory::DestroyTensorRTInferenceBackendClient(InferenceBackendClient &client)
+{
+
+    std::lock_guard<std::mutex> instanceLock(inferenceMutex);
+    if (client != nullptr)
+    {
+        client->unregister();
+        delete client;
+        client = nullptr;
+    }
+    client = nullptr;
+
+    return ErrorCode::SUCCESS;
+}
+
+}} // namespace cvcore::inferencer
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/inferencer/tensorrt/TensorRTInferencer.cpp b/isaac_ros_ess/gxf/ess/cvcore/src/inferencer/tensorrt/TensorRTInferencer.cpp
new file mode 100644
index 0000000..db5a0d0
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/inferencer/tensorrt/TensorRTInferencer.cpp
@@ -0,0 +1,275 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#include "TensorRTInferencer.h"
+#include <fstream>
+#include <iostream>
+#include "TensorRTUtils.h"
+
+#include "cv/inferencer/Errors.h"
+#include "cv/inferencer/IInferenceBackend.h"
+#include "cv/inferencer/Inferencer.h"
+
+namespace cvcore { namespace inferencer {
+
+namespace {
+size_t getDataSize(const std::vector<int64_t> &shape, cvcore::ChannelType dataType)
+{
+    size_t layerShape = 1;
+    for (size_t k = 0; k < shape.size(); k++)
+        layerShape *= shape[k] <= 0 ? 1 : shape[k];
+
+    return layerShape * GetChannelSize(dataType);
+}
+} // namespace
+
+std::error_code TensorRTInferencer::getLayerInfo(LayerInfo &layer, std::string layerName)
+{
+    layer.name                          = layerName;
+    layer.index                         = m_inferenceEngine->getBindingIndex(layerName.c_str());
+    auto dim                            = m_inferenceEngine->getBindingDimensions(layer.index);
+    nvinfer1::TensorFormat tensorFormat = m_inferenceEngine->getBindingFormat(layer.index);
+
+    std::error_code err;
+    err = getCVCoreChannelLayoutFromTensorRT(layer.layout, tensorFormat);
+    if (err != cvcore::make_error_code(ErrorCode::SUCCESS))
+    {
+        return ErrorCode::INVALID_ARGUMENT;
+    }
+
+    for (size_t cnt = 0; cnt < dim.nbDims; cnt++)
+    {
+        layer.shape.push_back(dim.d[cnt]);
+    }
+
+    err = getCVCoreChannelTypeFromTensorRT(layer.dataType, m_inferenceEngine->getBindingDataType(layer.index));
+    layer.layerSize = getDataSize(layer.shape, layer.dataType);
+    if (err != cvcore::make_error_code(ErrorCode::SUCCESS))
+    {
+        return ErrorCode::INVALID_ARGUMENT;
+    }
+
+    return ErrorCode::SUCCESS;
+}
+
+std::error_code TensorRTInferencer::ParseTRTModel()
+{
+
+    m_modelInfo.modelName    = m_inferenceEngine->getName();
+    m_modelInfo.modelVersion = "";
+    m_modelInfo.maxBatchSize = m_maxBatchSize;
+    std::error_code err;
+    for (size_t i = 0; i < m_inputLayers.size(); i++)
+    {
+        LayerInfo layer;
+        err = getLayerInfo(layer, m_inputLayers[i]);
+        if (err != cvcore::make_error_code(cvcore::ErrorCode::SUCCESS))
+        {
+            return err;
+        }
+        m_modelInfo.inputLayers[layer.name] = layer;
+    }
+    for (size_t i = 0; i < m_outputLayers.size(); i++)
+    {
+        LayerInfo layer;
+        err = getLayerInfo(layer, m_outputLayers[i]);
+        if (err != cvcore::make_error_code(cvcore::ErrorCode::SUCCESS))
+        {
+            return err;
+        }
+        m_modelInfo.outputLayers[layer.name] = layer;
+    }
+
+    return ErrorCode::SUCCESS;
+}
+
+TensorRTInferencer::TensorRTInferencer(const TensorRTInferenceParams &params)
+    : m_logger(new TRTLogger())
+    , m_maxBatchSize(params.maxBatchSize)
+    , m_inputLayers(params.inputLayerNames)
+    , m_outputLayers(params.outputLayerNames)
+    , m_cudaStream(0)
+    , m_inferenceEngine(nullptr)
+{
+
+    if (params.inferType == TRTInferenceType::TRT_ENGINE)
+    {
+        std::ifstream trtModelFStream(params.engineFilePath, std::ios::binary);
+        std::unique_ptr<char[]> trtModelContent;
+        size_t trtModelContentSize = 0;
+
+        if (!trtModelFStream.good())
+        {
+            throw ErrorCode::INVALID_ARGUMENT;
+        }
+        else
+        {
+            trtModelFStream.seekg(0, trtModelFStream.end);
+            trtModelContentSize = trtModelFStream.tellg();
+            trtModelFStream.seekg(0, trtModelFStream.beg);
+            trtModelContent.reset(new char[trtModelContentSize]);
+            trtModelFStream.read(trtModelContent.get(), trtModelContentSize);
+            trtModelFStream.close();
+        }
+
+        m_inferenceRuntime.reset(nvinfer1::createInferRuntime(*(m_logger.get())));
+        if (params.dlaID != -1 && params.dlaID < m_inferenceRuntime->getNbDLACores())
+        {
+            m_inferenceRuntime->setDLACore(params.dlaID);
+        }
+        m_inferenceEngine = m_inferenceRuntime->deserializeCudaEngine(trtModelContent.get(), trtModelContentSize);
+        m_ownedInferenceEngine.reset(m_inferenceEngine);
+        m_inferenceContext.reset(m_inferenceEngine->createExecutionContext());
+        m_inferenceContext->setOptimizationProfileAsync(0, m_cudaStream);
+    }
+    else
+    {
+        if (params.engine == nullptr)
+        {
+            throw ErrorCode::INVALID_ARGUMENT;
+        }
+        m_inferenceEngine = params.engine;
+        m_inferenceContext.reset(m_inferenceEngine->createExecutionContext());
+    }
+
+    if (m_inferenceEngine == nullptr || m_inferenceContext == nullptr)
+    {
+        throw ErrorCode::INVALID_ARGUMENT;
+    }
+
+    m_hasImplicitBatch = m_inferenceEngine->hasImplicitBatchDimension();
+    m_bindingsCount    = m_inferenceEngine->getNbBindings();
+    if (!m_hasImplicitBatch)
+    {
+        for (size_t i = 0; i < m_bindingsCount; i++)
+        {
+            if (m_inferenceEngine->bindingIsInput(i))
+            {
+                nvinfer1::Dims dims_i(m_inferenceEngine->getBindingDimensions(i));
+                nvinfer1::Dims4 inputDims{1, dims_i.d[1], dims_i.d[2], dims_i.d[3]};
+                m_inferenceContext->setBindingDimensions(i, inputDims);
+            }
+        }
+    }
+    std::error_code err;
+    err = ParseTRTModel();
+    if (err != cvcore::make_error_code(ErrorCode::SUCCESS))
+    {
+        throw err;
+    }
+    m_buffers.resize(m_bindingsCount);
+}
+
+// Set input layer tensor
+std::error_code TensorRTInferencer::setInput(const cvcore::TensorBase &trtInputBuffer, std::string inputLayerName)
+{
+    if (m_modelInfo.inputLayers.find(inputLayerName) == m_modelInfo.inputLayers.end())
+    {
+        return ErrorCode::INVALID_ARGUMENT;
+    }
+    LayerInfo layer        = m_modelInfo.inputLayers[inputLayerName];
+    m_buffers[layer.index] = trtInputBuffer.getData();
+    return ErrorCode::SUCCESS;
+}
+
+// Sets output layer tensor
+std::error_code TensorRTInferencer::setOutput(cvcore::TensorBase &trtOutputBuffer, std::string outputLayerName)
+{
+    if (m_modelInfo.outputLayers.find(outputLayerName) == m_modelInfo.outputLayers.end())
+    {
+        return ErrorCode::INVALID_ARGUMENT;
+    }
+    LayerInfo layer        = m_modelInfo.outputLayers[outputLayerName];
+    m_buffers[layer.index] = trtOutputBuffer.getData();
+    return ErrorCode::SUCCESS;
+}
+
+// Get the model metadata parsed based on the model file
+// This would be done in initialize call itself. User can access the modelMetaData created using this API.
+ModelMetaData TensorRTInferencer::getModelMetaData() const
+{
+    return m_modelInfo;
+}
+
+std::error_code TensorRTInferencer::infer(size_t batchSize)
+{
+    bool err = true;
+    if (!m_hasImplicitBatch)
+    {
+        size_t bindingsCount = m_inferenceEngine->getNbBindings();
+        for (size_t i = 0; i < bindingsCount; i++)
+        {
+            if (m_inferenceEngine->bindingIsInput(i))
+            {
+                nvinfer1::Dims dims_i(m_inferenceEngine->getBindingDimensions(i));
+                nvinfer1::Dims4 inputDims{static_cast<int>(batchSize), dims_i.d[1], dims_i.d[2], dims_i.d[3]};
+                m_inferenceContext->setBindingDimensions(i, inputDims);
+            }
+        }
+        err = m_inferenceContext->enqueueV2(&m_buffers[0], m_cudaStream, nullptr);
+    }
+    else
+    {
+        err = m_inferenceContext->enqueue(m_maxBatchSize, &m_buffers[0], m_cudaStream, nullptr);
+    }
+    if (!err)
+    {
+        return InferencerErrorCode::TENSORRT_INFERENCE_ERROR;
+    }
+    return ErrorCode::SUCCESS;
+}
+
+// Applicable only for Native TRT
+std::error_code TensorRTInferencer::setCudaStream(cudaStream_t cudaStream) // Only in TRT
+{
+    m_cudaStream = cudaStream;
+    return ErrorCode::SUCCESS;
+}
+
+std::error_code TensorRTInferencer::unregister(std::string layerName)
+{
+    size_t index;
+    if (m_modelInfo.outputLayers.find(layerName) != m_modelInfo.outputLayers.end())
+    {
+        index = m_modelInfo.outputLayers[layerName].index;
+    }
+    else if (m_modelInfo.inputLayers.find(layerName) != m_modelInfo.inputLayers.end())
+    {
+        index = m_modelInfo.inputLayers[layerName].index;
+    }
+    else
+    {
+        return ErrorCode::INVALID_ARGUMENT;
+    }
+    m_buffers[index] = nullptr;
+    return ErrorCode::SUCCESS;
+}
+
+std::error_code TensorRTInferencer::unregister()
+{
+    for (size_t i = 0; i < m_buffers.size(); i++)
+    {
+        m_buffers[i] = nullptr;
+    }
+    return ErrorCode::SUCCESS;
+}
+
+TensorRTInferencer::~TensorRTInferencer()
+{
+    m_buffers.clear();
+}
+
+}} // namespace cvcore::inferencer
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/inferencer/tensorrt/TensorRTInferencer.h b/isaac_ros_ess/gxf/ess/cvcore/src/inferencer/tensorrt/TensorRTInferencer.h
new file mode 100644
index 0000000..b2a6535
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/inferencer/tensorrt/TensorRTInferencer.h
@@ -0,0 +1,78 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef TENSORRT_INFERENCER_H
+#define TENSORRT_INFERENCER_H
+
+#include <iostream>
+
+#include <NvInferRuntimeCommon.h>
+#include "cv/inferencer/Errors.h"
+#include "cv/inferencer/IInferenceBackend.h"
+#include "cv/inferencer/Inferencer.h"
+
+namespace cvcore { namespace inferencer {
+
+class TensorRTInferencer : public IInferenceBackendClient
+{
+public:
+    TensorRTInferencer(const TensorRTInferenceParams &params);
+
+    // Set input layer tensor
+    virtual std::error_code setInput(const cvcore::TensorBase &trtInputBuffer, std::string inputLayerName) override;
+
+    // Sets output layer tensor
+    virtual std::error_code setOutput(cvcore::TensorBase &trtOutputBuffer, std::string outputLayerName) override;
+
+    // Get the model metadata parsed based on the model file
+    // This would be done in initialize call itself. User can access the modelMetaData created using this API.
+    virtual ModelMetaData getModelMetaData() const override;
+
+    // TensorRT will use infer and TensorRT would use enqueueV2
+    virtual std::error_code infer(size_t batchSize = 1) override;
+
+    // Applicable only for Native TRT
+    virtual std::error_code setCudaStream(cudaStream_t) override; // Only in TRT
+
+    // Unregister shared memory for layer
+    virtual std::error_code unregister(std::string layerName) override;
+
+    // Unregister all shared memory
+    virtual std::error_code unregister() override;
+
+private:
+    ~TensorRTInferencer();
+    std::unique_ptr<TRTLogger> m_logger;
+    std::unique_ptr<nvinfer1::IRuntime> m_inferenceRuntime;
+    size_t m_maxBatchSize;
+    std::vector<std::string> m_inputLayers;
+    std::vector<std::string> m_outputLayers;
+    cudaStream_t m_cudaStream;
+    nvinfer1::ICudaEngine *m_inferenceEngine;
+    std::unique_ptr<nvinfer1::ICudaEngine> m_ownedInferenceEngine;
+    std::unique_ptr<nvinfer1::IExecutionContext> m_inferenceContext;
+    size_t m_bindingsCount;
+    ModelMetaData m_modelInfo;
+    std::vector<void *> m_buffers;
+    bool m_hasImplicitBatch;
+
+    std::error_code ParseTRTModel();
+    std::error_code getLayerInfo(LayerInfo &layer, std::string layerName);
+};
+
+}} // namespace cvcore::inferencer
+#endif
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/inferencer/tensorrt/TensorRTUtils.cpp b/isaac_ros_ess/gxf/ess/cvcore/src/inferencer/tensorrt/TensorRTUtils.cpp
new file mode 100644
index 0000000..3a1696a
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/inferencer/tensorrt/TensorRTUtils.cpp
@@ -0,0 +1,64 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "TensorRTUtils.h"
+#include <iostream>
+
+namespace cvcore { namespace inferencer {
+
+std::error_code getCVCoreChannelTypeFromTensorRT(cvcore::ChannelType &channelType, nvinfer1::DataType dtype)
+{
+    if (dtype == nvinfer1::DataType::kINT8)
+    {
+        channelType = cvcore::ChannelType::U8;
+    }
+    else if (dtype == nvinfer1::DataType::kHALF)
+    {
+        channelType = cvcore::ChannelType::F16;
+    }
+    else if (dtype == nvinfer1::DataType::kFLOAT)
+    {
+        channelType = cvcore::ChannelType::F32;
+    }
+    else
+    {
+        return ErrorCode::INVALID_OPERATION;
+    }
+
+    return ErrorCode::SUCCESS;
+}
+
+std::error_code getCVCoreChannelLayoutFromTensorRT(cvcore::TensorLayout &channelLayout,
+                                                   nvinfer1::TensorFormat tensorFormat)
+{
+    if (tensorFormat == nvinfer1::TensorFormat::kLINEAR)
+    {
+        channelLayout = cvcore::TensorLayout::NCHW;
+    }
+    else if (tensorFormat == nvinfer1::TensorFormat::kHWC)
+    {
+        channelLayout = cvcore::TensorLayout::HWC;
+    }
+    else
+    {
+        return ErrorCode::INVALID_OPERATION;
+    }
+
+    return ErrorCode::SUCCESS;
+}
+
+}} // namespace cvcore::inferencer
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/inferencer/tensorrt/TensorRTUtils.h b/isaac_ros_ess/gxf/ess/cvcore/src/inferencer/tensorrt/TensorRTUtils.h
new file mode 100644
index 0000000..290ea70
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/inferencer/tensorrt/TensorRTUtils.h
@@ -0,0 +1,45 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#ifndef TENSORRT_UTILS_H
+#define TENSORRT_UTILS_H
+
+#include "NvInferRuntime.h"
+#include "cv/core/Tensor.h"
+#include "cv/inferencer/Errors.h"
+
+namespace cvcore { namespace inferencer {
+
+/*
+ * Maps tensorrt datatype to cvcore Channel type.
+ * @param channelType cvcore channel type.
+ * @param dtype tensorrt datatype
+ * return error code
+ */
+std::error_code getCVCoreChannelTypeFromTensorRT(cvcore::ChannelType &channelType, nvinfer1::DataType dtype);
+
+/*
+ * Maps tensorrt datatype to cvcore Channel type.
+ * @param channelLayout cvcore channel type.
+ * @param dtype tensorrt layout
+ * return error code
+ */
+std::error_code getCVCoreChannelLayoutFromTensorRT(cvcore::TensorLayout &channelLayout,
+                                                   nvinfer1::TensorFormat tensorFormat);
+
+}} // namespace cvcore::inferencer
+
+#endif
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/inferencer/triton/TritonGrpcInferencer.cpp b/isaac_ros_ess/gxf/ess/cvcore/src/inferencer/triton/TritonGrpcInferencer.cpp
new file mode 100644
index 0000000..f30e8c1
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/inferencer/triton/TritonGrpcInferencer.cpp
@@ -0,0 +1,342 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#ifdef ENABLE_TRITON
+#include "TritonGrpcInferencer.h"
+#include <grpc_client.h>
+#include <iostream>
+#include "TritonUtils.h"
+#include "cv/inferencer/Errors.h"
+#include "cv/inferencer/IInferenceBackend.h"
+#include "cv/inferencer/Inferencer.h"
+namespace cvcore { namespace inferencer {
+namespace tc = triton::client;
+
+namespace {
+size_t getDataSize(const std::vector<int64_t> &shape, cvcore::ChannelType dataType)
+{
+    size_t layerShape = 1;
+    for (size_t k = 0; k < shape.size(); k++)
+        layerShape *= shape[k] <= 0 ? 1 : shape[k];
+
+    return layerShape * GetChannelSize(dataType);
+}
+} // namespace
+
+std::error_code TritonGrpcInferencer::ParseGrpcModel()
+{
+    inference::ModelMetadataResponse tritonModelMetadata;
+    inference::ModelConfigResponse modelConfig;
+
+    tc::Error err;
+    modelInfo.modelName    = modelName;
+    modelInfo.modelVersion = modelVersion;
+    err                    = client->ModelMetadata(&tritonModelMetadata, modelName, modelVersion);
+    err                    = client->ModelConfig(&modelConfig, modelName, modelVersion);
+    modelInfo.maxBatchSize = modelConfig.config().max_batch_size();
+    bool inputBatchDim     = modelInfo.maxBatchSize > 0;
+    for (int i = 0; i < tritonModelMetadata.inputs().size(); i++)
+    {
+        LayerInfo layer;
+        layer.name       = tritonModelMetadata.inputs(i).name();
+        layer.index      = i;
+        bool parseStatus = getCVCoreChannelType(layer.dataType, tritonModelMetadata.inputs(i).datatype());
+        if (!parseStatus)
+        {
+            return ErrorCode::INVALID_OPERATION;
+        }
+
+        size_t cnt = modelInfo.maxBatchSize == 0 ? 0 : 1;
+        if (modelInfo.maxBatchSize != 0)
+            layer.shape.push_back(modelInfo.maxBatchSize);
+        for (; cnt < tritonModelMetadata.inputs(i).shape().size(); cnt++)
+        {
+            layer.shape.push_back(tritonModelMetadata.inputs(i).shape(cnt));
+        }
+        layer.layerSize                   = getDataSize(layer.shape, layer.dataType);
+        modelInfo.inputLayers[layer.name] = layer;
+    }
+    for (int i = 0; i < tritonModelMetadata.outputs().size(); i++)
+    {
+        LayerInfo layer;
+        layer.name       = tritonModelMetadata.outputs(i).name();
+        layer.index      = i;
+        bool parseStatus = getCVCoreChannelType(layer.dataType, tritonModelMetadata.inputs(i).datatype());
+        if (!parseStatus)
+        {
+            return ErrorCode::INVALID_OPERATION;
+        }
+        layer.layout = TensorLayout::NHWC;
+        size_t cnt   = modelInfo.maxBatchSize == 0 ? 0 : 1;
+        if (modelInfo.maxBatchSize != 0)
+            layer.shape.push_back(modelInfo.maxBatchSize);
+        for (; cnt < tritonModelMetadata.outputs(i).shape().size(); cnt++)
+        {
+            layer.shape.push_back(tritonModelMetadata.outputs(i).shape(cnt));
+        }
+        modelInfo.outputLayers[layer.name] = layer;
+        layer.layerSize                    = getDataSize(layer.shape, layer.dataType);
+    }
+    return ErrorCode::SUCCESS;
+}
+
+TritonGrpcInferencer::TritonGrpcInferencer(const TritonRemoteInferenceParams &params)
+    : modelVersion(params.modelVersion)
+    , modelName(params.modelName)
+{
+
+    tc::Error err = tc::InferenceServerGrpcClient::Create(&client, params.serverUrl, params.verbose);
+
+    if (!err.IsOk())
+    {
+        throw make_error_code(InferencerErrorCode::TRITON_SERVER_NOT_READY);
+    }
+
+    // Unregistering all shared memory regions for a clean
+    // start.
+    err = client->UnregisterSystemSharedMemory();
+    if (!err.IsOk())
+    {
+        throw make_error_code(InferencerErrorCode::TRITON_CUDA_SHARED_MEMORY_ERROR);
+    }
+    err = client->UnregisterCudaSharedMemory();
+    if (!err.IsOk())
+    {
+        throw make_error_code(InferencerErrorCode::TRITON_CUDA_SHARED_MEMORY_ERROR);
+    }
+
+    ParseGrpcModel();
+
+    // Include the batch dimension if required
+    inputRequests.resize(modelInfo.inputLayers.size());
+    inputMap.resize(modelInfo.inputLayers.size());
+    inputMapHistory.resize(modelInfo.inputLayers.size());
+    outputRequests.resize(modelInfo.outputLayers.size());
+    outputMap.resize(modelInfo.outputLayers.size());
+    outputMapHistory.resize(modelInfo.outputLayers.size());
+    for (auto &it : modelInfo.inputLayers)
+    {
+        tc::InferInput *inferInputVal;
+        std::string tritonDataType;
+        bool parseStatus = getTritonChannelType(tritonDataType, it.second.dataType);
+        if (!parseStatus)
+        {
+            throw make_error_code(InferencerErrorCode::TRITON_REGISTER_LAYER_ERROR);
+        }
+        err = tc::InferInput::Create(&inferInputVal, it.second.name, it.second.shape, tritonDataType);
+        if (!err.IsOk())
+        {
+            throw make_error_code(InferencerErrorCode::TRITON_REGISTER_LAYER_ERROR);
+        }
+        inputRequests[it.second.index].reset(inferInputVal);
+    }
+    for (auto &it : modelInfo.outputLayers)
+    {
+        tc::InferRequestedOutput *output;
+        err = tc::InferRequestedOutput::Create(&output, it.second.name);
+        if (!err.IsOk())
+        {
+            throw make_error_code(InferencerErrorCode::TRITON_REGISTER_LAYER_ERROR);
+        }
+        outputRequests[it.second.index].reset(output);
+    }
+}
+cudaError_t CreateCUDAIPCHandle(cudaIpcMemHandle_t *cuda_handle, void *input_d_ptr, int deviceId = 0)
+{
+    // Set the GPU device to the desired GPU
+    cudaError_t err;
+    err = cudaSetDevice(deviceId);
+    if (err != cudaSuccess)
+    {
+        return err;
+    }
+
+    err = cudaIpcGetMemHandle(cuda_handle, input_d_ptr);
+    if (err != cudaSuccess)
+    {
+        return err;
+    }
+    return cudaSuccess;
+    //  Create IPC handle for data on the gpu
+}
+
+// Set input layer tensor
+std::error_code TritonGrpcInferencer::setInput(const cvcore::TensorBase &trtInputBuffer, std::string inputLayerName)
+{
+    if (trtInputBuffer.isCPU())
+    {
+        return ErrorCode::INVALID_ARGUMENT;
+    }
+
+    size_t index = modelInfo.inputLayers[inputLayerName].index;
+    if (inputMapHistory[index] != (void *)trtInputBuffer.getData())
+    {
+        inputMapHistory[index] = trtInputBuffer.getData();
+        unregister(inputLayerName);
+        cudaIpcMemHandle_t input_cuda_handle;
+        cudaError_t cudaStatus = CreateCUDAIPCHandle(&input_cuda_handle, (void *)trtInputBuffer.getData());
+        if (cudaStatus != cudaSuccess)
+        {
+            return make_error_code(InferencerErrorCode::TRITON_CUDA_SHARED_MEMORY_ERROR);
+        }
+
+        tc::Error err;
+        err = client->RegisterCudaSharedMemory(inputLayerName.c_str(), input_cuda_handle, 0,
+                                               trtInputBuffer.getDataSize());
+        if (!err.IsOk())
+        {
+            return make_error_code(InferencerErrorCode::TRITON_CUDA_SHARED_MEMORY_ERROR);
+        }
+
+        size_t index = modelInfo.inputLayers[inputLayerName].index;
+        err          = inputRequests[index]->SetSharedMemory(inputLayerName.c_str(), trtInputBuffer.getDataSize(), 0);
+        inputMap[index] = inputRequests[index].get();
+        if (!err.IsOk())
+        {
+            return make_error_code(InferencerErrorCode::TRITON_CUDA_SHARED_MEMORY_ERROR);
+            err = inputRequests[index]->SetSharedMemory(inputLayerName.c_str(), trtInputBuffer.getDataSize(), 0);
+            inputMap[index] = inputRequests[index].get();
+            if (!err.IsOk())
+            {
+                return make_error_code(InferencerErrorCode::TRITON_CUDA_SHARED_MEMORY_ERROR);
+            }
+        }
+    }
+    return ErrorCode::SUCCESS;
+}
+
+// Sets output layer tensor
+std::error_code TritonGrpcInferencer::setOutput(cvcore::TensorBase &trtOutputBuffer, std::string outputLayerName)
+{
+    if (trtOutputBuffer.isCPU())
+    {
+        return ErrorCode::INVALID_ARGUMENT;
+    }
+
+    size_t index = modelInfo.outputLayers[outputLayerName].index;
+    if (outputMapHistory[index] != (void *)trtOutputBuffer.getData())
+    {
+        outputMapHistory[index] = trtOutputBuffer.getData();
+        unregister(outputLayerName);
+        cudaIpcMemHandle_t outputCudaHandle;
+        CreateCUDAIPCHandle(&outputCudaHandle, (void *)trtOutputBuffer.getData());
+        tc::Error err;
+        err = client->RegisterCudaSharedMemory(outputLayerName.c_str(), outputCudaHandle, 0 /* deviceId */,
+                                               trtOutputBuffer.getDataSize());
+        if (!err.IsOk())
+        {
+            return make_error_code(InferencerErrorCode::TRITON_CUDA_SHARED_MEMORY_ERROR);
+        }
+
+        err = outputRequests[index]->SetSharedMemory(outputLayerName.c_str(), trtOutputBuffer.getDataSize(),
+                                                     0 /* offset */);
+        if (!err.IsOk())
+        {
+            return make_error_code(InferencerErrorCode::TRITON_CUDA_SHARED_MEMORY_ERROR);
+        }
+        outputMap[index] = outputRequests[index].get();
+    }
+    return ErrorCode::SUCCESS;
+}
+
+// Get the model metadata parsed based on the model file
+// This would be done in initialize call itself. User can access the modelMetaData created using this API.
+ModelMetaData TritonGrpcInferencer::getModelMetaData() const
+{
+    return modelInfo;
+}
+
+// Triton will use infer and TensorRT would use enqueueV2
+std::error_code TritonGrpcInferencer::infer(size_t batchSize)
+{
+    tc::InferResult *results;
+    tc::Headers httpHeaders;
+    tc::InferOptions options(modelInfo.modelName);
+    options.model_version_ = modelInfo.modelVersion;
+    for (auto &inputLayer : modelInfo.inputLayers)
+    {
+        LayerInfo inputLayerInfo = inputLayer.second;
+        size_t index             = inputLayerInfo.index;
+        tc::Error err;
+        err =
+            inputRequests[index]->SetSharedMemory(inputLayerInfo.name.c_str(), inputLayerInfo.layerSize * batchSize, 0);
+        if (!err.IsOk())
+        {
+            return make_error_code(InferencerErrorCode::TRITON_CUDA_SHARED_MEMORY_ERROR);
+        }
+    }
+    for (auto &outputLayer : modelInfo.outputLayers)
+    {
+        LayerInfo outputLayerInfo = outputLayer.second;
+        size_t index              = outputLayerInfo.index;
+        tc::Error err;
+        err = outputRequests[index]->SetSharedMemory(outputLayerInfo.name.c_str(),
+                                                     outputLayerInfo.layerSize * batchSize, 0);
+        if (!err.IsOk())
+        {
+            return make_error_code(InferencerErrorCode::TRITON_CUDA_SHARED_MEMORY_ERROR);
+        }
+    }
+    tc::Error err = client->Infer(&results, options, inputMap, outputMap, httpHeaders);
+    if (!err.IsOk())
+    {
+        return make_error_code(InferencerErrorCode::TRITON_INFERENCE_ERROR);
+    }
+
+    return ErrorCode::SUCCESS;
+}
+
+// Applicable only for Native TRT
+std::error_code TritonGrpcInferencer::setCudaStream(cudaStream_t) // Only in TRT
+{
+    return ErrorCode::INVALID_OPERATION;
+}
+
+std::error_code TritonGrpcInferencer::unregister(std::string layerName)
+{
+    tc::Error err;
+    inference::CudaSharedMemoryStatusResponse status;
+    err = client->CudaSharedMemoryStatus(&status);
+    if (!err.IsOk())
+    {
+        return make_error_code(InferencerErrorCode::TRITON_CUDA_SHARED_MEMORY_ERROR);
+    }
+    err = client->UnregisterCudaSharedMemory(layerName.c_str());
+    if (!err.IsOk())
+    {
+        return make_error_code(InferencerErrorCode::TRITON_CUDA_SHARED_MEMORY_ERROR);
+    }
+    return ErrorCode::SUCCESS;
+}
+
+std::error_code TritonGrpcInferencer::unregister()
+{
+    tc::Error err;
+    inference::CudaSharedMemoryStatusResponse status;
+    err = client->CudaSharedMemoryStatus(&status);
+    if (!err.IsOk())
+    {
+        return make_error_code(InferencerErrorCode::TRITON_CUDA_SHARED_MEMORY_ERROR);
+    }
+    err = client->UnregisterCudaSharedMemory();
+    if (!err.IsOk())
+    {
+        return make_error_code(InferencerErrorCode::TRITON_CUDA_SHARED_MEMORY_ERROR);
+    }
+    return ErrorCode::SUCCESS;
+}
+}} // namespace cvcore::inferencer
+#endif
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/inferencer/triton/TritonGrpcInferencer.h b/isaac_ros_ess/gxf/ess/cvcore/src/inferencer/triton/TritonGrpcInferencer.h
new file mode 100644
index 0000000..d02b3e2
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/inferencer/triton/TritonGrpcInferencer.h
@@ -0,0 +1,75 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifdef ENABLE_TRITON
+#ifndef TRITONGRPC_INFERENCER_H
+#define TRITONGRPC_INFERENCER_H
+
+#include <grpc_client.h>
+
+#include <iostream>
+
+#include "cv/inferencer/Errors.h"
+#include "cv/inferencer/IInferenceBackend.h"
+#include "cv/inferencer/Inferencer.h"
+
+namespace cvcore { namespace inferencer {
+namespace tc = triton::client;
+
+class TritonGrpcInferencer : public IInferenceBackendClient
+{
+public:
+    TritonGrpcInferencer(const TritonRemoteInferenceParams &params);
+
+    // Set input layer tensor
+    virtual std::error_code setInput(const cvcore::TensorBase &trtInputBuffer, std::string inputLayerName) override;
+    // Sets output layer tensor
+    virtual std::error_code setOutput(cvcore::TensorBase &trtOutputBuffer, std::string outputLayerName) override;
+
+    // Get the model metadata parsed based on the model file
+    // This would be done in initialize call itself. User can access the modelMetaData created using this API.
+    virtual ModelMetaData getModelMetaData() const override;
+
+    // Triton will use infer and TensorRT would use enqueueV2
+    virtual std::error_code infer(size_t batchSize = 1) override;
+
+    // Applicable only for Native TRT
+    virtual std::error_code setCudaStream(cudaStream_t) override; // Only in TRT
+
+    // Unregister shared memory for layer
+    virtual std::error_code unregister(std::string layerName) override;
+
+    // Unregister all shared memory
+    virtual std::error_code unregister() override;
+
+private:
+    // Parse grpc model
+    std::error_code ParseGrpcModel();
+    std::unique_ptr<triton::client::InferenceServerGrpcClient> client;
+    ModelMetaData modelInfo;
+    std::vector<std::shared_ptr<tc::InferInput>> inputRequests;
+    std::vector<std::shared_ptr<tc::InferRequestedOutput>> outputRequests;
+    std::vector<tc::InferInput *> inputMap;
+    std::vector<void *> inputMapHistory;
+    std::vector<void *> outputMapHistory;
+    std::vector<const tc::InferRequestedOutput *> outputMap;
+    std::string modelVersion, modelName;
+};
+
+}} // namespace cvcore::inferencer
+#endif
+#endif
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/inferencer/triton/TritonUtils.cpp b/isaac_ros_ess/gxf/ess/cvcore/src/inferencer/triton/TritonUtils.cpp
new file mode 100644
index 0000000..47b3653
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/inferencer/triton/TritonUtils.cpp
@@ -0,0 +1,84 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#ifdef ENABLE_TRITON
+#include "TritonUtils.h"
+#include <iostream>
+
+namespace cvcore { namespace inferencer {
+
+bool getCVCoreChannelType(cvcore::ChannelType &channelType, std::string dtype)
+{
+    if (dtype.compare("UINT8") == 0)
+    {
+        channelType = cvcore::ChannelType::U8;
+    }
+    else if (dtype.compare("UINT16") == 0)
+    {
+        channelType = cvcore::ChannelType::U16;
+    }
+    else if (dtype.compare("FP16") == 0)
+    {
+        channelType = cvcore::ChannelType::F16;
+    }
+    else if (dtype.compare("FP32") == 0)
+    {
+        channelType = cvcore::ChannelType::F32;
+    }
+    else if (dtype.compare("FP64") == 0)
+    {
+        channelType = cvcore::ChannelType::F64;
+    }
+    else
+    {
+        return false;
+    }
+
+    return true;
+}
+
+bool getTritonChannelType(std::string &dtype, cvcore::ChannelType channelType)
+{
+    if (channelType == cvcore::ChannelType::U8)
+    {
+        dtype = "UINT8";
+    }
+    else if (channelType == cvcore::ChannelType::U16)
+    {
+        dtype = "UINT16";
+    }
+    else if (channelType == cvcore::ChannelType::F16)
+    {
+        dtype = "FP16";
+    }
+    else if (channelType == cvcore::ChannelType::F32)
+    {
+        dtype = "FP32";
+    }
+    else if (channelType == cvcore::ChannelType::F64)
+    {
+        dtype = "FP64";
+    }
+    else
+    {
+        return false;
+    }
+
+    return true;
+}
+
+}} // namespace cvcore::inferencer
+#endif
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/inferencer/triton/TritonUtils.h b/isaac_ros_ess/gxf/ess/cvcore/src/inferencer/triton/TritonUtils.h
new file mode 100644
index 0000000..bb13553
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/inferencer/triton/TritonUtils.h
@@ -0,0 +1,47 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#ifdef ENABLE_TRITON
+
+#ifndef TRITON_UTILS_H
+#define TRITON_UTILS_H
+
+#include <grpc_client.h>
+#include "cv/core/Tensor.h"
+#include "cv/inferencer/Errors.h"
+
+namespace cvcore { namespace inferencer {
+
+/*
+ * Maps triton datatype to cvcore Channel type.
+ * @param channelType cvcore channel type.
+ * @param dtype String representing triton datatype
+ * return bool returns false if mapping was not successful.
+ */
+bool getCVCoreChannelType(cvcore::ChannelType &channelType, std::string dtype);
+
+/*
+ * Maps triton datatype to cvcore Channel type.
+ * @param dtype String representing triton datatype
+ * @param channelType cvcore channel type.
+ * return bool returns false if mapping was not successful.
+ */
+bool getTritonChannelType(std::string &dtype, cvcore::ChannelType channelType);
+
+}}     // namespace cvcore::inferencer
+#endif // TRITON_UTILS_H
+
+#endif // ENABLE_TRITON
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/ArithmeticOperations.cpp b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/ArithmeticOperations.cpp
new file mode 100644
index 0000000..85d5e2f
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/ArithmeticOperations.cpp
@@ -0,0 +1,329 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "NppUtils.h"
+
+#include "cv/tensor_ops/ImageUtils.h"
+
+#include "cv/core/Memory.h"
+
+#include <nppi_arithmetic_and_logical_operations.h>
+#include <nppi_data_exchange_and_initialization.h>
+
+#include <cassert>
+#include <cstdint>
+#include <stdexcept>
+
+namespace cvcore { namespace tensor_ops {
+
+namespace {
+
+static void NormalizeTensorC3F32Inplace(Tensor<HWC, C3, F32> &src, const float scale[3], const float offset[3],
+                                        NppStreamContext streamContext)
+{
+    const int srcW         = src.getWidth();
+    const int srcH         = src.getHeight();
+    const NppiSize srcSize = {srcW, srcH};
+
+    const Npp32f offsets[3] = {static_cast<Npp32f>(offset[0]), static_cast<Npp32f>(offset[1]),
+                               static_cast<Npp32f>(offset[2])};
+    NppStatus status =
+        nppiAddC_32f_C3IR_Ctx(offsets, static_cast<Npp32f *>(src.getData()),
+                              src.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f), srcSize, streamContext);
+    assert(status == NPP_SUCCESS);
+
+    const Npp32f scales[3] = {static_cast<Npp32f>(scale[0]), static_cast<Npp32f>(scale[1]),
+                              static_cast<Npp32f>(scale[2])};
+    status                 = nppiMulC_32f_C3IR_Ctx(scales, static_cast<Npp32f *>(src.getData()),
+                                   src.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f), srcSize, streamContext);
+    assert(status == NPP_SUCCESS);
+}
+
+template<TensorLayout TL>
+static void NormalizeTensorC1F32Inplace(Tensor<TL, C1, F32> &src, const float scale, const float offset,
+                                        NppStreamContext streamContext)
+{
+    const int srcW         = src.getWidth();
+    const int srcH         = src.getHeight();
+    const NppiSize srcSize = {srcW, srcH};
+
+    NppStatus status =
+        nppiAddC_32f_C1IR_Ctx(static_cast<Npp32f>(offset), static_cast<Npp32f *>(src.getData()),
+                              src.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f), srcSize, streamContext);
+    assert(status == NPP_SUCCESS);
+
+    status = nppiMulC_32f_C1IR_Ctx(static_cast<Npp32f>(scale), static_cast<Npp32f *>(src.getData()),
+                                   src.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f), srcSize, streamContext);
+    assert(status == NPP_SUCCESS);
+}
+
+template<TensorLayout TL>
+void NormalizeC1U8Impl(Tensor<TL, C1, F32> &dst, const Tensor<TL, C1, U8> &src, const float scale, const float offset,
+                       cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    assert((src.getWidth() == dst.getWidth()) && (src.getHeight() == dst.getHeight()));
+
+    NppStreamContext streamContext = GetNppStreamContext(stream);
+
+    NppStatus status = nppiConvert_8u32f_C1R_Ctx(
+        static_cast<const Npp8u *>(src.getData()), src.getStride(TensorDimension::HEIGHT) * sizeof(Npp8u),
+        static_cast<Npp32f *>(dst.getData()), dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f),
+        {int(src.getWidth()), int(src.getHeight())}, streamContext);
+    assert(status == NPP_SUCCESS);
+
+    NormalizeTensorC1F32Inplace(dst, scale, offset, streamContext);
+}
+
+template<TensorLayout TL>
+void NormalizeC1U16Impl(Tensor<TL, C1, F32> &dst, const Tensor<TL, C1, U16> &src, const float scale, const float offset,
+                        cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    assert((src.getWidth() == dst.getWidth()) && (src.getHeight() == dst.getHeight()));
+
+    NppStreamContext streamContext = GetNppStreamContext(stream);
+
+    NppStatus status = nppiConvert_16u32f_C1R_Ctx(
+        static_cast<const Npp16u *>(src.getData()), src.getStride(TensorDimension::HEIGHT) * sizeof(Npp16u),
+        static_cast<Npp32f *>(dst.getData()), dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f),
+        {int(src.getWidth()), int(src.getHeight())}, streamContext);
+    assert(status == NPP_SUCCESS);
+
+    NormalizeTensorC1F32Inplace(dst, scale, offset, streamContext);
+}
+
+template<TensorLayout TL>
+void NormalizeC1F32Impl(Tensor<TL, C1, F32> &dst, const Tensor<TL, C1, F32> &src, const float scale, const float offset,
+                        cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    assert((src.getWidth() == dst.getWidth()) && (src.getHeight() == dst.getHeight()));
+
+    Copy(dst, src, stream);
+    NormalizeTensorC1F32Inplace(dst, scale, offset, GetNppStreamContext(stream));
+}
+
+template<ChannelType CT>
+void NormalizeC3Batch(Tensor<NHWC, C3, F32> &dst, Tensor<NHWC, C3, CT> &src, const float scale[3],
+                      const float offset[3], cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    assert(src.getDepth() == dst.getDepth());
+
+    for (int i = 0; i < src.getDepth(); i++)
+    {
+        size_t shiftSrc = i * src.getStride(TensorDimension::DEPTH);
+        size_t shiftDst = i * dst.getStride(TensorDimension::DEPTH);
+        Tensor<HWC, C3, CT> srcTmp(src.getWidth(), src.getHeight(),
+                                   src.getStride(TensorDimension::HEIGHT) * GetChannelSize(CT),
+                                   src.getData() + shiftSrc, false);
+        Tensor<HWC, C3, F32> dstTmp(dst.getWidth(), dst.getHeight(),
+                                    dst.getStride(TensorDimension::HEIGHT) * GetChannelSize(F32),
+                                    dst.getData() + shiftDst, false);
+        Normalize(dstTmp, srcTmp, scale, offset, stream);
+    }
+}
+
+template<ChannelType CT>
+void NormalizeC1Batch(Tensor<NHWC, C1, F32> &dst, Tensor<NHWC, C1, CT> &src, const float scale, const float offset,
+                      cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    assert(src.getDepth() == dst.getDepth());
+
+    for (int i = 0; i < src.getDepth(); i++)
+    {
+        size_t shiftSrc = i * src.getStride(TensorDimension::DEPTH);
+        size_t shiftDst = i * dst.getStride(TensorDimension::DEPTH);
+        Tensor<HWC, C1, CT> srcTmp(src.getWidth(), src.getHeight(),
+                                   src.getStride(TensorDimension::HEIGHT) * GetChannelSize(CT),
+                                   src.getData() + shiftSrc, false);
+        Tensor<HWC, C1, F32> dstTmp(dst.getWidth(), dst.getHeight(),
+                                    dst.getStride(TensorDimension::HEIGHT) * GetChannelSize(F32),
+                                    dst.getData() + shiftDst, false);
+        Normalize(dstTmp, srcTmp, scale, offset, stream);
+    }
+}
+
+template<ChannelType CT>
+void NormalizeC1Batch(Tensor<NCHW, C1, F32> &dst, Tensor<NCHW, C1, CT> &src, const float scale, const float offset,
+                      cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    assert(src.getDepth() == dst.getDepth());
+
+    for (int i = 0; i < src.getDepth(); i++)
+    {
+        size_t shiftSrc = i * src.getStride(TensorDimension::DEPTH);
+        size_t shiftDst = i * dst.getStride(TensorDimension::DEPTH);
+        Tensor<CHW, C1, CT> srcTmp(src.getWidth(), src.getHeight(),
+                                   src.getStride(TensorDimension::HEIGHT) * GetChannelSize(CT),
+                                   src.getData() + shiftSrc, false);
+        Tensor<CHW, C1, F32> dstTmp(dst.getWidth(), dst.getHeight(),
+                                    dst.getStride(TensorDimension::HEIGHT) * GetChannelSize(F32),
+                                    dst.getData() + shiftDst, false);
+        Normalize(dstTmp, srcTmp, scale, offset, stream);
+    }
+}
+
+} // anonymous namespace
+
+void Normalize(Tensor<HWC, C3, F32> &dst, const Tensor<HWC, C3, U8> &src, const float scale[3], const float offset[3],
+               cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    assert((src.getWidth() == dst.getWidth()) && (src.getHeight() == dst.getHeight()));
+
+    NppStreamContext streamContext = GetNppStreamContext(stream);
+
+    NppStatus status = nppiConvert_8u32f_C3R_Ctx(
+        static_cast<const Npp8u *>(src.getData()), src.getStride(TensorDimension::HEIGHT) * sizeof(Npp8u),
+        static_cast<Npp32f *>(dst.getData()), dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f),
+        {int(src.getWidth()), int(src.getHeight())}, streamContext);
+    assert(status == NPP_SUCCESS);
+
+    NormalizeTensorC3F32Inplace(dst, scale, offset, streamContext);
+}
+
+void Normalize(Tensor<NHWC, C3, F32> &dst, const Tensor<NHWC, C3, U8> &src, const float scale[3], const float offset[3],
+               cudaStream_t stream)
+{
+    NormalizeC3Batch(dst, const_cast<Tensor<NHWC, C3, U8> &>(src), scale, offset, stream);
+}
+
+void Normalize(Tensor<HWC, C3, F32> &dst, const Tensor<HWC, C3, U16> &src, const float scale[3], const float offset[3],
+               cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    assert((src.getWidth() == dst.getWidth()) && (src.getHeight() == dst.getHeight()));
+
+    NppStreamContext streamContext = GetNppStreamContext(stream);
+
+    NppStatus status = nppiConvert_16u32f_C3R_Ctx(
+        static_cast<const Npp16u *>(src.getData()), src.getStride(TensorDimension::HEIGHT) * sizeof(Npp16u),
+        static_cast<Npp32f *>(dst.getData()), dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f),
+        {int(src.getWidth()), int(src.getHeight())}, streamContext);
+    assert(status == NPP_SUCCESS);
+
+    NormalizeTensorC3F32Inplace(dst, scale, offset, streamContext);
+}
+
+void Normalize(Tensor<NHWC, C3, F32> &dst, const Tensor<NHWC, C3, U16> &src, const float scale[3],
+               const float offset[3], cudaStream_t stream)
+{
+    NormalizeC3Batch(dst, const_cast<Tensor<NHWC, C3, U16> &>(src), scale, offset, stream);
+}
+
+void Normalize(Tensor<HWC, C3, F32> &dst, const Tensor<HWC, C3, F32> &src, const float scale[3], const float offset[3],
+               cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    assert((src.getWidth() == dst.getWidth()) && (src.getHeight() == dst.getHeight()));
+
+    Copy(dst, src, stream);
+    NormalizeTensorC3F32Inplace(dst, scale, offset, GetNppStreamContext(stream));
+}
+
+void Normalize(Tensor<NHWC, C3, F32> &dst, const Tensor<NHWC, C3, F32> &src, const float scale[3],
+               const float offset[3], cudaStream_t stream)
+{
+    NormalizeC3Batch(dst, const_cast<Tensor<NHWC, C3, F32> &>(src), scale, offset, stream);
+}
+
+void Normalize(Tensor<HWC, C1, F32> &dst, const Tensor<HWC, C1, U8> &src, const float scale, const float offset,
+               cudaStream_t stream)
+{
+    NormalizeC1U8Impl(dst, src, scale, offset, stream);
+}
+
+void Normalize(Tensor<NHWC, C1, F32> &dst, const Tensor<NHWC, C1, U8> &src, const float scale, const float offset,
+               cudaStream_t stream)
+{
+    NormalizeC1Batch(dst, const_cast<Tensor<NHWC, C1, U8> &>(src), scale, offset, stream);
+}
+
+void Normalize(Tensor<HWC, C1, F32> &dst, const Tensor<HWC, C1, U16> &src, const float scale, const float offset,
+               cudaStream_t stream)
+{
+    NormalizeC1U16Impl(dst, src, scale, offset, stream);
+}
+
+void Normalize(Tensor<NHWC, C1, F32> &dst, const Tensor<NHWC, C1, U16> &src, const float scale, const float offset,
+               cudaStream_t stream)
+{
+    NormalizeC1Batch(dst, const_cast<Tensor<NHWC, C1, U16> &>(src), scale, offset, stream);
+}
+
+void Normalize(Tensor<HWC, C1, F32> &dst, const Tensor<HWC, C1, F32> &src, const float scale, const float offset,
+               cudaStream_t stream)
+{
+    NormalizeC1F32Impl(dst, src, scale, offset, stream);
+}
+
+void Normalize(Tensor<NHWC, C1, F32> &dst, const Tensor<NHWC, C1, F32> &src, const float scale, const float offset,
+               cudaStream_t stream)
+{
+    NormalizeC1Batch(dst, const_cast<Tensor<NHWC, C1, F32> &>(src), scale, offset, stream);
+}
+
+void Normalize(Tensor<CHW, C1, F32> &dst, const Tensor<CHW, C1, U8> &src, const float scale, const float offset,
+               cudaStream_t stream)
+{
+    NormalizeC1U8Impl(dst, src, scale, offset, stream);
+}
+
+void Normalize(Tensor<NCHW, C1, F32> &dst, const Tensor<NCHW, C1, U8> &src, const float scale, const float offset,
+               cudaStream_t stream)
+{
+    NormalizeC1Batch(dst, const_cast<Tensor<NCHW, C1, U8> &>(src), scale, offset, stream);
+}
+
+void Normalize(Tensor<CHW, C1, F32> &dst, const Tensor<CHW, C1, U16> &src, const float scale, const float offset,
+               cudaStream_t stream)
+{
+    NormalizeC1U16Impl(dst, src, scale, offset, stream);
+}
+
+void Normalize(Tensor<NCHW, C1, F32> &dst, const Tensor<NCHW, C1, U16> &src, const float scale, const float offset,
+               cudaStream_t stream)
+{
+    NormalizeC1Batch(dst, const_cast<Tensor<NCHW, C1, U16> &>(src), scale, offset, stream);
+}
+
+void Normalize(Tensor<CHW, C1, F32> &dst, const Tensor<CHW, C1, F32> &src, const float scale, const float offset,
+               cudaStream_t stream)
+{
+    NormalizeC1F32Impl(dst, src, scale, offset, stream);
+}
+
+void Normalize(Tensor<NCHW, C1, F32> &dst, const Tensor<NCHW, C1, F32> &src, const float scale, const float offset,
+               cudaStream_t stream)
+{
+    NormalizeC1Batch(dst, const_cast<Tensor<NCHW, C1, F32> &>(src), scale, offset, stream);
+}
+
+}} // namespace cvcore::tensor_ops
\ No newline at end of file
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/BBoxUtils.cpp b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/BBoxUtils.cpp
new file mode 100644
index 0000000..4559a35
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/BBoxUtils.cpp
@@ -0,0 +1,173 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "cv/tensor_ops/BBoxUtils.h"
+
+#include <algorithm>
+#include <stdexcept>
+
+namespace cvcore { namespace tensor_ops {
+
+namespace {
+
+bool IsValid(const BBox &box)
+{
+    return box.xmin >= 0 && box.ymin >= 0 && box.xmin < box.xmax && box.ymin < box.ymax;
+}
+
+} // anonymous namespace
+
+float GetArea(const BBox &box)
+{
+    if (box.xmax < box.xmin || box.ymax < box.ymin)
+    {
+        return 0.f;
+    }
+    return static_cast<float>((box.xmax - box.xmin) * (box.ymax - box.ymin));
+}
+
+float GetIntersection(const BBox &a, const BBox &b)
+{
+    const int lowerX = std::max(a.xmin, b.xmin);
+    const int upperX = std::min(a.xmax, b.xmax);
+    const int lowerY = std::max(a.ymin, b.ymin);
+    const int upperY = std::min(a.ymax, b.ymax);
+    const int diffX  = lowerX < upperX ? upperX - lowerX : 0;
+    const int diffY  = lowerY < upperY ? upperY - lowerY : 0;
+    return static_cast<float>(diffX * diffY);
+}
+
+float GetUnion(const BBox &a, const BBox &b)
+{
+    return GetArea(a) + GetArea(b) - GetIntersection(a, b);
+}
+
+float GetIoU(const BBox &a, const BBox &b)
+{
+    return GetIntersection(a, b) / GetUnion(a, b);
+}
+
+BBox MergeBoxes(const BBox &a, const BBox &b)
+{
+    if (!IsValid(a) || !IsValid(b))
+    {
+        return IsValid(a) ? a : b;
+    }
+    BBox res;
+    res.xmin = std::min(a.xmin, b.xmin);
+    res.xmax = std::max(a.xmax, b.xmax);
+    res.ymin = std::min(a.ymin, b.ymin);
+    res.ymax = std::max(a.ymax, b.ymax);
+    return res;
+}
+
+BBox ClampBox(const BBox &a, const BBox &b)
+{
+    return {std::max(a.xmin, b.xmin), std::max(a.ymin, b.ymin), std::min(a.xmax, b.xmax), std::min(a.ymax, b.ymax)};
+}
+
+BBox InterpolateBoxes(float currLeft, float currRight, float currBottom, float currTop, float xScaler, float yScaler,
+                      int currColumn, int currRow, BBoxInterpolationType type, float bboxNorm)
+{
+    BBox currBoxInfo;
+    if (type == CONST_INTERPOLATION)
+    {
+        float centerX    = ((currColumn * xScaler + 0.5) / bboxNorm);
+        float centerY    = ((currRow * yScaler + 0.5) / bboxNorm);
+        float left       = (currLeft - centerX);
+        float right      = (currRight + centerX);
+        float top        = (currTop - centerY);
+        float bottom     = (currBottom + centerY);
+        currBoxInfo.xmin = left * -bboxNorm;
+        currBoxInfo.xmax = right * bboxNorm;
+        currBoxInfo.ymin = top * -bboxNorm;
+        currBoxInfo.ymax = bottom * bboxNorm;
+    }
+    else if (type == IMAGE_INTERPOLATION)
+    {
+        int centerX      = (int)((currColumn + 0.5f) * xScaler);
+        int centerY      = (int)((currRow + 0.5f) * yScaler);
+        int left         = (int)(currLeft * xScaler);
+        int right        = (int)(currRight * xScaler);
+        int top          = (int)(currTop * yScaler);
+        int bottom       = (int)(currBottom * yScaler);
+        currBoxInfo.xmin = centerX - left;
+        currBoxInfo.xmax = centerX + right;
+        currBoxInfo.ymin = centerY - top;
+        currBoxInfo.ymax = centerY + bottom;
+    }
+    else
+    {
+        throw std::runtime_error("invalid bbox interpolation type");
+    }
+    return currBoxInfo;
+}
+
+BBox ScaleBox(const BBox &bbox, float xScaler, float yScaler, BBoxScaleType type)
+{
+    BBox output;
+    if (type == NORMAL)
+    {
+        int xMin = (int)(bbox.xmin * xScaler + 0.5f);
+        int yMin = (int)(bbox.ymin * yScaler + 0.5f);
+        int xMax = (int)(bbox.xmax * xScaler + 0.5f);
+        int yMax = (int)(bbox.ymax * yScaler + 0.5f);
+        output   = {xMin, yMin, xMax, yMax};
+    }
+    else if (type == CENTER)
+    {
+        float xCenter = (bbox.xmax + bbox.xmin) / 2.0f;
+        float yCenter = (bbox.ymax + bbox.ymin) / 2.0f;
+
+        float width  = (bbox.xmax - bbox.xmin) * xScaler;
+        float height = (bbox.ymax - bbox.ymin) * yScaler;
+
+        output = {int(xCenter - width / 2 + 0.5f), int(yCenter - height / 2 + 0.5f), int(xCenter + width / 2 + 0.5f),
+                  int(yCenter + height / 2 + 0.5f)};
+    }
+    else
+    {
+        throw std::runtime_error("invalid bbox scaling type");
+    }
+    return output;
+}
+
+BBox TransformBox(const BBox &bbox, float xScaler, float yScaler, float xOffset, float yOffset)
+{
+    int xMin = (int)((bbox.xmin + xOffset) * xScaler + 0.5f);
+    int yMin = (int)((bbox.ymin + yOffset) * yScaler + 0.5f);
+    int xMax = (int)((bbox.xmax + xOffset) * xScaler + 0.5f);
+    int yMax = (int)((bbox.ymax + yOffset) * yScaler + 0.5f);
+    return {xMin, yMin, xMax, yMax};
+}
+
+BBox SquarifyBox(const BBox &box, const BBox &boundary, float scale)
+{
+    BBox output    = ClampBox(box, boundary);
+    float updateWH = scale * std::max(output.xmax - output.xmin, output.ymax - output.ymin);
+    float scaleW   = updateWH / float(output.xmax - output.xmin);
+    float scaleH   = updateWH / float(output.ymax - output.ymin);
+    output         = ScaleBox(output, scaleW, scaleH, CENTER);
+
+    output   = ClampBox(output, boundary);
+    int xmin = output.xmin;
+    int ymin = output.ymin;
+    int l    = std::min(output.xmax - output.xmin, output.ymax - output.ymin);
+    return {xmin, ymin, xmin + l, ymin + l};
+}
+
+}} // namespace cvcore::tensor_ops
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/ColorConversions.cpp b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/ColorConversions.cpp
new file mode 100644
index 0000000..f02d3a9
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/ColorConversions.cpp
@@ -0,0 +1,447 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "NppUtils.h"
+
+#include "cv/tensor_ops/ImageUtils.h"
+
+#include "cv/core/Memory.h"
+
+#include <nppi_arithmetic_and_logical_operations.h>
+#include <nppi_color_conversion.h>
+#include <nppi_data_exchange_and_initialization.h>
+
+#include <cassert>
+#include <cstdint>
+#include <stdexcept>
+
+namespace cvcore { namespace tensor_ops {
+
+const float BGR2GRAY_COEFFS[3] = {0.114f, 0.587f, 0.299f};
+const float RGB2GRAY_COEFFS[3] = {0.299f, 0.587f, 0.114f};
+
+namespace {
+
+template<ChannelCount CC1, ChannelCount CC2, ChannelType CT>
+void ConvertColorFormatBatch(Tensor<NHWC, CC1, CT> &dst, Tensor<NHWC, CC2, CT> &src, ColorConversionType type,
+                             cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    assert(src.getDepth() == dst.getDepth());
+
+    for (int i = 0; i < src.getDepth(); i++)
+    {
+        size_t offsetSrc = i * src.getStride(TensorDimension::DEPTH);
+        size_t offsetDst = i * dst.getStride(TensorDimension::DEPTH);
+        Tensor<HWC, CC1, CT> srcTmp(src.getWidth(), src.getHeight(),
+                                    src.getStride(TensorDimension::HEIGHT) * GetChannelSize(CT),
+                                    src.getData() + offsetSrc, false);
+        Tensor<HWC, CC2, CT> dstTmp(dst.getWidth(), dst.getHeight(),
+                                    dst.getStride(TensorDimension::HEIGHT) * GetChannelSize(CT),
+                                    dst.getData() + offsetDst, false);
+        ConvertColorFormat(dstTmp, srcTmp, type, stream);
+    }
+}
+
+template<ChannelCount CC, ChannelType CT>
+void InterleavedToPlanarBatch(Tensor<NCHW, CC, CT> &dst, Tensor<NHWC, CC, CT> &src, cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    assert(src.getDepth() == dst.getDepth());
+
+    for (int i = 0; i < src.getDepth(); i++)
+    {
+        size_t offsetSrc = i * src.getStride(TensorDimension::DEPTH);
+        size_t offsetDst = i * dst.getStride(TensorDimension::DEPTH);
+        Tensor<HWC, CC, CT> srcTmp(src.getWidth(), src.getHeight(),
+                                   src.getStride(TensorDimension::HEIGHT) * GetChannelSize(CT),
+                                   src.getData() + offsetSrc, false);
+        Tensor<CHW, CC, CT> dstTmp(dst.getWidth(), dst.getHeight(),
+                                   dst.getStride(TensorDimension::HEIGHT) * GetChannelSize(CT),
+                                   dst.getData() + offsetDst, false);
+        InterleavedToPlanar(dstTmp, srcTmp, stream);
+    }
+}
+
+} // anonymous namespace
+
+void ConvertColorFormat(Tensor<HWC, C3, U8> &dst, const Tensor<HWC, C3, U8> &src, ColorConversionType type,
+                        cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    assert((src.getWidth() == dst.getWidth()) && (src.getHeight() == dst.getHeight()));
+
+    if (type == BGR2RGB || type == RGB2BGR)
+    {
+        const int order[3] = {2, 1, 0};
+        NppStatus status   = nppiSwapChannels_8u_C3R_Ctx(
+            static_cast<const Npp8u *>(src.getData()), src.getStride(TensorDimension::HEIGHT) * sizeof(Npp8u),
+            static_cast<Npp8u *>(dst.getData()), dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp8u),
+            {int(src.getWidth()), int(src.getHeight())}, order, GetNppStreamContext(stream));
+        assert(status == NPP_SUCCESS);
+    }
+    else
+    {
+        throw std::runtime_error("invalid color conversion type");
+    }
+}
+
+void ConvertColorFormat(Tensor<NHWC, C3, U8> &dst, const Tensor<NHWC, C3, U8> &src, ColorConversionType type,
+                        cudaStream_t stream)
+{
+    ConvertColorFormatBatch(dst, const_cast<Tensor<NHWC, C3, U8> &>(src), type, stream);
+}
+
+void ConvertColorFormat(Tensor<HWC, C3, U16> &dst, const Tensor<HWC, C3, U16> &src, ColorConversionType type,
+                        cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    assert((src.getWidth() == dst.getWidth()) && (src.getHeight() == dst.getHeight()));
+
+    if (type == BGR2RGB || type == RGB2BGR)
+    {
+        const int order[3] = {2, 1, 0};
+        NppStatus status   = nppiSwapChannels_16u_C3R_Ctx(
+            static_cast<const Npp16u *>(src.getData()), src.getStride(TensorDimension::HEIGHT) * sizeof(Npp16u),
+            static_cast<Npp16u *>(dst.getData()), dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp16u),
+            {int(src.getWidth()), int(src.getHeight())}, order, GetNppStreamContext(stream));
+        assert(status == NPP_SUCCESS);
+    }
+    else
+    {
+        throw std::runtime_error("invalid color conversion type");
+    }
+}
+
+void ConvertColorFormat(Tensor<NHWC, C3, U16> &dst, const Tensor<NHWC, C3, U16> &src, ColorConversionType type,
+                        cudaStream_t stream)
+{
+    ConvertColorFormatBatch(dst, const_cast<Tensor<NHWC, C3, U16> &>(src), type, stream);
+}
+
+void ConvertColorFormat(Tensor<HWC, C3, F32> &dst, const Tensor<HWC, C3, F32> &src, ColorConversionType type,
+                        cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    assert((src.getWidth() == dst.getWidth()) && (src.getHeight() == dst.getHeight()));
+
+    if (type == BGR2RGB || type == RGB2BGR)
+    {
+        const int order[3] = {2, 1, 0};
+        NppStatus status   = nppiSwapChannels_32f_C3R_Ctx(
+            static_cast<const Npp32f *>(src.getData()), src.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f),
+            static_cast<Npp32f *>(dst.getData()), dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f),
+            {int(src.getWidth()), int(src.getHeight())}, order, GetNppStreamContext(stream));
+        assert(status == NPP_SUCCESS);
+    }
+    else
+    {
+        throw std::runtime_error("invalid color conversion type");
+    }
+}
+
+void ConvertColorFormat(Tensor<NHWC, C3, F32> &dst, const Tensor<NHWC, C3, F32> &src, ColorConversionType type,
+                        cudaStream_t stream)
+{
+    ConvertColorFormatBatch(dst, const_cast<Tensor<NHWC, C3, F32> &>(src), type, stream);
+}
+
+void ConvertColorFormat(Tensor<HWC, C1, U8> &dst, const Tensor<HWC, C3, U8> &src, ColorConversionType type,
+                        cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    assert((src.getWidth() == dst.getWidth()) && (src.getHeight() == dst.getHeight()));
+
+    if (type == BGR2GRAY || type == RGB2GRAY)
+    {
+        NppStatus status = nppiColorToGray_8u_C3C1R_Ctx(
+            static_cast<const Npp8u *>(src.getData()), src.getStride(TensorDimension::HEIGHT) * sizeof(Npp8u),
+            static_cast<Npp8u *>(dst.getData()), dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp8u),
+            {int(src.getWidth()), int(src.getHeight())}, type == BGR2GRAY ? BGR2GRAY_COEFFS : RGB2GRAY_COEFFS,
+            GetNppStreamContext(stream));
+        assert(status == NPP_SUCCESS);
+    }
+    else
+    {
+        throw std::runtime_error("invalid color conversion type");
+    }
+}
+
+void ConvertColorFormat(Tensor<HWC, C1, U16> &dst, const Tensor<HWC, C3, U16> &src, ColorConversionType type,
+                        cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    assert((src.getWidth() == dst.getWidth()) && (src.getHeight() == dst.getHeight()));
+
+    if (type == BGR2GRAY || type == RGB2GRAY)
+    {
+        NppStatus status = nppiColorToGray_16u_C3C1R_Ctx(
+            static_cast<const Npp16u *>(src.getData()), src.getStride(TensorDimension::HEIGHT) * sizeof(Npp16u),
+            static_cast<Npp16u *>(dst.getData()), dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp16u),
+            {int(src.getWidth()), int(src.getHeight())}, type == BGR2GRAY ? BGR2GRAY_COEFFS : RGB2GRAY_COEFFS,
+            GetNppStreamContext(stream));
+        assert(status == NPP_SUCCESS);
+    }
+    else
+    {
+        throw std::runtime_error("invalid color conversion type");
+    }
+}
+
+void ConvertColorFormat(Tensor<HWC, C1, F32> &dst, const Tensor<HWC, C3, F32> &src, ColorConversionType type,
+                        cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    assert((src.getWidth() == dst.getWidth()) && (src.getHeight() == dst.getHeight()));
+
+    if (type == BGR2GRAY || type == RGB2GRAY)
+    {
+        NppStatus status = nppiColorToGray_32f_C3C1R_Ctx(
+            static_cast<const Npp32f *>(src.getData()), src.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f),
+            static_cast<Npp32f *>(dst.getData()), dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f),
+            {int(src.getWidth()), int(src.getHeight())}, type == BGR2GRAY ? BGR2GRAY_COEFFS : RGB2GRAY_COEFFS,
+            GetNppStreamContext(stream));
+        assert(status == NPP_SUCCESS);
+    }
+    else
+    {
+        throw std::runtime_error("invalid color conversion type");
+    }
+}
+
+void ConvertColorFormat(Tensor<HWC, C3, U8> &dst, const Tensor<HWC, C1, U8> &src, ColorConversionType type,
+                        cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    assert((src.getWidth() == dst.getWidth()) && (src.getHeight() == dst.getHeight()));
+
+    if (type == GRAY2BGR || type == GRAY2RGB)
+    {
+        NppStatus status = nppiDup_8u_C1C3R_Ctx(
+            static_cast<const Npp8u *>(src.getData()), src.getStride(TensorDimension::HEIGHT) * sizeof(Npp8u),
+            static_cast<Npp8u *>(dst.getData()), dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp8u),
+            {int(src.getWidth()), int(src.getHeight())}, GetNppStreamContext(stream));
+        assert(status == NPP_SUCCESS);
+    }
+    else
+    {
+        throw std::runtime_error("invalid color conversion type");
+    }
+}
+
+void ConvertColorFormat(Tensor<HWC, C3, U16> &dst, const Tensor<HWC, C1, U16> &src, ColorConversionType type,
+                        cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    assert((src.getWidth() == dst.getWidth()) && (src.getHeight() == dst.getHeight()));
+
+    if (type == GRAY2BGR || type == GRAY2RGB)
+    {
+        NppStatus status = nppiDup_16u_C1C3R_Ctx(
+            static_cast<const Npp16u *>(src.getData()), src.getStride(TensorDimension::HEIGHT) * sizeof(Npp16u),
+            static_cast<Npp16u *>(dst.getData()), dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp16u),
+            {int(src.getWidth()), int(src.getHeight())}, GetNppStreamContext(stream));
+        assert(status == NPP_SUCCESS);
+    }
+    else
+    {
+        throw std::runtime_error("invalid color conversion type");
+    }
+}
+
+void ConvertColorFormat(Tensor<HWC, C3, F32> &dst, const Tensor<HWC, C1, F32> &src, ColorConversionType type,
+                        cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    assert((src.getWidth() == dst.getWidth()) && (src.getHeight() == dst.getHeight()));
+
+    if (type == GRAY2BGR || type == GRAY2RGB)
+    {
+        NppStatus status = nppiDup_32f_C1C3R_Ctx(
+            static_cast<const Npp32f *>(src.getData()), src.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f),
+            static_cast<Npp32f *>(dst.getData()), dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f),
+            {int(src.getWidth()), int(src.getHeight())}, GetNppStreamContext(stream));
+        assert(status == NPP_SUCCESS);
+    }
+    else
+    {
+        throw std::runtime_error("invalid color conversion type");
+    }
+}
+
+void ConvertBitDepth(Tensor<HWC, C1, U8> &dst, Tensor<HWC, C1, F32> &src, const float scale, cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    assert((src.getWidth() == dst.getWidth()) && (src.getHeight() == dst.getHeight()));
+
+    const NppiSize srcSize = {src.getWidth(), src.getHeight()};
+
+    NppStreamContext streamContext = GetNppStreamContext(stream);
+
+    NppStatus status =
+        nppiMulC_32f_C1IR_Ctx(static_cast<Npp32f>(scale), static_cast<Npp32f *>(src.getData()),
+                              src.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f), srcSize, streamContext);
+    assert(status == NPP_SUCCESS);
+
+    status = nppiConvert_32f8u_C1R_Ctx(
+        static_cast<const Npp32f *>(src.getData()), src.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f),
+        static_cast<Npp8u *>(dst.getData()), dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp8u),
+        {int(src.getWidth()), int(src.getHeight())}, NPP_RND_FINANCIAL, streamContext);
+    assert(status == NPP_SUCCESS);
+}
+
+void ConvertBitDepth(Tensor<NHWC, C1, U8> &dst, Tensor<NHWC, C1, F32> &src, const float scale, cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    assert(src.getDepth() == dst.getDepth());
+
+    Tensor<HWC, C1, F32> srcTmp(src.getWidth(), src.getDepth() * src.getHeight(),
+                                src.getStride(TensorDimension::HEIGHT) * GetChannelSize(F32), src.getData(), false);
+    Tensor<HWC, C1, U8> dstTmp(dst.getWidth(), dst.getDepth() * dst.getHeight(),
+                               dst.getStride(TensorDimension::HEIGHT) * GetChannelSize(U8), dst.getData(), false);
+    ConvertBitDepth(dstTmp, srcTmp, scale, stream);
+}
+
+void InterleavedToPlanar(Tensor<CHW, C3, U8> &dst, const Tensor<HWC, C3, U8> &src, cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    assert((src.getWidth() == dst.getWidth()) && (src.getHeight() == dst.getHeight()));
+
+    NppStatus status;
+    NppStreamContext streamContext = GetNppStreamContext(stream);
+
+    const size_t offset       = dst.getStride(TensorDimension::HEIGHT) * dst.getHeight();
+    Npp8u *const dstBuffer[3] = {dst.getData(), dst.getData() + offset, dst.getData() + 2 * offset};
+    status                    = nppiCopy_8u_C3P3R_Ctx(static_cast<const Npp8u *>(src.getData()),
+                                   src.getStride(TensorDimension::HEIGHT) * sizeof(Npp8u), dstBuffer,
+                                   dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp8u),
+                                   {int(src.getWidth()), int(src.getHeight())}, streamContext);
+    assert(status == NPP_SUCCESS);
+}
+
+void InterleavedToPlanar(Tensor<NCHW, C3, U8> &dst, const Tensor<NHWC, C3, U8> &src, cudaStream_t stream)
+{
+    InterleavedToPlanarBatch(dst, const_cast<Tensor<NHWC, C3, U8> &>(src), stream);
+}
+
+void InterleavedToPlanar(Tensor<CHW, C3, U16> &dst, const Tensor<HWC, C3, U16> &src, cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    assert((src.getWidth() == dst.getWidth()) && (src.getHeight() == dst.getHeight()));
+
+    NppStatus status;
+    NppStreamContext streamContext = GetNppStreamContext(stream);
+
+    const size_t offset        = dst.getStride(TensorDimension::HEIGHT) * dst.getHeight();
+    Npp16u *const dstBuffer[3] = {dst.getData(), dst.getData() + offset, dst.getData() + 2 * offset};
+    status                     = nppiCopy_16u_C3P3R_Ctx(static_cast<const Npp16u *>(src.getData()),
+                                    src.getStride(TensorDimension::HEIGHT) * sizeof(Npp16u), dstBuffer,
+                                    dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp16u),
+                                    {int(src.getWidth()), int(src.getHeight())}, streamContext);
+}
+
+void InterleavedToPlanar(Tensor<NCHW, C3, U16> &dst, const Tensor<NHWC, C3, U16> &src, cudaStream_t stream)
+{
+    InterleavedToPlanarBatch(dst, const_cast<Tensor<NHWC, C3, U16> &>(src), stream);
+}
+
+void InterleavedToPlanar(Tensor<CHW, C3, F32> &dst, const Tensor<HWC, C3, F32> &src, cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    assert((src.getWidth() == dst.getWidth()) && (src.getHeight() == dst.getHeight()));
+
+    NppStatus status;
+    NppStreamContext streamContext = GetNppStreamContext(stream);
+
+    const size_t offset        = dst.getStride(TensorDimension::HEIGHT) * dst.getHeight();
+    Npp32f *const dstBuffer[3] = {dst.getData(), dst.getData() + offset, dst.getData() + 2 * offset};
+    status                     = nppiCopy_32f_C3P3R_Ctx(static_cast<const Npp32f *>(src.getData()),
+                                    src.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f), dstBuffer,
+                                    dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f),
+                                    {int(src.getWidth()), int(src.getHeight())}, streamContext);
+}
+
+void InterleavedToPlanar(Tensor<NCHW, C3, F32> &dst, const Tensor<NHWC, C3, F32> &src, cudaStream_t stream)
+{
+    InterleavedToPlanarBatch(dst, const_cast<Tensor<NHWC, C3, F32> &>(src), stream);
+}
+
+void InterleavedToPlanar(Tensor<CHW, C1, U8> &dst, const Tensor<HWC, C1, U8> &src, cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    assert((src.getWidth() == dst.getWidth()) && (src.getHeight() == dst.getHeight()));
+
+    Tensor<HWC, C1, U8> tmp(dst.getWidth(), dst.getHeight(), dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp8u),
+                            dst.getData(), false);
+    Copy(tmp, src, stream);
+}
+
+void InterleavedToPlanar(Tensor<NCHW, C1, U8> &dst, const Tensor<NHWC, C1, U8> &src, cudaStream_t stream)
+{
+    InterleavedToPlanarBatch(dst, const_cast<Tensor<NHWC, C1, U8> &>(src), stream);
+}
+
+void InterleavedToPlanar(Tensor<CHW, C1, U16> &dst, const Tensor<HWC, C1, U16> &src, cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    assert((src.getWidth() == dst.getWidth()) && (src.getHeight() == dst.getHeight()));
+
+    Tensor<HWC, C1, U16> tmp(dst.getWidth(), dst.getHeight(), dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp16u),
+                             dst.getData(), false);
+    Copy(tmp, src, stream);
+}
+
+void InterleavedToPlanar(Tensor<NCHW, C1, U16> &dst, const Tensor<NHWC, C1, U16> &src, cudaStream_t stream)
+{
+    InterleavedToPlanarBatch(dst, const_cast<Tensor<NHWC, C1, U16> &>(src), stream);
+}
+
+void InterleavedToPlanar(Tensor<CHW, C1, F32> &dst, const Tensor<HWC, C1, F32> &src, cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    assert((src.getWidth() == dst.getWidth()) && (src.getHeight() == dst.getHeight()));
+
+    Tensor<HWC, C1, F32> tmp(dst.getWidth(), dst.getHeight(), dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f),
+                             dst.getData(), false);
+    Copy(tmp, src, stream);
+}
+
+void InterleavedToPlanar(Tensor<NCHW, C1, F32> &dst, const Tensor<NHWC, C1, F32> &src, cudaStream_t stream)
+{
+    InterleavedToPlanarBatch(dst, const_cast<Tensor<NHWC, C1, F32> &>(src), stream);
+}
+
+}} // namespace cvcore::tensor_ops
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/DBScan.cpp b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/DBScan.cpp
new file mode 100644
index 0000000..f877154
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/DBScan.cpp
@@ -0,0 +1,214 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "cv/tensor_ops/DBScan.h"
+#include "cv/tensor_ops/BBoxUtils.h"
+
+#include <stdexcept>
+#include <vector>
+
+namespace cvcore { namespace tensor_ops {
+
+constexpr int kUnclassified = -1;
+constexpr int kCorePoint    = 1;
+constexpr int kBorderPoint  = 2;
+constexpr int kNoise        = -2;
+
+namespace {
+
+float CalculateDistance(const BBox &lhs, const BBox &rhs)
+{
+    const float iou = GetIoU(lhs, rhs);
+    return 1.0f - iou;
+}
+
+void MergeMaximumBBoxes(Array<BBox> &input, Array<int> &clusters, Array<BBox> &output)
+{
+    BBox tempBox = {-1, -1, -1, -1};
+    // Initialize each cluster-box with a placeholder that has no cluster
+    for (int i = 0; i < output.getSize(); i++)
+    {
+        // It's a struct so these pushes are by value
+        output[i] = tempBox;
+    }
+
+    for (int i = 0; i < input.getSize(); i++)
+    {
+        int clusterId = clusters[i];
+        if (clusterId >= 0)
+        {
+            // Box merging is associative & commutative
+            output[clusterId] = MergeBoxes(input[i], output[clusterId]);
+        }
+    }
+}
+
+void MergeWeightedBBoxes(Array<BBox> &input, Array<int> &clusters, Array<float> &weights, Array<BBox> &output)
+{
+    int numClusters           = output.getSize();
+    // centos has gcc 4.8.5 which complains about initializing variable sized arrays with {}.
+    // Use std::vector for variable sized array.
+    std::vector<float> xmins(numClusters, 0);
+    std::vector<float> ymins(numClusters, 0);
+    std::vector<float> xmaxs(numClusters, 0);
+    std::vector<float> ymaxs(numClusters, 0);
+    std::vector<float> scales(numClusters, 0);
+
+    for (int i = 0; i < input.getSize(); i++)
+    {
+        int clusterId = clusters[i];
+        if (clusterId >= 0)
+        {
+            xmins[clusterId] += input[i].xmin * weights[i];
+            ymins[clusterId] += input[i].ymin * weights[i];
+            xmaxs[clusterId] += input[i].xmax * weights[i];
+            ymaxs[clusterId] += input[i].ymax * weights[i];
+            scales[clusterId] += weights[i];
+        }
+    }
+
+    for (int i = 0; i < numClusters; i++)
+    {
+        output[i] = {int(xmins[i] / scales[i] + 0.5f), int(ymins[i] / scales[i] + 0.5f),
+                     int(xmaxs[i] / scales[i] + 0.5f), int(ymaxs[i] / scales[i] + 0.5f)};
+    }
+}
+
+} // anonymous namespace
+
+DBScan::DBScan(int pointsSize, int minPoints, float epsilon)
+    : m_pointsSize(pointsSize)
+    , m_numClusters(0)
+    , m_minPoints(minPoints)
+    , m_epsilon(epsilon)
+    , m_clusterStates(pointsSize, true)
+{
+    m_clusterStates.setSize(pointsSize);
+}
+
+void DBScan::doCluster(Array<BBox> &input, Array<int> &clusters)
+{
+    // Reset all cluster id
+    for (int i = 0; i < m_pointsSize; i++)
+    {
+        clusters[i]        = -1;
+        m_clusterStates[i] = kUnclassified;
+    }
+    int nextClusterId = 0;
+    for (int cIndex = 0; cIndex < m_pointsSize; cIndex++)
+    {
+        std::vector<int> neighbors;
+        for (int neighborIndex = 0; neighborIndex < m_pointsSize; neighborIndex++)
+        {
+            if (neighborIndex == cIndex)
+            {
+                continue; // Don't look at being your own neighbor
+            }
+            if (CalculateDistance(input[cIndex], input[neighborIndex]) <= m_epsilon)
+            {
+                // nrighborIndex is in our neighborhood
+                neighbors.push_back(neighborIndex);
+
+                if (m_clusterStates[neighborIndex] == kCorePoint)
+                {
+                    // We are at the neighborhood of a core point, we are at least a border point
+                    m_clusterStates[cIndex] = kBorderPoint;
+                    // Take the first cluster number as you can
+                    if (clusters[cIndex] == -1)
+                    {
+                        clusters[cIndex] = clusters[neighborIndex];
+                    }
+                }
+            }
+        }
+        if (neighbors.size() >= m_minPoints - 1)
+        {
+            m_clusterStates[cIndex] = kCorePoint;
+            if (clusters[cIndex] == -1)
+            {
+                // We're not in the neighborhood of other core points
+                // So we're the core of a new cluster
+                clusters[cIndex] = nextClusterId;
+                nextClusterId++;
+            }
+
+            // Set all neighbors that came before us to be border points
+            for (int neighborListIndex = 0;
+                 neighborListIndex < neighbors.size() && neighbors[neighborListIndex] < cIndex; neighborListIndex++)
+            {
+                if (m_clusterStates[neighbors[neighborListIndex]] == kNoise)
+                {
+                    // If it was noise, now it's a border point in our cluster
+                    m_clusterStates[neighbors[neighborListIndex]] = kBorderPoint;
+                    // Make sure everything that's in our neighborhood is our cluster id
+                    clusters[neighbors[neighborListIndex]] = clusters[cIndex];
+                }
+            }
+        }
+        else
+        {
+            // We are a border point, or a noise point
+            if (m_clusterStates[cIndex] == kUnclassified)
+            {
+                m_clusterStates[cIndex] = kNoise;
+                clusters[cIndex]        = -1;
+            }
+        }
+    }
+
+    m_numClusters = nextClusterId; // Number of clusters
+}
+
+void DBScan::doClusterAndMerge(Array<BBox> &input, Array<BBox> &output, BBoxMergeType type)
+{
+    Array<int> clusters(m_pointsSize, true);
+    clusters.setSize(m_pointsSize);
+    doCluster(input, clusters);
+    output.setSize(m_numClusters);
+
+    // merge bboxes based on different modes
+    if (type == MAXIMUM)
+    {
+        MergeMaximumBBoxes(input, clusters, output);
+    }
+    else
+    {
+        throw std::runtime_error("Unsupported bbox merge type.");
+    }
+}
+
+void DBScan::doClusterAndMerge(Array<BBox> &input, Array<float> &weights, Array<BBox> &output, BBoxMergeType type)
+{
+    Array<int> clusters(m_pointsSize, true);
+    clusters.setSize(m_pointsSize);
+    doCluster(input, clusters);
+    output.setSize(m_numClusters);
+
+    // merge type must be WEIGHTED
+    if (type != WEIGHTED)
+    {
+        throw std::runtime_error("Bbox merge type must be WEIGHTED.");
+    }
+    MergeWeightedBBoxes(input, clusters, weights, output);
+}
+
+int DBScan::getNumClusters() const
+{
+    return m_numClusters;
+}
+
+}} // namespace cvcore::tensor_ops
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/Errors.cpp b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/Errors.cpp
new file mode 100644
index 0000000..d29a1ae
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/Errors.cpp
@@ -0,0 +1,104 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "cv/tensor_ops/Errors.h"
+
+#ifndef __cpp_lib_to_underlying
+// Using a C++23 feature by hacking std
+namespace std
+{
+    template<typename Enum>
+    constexpr underlying_type_t<Enum> to_underlying(Enum e) noexcept
+    {
+        return static_cast<underlying_type_t<Enum>>(e);
+    }
+};
+#endif // __cpp_lib_to_underlying
+
+namespace cvcore { namespace tensor_ops {
+
+namespace detail
+{
+    struct TensorOpsErrorCategory : std::error_category
+    {
+        virtual const char * name() const noexcept override final
+        {
+            return "cvcore-tensor-ops-error";
+        }
+
+        virtual std::string message(int value) const override final
+        {
+            std::string result;
+
+            switch(value)
+            {
+                case std::to_underlying(TensorOpsErrorCode::SUCCESS):
+                    result = "(SUCCESS) No errors detected";
+                    break;
+                case std::to_underlying(TensorOpsErrorCode::COMPUTE_ENGINE_UNSUPPORTED_BY_CONTEXT):
+                    result = "(COMPUTE_ENGINE_UNSUPPORTED_BY_CONTEXT) The selected compute "
+                             "engine defined by cvcore::ComputeEngine is not avaible in the "
+                             "requested context defined by cvcore::tensor_ops::TensorBackend";
+                    break;
+                case std::to_underlying(TensorOpsErrorCode::CAMERA_DISTORTION_MODEL_UNSUPPORTED):
+                    result = "(CAMERA_DISTORTION_MODEL_UNSUPPORTED) The selected camera "
+                             "distortion model defined by cvcore::CameraDistortionType is "
+                             "currently unsupported";
+                    break;
+                default:
+                    result = "(Unrecognized Condition) Value " + std::to_string(value) +
+                             " does not map to known error code literal " +
+                             " defined by cvcore::tensor_ops::TensorOpsErrorCode";
+                    break;
+            }
+
+            return result;
+        }
+
+        virtual std::error_condition default_error_condition(int code) const noexcept override final
+        {
+            std::error_condition result;
+
+            switch(code)
+            {
+                case std::to_underlying(TensorOpsErrorCode::SUCCESS):
+                    result = ErrorCode::SUCCESS;
+                    break;
+                case std::to_underlying(TensorOpsErrorCode::COMPUTE_ENGINE_UNSUPPORTED_BY_CONTEXT):
+                    result = ErrorCode::INVALID_ENGINE_TYPE;
+                    break;
+                case std::to_underlying(TensorOpsErrorCode::CAMERA_DISTORTION_MODEL_UNSUPPORTED):
+                    result = ErrorCode::INVALID_ARGUMENT;
+                    break;
+                default:
+                    result = ErrorCode::NOT_IMPLEMENTED;
+                    break;
+            }
+
+            return result;
+        }
+    };
+} // namespace detail
+
+const detail::TensorOpsErrorCategory errorCategory{};
+
+std::error_code make_error_code(TensorOpsErrorCode ec) noexcept
+{
+    return {std::to_underlying(ec), errorCategory};
+}
+
+}} // namespace cvcore::tensor_ops
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/Filters.cpp b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/Filters.cpp
new file mode 100644
index 0000000..d8bd75c
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/Filters.cpp
@@ -0,0 +1,112 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "Filters.h"
+#include "NppUtils.h"
+
+#include "cv/core/MathTypes.h"
+#include "cv/core/Memory.h"
+
+#include <nppi_filtering_functions.h>
+
+#include <cassert>
+#include <cstdint>
+#include <stdexcept>
+
+namespace cvcore { namespace tensor_ops {
+
+void BoxFilter(Tensor<HWC, C3, U8> &dst, const Tensor<HWC, C3, U8> &src, const Vector2i &maskSize,
+               const Vector2i &anchor, cudaStream_t stream)
+{
+    assert(!src.isCPU() && !dst.isCPU());
+    NppStatus status = nppiFilterBoxBorder_8u_C3R_Ctx(
+        static_cast<const Npp8u *>(src.getData()), src.getStride(TensorDimension::HEIGHT) * sizeof(Npp8u),
+        {static_cast<int>(src.getWidth()), static_cast<int>(src.getHeight())}, {0, 0},
+        static_cast<Npp8u *>(dst.getData()), dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp8u),
+        {static_cast<int>(src.getWidth()), static_cast<int>(src.getHeight())}, {maskSize.x, maskSize.y},
+        {anchor.x, anchor.y},
+        NPP_BORDER_REPLICATE, //Only Npp Replicate is supported!!!
+        GetNppStreamContext(stream));
+    assert(status == NPP_SUCCESS);
+}
+
+void BoxFilter(Tensor<HWC, C1, U8> &dst, const Tensor<HWC, C1, U8> &src, const Vector2i &maskSize,
+               const Vector2i &anchor, cudaStream_t stream)
+{
+    assert(!src.isCPU() && !dst.isCPU());
+    NppStatus status = nppiFilterBoxBorder_8u_C1R_Ctx(
+        static_cast<const Npp8u *>(src.getData()), src.getStride(TensorDimension::HEIGHT) * sizeof(Npp8u),
+        {static_cast<int>(src.getWidth()), static_cast<int>(src.getHeight())}, {0, 0},
+        static_cast<Npp8u *>(dst.getData()), dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp8u),
+        {static_cast<int>(src.getWidth()), static_cast<int>(src.getHeight())}, {maskSize.x, maskSize.y},
+        {anchor.x, anchor.y}, NPP_BORDER_REPLICATE, GetNppStreamContext(stream));
+    assert(status == NPP_SUCCESS);
+}
+
+void BoxFilter(Tensor<HWC, C1, U16> &dst, const Tensor<HWC, C1, U16> &src, const Vector2i &maskSize,
+               const Vector2i &anchor, cudaStream_t stream)
+{
+    assert(!src.isCPU() && !dst.isCPU());
+    NppStatus status = nppiFilterBoxBorder_16u_C1R_Ctx(
+        static_cast<const Npp16u *>(src.getData()), src.getStride(TensorDimension::HEIGHT) * sizeof(Npp16u),
+        {static_cast<int>(src.getWidth()), static_cast<int>(src.getHeight())}, {0, 0},
+        static_cast<Npp16u *>(dst.getData()), dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp16u),
+        {static_cast<int>(src.getWidth()), static_cast<int>(src.getHeight())}, {maskSize.x, maskSize.y},
+        {anchor.x, anchor.y}, NPP_BORDER_REPLICATE, GetNppStreamContext(stream));
+    assert(status == NPP_SUCCESS);
+}
+
+void BoxFilter(Tensor<HWC, C3, U16> &dst, const Tensor<HWC, C3, U16> &src, const Vector2i &maskSize,
+               const Vector2i &anchor, cudaStream_t stream)
+{
+    assert(!src.isCPU() && !dst.isCPU());
+    NppStatus status = nppiFilterBoxBorder_16u_C3R_Ctx(
+        static_cast<const Npp16u *>(src.getData()), src.getStride(TensorDimension::HEIGHT) * sizeof(Npp16u),
+        {static_cast<int>(src.getWidth()), static_cast<int>(src.getHeight())}, {0, 0},
+        static_cast<Npp16u *>(dst.getData()), dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp16u),
+        {static_cast<int>(src.getWidth()), static_cast<int>(src.getHeight())}, {maskSize.x, maskSize.y},
+        {anchor.x, anchor.y}, NPP_BORDER_REPLICATE, GetNppStreamContext(stream));
+    assert(status == NPP_SUCCESS);
+}
+
+void BoxFilter(Tensor<HWC, C3, F32> &dst, const Tensor<HWC, C3, F32> &src, const Vector2i &maskSize,
+               const Vector2i &anchor, cudaStream_t stream)
+{
+    assert(!src.isCPU() && !dst.isCPU());
+    NppStatus status = nppiFilterBoxBorder_32f_C3R_Ctx(
+        static_cast<const Npp32f *>(src.getData()), src.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f),
+        {static_cast<int>(src.getWidth()), static_cast<int>(src.getHeight())}, {0, 0},
+        static_cast<Npp32f *>(dst.getData()), dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f),
+        {static_cast<int>(src.getWidth()), static_cast<int>(src.getHeight())}, {maskSize.x, maskSize.y},
+        {anchor.x, anchor.y}, NPP_BORDER_REPLICATE, GetNppStreamContext(stream));
+    assert(status == NPP_SUCCESS);
+}
+
+void BoxFilter(Tensor<HWC, C1, F32> &dst, const Tensor<HWC, C1, F32> &src, const Vector2i &maskSize,
+               const Vector2i &anchor, cudaStream_t stream)
+{
+    assert(!src.isCPU() && !dst.isCPU());
+    NppStatus status = nppiFilterBoxBorder_32f_C1R_Ctx(
+        static_cast<const Npp32f *>(src.getData()), src.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f),
+        {static_cast<int>(src.getWidth()), static_cast<int>(src.getHeight())}, {0, 0},
+        static_cast<Npp32f *>(dst.getData()), dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f),
+        {static_cast<int>(src.getWidth()), static_cast<int>(src.getHeight())}, {maskSize.x, maskSize.y},
+        {anchor.x, anchor.y}, NPP_BORDER_REPLICATE, GetNppStreamContext(stream));
+    assert(status == NPP_SUCCESS);
+}
+
+}} // namespace cvcore::tensor_ops
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/Filters.h b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/Filters.h
new file mode 100644
index 0000000..f764b0b
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/Filters.h
@@ -0,0 +1,105 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_FILTERS_H
+#define CVCORE_FILTERS_H
+
+#include "cv/core/Tensor.h"
+#include "cv/core/MathTypes.h"
+
+#include <cuda_runtime.h>
+
+namespace cvcore { namespace tensor_ops {
+
+/**
+ * Box type filtering for three channel HWC format uint_8 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param maskSize Size of mask which determines number of pixels to be averaged.
+ * @param anchor Offset of mask relative to current pixel index.
+ * 	  {0, 0} mask aligns with starting pixel.
+ * 	  {mask size/2, mask size/2} mask aligns with center pixel index.
+ * @param stream specified cuda stream.
+ */
+
+void BoxFilter(Tensor<HWC, C3, U8> &dst, const Tensor<HWC, C3, U8> &src, const Vector2i &maskSize,
+               const Vector2i &anchor, cudaStream_t stream = 0);
+/**
+ * Box type filtering for three channel HWC format uint_16 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param maskSize Size of mask which determines number of pixels to be averaged.
+ * @param anchor Offset of mask relative to current pixel index.
+ * 	  {0, 0} mask aligns with starting pixel.
+ * 	  {mask size/2, mask size/2} mask aligns with center pixel index.
+ * @param stream specified cuda stream.
+ */
+void BoxFilter(Tensor<HWC, C3, U16> &dst, const Tensor<HWC, C3, U16> &src, const Vector2i &maskSize,
+               const Vector2i &anchor, cudaStream_t stream = 0);
+/**
+ * Box type filtering for three channel HWC format float type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param maskSize Size of mask which determines number of pixels to be averaged.
+ * @param anchor Offset of mask relative to current pixel index.
+ * 	  {0, 0} mask aligns with starting pixel.
+ * 	  {mask size/2, mask size/2} mask aligns with center pixel index.
+ * @param stream specified cuda stream.
+ */
+void BoxFilter(Tensor<HWC, C3, F32> &dst, const Tensor<HWC, C3, F32> &src, const Vector2i &maskSize,
+               const Vector2i &anchor, cudaStream_t stream = 0);
+
+/**
+ * Box type filtering for one channel HWC format uint_8 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param maskSize Size of mask which determines number of pixels to be averaged.
+ * @param anchor Offset of mask relative to current pixel index.
+ * 	  {0, 0} mask aligns with starting pixel.
+ * 	  {mask size/2, mask size/2} mask aligns with center pixel index.
+ * @param stream specified cuda stream.
+ */
+void BoxFilter(Tensor<HWC, C1, U8> &dst, const Tensor<HWC, C1, U8> &src, const Vector2i &maskSize,
+               const Vector2i &anchor, cudaStream_t stream = 0);
+/**
+ * Box type filtering for one channel HWC format uint_16 type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param maskSize Size of mask which determines number of pixels to be averaged.
+ * @param anchor Offset of mask relative to current pixel index.
+ * 	  {0, 0} mask aligns with starting pixel.
+ * 	  {mask size/2, mask size/2} mask aligns with center pixel index.
+ * @param stream specified cuda stream.
+ */
+void BoxFilter(Tensor<HWC, C1, U16> &dst, const Tensor<HWC, C1, U16> &src, const Vector2i &maskSize,
+               const Vector2i &anchor, cudaStream_t stream = 0);
+/**
+ * Box type filtering for one channel HWC format float type Tensor.
+ * @param dst destination tensor.
+ * @param src source tensor.
+ * @param maskSize Size of mask which determines number of pixels to be averaged.
+ * @param anchor Offset of mask relative to current pixel index.
+ * 	  {0, 0} mask aligns with starting pixel.
+ * 	  {mask size/2, mask size/2} mask aligns with center pixel index.
+ * @param stream specified cuda stream.
+ */
+void BoxFilter(Tensor<HWC, C1, F32> &dst, const Tensor<HWC, C1, F32> &src, const Vector2i &maskSize,
+               const Vector2i &anchor, cudaStream_t stream = 0);
+
+}} // namespace cvcore::tensor_ops
+
+#endif // CVCORE_FILTERS_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/FusedOperations.cpp b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/FusedOperations.cpp
new file mode 100644
index 0000000..4ae0c3b
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/FusedOperations.cpp
@@ -0,0 +1,261 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "NppUtils.h"
+
+#include "cv/tensor_ops/ImageUtils.h"
+
+#include "cv/core/Memory.h"
+
+#include <cassert>
+#include <cstdint>
+#include <stdexcept>
+
+namespace cvcore { namespace tensor_ops {
+
+template<TensorLayout TL_IN, TensorLayout TL_OUT, ChannelCount CC, ChannelType CT>
+struct ImageToNormalizedPlanarTensorOperator<TL_IN, TL_OUT, CC, CT>::ImageToNormalizedPlanarTensorOperatorImpl
+{
+    int m_width;
+    int m_height;
+    int m_depth;
+    std::unique_ptr<Tensor<TL_IN, CC, CT>> m_resizedTensor;
+    std::unique_ptr<Tensor<TL_IN, CC, F32>> m_normalizedTensor;
+
+    template<TensorLayout T = TL_IN, typename std::enable_if<T == HWC>::type * = nullptr>
+    ImageToNormalizedPlanarTensorOperatorImpl(int width, int height)
+        : m_width(width)
+        , m_height(height)
+        , m_depth(1)
+    {
+        m_resizedTensor.reset(new Tensor<TL_IN, CC, CT>(width, height, false));
+        m_normalizedTensor.reset(new Tensor<TL_IN, CC, F32>(width, height, false));
+    }
+
+    template<TensorLayout T = TL_IN, typename std::enable_if<T == NHWC>::type * = nullptr>
+    ImageToNormalizedPlanarTensorOperatorImpl(int width, int height, int depth)
+        : m_width(width)
+        , m_height(height)
+        , m_depth(depth)
+    {
+        m_resizedTensor.reset(new Tensor<TL_IN, CC, CT>(width, height, depth, false));
+        m_normalizedTensor.reset(new Tensor<TL_IN, CC, F32>(width, height, depth, false));
+    }
+
+    template<TensorLayout T = TL_IN, ChannelCount C = CC,
+             typename std::enable_if<T == HWC && C == C3>::type * = nullptr>
+    void execute(Tensor<TL_OUT, CC, F32> &dst, const Tensor<TL_IN, CC, CT> &src, const float scale[3],
+                 const float offset[3], bool swapRB, bool keep_aspect_ratio, cudaStream_t stream)
+    {
+        // src and dst must be GPU tensors
+        assert(!src.isCPU() && !dst.isCPU());
+
+        // dst image width/height must match width/height of class
+        if ((dst.getWidth() != m_width) || (dst.getHeight() != m_height))
+        {
+            throw std::runtime_error("invalid input width/height");
+        }
+
+        // first do the resizing
+        Resize(*m_resizedTensor, src, keep_aspect_ratio, INTERP_LINEAR, stream);
+
+        // swap channels if needed
+        if (swapRB)
+        {
+            ConvertColorFormat(*m_resizedTensor, *m_resizedTensor, BGR2RGB, stream);
+        }
+
+        // do the normalization
+        Normalize(*m_normalizedTensor, *m_resizedTensor, scale, offset, stream);
+
+        // convert interleave to planar tensor
+        InterleavedToPlanar(dst, *m_normalizedTensor, stream);
+    }
+
+    template<TensorLayout T = TL_IN, ChannelCount C = CC,
+             typename std::enable_if<T == NHWC && C == C3>::type * = nullptr>
+    void execute(Tensor<TL_OUT, CC, F32> &dst, const Tensor<TL_IN, CC, CT> &src, const float scale[3],
+                 const float offset[3], bool swapRB, bool keep_aspect_ratio, cudaStream_t stream)
+    {
+        // src and dst must be GPU tensors
+        assert(!src.isCPU() && !dst.isCPU());
+
+        // dst image width/height must match width/height of class
+        if ((dst.getWidth() != m_width) || (dst.getHeight() != m_height))
+        {
+            throw std::runtime_error("invalid input width/height");
+        }
+
+        // dst image depth must be equal to src image depth and no bigger than m_depth
+        if ((dst.getDepth() != src.getDepth()) || (dst.getDepth() > m_depth))
+        {
+            throw std::runtime_error("invalid input depth");
+        }
+
+        // wrap the batch tensor with non-owning tensor
+        Tensor<TL_IN, CC, CT> resizedTensor(m_width, m_height, dst.getDepth(), m_resizedTensor->getData(), false);
+        Tensor<TL_IN, CC, F32> normalizedTensor(m_width, m_height, dst.getDepth(), m_normalizedTensor->getData(),
+                                                false);
+
+        // first do the resizing
+        Resize(resizedTensor, src, keep_aspect_ratio, INTERP_LINEAR, stream);
+
+        // swap channels if needed
+        if (swapRB)
+        {
+            ConvertColorFormat(resizedTensor, resizedTensor, BGR2RGB, stream);
+        }
+
+        // do the normalization
+        Normalize(normalizedTensor, resizedTensor, scale, offset, stream);
+
+        // convert interleave to planar tensor
+        InterleavedToPlanar(dst, normalizedTensor, stream);
+    }
+
+    template<TensorLayout T = TL_IN, ChannelCount C = CC,
+             typename std::enable_if<T == HWC && C == C1>::type * = nullptr>
+    void execute(Tensor<TL_OUT, CC, F32> &dst, const Tensor<TL_IN, CC, CT> &src, float scale, float offset,
+                 bool keep_aspect_ratio, cudaStream_t stream)
+    {
+        // src and dst must be GPU tensors
+        assert(!src.isCPU() && !dst.isCPU());
+
+        // dst image width/height must match width/height of class
+        if ((dst.getWidth() != m_width) || (dst.getHeight() != m_height))
+        {
+            throw std::runtime_error("invalid input width/height");
+        }
+
+        // first do the resizing
+        Resize(*m_resizedTensor, src, keep_aspect_ratio, INTERP_LINEAR, stream);
+
+        // do the normalization and map to destination tensor directly
+        Tensor<TL_IN, CC, F32> output(m_width, m_height, dst.getData(), false);
+        Normalize(output, *m_resizedTensor, scale, offset, stream);
+    }
+
+    template<TensorLayout T = TL_IN, ChannelCount C = CC,
+             typename std::enable_if<T == NHWC && C == C1>::type * = nullptr>
+    void execute(Tensor<TL_OUT, CC, F32> &dst, const Tensor<TL_IN, CC, CT> &src, float scale, float offset,
+                 bool keep_aspect_ratio, cudaStream_t stream)
+    {
+        // src and dst must be GPU tensors
+        assert(!src.isCPU() && !dst.isCPU());
+
+        // dst image width/height must match width/height of class
+        if ((dst.getWidth() != m_width) || (dst.getHeight() != m_height))
+        {
+            throw std::runtime_error("invalid input width/height");
+        }
+
+        // dst image depth must be equal to src image depth and no bigger than m_depth
+        if ((dst.getDepth() != src.getDepth()) || (dst.getDepth() > m_depth))
+        {
+            throw std::runtime_error("invalid input depth");
+        }
+
+        // wrap the batch tensor with non-owning tensor
+        Tensor<TL_IN, CC, CT> resizedTensor(m_width, m_height, dst.getDepth(), m_resizedTensor->getData(), false);
+
+        // first do the resizing
+        Resize(resizedTensor, src, keep_aspect_ratio, INTERP_LINEAR, stream);
+
+        // do the normalization and map to destination tensor directly
+        Tensor<TL_IN, CC, F32> output(m_width, m_height, dst.getDepth(), dst.getData(), false);
+        Normalize(output, resizedTensor, scale, offset, stream);
+    }
+};
+
+template<TensorLayout TL_IN, TensorLayout TL_OUT, ChannelCount CC, ChannelType CT>
+template<TensorLayout T, typename std::enable_if<T == HWC>::type *>
+ImageToNormalizedPlanarTensorOperator<TL_IN, TL_OUT, CC, CT>::ImageToNormalizedPlanarTensorOperator(int width,
+                                                                                                    int height)
+    : m_pImpl(new ImageToNormalizedPlanarTensorOperatorImpl(width, height))
+{
+    static_assert(TL_IN == HWC && TL_OUT == CHW, "Tensor Layout is different");
+    static_assert(CC == C1 || CC == C3, "Channel count is different");
+}
+
+template<TensorLayout TL_IN, TensorLayout TL_OUT, ChannelCount CC, ChannelType CT>
+template<TensorLayout T, typename std::enable_if<T == NHWC>::type *>
+ImageToNormalizedPlanarTensorOperator<TL_IN, TL_OUT, CC, CT>::ImageToNormalizedPlanarTensorOperator(int width,
+                                                                                                    int height,
+                                                                                                    int depth)
+    : m_pImpl(new ImageToNormalizedPlanarTensorOperatorImpl(width, height, depth))
+{
+    static_assert(TL_IN == NHWC && TL_OUT == NCHW, "Tensor Layout is different");
+    static_assert(CC == C1 || CC == C3, "Channel count is different");
+}
+
+template<TensorLayout TL_IN, TensorLayout TL_OUT, ChannelCount CC, ChannelType CT>
+ImageToNormalizedPlanarTensorOperator<TL_IN, TL_OUT, CC, CT>::~ImageToNormalizedPlanarTensorOperator()
+{
+}
+
+template<TensorLayout TL_IN, TensorLayout TL_OUT, ChannelCount CC, ChannelType CT>
+template<ChannelCount T, typename std::enable_if<T == C3>::type *>
+void ImageToNormalizedPlanarTensorOperator<TL_IN, TL_OUT, CC, CT>::operator()(
+    Tensor<TL_OUT, CC, F32> &dst, const Tensor<TL_IN, CC, CT> &src, const float scale[3], const float offset[3],
+    bool swapRB, bool keep_aspect_ratio, cudaStream_t stream)
+{
+    m_pImpl->execute(dst, src, scale, offset, swapRB, keep_aspect_ratio, stream);
+}
+
+template<TensorLayout TL_IN, TensorLayout TL_OUT, ChannelCount CC, ChannelType CT>
+template<ChannelCount T, typename std::enable_if<T == C1>::type *>
+void ImageToNormalizedPlanarTensorOperator<TL_IN, TL_OUT, CC, CT>::operator()(Tensor<TL_OUT, CC, F32> &dst,
+                                                                              const Tensor<TL_IN, CC, CT> &src,
+                                                                              float scale, float offset,
+                                                                              bool keep_aspect_ratio,
+                                                                              cudaStream_t stream)
+{
+    m_pImpl->execute(dst, src, scale, offset, keep_aspect_ratio, stream);
+}
+
+// explicit instantiations
+template class ImageToNormalizedPlanarTensorOperator<HWC, CHW, C3, U8>;
+template void ImageToNormalizedPlanarTensorOperator<HWC, CHW, C3, U8>::operator()<C3>(Tensor<CHW, C3, F32> &,
+                                                                                      const Tensor<HWC, C3, U8> &,
+                                                                                      const float [], const float [],
+                                                                                      bool, bool, cudaStream_t);
+template ImageToNormalizedPlanarTensorOperator<HWC, CHW, C3, U8>::ImageToNormalizedPlanarTensorOperator(int, int);
+
+template class ImageToNormalizedPlanarTensorOperator<HWC, CHW, C1, U8>;
+template void ImageToNormalizedPlanarTensorOperator<HWC, CHW, C1, U8>::operator()<C1>(Tensor<CHW, C1, F32> &,
+                                                                                      const Tensor<HWC, C1, U8> &,
+                                                                                      float, float, bool, cudaStream_t);
+template ImageToNormalizedPlanarTensorOperator<HWC, CHW, C1, U8>::ImageToNormalizedPlanarTensorOperator(int, int);
+
+template class ImageToNormalizedPlanarTensorOperator<NHWC, NCHW, C3, U8>;
+template void ImageToNormalizedPlanarTensorOperator<NHWC, NCHW, C3, U8>::operator()<C3>(Tensor<NCHW, C3, F32> &,
+                                                                                        const Tensor<NHWC, C3, U8> &,
+                                                                                        const float [], const float [],
+                                                                                        bool, bool, cudaStream_t);
+template ImageToNormalizedPlanarTensorOperator<NHWC, NCHW, C3, U8>::ImageToNormalizedPlanarTensorOperator(int,
+                                                                                                                int,
+                                                                                                                int);
+
+template class ImageToNormalizedPlanarTensorOperator<NHWC, NCHW, C1, U8>;
+template void ImageToNormalizedPlanarTensorOperator<NHWC, NCHW, C1, U8>::operator()<C1>(Tensor<NCHW, C1, F32> &,
+                                                                                        const Tensor<NHWC, C1, U8> &,
+                                                                                        float, float, bool,
+                                                                                        cudaStream_t);
+template ImageToNormalizedPlanarTensorOperator<NHWC, NCHW, C1, U8>::ImageToNormalizedPlanarTensorOperator(int,
+                                                                                                                int,
+                                                                                                                int);
+}} // namespace cvcore::tensor_ops
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/GeometryTransforms.cpp b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/GeometryTransforms.cpp
new file mode 100644
index 0000000..238eac1
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/GeometryTransforms.cpp
@@ -0,0 +1,754 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "NppUtils.h"
+
+#include "cv/tensor_ops/ImageUtils.h"
+
+#include "cv/core/Memory.h"
+
+#include <nppi_data_exchange_and_initialization.h>
+#include <nppi_geometry_transforms.h>
+
+#include <cassert>
+#include <cstdint>
+#include <stdexcept>
+
+namespace cvcore { namespace tensor_ops {
+
+namespace {
+
+static NppiInterpolationMode GetNppiInterpolationMode(InterpolationType type)
+{
+    if (type == INTERP_NEAREST)
+    {
+        return NPPI_INTER_NN;
+    }
+    else if (type == INTERP_LINEAR)
+    {
+        return NPPI_INTER_LINEAR;
+    }
+    else if (type == INTERP_CUBIC_BSPLINE)
+    {
+        return NPPI_INTER_CUBIC2P_BSPLINE;
+    }
+    else if (type == INTERP_CUBIC_CATMULLROM)
+    {
+        return NPPI_INTER_CUBIC2P_CATMULLROM;
+    }
+    else
+    {
+        throw std::runtime_error("invalid resizing interpolation mode");
+    }
+}
+
+static BBox GetScaledROI(int srcW, int srcH, int dstW, int dstH)
+{
+    if (srcW * dstH >= dstW * srcH)
+    {
+        int bboxH   = static_cast<int>((static_cast<double>(srcH) / srcW) * dstW);
+        int offsetH = (dstH - bboxH) / 2;
+        return {0, offsetH, dstW, offsetH + bboxH};
+    }
+    else
+    {
+        int bboxW   = static_cast<int>((static_cast<double>(srcW) / srcH) * dstH);
+        int offsetW = (dstW - bboxW) / 2;
+        return {offsetW, 0, offsetW + bboxW, dstH};
+    }
+}
+
+static void AssertValidROI(const BBox &roi, int width, int height)
+{
+    assert(roi.xmin >= 0 && roi.xmin < roi.xmax);
+    assert(roi.ymin >= 0 && roi.ymin < roi.ymax);
+    assert(roi.ymax <= height);
+    assert(roi.xmax <= width);
+}
+
+template<TensorLayout TL>
+void FillBufferC1U8Impl(Tensor<TL, C1, U8> &dst, const Npp8u value, cudaStream_t stream)
+{
+    assert(!dst.isCPU());
+    NppStatus status = nppiSet_8u_C1R_Ctx(value, static_cast<Npp8u *>(dst.getData()),
+                                          dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp8u),
+                                          {int(dst.getWidth()), int(dst.getHeight())}, GetNppStreamContext(stream));
+    assert(status == NPP_SUCCESS);
+}
+
+template<TensorLayout TL>
+void FillBufferC1U16Impl(Tensor<TL, C1, U16> &dst, const Npp16u value, cudaStream_t stream)
+{
+    assert(!dst.isCPU());
+    NppStatus status = nppiSet_16u_C1R_Ctx(value, static_cast<Npp16u *>(dst.getData()),
+                                           dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp16u),
+                                           {int(dst.getWidth()), int(dst.getHeight())}, GetNppStreamContext(stream));
+    assert(status == NPP_SUCCESS);
+}
+
+template<TensorLayout TL>
+void FillBufferC1F32Impl(Tensor<TL, C1, F32> &dst, const Npp32f value, cudaStream_t stream)
+{
+    assert(!dst.isCPU());
+    NppStatus status = nppiSet_32f_C1R_Ctx(value, static_cast<Npp32f *>(dst.getData()),
+                                           dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f),
+                                           {int(dst.getWidth()), int(dst.getHeight())}, GetNppStreamContext(stream));
+    assert(status == NPP_SUCCESS);
+}
+
+static void FillBuffer(Tensor<HWC, C1, U8> &dst, const Npp8u value, cudaStream_t stream = 0)
+{
+    FillBufferC1U8Impl(dst, value, stream);
+}
+
+static void FillBuffer(Tensor<HWC, C1, U16> &dst, const Npp16u value, cudaStream_t stream = 0)
+{
+    FillBufferC1U16Impl(dst, value, stream);
+}
+
+static void FillBuffer(Tensor<HWC, C1, F32> &dst, const Npp32f value, cudaStream_t stream = 0)
+{
+    FillBufferC1F32Impl(dst, value, stream);
+}
+
+static void FillBuffer(Tensor<CHW, C1, U8> &dst, const Npp8u value, cudaStream_t stream = 0)
+{
+    FillBufferC1U8Impl(dst, value, stream);
+}
+
+static void FillBuffer(Tensor<CHW, C1, U16> &dst, const Npp16u value, cudaStream_t stream = 0)
+{
+    FillBufferC1U16Impl(dst, value, stream);
+}
+
+static void FillBuffer(Tensor<CHW, C1, F32> &dst, const Npp32f value, cudaStream_t stream = 0)
+{
+    FillBufferC1F32Impl(dst, value, stream);
+}
+
+static void FillBuffer(Tensor<HWC, C3, U8> &dst, const Npp8u value, cudaStream_t stream = 0)
+{
+    assert(!dst.isCPU());
+    const Npp8u padding[3] = {value, value, value};
+    NppStatus status       = nppiSet_8u_C3R_Ctx(padding, static_cast<Npp8u *>(dst.getData()),
+                                          dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp8u),
+                                          {int(dst.getWidth()), int(dst.getHeight())}, GetNppStreamContext(stream));
+    assert(status == NPP_SUCCESS);
+}
+
+static void FillBuffer(Tensor<HWC, C3, U16> &dst, const Npp16u value, cudaStream_t stream = 0)
+{
+    assert(!dst.isCPU());
+    const Npp16u padding[3] = {value, value, value};
+    NppStatus status        = nppiSet_16u_C3R_Ctx(padding, static_cast<Npp16u *>(dst.getData()),
+                                           dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp16u),
+                                           {int(dst.getWidth()), int(dst.getHeight())}, GetNppStreamContext(stream));
+    assert(status == NPP_SUCCESS);
+}
+
+static void FillBuffer(Tensor<HWC, C3, F32> &dst, const Npp32f value, cudaStream_t stream = 0)
+{
+    assert(!dst.isCPU());
+    const Npp32f padding[3] = {value, value, value};
+    NppStatus status        = nppiSet_32f_C3R_Ctx(padding, static_cast<Npp32f *>(dst.getData()),
+                                           dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f),
+                                           {int(dst.getWidth()), int(dst.getHeight())}, GetNppStreamContext(stream));
+    assert(status == NPP_SUCCESS);
+}
+
+template<TensorLayout TL>
+void CropAndResizeC1U8Impl(Tensor<TL, C1, U8> &dst, const Tensor<TL, C1, U8> &src, const BBox &dstROI,
+                           const BBox &srcROI, InterpolationType type, cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    AssertValidROI(dstROI, dst.getWidth(), dst.getHeight());
+    AssertValidROI(srcROI, src.getWidth(), src.getHeight());
+
+    NppStatus status = nppiResizeSqrPixel_8u_C1R_Ctx(
+        static_cast<const Npp8u *>(src.getData() + srcROI.ymin * src.getStride(TensorDimension::HEIGHT) + srcROI.xmin),
+        {srcROI.xmax - srcROI.xmin, srcROI.ymax - srcROI.ymin}, src.getStride(TensorDimension::HEIGHT) * sizeof(Npp8u),
+        {0, 0, srcROI.xmax - srcROI.xmin, srcROI.ymax - srcROI.ymin},
+        static_cast<Npp8u *>(dst.getData() + dstROI.ymin * dst.getStride(TensorDimension::HEIGHT) + dstROI.xmin),
+        dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp8u),
+        {0, 0, dstROI.xmax - dstROI.xmin, dstROI.ymax - dstROI.ymin},
+        double(dstROI.xmax - dstROI.xmin) / double(srcROI.xmax - srcROI.xmin),
+        double(dstROI.ymax - dstROI.ymin) / double(srcROI.ymax - srcROI.ymin), 0.0, 0.0, GetNppiInterpolationMode(type),
+        GetNppStreamContext(stream));
+    assert(status == NPP_SUCCESS);
+}
+
+template<TensorLayout TL>
+void CropAndResizeC1U16Impl(Tensor<TL, C1, U16> &dst, const Tensor<TL, C1, U16> &src, const BBox &dstROI,
+                            const BBox &srcROI, InterpolationType type, cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    AssertValidROI(dstROI, dst.getWidth(), dst.getHeight());
+    AssertValidROI(srcROI, src.getWidth(), src.getHeight());
+
+    NppStatus status = nppiResizeSqrPixel_16u_C1R_Ctx(
+        static_cast<const Npp16u *>(src.getData() + srcROI.ymin * src.getStride(TensorDimension::HEIGHT) + srcROI.xmin),
+        {srcROI.xmax - srcROI.xmin, srcROI.ymax - srcROI.ymin}, src.getStride(TensorDimension::HEIGHT) * sizeof(Npp16u),
+        {0, 0, srcROI.xmax - srcROI.xmin, srcROI.ymax - srcROI.ymin},
+        static_cast<Npp16u *>(dst.getData() + dstROI.ymin * dst.getStride(TensorDimension::HEIGHT) + dstROI.xmin),
+        dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp16u),
+        {0, 0, dstROI.xmax - dstROI.xmin, dstROI.ymax - dstROI.ymin},
+        double(dstROI.xmax - dstROI.xmin) / double(srcROI.xmax - srcROI.xmin),
+        double(dstROI.ymax - dstROI.ymin) / double(srcROI.ymax - srcROI.ymin), 0.0, 0.0, GetNppiInterpolationMode(type),
+        GetNppStreamContext(stream));
+    assert(status == NPP_SUCCESS);
+}
+
+template<TensorLayout TL>
+void CropAndResizeC1F32Impl(Tensor<TL, C1, F32> &dst, const Tensor<TL, C1, F32> &src, const BBox &dstROI,
+                            const BBox &srcROI, InterpolationType type, cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    AssertValidROI(dstROI, dst.getWidth(), dst.getHeight());
+    AssertValidROI(srcROI, src.getWidth(), src.getHeight());
+
+    NppStatus status = nppiResizeSqrPixel_32f_C1R_Ctx(
+        static_cast<const Npp32f *>(src.getData() + srcROI.ymin * src.getStride(TensorDimension::HEIGHT) + srcROI.xmin),
+        {srcROI.xmax - srcROI.xmin, srcROI.ymax - srcROI.ymin}, src.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f),
+        {0, 0, srcROI.xmax - srcROI.xmin, srcROI.ymax - srcROI.ymin},
+        static_cast<Npp32f *>(dst.getData() + dstROI.ymin * dst.getStride(TensorDimension::HEIGHT) + dstROI.xmin),
+        dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f),
+        {0, 0, dstROI.xmax - dstROI.xmin, dstROI.ymax - dstROI.ymin},
+        double(dstROI.xmax - dstROI.xmin) / double(srcROI.xmax - srcROI.xmin),
+        double(dstROI.ymax - dstROI.ymin) / double(srcROI.ymax - srcROI.ymin), 0.0, 0.0, GetNppiInterpolationMode(type),
+        GetNppStreamContext(stream));
+    assert(status == NPP_SUCCESS);
+}
+
+template<TensorLayout TL, ChannelCount CC, ChannelType CT>
+void ResizeImpl(Tensor<TL, CC, CT> &dst, const Tensor<TL, CC, CT> &src, bool keep_aspect_ratio, InterpolationType type,
+                cudaStream_t stream)
+{
+    const BBox dstROI = keep_aspect_ratio
+                            ? GetScaledROI(src.getWidth(), src.getHeight(), dst.getWidth(), dst.getHeight())
+                            : BBox{0, 0, int(dst.getWidth()), int(dst.getHeight())};
+    if (keep_aspect_ratio)
+    {
+        FillBuffer(dst, 0, stream);
+    }
+    CropAndResize(dst, src, dstROI, {0, 0, int(src.getWidth()), int(src.getHeight())}, type, stream);
+}
+
+template<TensorLayout TL>
+void CropC1U8Impl(Tensor<TL, C1, U8> &dst, const Tensor<TL, C1, U8> &src, const BBox &srcROI, cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    AssertValidROI(srcROI, src.getWidth(), src.getHeight());
+    assert(srcROI.xmax - srcROI.xmin == dst.getWidth() && srcROI.ymax - srcROI.ymin == dst.getHeight());
+
+    NppStatus status = nppiCopy_8u_C1R_Ctx(
+        static_cast<const Npp8u *>(src.getData() + srcROI.ymin * src.getStride(TensorDimension::HEIGHT) + srcROI.xmin),
+        src.getStride(TensorDimension::HEIGHT) * sizeof(Npp8u), static_cast<Npp8u *>(dst.getData()),
+        dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp8u), {srcROI.xmax - srcROI.xmin, srcROI.ymax - srcROI.ymin},
+        GetNppStreamContext(stream));
+
+    assert(status == NPP_SUCCESS);
+}
+
+template<TensorLayout TL>
+void CropC1U16Impl(Tensor<TL, C1, U16> &dst, const Tensor<TL, C1, U16> &src, const BBox &srcROI, cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    AssertValidROI(srcROI, src.getWidth(), src.getHeight());
+    assert(srcROI.xmax - srcROI.xmin == dst.getWidth() && srcROI.ymax - srcROI.ymin == dst.getHeight());
+
+    NppStatus status = nppiCopy_16u_C1R_Ctx(
+        static_cast<const Npp16u *>(src.getData() + srcROI.ymin * src.getStride(TensorDimension::HEIGHT) + srcROI.xmin),
+        src.getStride(TensorDimension::HEIGHT) * sizeof(Npp16u), static_cast<Npp16u *>(dst.getData()),
+        dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp16u), {srcROI.xmax - srcROI.xmin, srcROI.ymax - srcROI.ymin},
+        GetNppStreamContext(stream));
+
+    assert(status == NPP_SUCCESS);
+}
+
+template<TensorLayout TL>
+void CropC1F32Impl(Tensor<TL, C1, F32> &dst, const Tensor<TL, C1, F32> &src, const BBox &srcROI, cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    AssertValidROI(srcROI, src.getWidth(), src.getHeight());
+    assert(srcROI.xmax - srcROI.xmin == dst.getWidth() && srcROI.ymax - srcROI.ymin == dst.getHeight());
+
+    NppStatus status = nppiCopy_32f_C1R_Ctx(
+        static_cast<const Npp32f *>(src.getData() + srcROI.ymin * src.getStride(TensorDimension::HEIGHT) + srcROI.xmin),
+        src.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f), static_cast<Npp32f *>(dst.getData()),
+        dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f), {srcROI.xmax - srcROI.xmin, srcROI.ymax - srcROI.ymin},
+        GetNppStreamContext(stream));
+
+    assert(status == NPP_SUCCESS);
+}
+
+template<ChannelCount CC, ChannelType CT>
+void ResizeBatch(Tensor<NHWC, CC, CT> &dst, Tensor<NHWC, CC, CT> &src, bool keep_aspect_ratio, InterpolationType type,
+                 cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    assert(src.getDepth() == dst.getDepth());
+
+    for (int i = 0; i < src.getDepth(); i++)
+    {
+        size_t offsetSrc = i * src.getStride(TensorDimension::DEPTH);
+        size_t offsetDst = i * dst.getStride(TensorDimension::DEPTH);
+        Tensor<HWC, CC, CT> srcTmp(src.getWidth(), src.getHeight(),
+                                   src.getStride(TensorDimension::HEIGHT) * GetChannelSize(CT),
+                                   src.getData() + offsetSrc, false);
+        Tensor<HWC, CC, CT> dstTmp(dst.getWidth(), dst.getHeight(),
+                                   dst.getStride(TensorDimension::HEIGHT) * GetChannelSize(CT),
+                                   dst.getData() + offsetDst, false);
+        Resize(dstTmp, srcTmp, keep_aspect_ratio, type, stream);
+    }
+}
+
+} // anonymous namespace
+
+void Resize(Tensor<HWC, C1, U8> &dst, const Tensor<HWC, C1, U8> &src, bool keep_aspect_ratio, InterpolationType type,
+            cudaStream_t stream)
+{
+    ResizeImpl(dst, src, keep_aspect_ratio, type, stream);
+}
+
+void Resize(Tensor<NHWC, C1, U8> &dst, const Tensor<NHWC, C1, U8> &src, bool keep_aspect_ratio, InterpolationType type,
+            cudaStream_t stream)
+{
+    ResizeBatch(dst, const_cast<Tensor<NHWC, C1, U8> &>(src), keep_aspect_ratio, type, stream);
+}
+
+void CropAndResize(Tensor<HWC, C1, U8> &dst, const Tensor<HWC, C1, U8> &src, const BBox &dstROI, const BBox &srcROI,
+                   InterpolationType type, cudaStream_t stream)
+{
+    CropAndResizeC1U8Impl(dst, src, dstROI, srcROI, type, stream);
+}
+
+void CropAndResize(Tensor<HWC, C1, U8> &dst, const Tensor<HWC, C1, U8> &src, const BBox &srcROI, InterpolationType type,
+                   cudaStream_t stream)
+{
+    CropAndResize(dst, src, {0, 0, int(dst.getWidth()), int(dst.getHeight())}, srcROI, type, stream);
+}
+
+void Resize(Tensor<HWC, C1, U16> &dst, const Tensor<HWC, C1, U16> &src, bool keep_aspect_ratio, InterpolationType type,
+            cudaStream_t stream)
+{
+    ResizeImpl(dst, src, keep_aspect_ratio, type, stream);
+}
+
+void Resize(Tensor<NHWC, C1, U16> &dst, const Tensor<NHWC, C1, U16> &src, bool keep_aspect_ratio,
+            InterpolationType type, cudaStream_t stream)
+{
+    ResizeBatch(dst, const_cast<Tensor<NHWC, C1, U16> &>(src), keep_aspect_ratio, type, stream);
+}
+
+void CropAndResize(Tensor<HWC, C1, U16> &dst, const Tensor<HWC, C1, U16> &src, const BBox &dstROI, const BBox &srcROI,
+                   InterpolationType type, cudaStream_t stream)
+{
+    CropAndResizeC1U16Impl(dst, src, dstROI, srcROI, type, stream);
+}
+
+void CropAndResize(Tensor<HWC, C1, U16> &dst, const Tensor<HWC, C1, U16> &src, const BBox &srcROI,
+                   InterpolationType type, cudaStream_t stream)
+{
+    CropAndResize(dst, src, {0, 0, int(dst.getWidth()), int(dst.getHeight())}, srcROI, type, stream);
+}
+
+void Resize(Tensor<HWC, C1, F32> &dst, const Tensor<HWC, C1, F32> &src, bool keep_aspect_ratio, InterpolationType type,
+            cudaStream_t stream)
+{
+    ResizeImpl(dst, src, keep_aspect_ratio, type, stream);
+}
+
+void Resize(Tensor<NHWC, C1, F32> &dst, const Tensor<NHWC, C1, F32> &src, bool keep_aspect_ratio,
+            InterpolationType type, cudaStream_t stream)
+{
+    ResizeBatch(dst, const_cast<Tensor<NHWC, C1, F32> &>(src), keep_aspect_ratio, type, stream);
+}
+
+void CropAndResize(Tensor<HWC, C1, F32> &dst, const Tensor<HWC, C1, F32> &src, const BBox &dstROI, const BBox &srcROI,
+                   InterpolationType type, cudaStream_t stream)
+{
+    CropAndResizeC1F32Impl(dst, src, dstROI, srcROI, type, stream);
+}
+
+void CropAndResize(Tensor<HWC, C1, F32> &dst, const Tensor<HWC, C1, F32> &src, const BBox &srcROI,
+                   InterpolationType type, cudaStream_t stream)
+{
+    CropAndResize(dst, src, {0, 0, int(dst.getWidth()), int(dst.getHeight())}, srcROI, type, stream);
+}
+
+void Resize(Tensor<CHW, C1, U8> &dst, const Tensor<CHW, C1, U8> &src, bool keep_aspect_ratio, InterpolationType type,
+            cudaStream_t stream)
+{
+    ResizeImpl(dst, src, keep_aspect_ratio, type, stream);
+}
+
+void CropAndResize(Tensor<CHW, C1, U8> &dst, const Tensor<CHW, C1, U8> &src, const BBox &dstROI, const BBox &srcROI,
+                   InterpolationType type, cudaStream_t stream)
+{
+    CropAndResizeC1U8Impl(dst, src, dstROI, srcROI, type, stream);
+}
+
+void CropAndResize(Tensor<CHW, C1, U8> &dst, const Tensor<CHW, C1, U8> &src, const BBox &srcROI, InterpolationType type,
+                   cudaStream_t stream)
+{
+    CropAndResize(dst, src, {0, 0, int(dst.getWidth()), int(dst.getHeight())}, srcROI, type, stream);
+}
+
+void Resize(Tensor<CHW, C1, U16> &dst, const Tensor<CHW, C1, U16> &src, bool keep_aspect_ratio, InterpolationType type,
+            cudaStream_t stream)
+{
+    ResizeImpl(dst, src, keep_aspect_ratio, type, stream);
+}
+
+void CropAndResize(Tensor<CHW, C1, U16> &dst, const Tensor<CHW, C1, U16> &src, const BBox &dstROI, const BBox &srcROI,
+                   InterpolationType type, cudaStream_t stream)
+{
+    CropAndResizeC1U16Impl(dst, src, dstROI, srcROI, type, stream);
+}
+
+void CropAndResize(Tensor<CHW, C1, U16> &dst, const Tensor<CHW, C1, U16> &src, const BBox &srcROI,
+                   InterpolationType type, cudaStream_t stream)
+{
+    CropAndResize(dst, src, {0, 0, int(dst.getWidth()), int(dst.getHeight())}, srcROI, type, stream);
+}
+
+void Resize(Tensor<CHW, C1, F32> &dst, const Tensor<CHW, C1, F32> &src, bool keep_aspect_ratio, InterpolationType type,
+            cudaStream_t stream)
+{
+    ResizeImpl(dst, src, keep_aspect_ratio, type, stream);
+}
+
+void CropAndResize(Tensor<CHW, C1, F32> &dst, const Tensor<CHW, C1, F32> &src, const BBox &dstROI, const BBox &srcROI,
+                   InterpolationType type, cudaStream_t stream)
+{
+    CropAndResizeC1F32Impl(dst, src, dstROI, srcROI, type, stream);
+}
+
+void CropAndResize(Tensor<CHW, C1, F32> &dst, const Tensor<CHW, C1, F32> &src, const BBox &srcROI,
+                   InterpolationType type, cudaStream_t stream)
+{
+    CropAndResize(dst, src, {0, 0, int(dst.getWidth()), int(dst.getHeight())}, srcROI, type, stream);
+}
+
+void Resize(Tensor<HWC, C3, U8> &dst, const Tensor<HWC, C3, U8> &src, bool keep_aspect_ratio, InterpolationType type,
+            cudaStream_t stream)
+{
+    ResizeImpl(dst, src, keep_aspect_ratio, type, stream);
+}
+
+void Resize(Tensor<NHWC, C3, U8> &dst, const Tensor<NHWC, C3, U8> &src, bool keep_aspect_ratio, InterpolationType type,
+            cudaStream_t stream)
+{
+    ResizeBatch(dst, const_cast<Tensor<NHWC, C3, U8> &>(src), keep_aspect_ratio, type, stream);
+}
+
+void CropAndResize(Tensor<HWC, C3, U8> &dst, const Tensor<HWC, C3, U8> &src, const BBox &dstROI, const BBox &srcROI,
+                   InterpolationType type, cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    AssertValidROI(dstROI, dst.getWidth(), dst.getHeight());
+    AssertValidROI(srcROI, src.getWidth(), src.getHeight());
+
+    NppStatus status = nppiResizeSqrPixel_8u_C3R_Ctx(
+        static_cast<const Npp8u *>(src.getData() + srcROI.ymin * src.getStride(TensorDimension::HEIGHT) +
+                                   srcROI.xmin * src.getChannelCount()),
+        {srcROI.xmax - srcROI.xmin, srcROI.ymax - srcROI.ymin}, src.getStride(TensorDimension::HEIGHT) * sizeof(Npp8u),
+        {0, 0, srcROI.xmax - srcROI.xmin, srcROI.ymax - srcROI.ymin},
+        static_cast<Npp8u *>(dst.getData() + dstROI.ymin * dst.getStride(TensorDimension::HEIGHT) +
+                             dstROI.xmin * dst.getChannelCount()),
+        dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp8u),
+        {0, 0, dstROI.xmax - dstROI.xmin, dstROI.ymax - dstROI.ymin},
+        double(dstROI.xmax - dstROI.xmin) / double(srcROI.xmax - srcROI.xmin),
+        double(dstROI.ymax - dstROI.ymin) / double(srcROI.ymax - srcROI.ymin), 0.0, 0.0, GetNppiInterpolationMode(type),
+        GetNppStreamContext(stream));
+    assert(status == NPP_SUCCESS);
+}
+
+void CropAndResize(Tensor<HWC, C3, U8> &dst, const Tensor<HWC, C3, U8> &src, const BBox &srcROI, InterpolationType type,
+                   cudaStream_t stream)
+{
+    CropAndResize(dst, src, {0, 0, int(dst.getWidth()), int(dst.getHeight())}, srcROI, type, stream);
+}
+
+void Resize(Tensor<HWC, C3, U16> &dst, const Tensor<HWC, C3, U16> &src, bool keep_aspect_ratio, InterpolationType type,
+            cudaStream_t stream)
+{
+    ResizeImpl(dst, src, keep_aspect_ratio, type, stream);
+}
+
+void Resize(Tensor<NHWC, C3, U16> &dst, const Tensor<NHWC, C3, U16> &src, bool keep_aspect_ratio,
+            InterpolationType type, cudaStream_t stream)
+{
+    ResizeBatch(dst, const_cast<Tensor<NHWC, C3, U16> &>(src), keep_aspect_ratio, type, stream);
+}
+
+void CropAndResize(Tensor<HWC, C3, U16> &dst, const Tensor<HWC, C3, U16> &src, const BBox &dstROI, const BBox &srcROI,
+                   InterpolationType type, cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    AssertValidROI(dstROI, dst.getWidth(), dst.getHeight());
+    AssertValidROI(srcROI, src.getWidth(), src.getHeight());
+
+    NppStatus status = nppiResizeSqrPixel_16u_C3R_Ctx(
+        static_cast<const Npp16u *>(src.getData() + srcROI.ymin * src.getStride(TensorDimension::HEIGHT) +
+                                    srcROI.xmin * src.getChannelCount()),
+        {srcROI.xmax - srcROI.xmin, srcROI.ymax - srcROI.ymin}, src.getStride(TensorDimension::HEIGHT) * sizeof(Npp16u),
+        {0, 0, srcROI.xmax - srcROI.xmin, srcROI.ymax - srcROI.ymin},
+        static_cast<Npp16u *>(dst.getData() + dstROI.ymin * dst.getStride(TensorDimension::HEIGHT) +
+                              dstROI.xmin * dst.getChannelCount()),
+        dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp16u),
+        {0, 0, dstROI.xmax - dstROI.xmin, dstROI.ymax - dstROI.ymin},
+        double(dstROI.xmax - dstROI.xmin) / double(srcROI.xmax - srcROI.xmin),
+        double(dstROI.ymax - dstROI.ymin) / double(srcROI.ymax - srcROI.ymin), 0.0, 0.0, GetNppiInterpolationMode(type),
+        GetNppStreamContext(stream));
+    assert(status == NPP_SUCCESS);
+}
+
+void CropAndResize(Tensor<HWC, C3, U16> &dst, const Tensor<HWC, C3, U16> &src, const BBox &srcROI,
+                   InterpolationType type, cudaStream_t stream)
+{
+    CropAndResize(dst, src, {0, 0, int(dst.getWidth()), int(dst.getHeight())}, srcROI, type, stream);
+}
+
+void Resize(Tensor<HWC, C3, F32> &dst, const Tensor<HWC, C3, F32> &src, bool keep_aspect_ratio, InterpolationType type,
+            cudaStream_t stream)
+{
+    ResizeImpl(dst, src, keep_aspect_ratio, type, stream);
+}
+
+void Resize(Tensor<NHWC, C3, F32> &dst, const Tensor<NHWC, C3, F32> &src, bool keep_aspect_ratio,
+            InterpolationType type, cudaStream_t stream)
+{
+    ResizeBatch(dst, const_cast<Tensor<NHWC, C3, F32> &>(src), keep_aspect_ratio, type, stream);
+}
+
+void CropAndResize(Tensor<HWC, C3, F32> &dst, const Tensor<HWC, C3, F32> &src, const BBox &dstROI, const BBox &srcROI,
+                   InterpolationType type, cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    AssertValidROI(dstROI, dst.getWidth(), dst.getHeight());
+    AssertValidROI(srcROI, src.getWidth(), src.getHeight());
+
+    NppStatus status = nppiResizeSqrPixel_32f_C3R_Ctx(
+        static_cast<const Npp32f *>(src.getData() + srcROI.ymin * src.getStride(TensorDimension::HEIGHT) +
+                                    srcROI.xmin * src.getChannelCount()),
+        {srcROI.xmax - srcROI.xmin, srcROI.ymax - srcROI.ymin}, src.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f),
+        {0, 0, srcROI.xmax - srcROI.xmin, srcROI.ymax - srcROI.ymin},
+        static_cast<Npp32f *>(dst.getData() + dstROI.ymin * dst.getStride(TensorDimension::HEIGHT) +
+                              dstROI.xmin * dst.getChannelCount()),
+        dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f),
+        {0, 0, dstROI.xmax - dstROI.xmin, dstROI.ymax - dstROI.ymin},
+        double(dstROI.xmax - dstROI.xmin) / double(srcROI.xmax - srcROI.xmin),
+        double(dstROI.ymax - dstROI.ymin) / double(srcROI.ymax - srcROI.ymin), 0.0, 0.0, GetNppiInterpolationMode(type),
+        GetNppStreamContext(stream));
+    assert(status == NPP_SUCCESS);
+}
+
+void CropAndResize(Tensor<HWC, C3, F32> &dst, const Tensor<HWC, C3, F32> &src, const BBox &srcROI,
+                   InterpolationType type, cudaStream_t stream)
+{
+    CropAndResize(dst, src, {0, 0, int(dst.getWidth()), int(dst.getHeight())}, srcROI, type, stream);
+}
+
+void Crop(Tensor<HWC, C1, U8> &dst, const Tensor<HWC, C1, U8> &src, const BBox &srcROI, cudaStream_t stream)
+{
+    CropC1U8Impl(dst, src, srcROI, stream);
+}
+
+void Crop(Tensor<HWC, C1, U16> &dst, const Tensor<HWC, C1, U16> &src, const BBox &srcROI, cudaStream_t stream)
+{
+    CropC1U16Impl(dst, src, srcROI, stream);
+}
+
+void Crop(Tensor<HWC, C1, F32> &dst, const Tensor<HWC, C1, F32> &src, const BBox &srcROI, cudaStream_t stream)
+{
+    CropC1F32Impl(dst, src, srcROI, stream);
+}
+
+void Crop(Tensor<CHW, C1, U8> &dst, const Tensor<CHW, C1, U8> &src, const BBox &srcROI, cudaStream_t stream)
+{
+    CropC1U8Impl(dst, src, srcROI, stream);
+}
+
+void Crop(Tensor<CHW, C1, U16> &dst, const Tensor<CHW, C1, U16> &src, const BBox &srcROI, cudaStream_t stream)
+{
+    CropC1U16Impl(dst, src, srcROI, stream);
+}
+
+void Crop(Tensor<CHW, C1, F32> &dst, const Tensor<CHW, C1, F32> &src, const BBox &srcROI, cudaStream_t stream)
+{
+    CropC1F32Impl(dst, src, srcROI, stream);
+}
+
+void Crop(Tensor<HWC, C3, U8> &dst, const Tensor<HWC, C3, U8> &src, const BBox &srcROI, cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    AssertValidROI(srcROI, src.getWidth(), src.getHeight());
+    assert(srcROI.xmax - srcROI.xmin == dst.getWidth() && srcROI.ymax - srcROI.ymin == dst.getHeight());
+
+    NppStatus status = nppiCopy_8u_C3R_Ctx(
+        static_cast<const Npp8u *>(src.getData() + srcROI.ymin * src.getStride(TensorDimension::HEIGHT) +
+                                   srcROI.xmin * src.getChannelCount()),
+        src.getStride(TensorDimension::HEIGHT) * sizeof(Npp8u), static_cast<Npp8u *>(dst.getData()),
+        dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp8u), {srcROI.xmax - srcROI.xmin, srcROI.ymax - srcROI.ymin},
+        GetNppStreamContext(stream));
+
+    assert(status == NPP_SUCCESS);
+}
+
+void Crop(Tensor<HWC, C3, U16> &dst, const Tensor<HWC, C3, U16> &src, const BBox &srcROI, cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    AssertValidROI(srcROI, src.getWidth(), src.getHeight());
+    assert(srcROI.xmax - srcROI.xmin == dst.getWidth() && srcROI.ymax - srcROI.ymin == dst.getHeight());
+
+    NppStatus status = nppiCopy_16u_C3R_Ctx(
+        static_cast<const Npp16u *>(src.getData() + srcROI.ymin * src.getStride(TensorDimension::HEIGHT) +
+                                    srcROI.xmin * src.getChannelCount()),
+        src.getStride(TensorDimension::HEIGHT) * sizeof(Npp16u), static_cast<Npp16u *>(dst.getData()),
+        dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp16u), {srcROI.xmax - srcROI.xmin, srcROI.ymax - srcROI.ymin},
+        GetNppStreamContext(stream));
+
+    assert(status == NPP_SUCCESS);
+}
+
+void Crop(Tensor<HWC, C3, F32> &dst, const Tensor<HWC, C3, F32> &src, const BBox &srcROI, cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+    AssertValidROI(srcROI, src.getWidth(), src.getHeight());
+    assert(srcROI.xmax - srcROI.xmin == dst.getWidth() && srcROI.ymax - srcROI.ymin == dst.getHeight());
+
+    NppStatus status = nppiCopy_32f_C3R_Ctx(
+        static_cast<const Npp32f *>(src.getData() + srcROI.ymin * src.getStride(TensorDimension::HEIGHT) +
+                                    srcROI.xmin * src.getChannelCount()),
+        src.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f), static_cast<Npp32f *>(dst.getData()),
+        dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f), {srcROI.xmax - srcROI.xmin, srcROI.ymax - srcROI.ymin},
+        GetNppStreamContext(stream));
+
+    assert(status == NPP_SUCCESS);
+}
+
+void WarpPerspective(Tensor<HWC, C1, U8> &dst, const Tensor<HWC, C1, U8> &src, const double coeffs[3][3],
+                     InterpolationType type, cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+
+    NppStatus status = nppiWarpPerspective_8u_C1R_Ctx(
+        static_cast<const Npp8u *>(src.getData()), {int(src.getWidth()), int(src.getHeight())},
+        src.getStride(TensorDimension::HEIGHT) * sizeof(Npp8u), {0, 0, int(src.getWidth()), int(src.getHeight())},
+        static_cast<Npp8u *>(dst.getData()), dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp8u),
+        {0, 0, int(dst.getWidth()), int(dst.getHeight())}, coeffs, GetNppiInterpolationMode(type),
+        GetNppStreamContext(stream));
+
+    assert(status == NPP_SUCCESS);
+}
+
+void WarpPerspective(Tensor<HWC, C1, U16> &dst, const Tensor<HWC, C1, U16> &src, const double coeffs[3][3],
+                     InterpolationType type, cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+
+    NppStatus status = nppiWarpPerspective_16u_C1R_Ctx(
+        static_cast<const Npp16u *>(src.getData()), {int(src.getWidth()), int(src.getHeight())},
+        src.getStride(TensorDimension::HEIGHT) * sizeof(Npp16u), {0, 0, int(src.getWidth()), int(src.getHeight())},
+        static_cast<Npp16u *>(dst.getData()), dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp16u),
+        {0, 0, int(dst.getWidth()), int(dst.getHeight())}, coeffs, GetNppiInterpolationMode(type),
+        GetNppStreamContext(stream));
+
+    assert(status == NPP_SUCCESS);
+}
+
+void WarpPerspective(Tensor<HWC, C1, F32> &dst, const Tensor<HWC, C1, F32> &src, const double coeffs[3][3],
+                     InterpolationType type, cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+
+    NppStatus status = nppiWarpPerspective_32f_C1R_Ctx(
+        static_cast<const Npp32f *>(src.getData()), {int(src.getWidth()), int(src.getHeight())},
+        src.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f), {0, 0, int(src.getWidth()), int(src.getHeight())},
+        static_cast<Npp32f *>(dst.getData()), dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f),
+        {0, 0, int(dst.getWidth()), int(dst.getHeight())}, coeffs, GetNppiInterpolationMode(type),
+        GetNppStreamContext(stream));
+
+    assert(status == NPP_SUCCESS);
+}
+
+void WarpPerspective(Tensor<HWC, C3, U8> &dst, const Tensor<HWC, C3, U8> &src, const double coeffs[3][3],
+                     InterpolationType type, cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+
+    NppStatus status = nppiWarpPerspective_8u_C3R_Ctx(
+        static_cast<const Npp8u *>(src.getData()), {int(src.getWidth()), int(src.getHeight())},
+        src.getStride(TensorDimension::HEIGHT) * sizeof(Npp8u), {0, 0, int(src.getWidth()), int(src.getHeight())},
+        static_cast<Npp8u *>(dst.getData()), dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp8u),
+        {0, 0, int(dst.getWidth()), int(dst.getHeight())}, coeffs, GetNppiInterpolationMode(type),
+        GetNppStreamContext(stream));
+
+    assert(status == NPP_SUCCESS);
+}
+
+void WarpPerspective(Tensor<HWC, C3, U16> &dst, const Tensor<HWC, C3, U16> &src, const double coeffs[3][3],
+                     InterpolationType type, cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+
+    NppStatus status = nppiWarpPerspective_16u_C3R_Ctx(
+        static_cast<const Npp16u *>(src.getData()), {int(src.getWidth()), int(src.getHeight())},
+        src.getStride(TensorDimension::HEIGHT) * sizeof(Npp16u), {0, 0, int(src.getWidth()), int(src.getHeight())},
+        static_cast<Npp16u *>(dst.getData()), dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp16u),
+        {0, 0, int(dst.getWidth()), int(dst.getHeight())}, coeffs, GetNppiInterpolationMode(type),
+        GetNppStreamContext(stream));
+
+    assert(status == NPP_SUCCESS);
+}
+
+void WarpPerspective(Tensor<HWC, C3, F32> &dst, const Tensor<HWC, C3, F32> &src, const double coeffs[3][3],
+                     InterpolationType type, cudaStream_t stream)
+{
+    // src and dst must be GPU tensors
+    assert(!src.isCPU() && !dst.isCPU());
+
+    NppStatus status = nppiWarpPerspective_32f_C3R_Ctx(
+        static_cast<const Npp32f *>(src.getData()), {int(src.getWidth()), int(src.getHeight())},
+        src.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f), {0, 0, int(src.getWidth()), int(src.getHeight())},
+        static_cast<Npp32f *>(dst.getData()), dst.getStride(TensorDimension::HEIGHT) * sizeof(Npp32f),
+        {0, 0, int(dst.getWidth()), int(dst.getHeight())}, coeffs, GetNppiInterpolationMode(type),
+        GetNppStreamContext(stream));
+
+    assert(status == NPP_SUCCESS);
+}
+
+}} // namespace cvcore::tensor_ops
\ No newline at end of file
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/IImageWarp.cpp b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/IImageWarp.cpp
new file mode 100644
index 0000000..c96b07e
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/IImageWarp.cpp
@@ -0,0 +1,24 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "cv/tensor_ops/IImageWarp.h"
+
+namespace cvcore { namespace tensor_ops {
+
+IImageWarp::~IImageWarp(){}
+
+}} // namespace cvcore::tensor_ops
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/NppUtils.cpp b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/NppUtils.cpp
new file mode 100644
index 0000000..4c7cf7f
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/NppUtils.cpp
@@ -0,0 +1,116 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "NppUtils.h"
+
+#include <array>
+#include <mutex>
+#include <stdexcept>
+#include <utility>
+
+namespace cvcore { namespace tensor_ops {
+
+constexpr size_t CACHE_SIZE = 20;
+static size_t timestamp     = 0;
+std::mutex lock;
+
+namespace {
+
+// This function involves GPU query and can be really slow
+void SetupNppStreamContext(NppStreamContext &context, cudaStream_t stream)
+{
+    context.hStream   = stream;
+    cudaError_t error = cudaGetDevice(&context.nCudaDeviceId);
+    if (error != cudaSuccess)
+    {
+        throw std::runtime_error("no devices supporting CUDA");
+    }
+    error = cudaStreamGetFlags(context.hStream, &context.nStreamFlags);
+    if (error != cudaSuccess)
+    {
+        throw std::runtime_error("failed to get cuda stream flags");
+    }
+
+    cudaDeviceProp deviceProp;
+    error = cudaGetDeviceProperties(&deviceProp, context.nCudaDeviceId);
+    if (error != cudaSuccess)
+    {
+        throw std::runtime_error("no device properties");
+    }
+
+    context.nSharedMemPerBlock           = deviceProp.sharedMemPerBlock;
+    context.nMaxThreadsPerBlock          = deviceProp.maxThreadsPerBlock;
+    context.nMultiProcessorCount         = deviceProp.multiProcessorCount;
+    context.nMaxThreadsPerMultiProcessor = deviceProp.maxThreadsPerMultiProcessor;
+
+    // Refer - https://gitlab-master.nvidia.com/cv/core-modules/tensor_ops/-/merge_requests/48#note_6602087
+    context.nReserved0 = 0;
+
+    error = cudaDeviceGetAttribute(&(context.nCudaDevAttrComputeCapabilityMajor), cudaDevAttrComputeCapabilityMajor,
+                                   context.nCudaDeviceId);
+    if (error != cudaSuccess)
+    {
+        throw std::runtime_error("no device attribute - nCudaDevAttrComputeCapabilityMajor");
+    }
+
+    error = cudaDeviceGetAttribute(&(context.nCudaDevAttrComputeCapabilityMinor), cudaDevAttrComputeCapabilityMinor,
+                                   context.nCudaDeviceId);
+    if (error != cudaSuccess)
+    {
+        throw std::runtime_error("no device attribute - nCudaDevAttrComputeCapabilityMinor");
+    }
+}
+
+} // anonymous namespace
+
+struct Context
+{
+    NppStreamContext nppContext;
+    size_t time = 0;
+};
+
+NppStreamContext GetNppStreamContext(cudaStream_t stream)
+{
+    // Create a memory cache, all timestamp would be initialzed to 0 automatically
+    static std::array<Context, CACHE_SIZE> contextCache = {};
+
+    // Lock the thread
+    std::lock_guard<std::mutex> guard(lock);
+
+    size_t minTimestamp = contextCache[0].time;
+    size_t minIdx       = 0;
+    for (size_t i = 0; i < CACHE_SIZE; i++)
+    {
+        auto &it = contextCache[i];
+        if (it.time > 0 && it.nppContext.hStream == stream)
+        {
+            it.time = ++timestamp;
+            return it.nppContext;
+        }
+        if (it.time < minTimestamp)
+        {
+            minTimestamp = it.time;
+            minIdx       = i;
+        }
+    }
+    auto &it = contextCache[minIdx];
+    SetupNppStreamContext(it.nppContext, stream);
+    it.time = ++timestamp;
+    return it.nppContext;
+}
+
+}} // namespace cvcore::tensor_ops
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/NppUtils.h b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/NppUtils.h
new file mode 100644
index 0000000..398ef8f
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/NppUtils.h
@@ -0,0 +1,31 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_NPP_UTILS_H
+#define CVCORE_NPP_UTILS_H
+
+#include <nppdefs.h>
+
+#include <cuda_runtime.h>
+
+namespace cvcore { namespace tensor_ops {
+
+NppStreamContext GetNppStreamContext(cudaStream_t stream);
+
+}} // namespace cvcore::tensor_ops
+
+#endif // CVCORE_NPP_UTILS_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/OneEuroFilter.cpp b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/OneEuroFilter.cpp
new file mode 100644
index 0000000..35041b8
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/OneEuroFilter.cpp
@@ -0,0 +1,288 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "cv/tensor_ops/OneEuroFilter.h"
+#include "cv/core/MathTypes.h"
+#include "cv/core/Traits.h"
+
+#include <math.h>
+#include <iostream>
+#include <system_error>
+
+#ifdef NVBENCH_ENABLE
+#include <nvbench/CPU.h>
+#endif
+
+namespace cvcore { namespace tensor_ops {
+
+namespace {
+
+// 1/(2*PI)
+constexpr float kOneOver2Pi = 0.15915494309189533577f;
+
+// Utilities to get template type from another template type
+template<class T, class U = void>
+struct deduceDataType;
+template<class T>
+struct deduceDataType<T, typename std::enable_if<std::is_same<T, float>::value || std::is_same<T, Vector2f>::value ||
+                                                 std::is_same<T, Vector3f>::value>::type>
+{
+    typedef float U;
+};
+template<class T>
+struct deduceDataType<T, typename std::enable_if<std::is_same<T, double>::value || std::is_same<T, Vector2d>::value ||
+                                                 std::is_same<T, Vector3d>::value>::type>
+{
+    typedef double U;
+};
+
+} // namespace
+
+/* Low pass filter to apply exponential smoothing*/
+template<typename T>
+class LowPassfilter
+{
+public:
+    LowPassfilter()
+    {
+        m_firstIteration = true;
+    }
+
+    void resetState()
+    {
+        m_firstIteration = true;
+    }
+
+    bool isInitialized() const
+    {
+        return !m_firstIteration;
+    }
+
+    T getPreviousValue() const
+    {
+        return m_prevfilteredValue;
+    }
+
+    std::error_code filter(T &outValue, T inValue, float alpha)
+    {
+#ifdef NVBENCH_ENABLE
+        std::string funcName       = "LowPassFilter_";
+        std::string tag            = funcName + typeid(T).name();
+        nv::bench::Timer timerFunc = nv::bench::CPU(tag.c_str(), nv::bench::Flag::DEFAULT);
+#endif
+        if (m_firstIteration)
+        {
+            outValue         = inValue;
+            m_firstIteration = false;
+        }
+        else
+        {
+            outValue = m_prevfilteredValue + (inValue - m_prevfilteredValue) * alpha;
+        }
+        m_prevRawValue      = inValue;
+        m_prevfilteredValue = outValue;
+        return ErrorCode::SUCCESS;
+    }
+
+private:
+    bool m_firstIteration;
+    T m_prevRawValue;
+    T m_prevfilteredValue;
+};
+
+template<typename U>
+struct OneEuroFilterState
+{
+    // Computes alpha value for the filter
+    float getAlpha(float dataUpdateRate, float cutOffFreq) const
+    {
+        float alpha = cutOffFreq / (dataUpdateRate * kOneOver2Pi + cutOffFreq);
+        return alpha;
+    }
+
+    // Resets the parameters and state of the filter
+    std::error_code resetParams(const OneEuroFilterParams &filterParams)
+    {
+        if (filterParams.dataUpdateRate <= 0.0f || filterParams.minCutoffFreq <= 0 || filterParams.derivCutoffFreq <= 0)
+        {
+            return ErrorCode::INVALID_ARGUMENT;
+        }
+        m_freq        = filterParams.dataUpdateRate;
+        m_mincutoff   = filterParams.minCutoffFreq;
+        m_cutOffSlope = filterParams.cutoffSlope;
+        m_derivCutOff = filterParams.derivCutoffFreq;
+        m_alphadxFilt = getAlpha(m_freq, filterParams.derivCutoffFreq);
+
+        xFilt->resetState();
+        dxFilt->resetState();
+
+        m_currfilteredValue = 0.0f;
+        m_prevfilteredValue = m_currfilteredValue;
+        return ErrorCode::SUCCESS;
+    }
+
+    // Constructor for each filter state
+    OneEuroFilterState(const OneEuroFilterParams &filterParams)
+    {
+        xFilt.reset(new LowPassfilter<U>());
+        dxFilt.reset(new LowPassfilter<U>());
+        auto err = resetParams(filterParams);
+        if (err != make_error_code(ErrorCode::SUCCESS))
+        {
+            throw err;
+        }
+    }
+
+    std::error_code filter(U &outValue, U value)
+    {
+#ifdef NVBENCH_ENABLE
+        std::string funcName       = "OneEuroFilterState_";
+        std::string tag            = funcName + typeid(U).name();
+        nv::bench::Timer timerFunc = nv::bench::CPU(tag.c_str(), nv::bench::Flag::DEFAULT);
+#endif
+        m_prevfilteredValue = m_currfilteredValue;
+        U dxValue           = xFilt->isInitialized() ? (value - xFilt->getPreviousValue()) * m_freq : 0.0f;
+        U edxValue;
+        auto err = dxFilt->filter(edxValue, dxValue, m_alphadxFilt);
+        if (err != make_error_code(ErrorCode::SUCCESS))
+        {
+            return err;
+        }
+        // Update the new cutoff frequency
+        U newCutoff    = m_mincutoff + m_cutOffSlope * fabsf(edxValue);
+        float newAlpha = getAlpha(m_freq, newCutoff);
+        err            = xFilt->filter(m_currfilteredValue, value, newAlpha);
+        if (err != make_error_code(ErrorCode::SUCCESS))
+        {
+            return err;
+        }
+
+        outValue = m_currfilteredValue;
+        return ErrorCode::SUCCESS;
+    }
+    std::unique_ptr<LowPassfilter<U>> xFilt;
+    std::unique_ptr<LowPassfilter<U>> dxFilt;
+    float m_alphadxFilt;
+    float m_freq;
+    float m_mincutoff;
+    float m_cutOffSlope;
+    float m_derivCutOff;
+    U m_prevfilteredValue;
+    U m_currfilteredValue;
+};
+
+template<typename T>
+struct OneEuroFilter<T>::OneEuroFilterImpl
+{
+    typedef typename deduceDataType<T>::U DT;
+    OneEuroFilterImpl(const OneEuroFilterParams &filterParams)
+    {
+        size_t numStates = traits::get_dim<T>::value;
+        m_states.resize(numStates);
+        for (size_t i = 0; i < m_states.size(); i++)
+        {
+            m_states[i].reset(new OneEuroFilterState<DT>(filterParams));
+        }
+    }
+
+    std::error_code resetParams(const OneEuroFilterParams &filterParams)
+    {
+        std::error_code err = ErrorCode::SUCCESS;
+        for (size_t i = 0; i < m_states.size(); i++)
+        {
+            err = m_states[i]->resetParams(filterParams);
+            if (err != make_error_code(ErrorCode::SUCCESS))
+            {
+                return err;
+            }
+        }
+        return ErrorCode::SUCCESS;
+    }
+
+    ~OneEuroFilterImpl() {}
+
+    template<typename U = T, typename std::enable_if<traits::get_dim<U>::value == 1>::type * = nullptr>
+    std::error_code filter(U &outValue, U value)
+    {
+        if (m_states.size() != 1)
+        {
+            return ErrorCode::INVALID_OPERATION;
+        }
+        std::error_code err = m_states[0]->filter(outValue, value);
+        return err;
+    }
+
+    template<typename U = T, typename std::enable_if<traits::get_dim<U>::value != 1>::type * = nullptr>
+    std::error_code filter(U &outValue, U value)
+    {
+        if (m_states.size() <= 1)
+        {
+            return ErrorCode::INVALID_OPERATION;
+        }
+        std::error_code err = ErrorCode::SUCCESS;
+        for (size_t i = 0; i < m_states.size(); i++)
+        {
+            err = m_states[i]->filter(outValue[i], value[i]);
+            if (err != make_error_code(ErrorCode::SUCCESS))
+            {
+                return err;
+            }
+        }
+
+        return err;
+    }
+
+    std::vector<std::unique_ptr<OneEuroFilterState<DT>>> m_states;
+};
+
+template<typename T>
+OneEuroFilter<T>::OneEuroFilter(const OneEuroFilterParams &filterParams)
+    : m_pImpl(new OneEuroFilterImpl(filterParams))
+{
+}
+
+template<typename T>
+OneEuroFilter<T>::~OneEuroFilter()
+{
+}
+
+template<typename T>
+std::error_code OneEuroFilter<T>::resetParams(const OneEuroFilterParams &filterParams)
+{
+    auto err = m_pImpl->resetParams(filterParams);
+    return err;
+}
+
+template<typename T>
+std::error_code OneEuroFilter<T>::execute(T &filteredValue, T inValue)
+{
+#ifdef NVBENCH_ENABLE
+    std::string funcName       = "OneEuroFilter_";
+    std::string tag            = funcName + typeid(T).name();
+    nv::bench::Timer timerFunc = nv::bench::CPU(tag.c_str(), nv::bench::Flag::DEFAULT);
+#endif
+    auto err = m_pImpl->filter(filteredValue, inValue);
+    return err;
+}
+
+template class OneEuroFilter<float>;
+template class OneEuroFilter<Vector2f>;
+template class OneEuroFilter<Vector3f>;
+template class OneEuroFilter<double>;
+template class OneEuroFilter<Vector2d>;
+template class OneEuroFilter<Vector3d>;
+}} // namespace cvcore::tensor_ops
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/TensorOperators.cpp b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/TensorOperators.cpp
new file mode 100644
index 0000000..02d34ca
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/TensorOperators.cpp
@@ -0,0 +1,116 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "cv/tensor_ops/TensorOperators.h"
+
+#include <iostream>
+
+#ifdef ENABLE_VPI
+#include "vpi/VPITensorOperators.h"
+#endif
+
+namespace cvcore { namespace tensor_ops {
+
+typename TensorContextFactory::MultitonType TensorContextFactory::instances;
+std::mutex TensorContextFactory::instanceMutex;
+
+std::error_code TensorContextFactory::CreateContext(TensorOperatorContext &tensorContext, TensorBackend backend)
+{
+    using PairType = typename TensorContextFactory::MultitonType::mapped_type;
+    using CounterType = typename PairType::first_type;
+    using ValuePtrType = typename PairType::second_type;
+
+    std::lock_guard<std::mutex> instanceLock(instanceMutex);
+
+    std::error_code result = ErrorCode::SUCCESS;
+
+    tensorContext = nullptr;
+    
+    auto contextItr = instances.find(backend);
+    if (contextItr == instances.end() && IsBackendSupported(backend))
+    {
+        switch (backend)
+        {
+        case TensorBackend::VPI:
+#ifdef ENABLE_VPI
+            try
+            {
+                instances[backend] = std::make_pair<CounterType, ValuePtrType>(1, ValuePtrType(new VPITensorContext{}));
+            }
+            catch (std::error_code &e)
+            {
+                result = e;
+            }
+            catch (...)
+            {
+                result = ErrorCode::INVALID_OPERATION;
+            }
+#else // _WIN32
+            result = ErrorCode::NOT_IMPLEMENTED;
+#endif // _WIN32
+            break;
+        default:
+            result = ErrorCode::NOT_IMPLEMENTED;
+            break;
+        }
+        tensorContext = instances[backend].second.get();
+    }
+    else
+    {
+        contextItr->second.first++;
+        tensorContext = contextItr->second.second.get();
+    }
+
+    return result;
+}
+
+std::error_code TensorContextFactory::DestroyContext(TensorOperatorContext &context)
+{
+    std::lock_guard<std::mutex> instanceLock(instanceMutex);
+
+    auto backend = context->Backend();
+    context      = nullptr;
+    auto contextItr = instances.find(backend);
+    if (contextItr != instances.end())
+    {
+        contextItr->second.first--;
+        if (contextItr->second.first == 0)
+        {
+            instances.erase(backend);
+        }
+    }
+    return ErrorCode::SUCCESS;
+}
+
+bool TensorContextFactory::IsBackendSupported(TensorBackend backend)
+{
+    bool result = false;
+
+    switch (backend)
+    {
+    case TensorBackend::VPI:
+#ifdef ENABLE_VPI
+        result = true;
+#endif // _WIN32
+        break;
+    default:
+        break;
+    }
+
+    return result;
+}
+}} // namespace cvcore::tensor_ops
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIColorConvertImpl.cpp b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIColorConvertImpl.cpp
new file mode 100644
index 0000000..693ca5c
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIColorConvertImpl.cpp
@@ -0,0 +1,135 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#include <stdio.h>
+#include <string.h>
+#include <cstring>
+#include <iostream>
+
+#include "cv/core/CameraModel.h"
+#include "cv/core/Image.h"
+#include "cv/core/Memory.h"
+#include "cv/core/ProfileUtils.h"
+#include "cv/core/Tensor.h"
+#include "cv/tensor_ops/ImageUtils.h"
+
+#include "VPIColorConvertImpl.h"
+#include "VPIEnumMapping.h"
+#include "VPIStatusMapping.h"
+
+// VPI includes
+#include <vpi/Image.h>
+#include <vpi/Status.h>
+#include <vpi/Stream.h>
+#include <vpi/algo/ConvertImageFormat.h>
+
+#ifdef NVBENCH_ENABLE
+#include <nvbench/VPI.h>
+#endif
+
+namespace cvcore { namespace tensor_ops {
+
+VPITensorStream::VPIColorConvertImpl::VPIColorConvertImpl()
+    : m_inputImage(nullptr)
+    , m_outputImage(nullptr)
+{
+    std::memset(reinterpret_cast<void *>(&m_inputImageData), 0, sizeof(VPIImageData));
+    std::memset(reinterpret_cast<void *>(&m_outputImageData), 0, sizeof(VPIImageData));
+}
+
+template<ImageType T_OUT, ImageType T_IN>
+std::error_code VPITensorStream::VPIColorConvertImpl::execute(Image<T_OUT> &outputImage, const Image<T_IN> &inputImage,
+                                                              VPIStream &stream, VPIBackend backend)
+{
+    std::error_code errCode = make_error_code(VPIStatus::VPI_SUCCESS);
+
+    bool paramsChanged = m_inputImage == nullptr || m_outputImage == nullptr ||
+                         CheckParamsChanged(m_inputImageData, inputImage) ||
+                         CheckParamsChanged(m_outputImageData, outputImage);
+    if (paramsChanged)
+    {
+        DestroyVPIImageWrapper(m_inputImage, m_inputImageData);
+        DestroyVPIImageWrapper(m_outputImage, m_outputImageData);
+        errCode = CreateVPIImageWrapper(m_inputImage, m_inputImageData, inputImage, backend);
+        if (errCode == make_error_code(VPI_SUCCESS))
+        {
+            errCode = CreateVPIImageWrapper(m_outputImage, m_outputImageData, outputImage, backend);
+        }
+    }
+
+    if (errCode == make_error_code(VPIStatus::VPI_SUCCESS))
+    {
+        errCode = UpdateImage(m_inputImage, m_inputImageData, inputImage);
+    }
+    if (errCode == make_error_code(VPIStatus::VPI_SUCCESS))
+    {
+        errCode = UpdateImage(m_outputImage, m_outputImageData, outputImage);
+    }
+
+    if (errCode == make_error_code(VPIStatus::VPI_SUCCESS))
+    {
+#ifdef NVBENCH_ENABLE
+        std::string tag = "VPIColorConvert_" + GetMemoryTypeAsString(inputImage.isCPU()) +"Input_" + GetMemoryTypeAsString(outputImage.isCPU()) +"Output_" + getVPIBackendString(backend) + "Backend";
+        nv::bench::Timer timerFunc =
+        nv::bench::VPI(tag.c_str(), nv::bench::Flag::DEFAULT, stream);
+#endif
+        errCode = make_error_code(vpiSubmitConvertImageFormat(stream, backend, m_inputImage, m_outputImage, nullptr));
+    }
+
+    if (errCode == make_error_code(VPIStatus::VPI_SUCCESS))
+    {
+        errCode = make_error_code(vpiStreamSync(stream));
+    }
+
+    if (errCode != make_error_code(VPIStatus::VPI_SUCCESS))
+    {
+        return errCode;
+    }
+
+    return make_error_code(ErrorCode::SUCCESS);
+}
+
+VPITensorStream::VPIColorConvertImpl::~VPIColorConvertImpl()
+{
+    // Destroy Input VPIImage
+    DestroyVPIImageWrapper(m_inputImage, m_inputImageData);
+
+    // Destroy Output VPIImage
+    DestroyVPIImageWrapper(m_outputImage, m_outputImageData);
+}
+
+template std::error_code VPITensorStream::VPIColorConvertImpl::execute(Image<BGR_U8> &, const Image<RGB_U8> &,
+                                                                       VPIStream &, VPIBackend);
+template std::error_code VPITensorStream::VPIColorConvertImpl::execute(Image<RGB_U8> &, const Image<BGR_U8> &,
+                                                                       VPIStream &, VPIBackend);
+template std::error_code VPITensorStream::VPIColorConvertImpl::execute(Image<BGR_U8> &, const Image<NV12> &,
+                                                                       VPIStream &, VPIBackend);
+template std::error_code VPITensorStream::VPIColorConvertImpl::execute(Image<BGR_U8> &, const Image<NV24> &,
+                                                                       VPIStream &, VPIBackend);
+template std::error_code VPITensorStream::VPIColorConvertImpl::execute(Image<NV12> &, const Image<BGR_U8> &,
+                                                                       VPIStream &, VPIBackend);
+template std::error_code VPITensorStream::VPIColorConvertImpl::execute(Image<NV24> &, const Image<BGR_U8> &,
+                                                                       VPIStream &, VPIBackend);
+template std::error_code VPITensorStream::VPIColorConvertImpl::execute(Image<RGB_U8> &, const Image<NV12> &,
+                                                                       VPIStream &, VPIBackend);
+template std::error_code VPITensorStream::VPIColorConvertImpl::execute(Image<RGB_U8> &, const Image<NV24> &,
+                                                                       VPIStream &, VPIBackend);
+template std::error_code VPITensorStream::VPIColorConvertImpl::execute(Image<NV12> &, const Image<RGB_U8> &,
+                                                                       VPIStream &, VPIBackend);
+template std::error_code VPITensorStream::VPIColorConvertImpl::execute(Image<NV24> &, const Image<RGB_U8> &,
+                                                                       VPIStream &, VPIBackend);
+
+}} // namespace cvcore::tensor_ops
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIColorConvertImpl.h b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIColorConvertImpl.h
new file mode 100644
index 0000000..0e71266
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIColorConvertImpl.h
@@ -0,0 +1,65 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_VPI_COLOR_CONVERT_IMPL_H
+#define CVCORE_VPI_COLOR_CONVERT_IMPL_H
+
+#include "VPITensorOperators.h"
+#include "cv/tensor_ops/ITensorOperatorStream.h"
+#include "cv/tensor_ops/ImageUtils.h"
+
+#include <vpi/Image.h>
+
+namespace cvcore { namespace tensor_ops {
+
+/**
+ * Color convert implementation for VPI backend.
+ */
+class VPITensorStream::VPIColorConvertImpl
+{
+public:
+    /**
+    * Image color conversion constructor.
+    */
+    VPIColorConvertImpl();
+
+    /**
+    * Image color conversion a given image type.
+    * @param outputImage Output image.
+    * @param inputImage Input image.
+    * @param stream specified VPI stream.
+    * @param backend specified VPI backend.
+    */
+    template<ImageType T_OUT, ImageType T_IN>
+    std::error_code execute(Image<T_OUT> &outputImage, const Image<T_IN> &inputImage, VPIStream &stream,
+                            VPIBackend backend);
+
+    /**
+    * Image color conversion destroy function to deallocate resources.
+    */
+    ~VPIColorConvertImpl();
+
+private:
+    VPIImage m_inputImage;
+    VPIImage m_outputImage;
+    VPIImageData m_inputImageData;
+    VPIImageData m_outputImageData;
+};
+
+}} // namespace cvcore::tensor_ops
+
+#endif //CVCORE_VPI_COLOR_CONVERT_IMPL_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIEnumMapping.h b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIEnumMapping.h
new file mode 100644
index 0000000..d611167
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIEnumMapping.h
@@ -0,0 +1,196 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_VPIENUMMAPPING_H
+#define CVCORE_VPIENUMMAPPING_H
+
+#include <type_traits>
+
+#include "VPITensorOperators.h"
+
+namespace cvcore { namespace tensor_ops {
+
+constexpr VPIBackend ToVpiBackendType(const ComputeEngine &computeEngine)
+{
+    switch (computeEngine)
+    {
+    case ComputeEngine::CPU:
+        return VPIBackend::VPI_BACKEND_CPU;
+    case ComputeEngine::PVA:
+        return VPIBackend::VPI_BACKEND_PVA;
+    case ComputeEngine::GPU:
+        return VPIBackend::VPI_BACKEND_CUDA;
+    case ComputeEngine::VIC:
+        return VPIBackend::VPI_BACKEND_VIC;
+    case ComputeEngine::NVENC:
+        return VPIBackend::VPI_BACKEND_NVENC;
+    default:
+        return VPIBackend::VPI_BACKEND_INVALID;
+    }
+}
+
+constexpr VPIInterpolationType ToVpiInterpolationType(InterpolationType value)
+{
+    VPIInterpolationType result = VPI_INTERP_NEAREST;
+
+    switch (value)
+    {
+    case INTERP_NEAREST:
+        result = VPI_INTERP_NEAREST;
+        break;
+    case INTERP_LINEAR:
+        result = VPI_INTERP_LINEAR;
+        break;
+    case INTERP_CUBIC_CATMULLROM:
+        result = VPI_INTERP_CATMULL_ROM;
+        break;
+    default:
+        break;
+    }
+
+    return result;
+}
+
+constexpr VPIBorderExtension ToVpiBorderType(BorderType value)
+{
+    VPIBorderExtension result = VPI_BORDER_ZERO;
+
+    switch (value)
+    {
+    case BORDER_ZERO:
+        result = VPI_BORDER_ZERO;
+        break;
+    case BORDER_REPEAT:
+        result = VPI_BORDER_CLAMP;
+        break;
+    case BORDER_REVERSE:
+        result = VPI_BORDER_REFLECT;
+        break;
+    case BORDER_MIRROR:
+        result = VPI_BORDER_MIRROR;
+        break;
+    default:
+        break;
+    }
+
+    return result;
+}
+
+constexpr VPIImageFormat ToVpiImageFormat(ImageType value)
+{
+    VPIImageFormat result = VPI_IMAGE_FORMAT_Y8_ER;
+
+    switch (value)
+    {
+    case Y_U8:
+        result = VPI_IMAGE_FORMAT_Y8_ER;
+        break;
+    case Y_U16:
+        result = VPI_IMAGE_FORMAT_Y16_ER;
+        break;
+    case Y_S8:
+        result = VPI_IMAGE_FORMAT_S8;
+        break;
+    case Y_S16:
+        result = VPI_IMAGE_FORMAT_S16;
+        break;
+    case Y_F32:
+        result = VPI_IMAGE_FORMAT_F32;
+        break;
+    case RGB_U8:
+        result = VPI_IMAGE_FORMAT_RGB8;
+        break;
+    case BGR_U8:
+        result = VPI_IMAGE_FORMAT_BGR8;
+        break;
+    case RGBA_U8:
+        result = VPI_IMAGE_FORMAT_RGBA8;
+        break;
+    case NV12:
+        result = VPI_IMAGE_FORMAT_NV12_ER;
+        break;
+    case NV24:
+        result = VPI_IMAGE_FORMAT_NV24_ER;
+        break;
+    default:
+        break;
+    }
+
+    return result;
+}
+
+constexpr VPIPixelType ToVpiPixelType(ImageType value)
+{
+    VPIPixelType result = VPI_PIXEL_TYPE_U8;
+
+    switch (value)
+    {
+    case Y_U8:
+        result = VPI_PIXEL_TYPE_U8;
+        break;
+    case Y_U16:
+        result = VPI_PIXEL_TYPE_U16;
+        break;
+    case Y_S8:
+        result = VPI_PIXEL_TYPE_S8;
+        break;
+    case Y_S16:
+        result = VPI_PIXEL_TYPE_S16;
+        break;
+    case Y_F32:
+        result = VPI_PIXEL_TYPE_F32;
+        break;
+    case RGB_U8:
+        result = VPI_PIXEL_TYPE_3U8;
+        break;
+    case BGR_U8:
+        result = VPI_PIXEL_TYPE_3U8;
+        break;
+    case RGBA_U8:
+        result = VPI_PIXEL_TYPE_4U8;
+        break;
+    default:
+        break;
+    }
+
+    return result;
+}
+
+static inline std::string getVPIBackendString(VPIBackend vpiBackend)
+{
+    switch (vpiBackend)
+    {
+    case VPIBackend::VPI_BACKEND_CPU:
+        return "CPU";
+    case VPIBackend::VPI_BACKEND_CUDA:
+        return "GPU";
+    case VPIBackend::VPI_BACKEND_VIC:
+        return "VIC";
+    case VPIBackend::VPI_BACKEND_PVA:
+        return "PVA";
+    case VPIBackend::VPI_BACKEND_NVENC:
+        return "NVENC";
+    case VPIBackend::VPI_BACKEND_INVALID:
+        return "INVALID";
+    default:
+        return "INVALID";
+    }
+}
+
+}} // namespace cvcore::tensor_ops
+
+#endif // CVCORE_VPIENUMMAPPING_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIImageWarp.h b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIImageWarp.h
new file mode 100644
index 0000000..71d098b
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIImageWarp.h
@@ -0,0 +1,37 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#ifndef CVCORE_VPIIMAGEWARP_H
+#define CVCORE_VPIIMAGEWARP_H
+#include <system_error>
+#include <vector>
+
+#include "cv/tensor_ops/IImageWarp.h"
+#include "cv/tensor_ops/Errors.h"
+#include <vpi/LensDistortionModels.h>
+
+namespace cvcore { namespace tensor_ops {
+
+struct VPIImageWarp : public IImageWarp
+{
+    ~VPIImageWarp() = default;
+
+    VPIPayload payload;
+};
+
+}} // namespace cvcore::tensor_ops
+
+#endif // CVCORE_VPIIMAGEWARP_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIRemapImpl.cpp b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIRemapImpl.cpp
new file mode 100644
index 0000000..cb544fd
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIRemapImpl.cpp
@@ -0,0 +1,160 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <cstring>
+#include <stdio.h>
+
+#include "VPIRemapImpl.h"
+#include "VPIEnumMapping.h"
+#include "VPIStatusMapping.h"
+
+#include "cv/core/CameraModel.h"
+#include "cv/core/Image.h"
+
+#include <vpi/algo/Remap.h>
+#include <vpi/Status.h>
+
+#ifdef NVBENCH_ENABLE
+#include <nvbench/VPI.h>
+#endif
+
+namespace cvcore { namespace tensor_ops {
+
+VPITensorStream::VPIRemapImpl::VPIRemapImpl()
+    : m_inputImage(nullptr)
+    , m_outputImage(nullptr)
+{
+    std::memset(reinterpret_cast<void*>(&m_inputImageData), 0, sizeof(VPIImageData));
+    std::memset(reinterpret_cast<void*>(&m_outputImageData), 0, sizeof(VPIImageData));
+}
+
+template<ImageType Type>
+std::error_code VPITensorStream::VPIRemapImpl::initialize(Image<Type> & outImage,
+                                                          const Image<Type> & inImage,
+                                                          VPIBackend backend)
+{
+    std::error_code status;
+    status = CreateVPIImageWrapper(m_inputImage, m_inputImageData, inImage, backend);
+    if(status == make_error_code(VPI_SUCCESS))
+    {
+        status = CreateVPIImageWrapper(m_outputImage, m_outputImageData, outImage, backend);
+    }
+
+    return status;
+}
+template std::error_code VPITensorStream::VPIRemapImpl::initialize(Image<RGB_U8> & outImage,
+                                                          const Image<RGB_U8> & inImage, VPIBackend);
+template std::error_code VPITensorStream::VPIRemapImpl::initialize(Image<BGR_U8> & outImage,
+                                                          const Image<BGR_U8> & inImage, VPIBackend);
+template std::error_code VPITensorStream::VPIRemapImpl::initialize(Image<NV12> & outImage,
+                                                          const Image<NV12> & inImage, VPIBackend);
+template std::error_code VPITensorStream::VPIRemapImpl::initialize(Image<NV24> & outImage,
+                                                          const Image<NV24> & inImage, VPIBackend);
+
+// -----------------------------------------------------------------------------
+
+template<ImageType Type>
+std::error_code VPITensorStream::VPIRemapImpl::execute(Image<Type> & outImage,
+                                                       const Image<Type> & inImage,
+                                                       const VPIImageWarp * warp,
+                                                       InterpolationType interpolation,
+                                                       BorderType border,
+                                                       VPIStream & stream,
+                                                       VPIBackend backend)
+{
+    std::error_code status = make_error_code(VPI_SUCCESS);
+    VPIInterpolationType vpiInterpolationType = ToVpiInterpolationType(interpolation);
+    VPIBorderExtension vpiBorderExt = ToVpiBorderType(border);
+
+    bool paramsChanged = m_inputImage == nullptr || m_outputImage == nullptr ||
+                         CheckParamsChanged(m_inputImageData, inImage) ||
+                         CheckParamsChanged(m_outputImageData, outImage);
+
+    if(paramsChanged)
+    {
+        DestroyVPIImageWrapper(m_inputImage, m_inputImageData);
+        DestroyVPIImageWrapper(m_outputImage, m_outputImageData);
+        status = initialize(outImage, inImage, backend);
+    }
+
+    if(status == make_error_code(VPI_SUCCESS))
+    {
+        status = UpdateImage(m_inputImage, m_inputImageData, inImage);
+    }
+
+    if(status == make_error_code(VPI_SUCCESS))
+    {
+        status = UpdateImage(m_outputImage, m_outputImageData, outImage);
+    }
+
+    if(status == make_error_code(VPI_SUCCESS))
+    {
+#ifdef NVBENCH_ENABLE
+        std::string tag = "VPISubmitRemap_" + GetMemoryTypeAsString(inImage.isCPU()) +"Input_" + GetMemoryTypeAsString(outImage.isCPU()) +"Output_" + getVPIBackendString(backend) + "Backend";
+        nv::bench::Timer timerFunc =
+        nv::bench::VPI(tag.c_str(), nv::bench::Flag::DEFAULT, stream);
+#endif
+        // Submit remap task for Lens Distortion Correction
+        status = make_error_code(vpiSubmitRemap(stream, backend, warp->payload,
+                                m_inputImage, m_outputImage, vpiInterpolationType, vpiBorderExt, 0));
+    }
+
+    if(status == make_error_code(VPI_SUCCESS))
+    {
+        // Wait for remap to complete
+        status = make_error_code(vpiStreamSync(stream));
+    }
+    return status;
+}
+template std::error_code VPITensorStream::VPIRemapImpl::execute(Image<RGB_U8> & outImage,
+                                                       const Image<RGB_U8> & inImage,
+                                                       const VPIImageWarp * warp,
+                                                       InterpolationType interpolation,
+                                                       BorderType border,
+                                                       VPIStream & stream,
+                                                       VPIBackend backend);
+template std::error_code VPITensorStream::VPIRemapImpl::execute(Image<BGR_U8> & outImage,
+                                                       const Image<BGR_U8> & inImage,
+                                                       const VPIImageWarp * warp,
+                                                       InterpolationType interpolation,
+                                                       BorderType border,
+                                                       VPIStream & stream,
+                                                       VPIBackend backend);
+template std::error_code VPITensorStream::VPIRemapImpl::execute(Image<NV12> & outImage,
+                                                       const Image<NV12> & inImage,
+                                                       const VPIImageWarp * warp,
+                                                       InterpolationType interpolation,
+                                                       BorderType border,
+                                                       VPIStream & stream,
+                                                       VPIBackend backend);
+template std::error_code VPITensorStream::VPIRemapImpl::execute(Image<NV24> & outImage,
+                                                       const Image<NV24> & inImage,
+                                                       const VPIImageWarp * warp,
+                                                       InterpolationType interpolation,
+                                                       BorderType border,
+                                                       VPIStream & stream,
+                                                       VPIBackend backend);
+// -----------------------------------------------------------------------------
+
+VPITensorStream::VPIRemapImpl::~VPIRemapImpl()
+{
+    DestroyVPIImageWrapper(m_inputImage, m_inputImageData);
+    DestroyVPIImageWrapper(m_outputImage, m_outputImageData);
+}
+// -----------------------------------------------------------------------------
+
+}} // namespace cvcore::tensor_ops
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIRemapImpl.h b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIRemapImpl.h
new file mode 100644
index 0000000..e129f04
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIRemapImpl.h
@@ -0,0 +1,82 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_VPIREMAPIMPL_H
+#define CVCORE_VPIREMAPIMPL_H
+
+#include <vpi/WarpMap.h>
+#include <vpi/Image.h>
+
+#include "VPITensorOperators.h"
+#include "VPIImageWarp.h"
+
+namespace cvcore { namespace tensor_ops {
+/**
+ * Remap implementation used for Lens Distortion.
+ */
+class VPITensorStream::VPIRemapImpl
+{
+    public:
+        /* VPIRemapImpl constructor */
+        VPIRemapImpl();
+
+        /**
+        * Remap Intialization.
+        * @param outputImage Remap output image of Type
+        * @param inputImage Remap input image of Type
+        * @param backend Compute backend
+        * @return Success if intialization is done successfully, otherwise error is returned
+        */
+        template<ImageType Type>
+        std::error_code initialize(Image<Type> & outImage,
+                                   const Image<Type> & inImage,
+                                   VPIBackend backend);
+
+        /**
+        * Remap execution function(non-blocking)
+        * Application is reqiured to call Sync() before accessing the generated Image.
+        * @param outImage Remap output image of type NV12
+        * @param inImage Remap input image of type NV12
+        * @param warp Remap warp pointer
+        * @param interpolation Interpolation type used for remap
+        * @param border Border type used for remap
+        * @param stream VPI stream used for remap
+        * @param backend VPI backend used for remap
+        * @return Success if remap is submitted successfully, otherwise error is returned
+        */
+        template<ImageType Type>
+        std::error_code execute(Image<Type> & outImage,
+                                const Image<Type> & inImage,
+                                const VPIImageWarp * warp,
+                                InterpolationType interpolation,
+                                BorderType border,
+                                VPIStream & stream,
+                                VPIBackend backend);
+
+        /* VPIRemapImpl destructor to release resources */
+        ~VPIRemapImpl();
+
+    private:
+        VPIImage m_inputImage;
+        VPIImage m_outputImage;
+        VPIImageData m_inputImageData;
+        VPIImageData m_outputImageData;
+};
+
+}} // namespace cvcore::tensor_ops
+
+#endif //CVCORE_VPIREMAPIMPL_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIResizeImpl.cpp b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIResizeImpl.cpp
new file mode 100644
index 0000000..1adffa3
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIResizeImpl.cpp
@@ -0,0 +1,139 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#include "VPIResizeImpl.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <cstring>
+#include <iostream>
+
+// VPI includes
+#include <vpi/Image.h>
+#include <vpi/Status.h>
+#include <vpi/Stream.h>
+#include <vpi/algo/ConvertImageFormat.h>
+#include <vpi/algo/Rescale.h>
+
+#include "VPIEnumMapping.h"
+#include "VPIStatusMapping.h"
+#include "cv/core/CameraModel.h"
+#include "cv/core/Image.h"
+
+#ifdef NVBENCH_ENABLE
+#include <nvbench/VPI.h>
+#endif
+
+namespace cvcore { namespace tensor_ops {
+
+template<cvcore::ImageType T>
+std::error_code VPITensorStream::VPIResizeImpl::execute(Image<T> &outputImage, const Image<T> &inputImage,
+                                                        InterpolationType interpolation, BorderType border,
+                                                        VPIStream &stream, VPIBackend backend)
+{
+    std::error_code errCode = ErrorCode::SUCCESS;
+    VPIStatus status        = VPIStatus::VPI_SUCCESS;
+    VPIInterpolationType interpType;
+    VPIBorderExtension borderExt;
+    interpType = ToVpiInterpolationType(interpolation);
+    borderExt  = ToVpiBorderType(border);
+
+    bool paramsChanged = m_inputImage == nullptr || m_outputImage == nullptr ||
+                         CheckParamsChanged(m_inputImageData, inputImage) ||
+                         CheckParamsChanged(m_outputImageData, outputImage);
+    if (paramsChanged)
+    {
+        DestroyVPIImageWrapper(m_inputImage, m_inputImageData);
+        DestroyVPIImageWrapper(m_outputImage, m_outputImageData);
+        errCode = CreateVPIImageWrapper(m_inputImage, m_inputImageData, inputImage, backend);
+        if (errCode == make_error_code(VPI_SUCCESS))
+        {
+            errCode = CreateVPIImageWrapper(m_outputImage, m_outputImageData, outputImage, backend);
+        }
+    }
+    else
+    {
+
+        errCode = UpdateImage(m_inputImage, m_inputImageData, inputImage);
+        if (errCode == make_error_code(VPIStatus::VPI_SUCCESS))
+        {
+            errCode = UpdateImage(m_outputImage, m_outputImageData, outputImage);
+        }
+    }
+   
+    if (status == VPIStatus::VPI_SUCCESS)
+    {
+#ifdef NVBENCH_ENABLE
+	std::string tag = "VPISubmitRescale_" + GetMemoryTypeAsString(inputImage.isCPU()) +"Input_" + GetMemoryTypeAsString(outputImage.isCPU()) +"Output_" + getVPIBackendString(backend) + "Backend";	
+        nv::bench::Timer timerFunc =
+        nv::bench::VPI(tag.c_str(), nv::bench::Flag::DEFAULT, stream);
+#endif
+        // Resize
+        status = vpiSubmitRescale(stream, backend, m_inputImage, m_outputImage, interpType, borderExt, 0);
+    }
+
+    if (status == VPIStatus::VPI_SUCCESS)
+    {
+        status = vpiStreamSync(stream);
+    }
+
+    if (status != VPIStatus::VPI_SUCCESS)
+    {
+        return make_error_code(status);
+    }
+    return make_error_code(ErrorCode::SUCCESS);
+}
+
+VPITensorStream::VPIResizeImpl::VPIResizeImpl()
+    : m_inputImage(nullptr)
+    , m_outputImage(nullptr)
+{
+    std::memset(reinterpret_cast<void *>(&m_inputImageData), 0, sizeof(VPIImageData));
+    std::memset(reinterpret_cast<void *>(&m_outputImageData), 0, sizeof(VPIImageData));
+}
+
+/**
+* Image resizing destroy function to deallocate resources.
+*/
+VPITensorStream::VPIResizeImpl::~VPIResizeImpl()
+{
+    // Destroy Input VPIImage
+    DestroyVPIImageWrapper(m_inputImage, m_inputImageData);
+    // Destroy Output VPIImage
+    DestroyVPIImageWrapper(m_outputImage, m_outputImageData);
+}
+
+template std::error_code VPITensorStream::VPIResizeImpl::execute(Image<RGB_U8> &outputImage,
+                                                                 const Image<RGB_U8> &inputImage,
+                                                                 InterpolationType interpolation, BorderType border,
+                                                                 VPIStream &stream, VPIBackend backend);
+template std::error_code VPITensorStream::VPIResizeImpl::execute(Image<RGBA_U8> &outputImage,
+                                                                 const Image<RGBA_U8> &inputImage,
+                                                                 InterpolationType interpolation, BorderType border,
+                                                                 VPIStream &stream, VPIBackend backend);
+template std::error_code VPITensorStream::VPIResizeImpl::execute(Image<BGR_U8> &outputImage,
+                                                                 const Image<BGR_U8> &inputImage,
+                                                                 InterpolationType interpolation, BorderType border,
+                                                                 VPIStream &stream, VPIBackend backend);
+template std::error_code VPITensorStream::VPIResizeImpl::execute(Image<NV24> &outputImage,
+                                                                 const Image<NV24> &inputImage,
+                                                                 InterpolationType interpolation, BorderType border,
+                                                                 VPIStream &stream, VPIBackend backend);
+template std::error_code VPITensorStream::VPIResizeImpl::execute(Image<NV12> &outputImage,
+                                                                 const Image<NV12> &inputImage,
+                                                                 InterpolationType interpolation, BorderType border,
+                                                                 VPIStream &stream, VPIBackend backend);
+}} // namespace cvcore::tensor_ops
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIResizeImpl.h b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIResizeImpl.h
new file mode 100644
index 0000000..ec20a45
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIResizeImpl.h
@@ -0,0 +1,66 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_VPIRESIZEIMPL_H
+#define CVCORE_VPIRESIZEIMPL_H
+
+#include "VPITensorOperators.h"
+
+#include <vpi/Image.h>
+
+#include "cv/tensor_ops/ITensorOperatorStream.h"
+#include "cv/tensor_ops/ImageUtils.h"
+
+namespace cvcore { namespace tensor_ops {
+
+/**
+ * Remap implementation used for Lens Distortion.
+ */
+class VPITensorStream::VPIResizeImpl
+{
+public:
+    /**
+     * Initialization for Image resizing.
+     */
+    VPIResizeImpl();
+
+    /**
+     * Image resizing a given image type.
+     * @param outputImage Resize output image of type .RGB_U8
+     * @param inputImage Resize input image of type RGB_U8.
+     * @param type Interpolation type used for resize.
+     * @param border Image border extension used for resize
+     */
+    template<ImageType T>
+    std::error_code execute(Image<T> &outputImage, const Image<T> &inputImage, InterpolationType interpolation,
+                            BorderType border, VPIStream &stream, VPIBackend backend);
+
+    /**
+      * Image resizing destroy function to deallocate resources.
+     */
+    ~VPIResizeImpl();
+
+private:
+    VPIImage m_inputImage;
+    VPIImage m_outputImage;
+    VPIImageData m_inputImageData;
+    VPIImageData m_outputImageData;
+};
+
+}} // namespace cvcore::tensor_ops
+
+#endif //CVCORE_VPIRESIZEIMPL_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIStatusMapping.cpp b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIStatusMapping.cpp
new file mode 100644
index 0000000..a43431d
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIStatusMapping.cpp
@@ -0,0 +1,122 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "cv/tensor_ops/Errors.h"
+#include "vpi/Status.h"
+
+#ifndef __cpp_lib_to_underlying
+// Using a C++23 feature by hacking std
+namespace std
+{
+    template<typename Enum>
+    constexpr underlying_type_t<Enum> to_underlying(Enum e) noexcept
+    {
+        return static_cast<underlying_type_t<Enum>>(e);
+    }
+};
+#endif // __cpp_lib_to_underlying
+
+namespace cvcore { namespace tensor_ops {
+
+namespace detail
+{
+    struct VPIStatusCategory : std::error_category
+    {
+        virtual const char * name() const noexcept override final
+        {
+            return "vpi-status";
+        }
+
+        virtual std::string message(int value) const override final
+        {
+            std::string result = "VPI Status";
+
+            return result;
+        }
+
+        virtual std::error_condition default_error_condition(int code) const noexcept override final
+        {
+            std::error_condition result;
+
+            switch(code)
+            {
+                case VPI_SUCCESS:
+                    result = ErrorCode::SUCCESS;
+                    break;
+
+                case VPI_ERROR_INVALID_ARGUMENT:
+                    result = ErrorCode::INVALID_ARGUMENT;
+                    break;
+
+                case VPI_ERROR_INVALID_IMAGE_FORMAT:
+                    result = ErrorCode::INVALID_IMAGE_FORMAT;
+                    break;
+
+                case VPI_ERROR_INVALID_ARRAY_TYPE:
+                    result = ErrorCode::INVALID_STORAGE_TYPE;
+                    break;
+
+                case VPI_ERROR_INVALID_PAYLOAD_TYPE:
+                    result = ErrorCode::INVALID_STORAGE_TYPE;
+                    break;
+
+                case VPI_ERROR_INVALID_OPERATION:
+                    result = ErrorCode::INVALID_OPERATION;
+                    break;
+
+                case VPI_ERROR_INVALID_CONTEXT:
+                    result = ErrorCode::INVALID_ENGINE_TYPE;
+                    break;
+
+                case VPI_ERROR_DEVICE:
+                    result = ErrorCode::DEVICE_ERROR;
+                    break;
+
+                case VPI_ERROR_NOT_READY:
+                    result = ErrorCode::NOT_READY;
+                    break;
+
+                case VPI_ERROR_BUFFER_LOCKED:
+                    result = ErrorCode::SYSTEM_ERROR;
+                    break;
+
+                case VPI_ERROR_OUT_OF_MEMORY:
+                    result = ErrorCode::OUT_OF_MEMORY;
+                    break;
+
+                case VPI_ERROR_INTERNAL:
+                    result = ErrorCode::SYSTEM_ERROR;
+                    break;
+
+                default:
+                    result = ErrorCode::NOT_IMPLEMENTED;
+                    break;
+            }
+
+            return result;
+        }
+    };
+} // namespace detail
+
+const detail::VPIStatusCategory errorCategory{};
+
+std::error_code make_error_code(VPIStatus ec) noexcept
+{
+    return {std::to_underlying(ec), errorCategory};
+}
+
+}} // namespace cvcore::tensor_ops
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIStatusMapping.h b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIStatusMapping.h
new file mode 100644
index 0000000..961d4fc
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIStatusMapping.h
@@ -0,0 +1,38 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_VPISTATUSMAPPING_H
+#define CVCORE_VPISTATUSMAPPING_H
+
+#include "cv/core/CVError.h"
+#include "vpi/Status.h"
+
+// WARNING: Extending base C++ namespace to cover cvcore error codes
+namespace std {
+
+template <>
+struct is_error_code_enum<VPIStatus> : true_type {};
+
+} // namespace std
+
+namespace cvcore { namespace tensor_ops {
+
+std::error_code make_error_code(VPIStatus) noexcept;
+
+}} // namespace cvcore::tensor_ops
+
+#endif //CVCORE_VPISTATUSMAPPING_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIStereoDisparityEstimatorImpl.cpp b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIStereoDisparityEstimatorImpl.cpp
new file mode 100644
index 0000000..c4e96e4
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIStereoDisparityEstimatorImpl.cpp
@@ -0,0 +1,211 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <stdio.h>
+#include <cstring>
+
+#include "VPIStereoDisparityEstimatorImpl.h"
+#include "VPIEnumMapping.h"
+#include "VPIStatusMapping.h"
+
+#include "cv/core/CameraModel.h"
+#include "cv/core/Image.h"
+
+#include <vpi/Status.h>
+
+#ifdef NVBENCH_ENABLE
+#include <nvbench/VPI.h>
+#endif
+
+namespace cvcore { namespace tensor_ops {
+
+VPITensorStream::VPIStereoDisparityEstimatorImpl::VPIStereoDisparityEstimatorImpl()
+    : m_inputLeftImage(nullptr)
+    , m_inputRightImage(nullptr)
+    , m_outputImage(nullptr)
+    , m_tempImage(nullptr)
+    , m_payload(nullptr)
+    , m_stereoParams()
+{
+    std::memset(reinterpret_cast<void *>(&m_inputLeftImageData), 0, sizeof(VPIImageData));
+    std::memset(reinterpret_cast<void *>(&m_inputRightImageData), 0, sizeof(VPIImageData));
+    std::memset(reinterpret_cast<void *>(&m_outputImageData), 0, sizeof(VPIImageData));
+    // Disparity values returned from VPI are in Q10.5 format, i.e., signed fixed point with 5 fractional bits. Divide it by 32.0f to convert it to floating point.
+    vpiInitConvertImageFormatParams(&m_cvtParams);
+    m_cvtParams.scale = 1.0f / 32;
+}
+
+template<ImageType T_OUT, ImageType T_IN>
+std::error_code VPITensorStream::VPIStereoDisparityEstimatorImpl::initialize(Image<T_OUT> &outImage,
+                                                                             const Image<T_IN> &leftImage,
+                                                                             const Image<T_IN> &rightImage,
+                                                                             VPIBackend backend)
+{
+    std::error_code status;
+    const std::error_code success = make_error_code(VPI_SUCCESS);
+    status                        = CreateVPIImageWrapper(m_inputLeftImage, m_inputLeftImageData, leftImage, backend);
+    if (status == success)
+    {
+        status = CreateVPIImageWrapper(m_inputRightImage, m_inputRightImageData, rightImage, backend);
+    }
+    if (status == success)
+    {
+        status = CreateVPIImageWrapper(m_outputImage, m_outputImageData, outImage, backend);
+    }
+    if (status == success)
+    {
+        status = make_error_code(
+            vpiImageCreate(outImage.getWidth(), outImage.getHeight(), VPI_IMAGE_FORMAT_S16, 0, &m_tempImage));
+    }
+    if (status == success)
+    {
+        status = make_error_code(vpiCreateStereoDisparityEstimator(backend, outImage.getWidth(), outImage.getHeight(),
+                                                                   ToVpiImageFormat(T_IN), NULL, &m_payload));
+    }
+    return status;
+}
+
+template std::error_code VPITensorStream::VPIStereoDisparityEstimatorImpl::initialize(Image<Y_F32> &outImage,
+                                                                                      const Image<Y_U8> &leftImage,
+                                                                                      const Image<Y_U8> &rightImage,
+                                                                                      VPIBackend backend);
+template std::error_code VPITensorStream::VPIStereoDisparityEstimatorImpl::initialize(Image<Y_F32> &outImage,
+                                                                                      const Image<NV12> &leftImage,
+                                                                                      const Image<NV12> &rightImage,
+                                                                                      VPIBackend backend);
+template std::error_code VPITensorStream::VPIStereoDisparityEstimatorImpl::initialize(Image<Y_F32> &outImage,
+                                                                                      const Image<NV24> &leftImage,
+                                                                                      const Image<NV24> &rightImage,
+                                                                                      VPIBackend backend);
+
+// -----------------------------------------------------------------------------
+
+template<ImageType T_OUT, ImageType T_IN>
+std::error_code VPITensorStream::VPIStereoDisparityEstimatorImpl::execute(Image<T_OUT> &outImage,
+                                                                          const Image<T_IN> &leftImage,
+                                                                          const Image<T_IN> &rightImage,
+                                                                          size_t windowSize, size_t maxDisparity,
+                                                                          VPIStream &stream, VPIBackend backend)
+{
+    std::error_code status      = make_error_code(VPI_SUCCESS);
+    m_stereoParams.windowSize   = static_cast<int32_t>(windowSize);
+    m_stereoParams.maxDisparity = static_cast<int32_t>(maxDisparity);
+
+    bool paramsChanged = m_inputLeftImage == nullptr || m_inputRightImage == nullptr || m_outputImage == nullptr ||
+                         CheckParamsChanged(m_inputLeftImageData, leftImage) ||
+                         CheckParamsChanged(m_inputRightImageData, rightImage) ||
+                         CheckParamsChanged(m_outputImageData, outImage);
+
+    if (paramsChanged)
+    {
+        if (m_payload != nullptr)
+        {
+            vpiPayloadDestroy(m_payload);
+        }
+        if (m_tempImage != nullptr)
+        {
+            vpiImageDestroy(m_tempImage);
+        }
+        DestroyVPIImageWrapper(m_inputLeftImage, m_inputLeftImageData);
+        DestroyVPIImageWrapper(m_inputRightImage, m_inputRightImageData);
+        DestroyVPIImageWrapper(m_outputImage, m_outputImageData);
+
+        status = initialize(outImage, leftImage, rightImage, backend);
+    }
+
+    if (status == make_error_code(VPI_SUCCESS))
+    {
+        status = UpdateImage(m_inputLeftImage, m_inputLeftImageData, leftImage);
+    }
+
+    if (status == make_error_code(VPI_SUCCESS))
+    {
+        status = UpdateImage(m_inputRightImage, m_inputRightImageData, rightImage);
+    }
+
+    if (status == make_error_code(VPI_SUCCESS))
+    {
+        status = UpdateImage(m_outputImage, m_outputImageData, outImage);
+    }
+
+    if (status == make_error_code(VPI_SUCCESS))
+    {
+#ifdef NVBENCH_ENABLE
+        std::string tag = "VPISubmitStereoDisparityEstimator_" + GetMemoryTypeAsString(leftImage.isCPU()) + "Input_" +
+                          GetMemoryTypeAsString(outImage.isCPU()) + "Output_" + getVPIBackendString(backend) +
+                          "Backend";
+        nv::bench::Timer timerFunc = nv::bench::VPI(tag.c_str(), nv::bench::Flag::DEFAULT, stream);
+#endif
+        // Submit SGM task for Stereo Disparity Estimator
+        status = make_error_code(vpiSubmitStereoDisparityEstimator(
+            stream, backend, m_payload, m_inputLeftImage, m_inputRightImage, m_tempImage, NULL, &m_stereoParams));
+    }
+
+    if (status == make_error_code(VPI_SUCCESS))
+    {
+#ifdef NVBENCH_ENABLE
+        std::string tag = "VPISubmitConvertImageFormat_" + GetMemoryTypeAsString(leftImage.isCPU()) + "Input_" +
+                          GetMemoryTypeAsString(outImage.isCPU()) + "Output_" + getVPIBackendString(backend) +
+                          "Backend";
+        nv::bench::Timer timerFunc = nv::bench::VPI(tag.c_str(), nv::bench::Flag::DEFAULT, stream);
+#endif
+        // Submit SGM task for Stereo Disparity Estimator
+        status =
+            make_error_code(vpiSubmitConvertImageFormat(stream, backend, m_tempImage, m_outputImage, &m_cvtParams));
+    }
+
+    if (status == make_error_code(VPI_SUCCESS))
+    {
+        // Wait for stereo disparity estimator to complete
+        status = make_error_code(vpiStreamSync(stream));
+    }
+
+    if (status != make_error_code(VPI_SUCCESS))
+    {
+        return status;
+    }
+    return make_error_code(ErrorCode::SUCCESS);
+}
+
+template std::error_code VPITensorStream::VPIStereoDisparityEstimatorImpl::execute(
+    Image<Y_F32> &outImage, const Image<Y_U8> &leftImage, const Image<Y_U8> &rightImage, size_t windowSize,
+    size_t maxDisparity, VPIStream &stream, VPIBackend backend);
+template std::error_code VPITensorStream::VPIStereoDisparityEstimatorImpl::execute(
+    Image<Y_F32> &outImage, const Image<NV12> &leftImage, const Image<NV12> &rightImage, size_t windowSize,
+    size_t maxDisparity, VPIStream &stream, VPIBackend backend);
+template std::error_code VPITensorStream::VPIStereoDisparityEstimatorImpl::execute(
+    Image<Y_F32> &outImage, const Image<NV24> &leftImage, const Image<NV24> &rightImage, size_t windowSize,
+    size_t maxDisparity, VPIStream &stream, VPIBackend backend);
+// -----------------------------------------------------------------------------
+
+VPITensorStream::VPIStereoDisparityEstimatorImpl::~VPIStereoDisparityEstimatorImpl()
+{
+    if (m_payload != nullptr)
+    {
+        vpiPayloadDestroy(m_payload);
+    }
+    if (m_tempImage != nullptr)
+    {
+        vpiImageDestroy(m_tempImage);
+    }
+    DestroyVPIImageWrapper(m_inputLeftImage, m_inputLeftImageData);
+    DestroyVPIImageWrapper(m_inputRightImage, m_inputRightImageData);
+    DestroyVPIImageWrapper(m_outputImage, m_outputImageData);
+}
+// -----------------------------------------------------------------------------
+
+}} // namespace cvcore::tensor_ops
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIStereoDisparityEstimatorImpl.h b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIStereoDisparityEstimatorImpl.h
new file mode 100644
index 0000000..53a5c63
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPIStereoDisparityEstimatorImpl.h
@@ -0,0 +1,83 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_VPISTEREODISPARITYESTIMATORIMPL_H
+#define CVCORE_VPISTEREODISPARITYESTIMATORIMPL_H
+
+#include <vpi/Image.h>
+#include <vpi/algo/ConvertImageFormat.h>
+#include <vpi/algo/StereoDisparity.h>
+
+#include "VPITensorOperators.h"
+
+namespace cvcore { namespace tensor_ops {
+/**
+ * StereoDisparityEstimator implementation used for stereo dispaity estimate.
+ */
+class VPITensorStream::VPIStereoDisparityEstimatorImpl
+{
+public:
+    /* VPIStereoDisparityEstimatorImpl constructor */
+    VPIStereoDisparityEstimatorImpl();
+
+    /**
+        * StereoDisparityEstimator Intialization.
+        * @param outputImage StereoDisparityEstimator output image
+        * @param leftImage StereoDisparityEstimator input left image
+        * @param rightImage StereoDisparityEstimator input right image
+        * @param backend VPI backend used for StereoDisparityEstimator
+        * @return Success if intialization is done successfully, otherwise error is returned
+        */
+    template<ImageType T_OUT, ImageType T_IN>
+    std::error_code initialize(Image<T_OUT> &outImage, const Image<T_IN> &leftImage, const Image<T_IN> &rightImage,
+                               VPIBackend backend);
+
+    /**
+        * StereoDisparityEstimator execution function(non-blocking)
+        * Application is reqiured to call Sync() before accessing the generated Image.
+        * @param outImage StereoDisparityEstimator output image
+        * @param leftImage StereoDisparityEstimator input left image
+        * @param rightImage StereoDisparityEstimator input right image
+        * @param windowSize Represents the median filter size (on PVA+NVENC_VIC backend) or census transform window size (other backends) used in the algorithm
+        * @param maxDisparity Maximum disparity for matching search
+        * @param stream VPI stream used for StereoDisparityEstimator
+        * @param backend VPI backend used for StereoDisparityEstimator
+        * @return Success if StereoDisparityEstimator is submitted successfully, otherwise error is returned
+        */
+    template<ImageType T_OUT, ImageType T_IN>
+    std::error_code execute(Image<T_OUT> &outImage, const Image<T_IN> &leftImage, const Image<T_IN> &rightImage,
+                            size_t windowSize, size_t maxDisparity, VPIStream &stream, VPIBackend backend);
+
+    /* VPIStereoDisparityEstimatorImpl destructor to release resources */
+    ~VPIStereoDisparityEstimatorImpl();
+
+private:
+    VPIImage m_inputLeftImage;
+    VPIImage m_inputRightImage;
+    VPIImage m_outputImage;
+    VPIImage m_tempImage;
+    VPIImageData m_inputLeftImageData;
+    VPIImageData m_inputRightImageData;
+    VPIImageData m_outputImageData;
+    VPIPayload m_payload;
+    VPIStereoDisparityEstimatorParams m_stereoParams;
+    VPIConvertImageFormatParams m_cvtParams;
+};
+
+}} // namespace cvcore::tensor_ops
+
+#endif //CVCORE_VPISTEREODISPARITYESTIMATORIMPL_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPITensorOperators.cpp b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPITensorOperators.cpp
new file mode 100644
index 0000000..83c5dff
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPITensorOperators.cpp
@@ -0,0 +1,709 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <stdio.h>
+#include <cstring>
+#include <iostream>
+
+#include <vpi/CUDAInterop.h>
+#include <vpi/Context.h>
+#include <vpi/Image.h>
+#include <vpi/Stream.h>
+#include <vpi/algo/Remap.h>
+
+#include "cv/core/CameraModel.h"
+#include "cv/core/Image.h"
+
+#include "VPIColorConvertImpl.h"
+#include "VPIEnumMapping.h"
+#include "VPIImageWarp.h"
+#include "VPIRemapImpl.h"
+#include "VPIResizeImpl.h"
+#include "VPIStatusMapping.h"
+#include "VPIStereoDisparityEstimatorImpl.h"
+#include "VPITensorOperators.h"
+
+#ifdef NVBENCH_ENABLE
+#include <nvbench/CPU.h>
+#endif
+
+namespace cvcore { namespace tensor_ops {
+
+namespace detail {
+
+// helper function to wrap VPI image for NV12 / NV24 image types
+template<ImageType T, typename std::enable_if<IsCompositeImage<T>::value>::type * = nullptr>
+std::error_code CreateVPIImageWrapperImpl(VPIImage &vpiImg, VPIImageData &imgdata, const Image<T> &cvcoreImage, VPIBackend backend)
+{
+#ifdef NVBENCH_ENABLE
+    std::string tag            = "CreateVPIImageWrapper_NV12/NV24";
+    nv::bench::Timer timerFunc = nv::bench::CPU(tag.c_str(), nv::bench::Flag::DEFAULT);
+#endif
+
+    std::memset(reinterpret_cast<void *>(&imgdata), 0, sizeof(VPIImageData));
+
+    imgdata.bufferType = cvcoreImage.isCPU() ? VPI_IMAGE_BUFFER_HOST_PITCH_LINEAR : VPI_IMAGE_BUFFER_CUDA_PITCH_LINEAR;
+    imgdata.buffer.pitch.format               = ToVpiImageFormat(T);
+    imgdata.buffer.pitch.numPlanes            = 2;
+    imgdata.buffer.pitch.planes[0].data       = const_cast<uint8_t *>(cvcoreImage.getLumaData());
+    imgdata.buffer.pitch.planes[0].height     = cvcoreImage.getLumaHeight();
+    imgdata.buffer.pitch.planes[0].width      = cvcoreImage.getLumaWidth();
+    imgdata.buffer.pitch.planes[0].pixelType  = VPI_PIXEL_TYPE_U8;
+    imgdata.buffer.pitch.planes[0].pitchBytes = cvcoreImage.getLumaStride(TensorDimension::HEIGHT) * sizeof(uint8_t);
+    imgdata.buffer.pitch.planes[1].data       = const_cast<uint8_t *>(cvcoreImage.getChromaData());
+    imgdata.buffer.pitch.planes[1].height     = cvcoreImage.getChromaHeight();
+    imgdata.buffer.pitch.planes[1].width      = cvcoreImage.getChromaWidth();
+    imgdata.buffer.pitch.planes[1].pixelType  = VPI_PIXEL_TYPE_2U8;
+    imgdata.buffer.pitch.planes[1].pitchBytes = cvcoreImage.getChromaStride(TensorDimension::HEIGHT) * sizeof(uint8_t);
+    VPIStatus vpiStatus;
+    vpiStatus = vpiImageCreateWrapper(&imgdata, nullptr, backend, &vpiImg);
+
+    return make_error_code(vpiStatus);
+}
+
+// helper function to wrap VPI image for interleaved image types
+template<ImageType T, typename std::enable_if<IsInterleavedImage<T>::value>::type * = nullptr>
+std::error_code CreateVPIImageWrapperImpl(VPIImage &vpiImg, VPIImageData &imgdata, const Image<T> &cvcoreImage, VPIBackend backend)
+{
+#ifdef NVBENCH_ENABLE
+    std::string tag            = "CreateVPIImageWrapper_Interleaved";
+    nv::bench::Timer timerFunc = nv::bench::CPU(tag.c_str(), nv::bench::Flag::DEFAULT);
+#endif
+
+    std::memset(reinterpret_cast<void *>(&imgdata), 0, sizeof(VPIImageData));
+
+    using D            = typename Image<T>::DataType;
+    imgdata.bufferType = cvcoreImage.isCPU() ? VPI_IMAGE_BUFFER_HOST_PITCH_LINEAR : VPI_IMAGE_BUFFER_CUDA_PITCH_LINEAR;
+    imgdata.buffer.pitch.format               = ToVpiImageFormat(T);
+    imgdata.buffer.pitch.numPlanes            = 1;
+    imgdata.buffer.pitch.planes[0].data       = const_cast<D *>(cvcoreImage.getData());
+    imgdata.buffer.pitch.planes[0].height     = cvcoreImage.getHeight();
+    imgdata.buffer.pitch.planes[0].width      = cvcoreImage.getWidth();
+    imgdata.buffer.pitch.planes[0].pixelType  = ToVpiPixelType(T);
+    imgdata.buffer.pitch.planes[0].pitchBytes = cvcoreImage.getStride(TensorDimension::HEIGHT) * GetImageElementSize(T);
+    VPIStatus vpiStatus;
+    vpiStatus = vpiImageCreateWrapper(&imgdata, nullptr, backend, &vpiImg);
+
+    return make_error_code(vpiStatus);
+}
+
+// helper function to wrap VPI image for planar image types
+template<ImageType T, typename std::enable_if<IsPlanarImage<T>::value>::type * = nullptr>
+std::error_code CreateVPIImageWrapperImpl(VPIImage &vpiImg, VPIImageData &imgdata, const Image<T> &cvcoreImage, VPIBackend backend)
+{
+    return make_error_code(VPI_ERROR_INVALID_IMAGE_FORMAT);
+}
+
+} // namespace detail
+
+std::error_code VPITensorContext::CreateStream(TensorOperatorStream &tensorStream, const ComputeEngine &computeEngine)
+{
+    tensorStream = nullptr;
+
+    if (!IsComputeEngineCompatible(computeEngine))
+    {
+        return ErrorCode::INVALID_ENGINE_TYPE;
+    }
+
+    try
+    {
+        tensorStream = new VPITensorStream(computeEngine);
+    }
+    catch (std::error_code &e)
+    {
+        return e;
+    }
+    catch (...)
+    {
+        return ErrorCode::INVALID_OPERATION;
+    }
+
+    return ErrorCode::SUCCESS;
+}
+
+VPITensorStream::VPITensorStream(const ComputeEngine &computeEngine)
+    : m_resizer(new VPIResizeImpl())
+    , m_remapper(new VPIRemapImpl())
+    , m_colorConverter(new VPIColorConvertImpl())
+    , m_stereoDisparityEstimator(new VPIStereoDisparityEstimatorImpl())
+{
+    VPIBackend backend = ToVpiBackendType(computeEngine);
+    VPIStatus status   = vpiStreamCreate(backend, &m_stream);
+    if (status != VPI_SUCCESS)
+    {
+        throw make_error_code(status);
+    }
+    m_backend = backend;
+}
+
+VPITensorStream::~VPITensorStream()
+{
+    vpiStreamDestroy(m_stream);
+}
+
+std::error_code VPITensorContext::DestroyStream(TensorOperatorStream &inputStream)
+{
+    if (inputStream != nullptr)
+    {
+        delete inputStream;
+        inputStream = nullptr;
+    }
+    return ErrorCode::SUCCESS;
+}
+
+bool VPITensorContext::IsComputeEngineCompatible(const ComputeEngine &computeEngine) const noexcept
+{
+    VPIBackend vpibackend = ToVpiBackendType(computeEngine);
+    if (vpibackend == VPIBackend::VPI_BACKEND_INVALID)
+    {
+        return false;
+    }
+    return true;
+}
+
+template<ImageType T>
+std::error_code CreateVPIImageWrapper(VPIImage &vpiImg, VPIImageData &imgdata, const Image<T> &cvcoreImage, VPIBackend backend)
+{
+    return detail::CreateVPIImageWrapperImpl(vpiImg, imgdata, cvcoreImage, backend);
+}
+
+template std::error_code CreateVPIImageWrapper(VPIImage &, VPIImageData &, const Image<Y_U8> &, VPIBackend);
+template std::error_code CreateVPIImageWrapper(VPIImage &, VPIImageData &, const Image<Y_U16> &, VPIBackend);
+template std::error_code CreateVPIImageWrapper(VPIImage &, VPIImageData &, const Image<Y_S8> &, VPIBackend);
+template std::error_code CreateVPIImageWrapper(VPIImage &, VPIImageData &, const Image<Y_S16> &, VPIBackend);
+template std::error_code CreateVPIImageWrapper(VPIImage &, VPIImageData &, const Image<Y_F32> &, VPIBackend);
+template std::error_code CreateVPIImageWrapper(VPIImage &, VPIImageData &, const Image<BGR_U8> &, VPIBackend);
+template std::error_code CreateVPIImageWrapper(VPIImage &, VPIImageData &, const Image<RGB_U8> &, VPIBackend);
+template std::error_code CreateVPIImageWrapper(VPIImage &, VPIImageData &, const Image<RGBA_U8> &, VPIBackend);
+template std::error_code CreateVPIImageWrapper(VPIImage &, VPIImageData &, const Image<NV12> &, VPIBackend);
+template std::error_code CreateVPIImageWrapper(VPIImage &, VPIImageData &, const Image<NV24> &, VPIBackend);
+
+std::error_code UpdateVPIImageWrapper(VPIImage &image, VPIImageData &imageWrap, bool isCPU)
+{
+#ifdef NVBENCH_ENABLE
+    std::string tag            = "UpdateVPIImageWrapper";
+    nv::bench::Timer timerFunc = nv::bench::CPU(tag.c_str(), nv::bench::Flag::DEFAULT);
+#endif
+
+    VPIStatus status = VPI_SUCCESS;
+    status           = vpiImageSetWrapper(image, &imageWrap);
+
+    return make_error_code(status);
+}
+
+std::error_code DestroyVPIImageWrapper(VPIImage &image, VPIImageData &imageWrap)
+{
+#ifdef NVBENCH_ENABLE
+    std::string tag            = "DestroyVPIImageWrapper";
+    nv::bench::Timer timerFunc = nv::bench::CPU(tag.c_str(), nv::bench::Flag::DEFAULT);
+#endif
+
+    std::memset(reinterpret_cast<void *>(&imageWrap), 0, sizeof(VPIImageData));
+    if (image != nullptr)
+    {
+        vpiImageDestroy(image);
+    }
+
+    image = nullptr;
+
+    return ErrorCode::SUCCESS;
+}
+std::error_code VPITensorStream::Status() noexcept
+{
+    return ErrorCode::SUCCESS;
+}
+
+std::error_code VPITensorStream::Resize(Image<RGB_U8> &outputImage, const Image<RGB_U8> &inputImage,
+                                        InterpolationType interpolation, BorderType border)
+{
+    std::unique_lock<decltype(m_fence)> scopedLock{m_fence};
+#ifdef NVBENCH_ENABLE
+    std::string tag = "VPIResize_RGB_U8_" + std::to_string(inputImage.getWidth()) + "X" +
+                      std::to_string(inputImage.getHeight()) + "_" + std::to_string(outputImage.getWidth()) + "X" +
+                      std::to_string(outputImage.getHeight()) + "_" + getVPIBackendString(m_backend) + "Backend_" +
+                      GetMemoryTypeAsString(inputImage.isCPU()) + "Input";
+    nv::bench::Timer timerFunc = nv::bench::CPU(tag.c_str(), nv::bench::Flag::DEFAULT);
+#endif
+
+    std::error_code err_code;
+    err_code = m_resizer->execute<RGB_U8>(outputImage, inputImage, interpolation, border, m_stream, m_backend);
+    return err_code;
+}
+
+std::error_code VPITensorStream::Resize(Image<NV12> &outputImage, const Image<NV12> &inputImage,
+                                        InterpolationType interpolation, BorderType border)
+{
+    std::unique_lock<decltype(m_fence)> scopedLock{m_fence};
+#ifdef NVBENCH_ENABLE
+    std::string tag = "VPIResize_NV12_" + std::to_string(inputImage.getLumaWidth()) + "X" +
+                      std::to_string(inputImage.getLumaHeight()) + "_" + std::to_string(outputImage.getLumaWidth()) +
+                      "X" + std::to_string(outputImage.getLumaHeight()) + "_" + getVPIBackendString(m_backend) +
+                      "Backend_" + GetMemoryTypeAsString(inputImage.isCPU()) + "Input";
+    ;
+    nv::bench::Timer timerFunc = nv::bench::CPU(tag.c_str(), nv::bench::Flag::DEFAULT);
+#endif
+
+    std::error_code err_code;
+    err_code = m_resizer->execute<NV12>(outputImage, inputImage, interpolation, border, m_stream, m_backend);
+    return err_code;
+}
+
+std::error_code VPITensorStream::Resize(Image<RGBA_U8> &outputImage, const Image<RGBA_U8> &inputImage,
+                                        InterpolationType interpolation, BorderType border)
+{
+    std::unique_lock<decltype(m_fence)> scopedLock{m_fence};
+#ifdef NVBENCH_ENABLE
+    std::string tag = "VPIResize_RGBA_U8_" + std::to_string(inputImage.getWidth()) + "X" +
+                      std::to_string(inputImage.getHeight()) + "_" + std::to_string(outputImage.getWidth()) + "X" +
+                      std::to_string(outputImage.getHeight()) + "_" + getVPIBackendString(m_backend) + "Backend_" +
+                      GetMemoryTypeAsString(inputImage.isCPU()) + "Input";
+    nv::bench::Timer timerFunc = nv::bench::CPU(tag.c_str(), nv::bench::Flag::DEFAULT);
+#endif
+
+    std::error_code err_code;
+    err_code = m_resizer->execute<RGBA_U8>(outputImage, inputImage, interpolation, border, m_stream, m_backend);
+    return err_code;
+}
+
+std::error_code VPITensorStream::Resize(Image<BGR_U8> &outputImage, const Image<BGR_U8> &inputImage,
+                                        InterpolationType interpolation, BorderType border)
+{
+    std::unique_lock<decltype(m_fence)> scopedLock{m_fence};
+#ifdef NVBENCH_ENABLE
+    std::string tag = "VPIResize_BGR_U8_" + std::to_string(inputImage.getWidth()) + "X" +
+                      std::to_string(inputImage.getHeight()) + "_" + std::to_string(outputImage.getWidth()) + "X" +
+                      std::to_string(outputImage.getHeight()) + "_" + getVPIBackendString(m_backend) + "Backend_" +
+                      GetMemoryTypeAsString(inputImage.isCPU()) + "Input";
+    nv::bench::Timer timerFunc = nv::bench::CPU(tag.c_str(), nv::bench::Flag::DEFAULT);
+#endif
+
+    std::error_code err_code;
+    err_code = m_resizer->execute<BGR_U8>(outputImage, inputImage, interpolation, border, m_stream, m_backend);
+    return err_code;
+}
+
+std::error_code VPITensorStream::Resize(Image<NV24> &outputImage, const Image<NV24> &inputImage,
+                                        InterpolationType interpolation, BorderType border)
+{
+    std::unique_lock<decltype(m_fence)> scopedLock{m_fence};
+#ifdef NVBENCH_ENABLE
+    std::string tag = "VPIResize_NV24_" + std::to_string(inputImage.getLumaWidth()) + "X" +
+                      std::to_string(inputImage.getLumaHeight()) + "_" + std::to_string(outputImage.getLumaWidth()) +
+                      "X" + std::to_string(outputImage.getLumaHeight()) + "_" + getVPIBackendString(m_backend) +
+                      "Backend_" + GetMemoryTypeAsString(inputImage.isCPU()) + "Input";
+    nv::bench::Timer timerFunc = nv::bench::CPU(tag.c_str(), nv::bench::Flag::DEFAULT);
+#endif
+
+    std::error_code err_code;
+    err_code = m_resizer->execute<NV24>(outputImage, inputImage, interpolation, border, m_stream, m_backend);
+    return err_code;
+}
+
+std::error_code VPITensorStream::Remap(Image<RGB_U8> &outputImage, const Image<RGB_U8> &inputImage,
+                                       const ImageWarp warp, InterpolationType interpolation, BorderType border)
+{
+    std::unique_lock<decltype(m_fence)> scopedLock{m_fence};
+#ifdef NVBENCH_ENABLE
+    std::string tag = "VPIRemap_RGB_U8_" + std::to_string(inputImage.getWidth()) + "X" +
+                      std::to_string(inputImage.getHeight()) + "_" + std::to_string(outputImage.getWidth()) + "X" +
+                      std::to_string(outputImage.getHeight()) + "_" + getVPIBackendString(m_backend) + "Backend_" +
+                      GetMemoryTypeAsString(inputImage.isCPU()) + "Input";
+    nv::bench::Timer timerFunc = nv::bench::CPU(tag.c_str(), nv::bench::Flag::DEFAULT);
+#endif
+
+    return m_remapper->execute(outputImage, inputImage, reinterpret_cast<VPIImageWarp *>(warp), interpolation, border,
+                               m_stream, m_backend);
+}
+
+std::error_code VPITensorStream::Remap(Image<BGR_U8> &outputImage, const Image<BGR_U8> &inputImage,
+                                       const ImageWarp warp, InterpolationType interpolation, BorderType border)
+{
+    std::unique_lock<decltype(m_fence)> scopedLock{m_fence};
+#ifdef NVBENCH_ENABLE
+    std::string tag = "VPIRemap_BGR_U8_" + std::to_string(inputImage.getWidth()) + "X" +
+                      std::to_string(inputImage.getHeight()) + "_" + std::to_string(outputImage.getWidth()) + "X" +
+                      std::to_string(outputImage.getHeight()) + "_" + getVPIBackendString(m_backend) + "Backend_" +
+                      GetMemoryTypeAsString(inputImage.isCPU()) + "Input";
+    nv::bench::Timer timerFunc = nv::bench::CPU(tag.c_str(), nv::bench::Flag::DEFAULT);
+#endif
+
+    return m_remapper->execute(outputImage, inputImage, reinterpret_cast<VPIImageWarp *>(warp), interpolation, border,
+                               m_stream, m_backend);
+}
+
+std::error_code VPITensorStream::Remap(Image<NV12> &outputImage, const Image<NV12> &inputImage, const ImageWarp warp,
+                                       InterpolationType interpolation, BorderType border)
+{
+    std::unique_lock<decltype(m_fence)> scopedLock{m_fence};
+#ifdef NVBENCH_ENABLE
+    std::string tag = "VPIRemap_NV12_" + std::to_string(inputImage.getLumaWidth()) + "X" +
+                      std::to_string(inputImage.getLumaHeight()) + "_" + std::to_string(outputImage.getLumaWidth()) +
+                      "X" + std::to_string(outputImage.getLumaHeight()) + "_" + getVPIBackendString(m_backend) +
+                      "Backend_" + GetMemoryTypeAsString(inputImage.isCPU()) + "Input";
+    nv::bench::Timer timerFunc = nv::bench::CPU(tag.c_str(), nv::bench::Flag::DEFAULT);
+#endif
+
+    return m_remapper->execute(outputImage, inputImage, reinterpret_cast<VPIImageWarp *>(warp), interpolation, border,
+                               m_stream, m_backend);
+}
+
+std::error_code VPITensorStream::Remap(Image<NV24> &outputImage, const Image<NV24> &inputImage, const ImageWarp warp,
+                                       InterpolationType interpolation, BorderType border)
+{
+    std::unique_lock<decltype(m_fence)> scopedLock{m_fence};
+#ifdef NVBENCH_ENABLE
+    std::string tag = "VPIRemap_NV24_" + std::to_string(inputImage.getLumaWidth()) + "X" +
+                      std::to_string(inputImage.getLumaHeight()) + "_" + std::to_string(outputImage.getLumaWidth()) +
+                      "X" + std::to_string(outputImage.getLumaHeight()) + "_" + getVPIBackendString(m_backend) +
+                      "Backend_" + GetMemoryTypeAsString(inputImage.isCPU()) + "Input";
+    nv::bench::Timer timerFunc = nv::bench::CPU(tag.c_str(), nv::bench::Flag::DEFAULT);
+#endif
+
+    return m_remapper->execute(outputImage, inputImage, reinterpret_cast<VPIImageWarp *>(warp), interpolation, border,
+                               m_stream, m_backend);
+}
+
+std::error_code VPITensorStream::ColorConvert(Image<BGR_U8> &outputImage, const Image<RGB_U8> &inputImage)
+{
+    std::unique_lock<decltype(m_fence)> scopedLock{m_fence};
+#ifdef NVBENCH_ENABLE
+    std::string tag = "VPIColorConvert_BGR_RGB_" + std::to_string(inputImage.getWidth()) + "X" +
+                      std::to_string(inputImage.getHeight()) + "_" + std::to_string(outputImage.getWidth()) + "X" +
+                      std::to_string(outputImage.getHeight()) + "_" + getVPIBackendString(m_backend) + "Backend_" +
+                      GetMemoryTypeAsString(inputImage.isCPU()) + "Input";
+    nv::bench::Timer timerFunc = nv::bench::CPU(tag.c_str(), nv::bench::Flag::DEFAULT);
+#endif
+
+    return m_colorConverter->execute(outputImage, inputImage, m_stream, m_backend);
+}
+
+std::error_code VPITensorStream::ColorConvert(Image<RGB_U8> &outputImage, const Image<BGR_U8> &inputImage)
+{
+    std::unique_lock<decltype(m_fence)> scopedLock{m_fence};
+#ifdef NVBENCH_ENABLE
+    std::string tag = "VPIColorConvert_RGB_BGR_" + std::to_string(inputImage.getWidth()) + "X" +
+                      std::to_string(inputImage.getHeight()) + "_" + std::to_string(outputImage.getWidth()) + "X" +
+                      std::to_string(outputImage.getHeight()) + "_" + getVPIBackendString(m_backend) + "Backend_" +
+                      GetMemoryTypeAsString(inputImage.isCPU()) + "Input";
+    nv::bench::Timer timerFunc = nv::bench::CPU(tag.c_str(), nv::bench::Flag::DEFAULT);
+#endif
+
+    return m_colorConverter->execute(outputImage, inputImage, m_stream, m_backend);
+}
+
+std::error_code VPITensorStream::ColorConvert(Image<NV12> &outputImage, const Image<BGR_U8> &inputImage)
+{
+    std::unique_lock<decltype(m_fence)> scopedLock{m_fence};
+#ifdef NVBENCH_ENABLE
+    std::string tag = "VPIColorConvert_NV12_BGR_" + std::to_string(inputImage.getWidth()) + "X" +
+                      std::to_string(inputImage.getHeight()) + "_" + std::to_string(outputImage.getLumaWidth()) + "X" +
+                      std::to_string(outputImage.getLumaHeight()) + "_" + getVPIBackendString(m_backend) + "Backend_" +
+                      GetMemoryTypeAsString(inputImage.isCPU()) + "Input";
+    nv::bench::Timer timerFunc = nv::bench::CPU(tag.c_str(), nv::bench::Flag::DEFAULT);
+#endif
+
+    return m_colorConverter->execute(outputImage, inputImage, m_stream, m_backend);
+}
+
+std::error_code VPITensorStream::ColorConvert(Image<NV24> &outputImage, const Image<BGR_U8> &inputImage)
+{
+    std::unique_lock<decltype(m_fence)> scopedLock{m_fence};
+#ifdef NVBENCH_ENABLE
+    std::string tag = "VPIColorConvert_NV24_BGR_" + std::to_string(inputImage.getWidth()) + "X" +
+                      std::to_string(inputImage.getHeight()) + "_" + std::to_string(outputImage.getLumaWidth()) + "X" +
+                      std::to_string(outputImage.getLumaHeight()) + "_" + getVPIBackendString(m_backend) + "Backend_" +
+                      GetMemoryTypeAsString(inputImage.isCPU()) + "Input";
+    nv::bench::Timer timerFunc = nv::bench::CPU(tag.c_str(), nv::bench::Flag::DEFAULT);
+#endif
+
+    return m_colorConverter->execute(outputImage, inputImage, m_stream, m_backend);
+}
+
+std::error_code VPITensorStream::ColorConvert(Image<BGR_U8> &outputImage, const Image<NV12> &inputImage)
+{
+    std::unique_lock<decltype(m_fence)> scopedLock{m_fence};
+#ifdef NVBENCH_ENABLE
+    std::string tag = "VPIColorConvert_BGR_NV12_" + std::to_string(inputImage.getLumaWidth()) + "X" +
+                      std::to_string(inputImage.getLumaHeight()) + "_" + std::to_string(outputImage.getWidth()) + "X" +
+                      std::to_string(outputImage.getHeight()) + "_" + getVPIBackendString(m_backend) + "Backend_" +
+                      GetMemoryTypeAsString(inputImage.isCPU()) + "Input";
+    nv::bench::Timer timerFunc = nv::bench::CPU(tag.c_str(), nv::bench::Flag::DEFAULT);
+#endif
+
+    return m_colorConverter->execute(outputImage, inputImage, m_stream, m_backend);
+}
+
+std::error_code VPITensorStream::ColorConvert(Image<BGR_U8> &outputImage, const Image<NV24> &inputImage)
+{
+    std::unique_lock<decltype(m_fence)> scopedLock{m_fence};
+#ifdef NVBENCH_ENABLE
+    std::string tag = "VPIColorConvert_BGR_NV24_" + std::to_string(inputImage.getLumaWidth()) + "X" +
+                      std::to_string(inputImage.getLumaHeight()) + "_" + std::to_string(outputImage.getWidth()) + "X" +
+                      std::to_string(outputImage.getHeight()) + "_" + getVPIBackendString(m_backend) + "Backend_" +
+                      GetMemoryTypeAsString(inputImage.isCPU()) + "Input";
+    nv::bench::Timer timerFunc = nv::bench::CPU(tag.c_str(), nv::bench::Flag::DEFAULT);
+#endif
+
+    return m_colorConverter->execute(outputImage, inputImage, m_stream, m_backend);
+}
+
+std::error_code VPITensorStream::ColorConvert(Image<NV12> &outputImage, const Image<RGB_U8> &inputImage)
+{
+    std::unique_lock<decltype(m_fence)> scopedLock{m_fence};
+#ifdef NVBENCH_ENABLE
+    std::string tag = "VPIColorConvert_NV12_RGB_" + std::to_string(inputImage.getWidth()) + "X" +
+                      std::to_string(inputImage.getHeight()) + "_" + std::to_string(outputImage.getLumaWidth()) + "X" +
+                      std::to_string(outputImage.getLumaHeight()) + "_" + getVPIBackendString(m_backend) + "Backend_" +
+                      GetMemoryTypeAsString(inputImage.isCPU()) + "Input";
+    nv::bench::Timer timerFunc = nv::bench::CPU(tag.c_str(), nv::bench::Flag::DEFAULT);
+#endif
+
+    return m_colorConverter->execute(outputImage, inputImage, m_stream, m_backend);
+}
+
+std::error_code VPITensorStream::ColorConvert(Image<NV24> &outputImage, const Image<RGB_U8> &inputImage)
+{
+    std::unique_lock<decltype(m_fence)> scopedLock{m_fence};
+#ifdef NVBENCH_ENABLE
+    std::string tag = "VPIColorConvert_NV24_RGB_" + std::to_string(inputImage.getWidth()) + "X" +
+                      std::to_string(inputImage.getHeight()) + "_" + std::to_string(outputImage.getLumaWidth()) + "X" +
+                      std::to_string(outputImage.getLumaHeight()) + "_" + getVPIBackendString(m_backend) + "Backend_" +
+                      GetMemoryTypeAsString(inputImage.isCPU()) + "Input";
+    nv::bench::Timer timerFunc = nv::bench::CPU(tag.c_str(), nv::bench::Flag::DEFAULT);
+#endif
+
+    return m_colorConverter->execute(outputImage, inputImage, m_stream, m_backend);
+}
+
+std::error_code VPITensorStream::ColorConvert(Image<RGB_U8> &outputImage, const Image<NV12> &inputImage)
+{
+    std::unique_lock<decltype(m_fence)> scopedLock{m_fence};
+#ifdef NVBENCH_ENABLE
+    std::string tag = "VPIColorConvert_RGB_NV12_" + std::to_string(inputImage.getLumaWidth()) + "X" +
+                      std::to_string(inputImage.getLumaHeight()) + "_" + std::to_string(outputImage.getWidth()) + "X" +
+                      std::to_string(outputImage.getHeight()) + "_" + getVPIBackendString(m_backend) + "Backend_" +
+                      GetMemoryTypeAsString(inputImage.isCPU()) + "Input";
+    nv::bench::Timer timerFunc = nv::bench::CPU(tag.c_str(), nv::bench::Flag::DEFAULT);
+#endif
+
+    return m_colorConverter->execute(outputImage, inputImage, m_stream, m_backend);
+}
+
+std::error_code VPITensorStream::ColorConvert(Image<RGB_U8> &outputImage, const Image<NV24> &inputImage)
+{
+    std::unique_lock<decltype(m_fence)> scopedLock{m_fence};
+#ifdef NVBENCH_ENABLE
+    std::string tag = "VPIColorConvert_RGB_NV24_" + std::to_string(inputImage.getLumaWidth()) + "X" +
+                      std::to_string(inputImage.getLumaHeight()) + "_" + std::to_string(outputImage.getWidth()) + "X" +
+                      std::to_string(outputImage.getHeight()) + "_" + getVPIBackendString(m_backend) + "Backend_" +
+                      GetMemoryTypeAsString(inputImage.isCPU()) + "Input";
+    nv::bench::Timer timerFunc = nv::bench::CPU(tag.c_str(), nv::bench::Flag::DEFAULT);
+#endif
+
+    return m_colorConverter->execute(outputImage, inputImage, m_stream, m_backend);
+}
+
+std::error_code VPITensorStream::StereoDisparityEstimator(Image<Y_F32> &outputImage, const Image<Y_U8> &inputLeftImage,
+                                                          const Image<Y_U8> &inputRightImage, size_t windowSize,
+                                                          size_t maxDisparity)
+{
+    std::unique_lock<decltype(m_fence)> scopedLock{m_fence};
+#ifdef NVBENCH_ENABLE
+    std::string tag = "StereoDisparityEstimator_Y_F32_Y_U8_" + std::to_string(inputLeftImage.getWidth()) + "X" +
+                      std::to_string(inputLeftImage.getHeight()) + "_" + std::to_string(outputImage.getWidth()) + "X" +
+                      std::to_string(outputImage.getHeight()) + "_" + getVPIBackendString(m_backend) + "Backend_" +
+                      GetMemoryTypeAsString(inputLeftImage.isCPU()) + "Input";
+    nv::bench::Timer timerFunc = nv::bench::CPU(tag.c_str(), nv::bench::Flag::DEFAULT);
+#endif
+    return m_stereoDisparityEstimator->execute(outputImage, inputLeftImage, inputRightImage, windowSize, maxDisparity,
+                                               m_stream, m_backend);
+}
+
+std::error_code VPITensorStream::StereoDisparityEstimator(Image<Y_F32> &outputImage, const Image<NV12> &inputLeftImage,
+                                                          const Image<NV12> &inputRightImage, size_t windowSize,
+                                                          size_t maxDisparity)
+{
+    std::unique_lock<decltype(m_fence)> scopedLock{m_fence};
+#ifdef NVBENCH_ENABLE
+    std::string tag = "StereoDisparityEstimator_Y_F32_NV12_" + std::to_string(inputLeftImage.getLumaWidth()) + "X" +
+                      std::to_string(inputLeftImage.getLumaHeight()) + "_" + std::to_string(outputImage.getWidth()) +
+                      "X" + std::to_string(outputImage.getHeight()) + "_" + getVPIBackendString(m_backend) +
+                      "Backend_" + GetMemoryTypeAsString(inputLeftImage.isCPU()) + "Input";
+    nv::bench::Timer timerFunc = nv::bench::CPU(tag.c_str(), nv::bench::Flag::DEFAULT);
+#endif
+    return m_stereoDisparityEstimator->execute(outputImage, inputLeftImage, inputRightImage, windowSize, maxDisparity,
+                                               m_stream, m_backend);
+}
+
+std::error_code VPITensorStream::StereoDisparityEstimator(Image<Y_F32> &outputImage, const Image<NV24> &inputLeftImage,
+                                                          const Image<NV24> &inputRightImage, size_t windowSize,
+                                                          size_t maxDisparity)
+{
+    std::unique_lock<decltype(m_fence)> scopedLock{m_fence};
+#ifdef NVBENCH_ENABLE
+    std::string tag = "StereoDisparityEstimator_Y_F32_NV24_" + std::to_string(inputLeftImage.getLumaWidth()) + "X" +
+                      std::to_string(inputLeftImage.getLumaHeight()) + "_" + std::to_string(outputImage.getWidth()) +
+                      "X" + std::to_string(outputImage.getHeight()) + "_" + getVPIBackendString(m_backend) +
+                      "Backend_" + GetMemoryTypeAsString(inputLeftImage.isCPU()) + "Input";
+    nv::bench::Timer timerFunc = nv::bench::CPU(tag.c_str(), nv::bench::Flag::DEFAULT);
+#endif
+    return m_stereoDisparityEstimator->execute(outputImage, inputLeftImage, inputRightImage, windowSize, maxDisparity,
+                                               m_stream, m_backend);
+}
+
+TensorBackend VPITensorContext::Backend() const noexcept
+{
+    return TensorBackend::VPI;
+}
+
+std::error_code VPITensorStream::GenerateWarpFromCameraModel(ImageWarp &warp, const ImageGrid &grid,
+                                                             const CameraModel &source, const CameraIntrinsics &target)
+{
+    std::unique_lock<decltype(m_fence)> scopedLock{m_fence};
+#ifdef NVBENCH_ENABLE
+    std::string tag            = "GenerateWarpFromCameraModel";
+    nv::bench::Timer timerFunc = nv::bench::CPU(tag.c_str(), nv::bench::Flag::DEFAULT);
+#endif
+
+    VPIStatus status = VPI_SUCCESS;
+
+    VPIWarpMap map           = {0};
+    map.grid.numHorizRegions = grid.numHorizRegions;
+    for (std::size_t i = 0; i < static_cast<std::size_t>(grid.numHorizRegions); i++)
+    {
+        map.grid.regionWidth[i]   = grid.regionWidth[i];
+        map.grid.horizInterval[i] = grid.horizInterval[i];
+    }
+    map.grid.numVertRegions = grid.numVertRegions;
+    for (std::size_t i = 0; i < static_cast<std::size_t>(grid.numVertRegions); i++)
+    {
+        map.grid.regionHeight[i] = grid.regionHeight[i];
+        map.grid.vertInterval[i] = grid.vertInterval[i];
+    }
+    status = vpiWarpMapAllocData(&map);
+
+    if ((status == VPI_SUCCESS) && (map.keypoints))
+    {
+        switch (source.distortion.type)
+        {
+        case CameraDistortionType::Polynomial:
+        {
+            VPIPolynomialLensDistortionModel distortion;
+            distortion.k1 = source.distortion.k1;
+            distortion.k2 = source.distortion.k2;
+            distortion.k3 = source.distortion.k3;
+            distortion.k4 = source.distortion.k4;
+            distortion.k5 = source.distortion.k5;
+            distortion.k6 = source.distortion.k6;
+            distortion.p1 = source.distortion.p1;
+            distortion.p2 = source.distortion.p2;
+            status        = vpiWarpMapGenerateFromPolynomialLensDistortionModel(
+                source.intrinsic.m_intrinsics, source.extrinsic.m_extrinsics, target.m_intrinsics, &distortion, &map);
+            break;
+        }
+        case CameraDistortionType::FisheyeEquidistant:
+        {
+            VPIFisheyeLensDistortionModel distortion;
+            distortion.k1      = source.distortion.k1;
+            distortion.k2      = source.distortion.k2;
+            distortion.k3      = source.distortion.k3;
+            distortion.k4      = source.distortion.k4;
+            distortion.mapping = VPI_FISHEYE_EQUIDISTANT;
+            status             = vpiWarpMapGenerateFromFisheyeLensDistortionModel(
+                source.intrinsic.m_intrinsics, source.extrinsic.m_extrinsics, target.m_intrinsics, &distortion, &map);
+            break;
+        }
+        case CameraDistortionType::FisheyeEquisolid:
+        {
+            VPIFisheyeLensDistortionModel distortion;
+            distortion.k1      = source.distortion.k1;
+            distortion.k2      = source.distortion.k2;
+            distortion.k3      = source.distortion.k3;
+            distortion.k4      = source.distortion.k4;
+            distortion.mapping = VPI_FISHEYE_EQUISOLID;
+            status             = vpiWarpMapGenerateFromFisheyeLensDistortionModel(
+                source.intrinsic.m_intrinsics, source.extrinsic.m_extrinsics, target.m_intrinsics, &distortion, &map);
+            break;
+        }
+        case CameraDistortionType::FisheyeOrthoGraphic:
+        {
+            VPIFisheyeLensDistortionModel distortion;
+            distortion.k1      = source.distortion.k1;
+            distortion.k2      = source.distortion.k2;
+            distortion.k3      = source.distortion.k3;
+            distortion.k4      = source.distortion.k4;
+            distortion.mapping = VPI_FISHEYE_ORTHOGRAPHIC;
+            status             = vpiWarpMapGenerateFromFisheyeLensDistortionModel(
+                source.intrinsic.m_intrinsics, source.extrinsic.m_extrinsics, target.m_intrinsics, &distortion, &map);
+            break;
+        }
+        case CameraDistortionType::FisheyeStereographic:
+        {
+            VPIFisheyeLensDistortionModel distortion;
+            distortion.k1      = source.distortion.k1;
+            distortion.k2      = source.distortion.k2;
+            distortion.k3      = source.distortion.k3;
+            distortion.k4      = source.distortion.k4;
+            distortion.mapping = VPI_FISHEYE_STEREOGRAPHIC;
+            status             = vpiWarpMapGenerateFromFisheyeLensDistortionModel(
+                source.intrinsic.m_intrinsics, source.extrinsic.m_extrinsics, target.m_intrinsics, &distortion, &map);
+            break;
+        }
+        default:
+            status = VPI_ERROR_INVALID_ARGUMENT;
+            break;
+        }
+    }
+
+    if ((status == VPI_SUCCESS) && (map.keypoints))
+    {
+        if (warp != nullptr)
+        {
+            vpiPayloadDestroy(reinterpret_cast<VPIImageWarp *>(warp)->payload);
+            delete warp;
+        }
+        warp   = new VPIImageWarp;
+        status = vpiCreateRemap(m_backend, &map, &(reinterpret_cast<VPIImageWarp *>(warp)->payload));
+    }
+
+    // Delete map after payload is generated
+    vpiWarpMapFreeData(&map);
+
+    return make_error_code(status);
+}
+
+std::error_code VPITensorStream::DestroyWarp(ImageWarp &warp) noexcept
+{
+    std::unique_lock<decltype(m_fence)> scopedLock{m_fence};
+    if (warp != nullptr)
+    {
+        try
+        {
+            vpiPayloadDestroy(reinterpret_cast<VPIImageWarp *>(warp)->payload);
+        }
+        catch (std::error_code &e)
+        {
+            return e;
+        }
+
+        delete reinterpret_cast<VPIImageWarp *>(warp);
+        warp = nullptr;
+    }
+    return make_error_code(ErrorCode::SUCCESS);
+}
+
+}} // namespace cvcore::tensor_ops
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPITensorOperators.h b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPITensorOperators.h
new file mode 100644
index 0000000..e01ed11
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/tensor_ops/vpi/VPITensorOperators.h
@@ -0,0 +1,272 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CVCORE_VPITENSOROPERATORS_H
+#define CVCORE_VPITENSOROPERATORS_H
+
+#include <type_traits>
+#include <mutex>
+
+#include <vpi/CUDAInterop.h>
+#include <vpi/Image.h>
+#include <vpi/Stream.h>
+#include <vpi/Types.h>
+
+#include "cv/core/CameraModel.h"
+#include "cv/tensor_ops/ITensorOperatorContext.h"
+#include "cv/tensor_ops/ITensorOperatorStream.h"
+
+#include "VPIEnumMapping.h"
+
+namespace cvcore { namespace tensor_ops {
+
+/**
+ * Returns the corresponding VPI backend given the cvcore compute engine.
+ * @param computeEngine Compute engine used.
+ * @return Returns the VPIbackend.
+ */
+VPIBackend getVPIBackend(const ComputeEngine &computeEngine);
+
+/**
+ * Wraps a CVCore Image type into a VPIImage
+ * @param vpiImage VPIImage
+ * @param imgdata VPIImage data
+ * @param cvcoreImage CVCore image
+ * @param backend Compute backend
+ * @return error code
+ */
+template<ImageType T>
+std::error_code CreateVPIImageWrapper(VPIImage &vpiImg, VPIImageData &imgdata, const Image<T> &cvcoreImage, VPIBackend backend);
+
+/**
+ * Update VPI Image data pointer
+ * @param vpiImage VPIImage
+ * @param imgdata VPIImage data
+ * @param isCPU data is on CPU or GPU
+ * @return error code
+ */
+std::error_code UpdateVPIImageWrapper(VPIImage &image, VPIImageData &imageWrap, bool isCPU);
+
+/**
+ * Destory Wrapped VPI Image
+ * @param vpiImage VPIImage
+ * @param imgdata VPIImage data
+ * @return error code
+ */
+std::error_code DestroyVPIImageWrapper(VPIImage &image, VPIImageData &imageWrap);
+
+/**
+ * Update VPI Image given CVCORE Image
+ * @param vpiImage VPIImage
+ * @param vpiImageData VPIImage data
+ * @param image CVCORE Image
+ * @return error code
+ */
+template<ImageType T,
+         typename std::enable_if<IsInterleavedImage<T>::value>::type * = nullptr>
+std::error_code UpdateImage(VPIImage &vpiImage, VPIImageData &vpiImageData, const Image<T> &image)
+{
+    using D                                  = typename Image<T>::DataType;
+    vpiImageData.buffer.pitch.planes[0].data = const_cast<D *>(image.getData());
+    return UpdateVPIImageWrapper(vpiImage, vpiImageData, image.isCPU());
+}
+
+/**
+ * Update VPI Image given CVCORE Image
+ * @param vpiImage VPIImage
+ * @param vpiImageData VPIImage data
+ * @param image CVCORE Image
+ * @return error code
+ */
+template<ImageType T, typename std::enable_if<IsCompositeImage<T>::value>::type * = nullptr>
+std::error_code UpdateImage(VPIImage &vpiImage, VPIImageData &vpiImageData, const Image<T> &image)
+{
+    vpiImageData.buffer.pitch.planes[0].data = const_cast<uint8_t *>(image.getLumaData());
+    vpiImageData.buffer.pitch.planes[1].data = const_cast<uint8_t *>(image.getChromaData());
+    return UpdateVPIImageWrapper(vpiImage, vpiImageData, image.isCPU());
+}
+
+/**
+ * Check if params of VPIImageData is consistent with the given CVCORE Image
+ * @param vpiImageData VPIImage data
+ * @param image CVCORE Image
+ * @return whether param changed
+ */
+template<ImageType T,
+         typename std::enable_if<!IsCompositeImage<T>::value && !IsPlanarImage<T>::value>::type * = nullptr>
+bool CheckParamsChanged(VPIImageData &vpiImageData, const Image<T> &image)
+{
+    bool paramsChanged = false;
+    // Did format change
+    paramsChanged = paramsChanged || vpiImageData.buffer.pitch.format != ToVpiImageFormat(T);
+    // Did image dimension change
+    paramsChanged =
+        paramsChanged || (vpiImageData.buffer.pitch.planes[0].height != static_cast<std::int32_t>(image.getHeight()) ||
+                          vpiImageData.buffer.pitch.planes[0].width != static_cast<std::int32_t>(image.getWidth()));
+    return paramsChanged;
+}
+
+/**
+ * Check if params of VPIImageData is consistent with the given CVCORE Image
+ * @param vpiImageData VPIImage data
+ * @param image CVCORE Image
+ * @return whether param changed
+ */
+template<ImageType T, typename std::enable_if<IsCompositeImage<T>::value>::type * = nullptr>
+bool CheckParamsChanged(VPIImageData &vpiImageData, const Image<T> &image)
+{
+    bool paramsChanged = false;
+
+    // Did format change
+    paramsChanged = paramsChanged || vpiImageData.buffer.pitch.format != ToVpiImageFormat(T);
+
+    // Did image dimension change
+    paramsChanged = paramsChanged ||
+                    (vpiImageData.buffer.pitch.planes[0].height != static_cast<std::int32_t>(image.getLumaHeight()) ||
+                     vpiImageData.buffer.pitch.planes[0].width != static_cast<std::int32_t>(image.getLumaWidth()) ||
+                     vpiImageData.buffer.pitch.planes[1].height != static_cast<std::int32_t>(image.getChromaHeight()) ||
+                     vpiImageData.buffer.pitch.planes[1].width != static_cast<std::int32_t>(image.getChromaWidth()));
+    return paramsChanged;
+}
+
+/**
+ * Implementation of VPITensorContext 
+ */
+class VPITensorContext : public ITensorOperatorContext
+{
+public:
+    /**
+    * Default Constructor for VPI Context.
+    */
+    VPITensorContext() = default;
+
+    /**
+    * Default Destructor for VPI Context.
+    */
+    ~VPITensorContext() = default;
+
+    /**
+     * Creates a stream based on compute engine
+     * @param computeEngine CVCore Compute engine
+     * @return Pointer to ITensorOperatorStream object.
+     */
+    virtual std::error_code CreateStream(TensorOperatorStream &, const ComputeEngine &computeEngine) override;
+
+    /**
+    * Destroy stream creates.
+    * @param inputStream Input stream to be deleted
+    * @return Error code
+    */
+    virtual std::error_code DestroyStream(TensorOperatorStream &inputStream) override;
+
+    /**
+    * Checks if stream type is supported for a given backend.
+    * @param computeEngine CVCore Compute engine
+    * @return true if stream type is available.
+    */
+    virtual bool IsComputeEngineCompatible(const ComputeEngine &computeEngine) const noexcept override;
+
+    /**
+    * Returns the backend type
+    */
+    virtual TensorBackend Backend() const noexcept override;
+
+private:
+};
+
+/**
+ * Implementation of VPITensorStream
+ */
+class VPITensorStream : public ITensorOperatorStream
+{
+public:
+    virtual std::error_code Status() noexcept override;
+
+    virtual std::error_code GenerateWarpFromCameraModel(ImageWarp &warp, const ImageGrid &grid,
+                                                        const CameraModel &source,
+                                                        const CameraIntrinsics &target) override;
+    virtual std::error_code DestroyWarp(ImageWarp &warp) noexcept override;
+
+    virtual std::error_code Remap(Image<RGB_U8> &outputImage, const Image<RGB_U8> &inputImage, const ImageWarp warp,
+                                  InterpolationType interpolation, BorderType border) override;
+
+    virtual std::error_code Remap(Image<BGR_U8> &outputImage, const Image<BGR_U8> &inputImage, const ImageWarp warp,
+                                  InterpolationType interpolation, BorderType border) override;
+
+    virtual std::error_code Remap(Image<NV12> &outputImage, const Image<NV12> &inputImage, const ImageWarp warp,
+                                  InterpolationType interpolation, BorderType border) override;
+
+    virtual std::error_code Remap(Image<NV24> &outputImage, const Image<NV24> &inputImage, const ImageWarp warp,
+                                  InterpolationType interpolation, BorderType border) override;
+
+    virtual std::error_code Resize(Image<RGB_U8> &outputImage, const Image<RGB_U8> &inputImage,
+                                   InterpolationType interpolation, BorderType border) override;
+    virtual std::error_code Resize(Image<RGBA_U8> &outputImage, const Image<RGBA_U8> &inputImage,
+                                   InterpolationType interpolation, BorderType border) override;
+    virtual std::error_code Resize(Image<NV24> &outputImage, const Image<NV24> &inputImage,
+                                   InterpolationType interpolation, BorderType border) override;
+    virtual std::error_code Resize(Image<NV12> &outputImage, const Image<NV12> &inputImage,
+                                   InterpolationType interpolation, BorderType border) override;
+    virtual std::error_code Resize(Image<BGR_U8> &outputImage, const Image<BGR_U8> &inputImage,
+                                   InterpolationType interpolation, BorderType border) override;
+
+    virtual std::error_code ColorConvert(Image<BGR_U8> &outputImage, const Image<RGB_U8> &inputImage) override;
+    virtual std::error_code ColorConvert(Image<RGB_U8> &outputImage, const Image<BGR_U8> &inputImage) override;
+    virtual std::error_code ColorConvert(Image<NV12> &outputImage, const Image<BGR_U8> &inputImage) override;
+    virtual std::error_code ColorConvert(Image<NV24> &outputImage, const Image<BGR_U8> &inputImage) override;
+    virtual std::error_code ColorConvert(Image<BGR_U8> &outputImage, const Image<NV12> &inputImage) override;
+    virtual std::error_code ColorConvert(Image<BGR_U8> &outputImage, const Image<NV24> &inputImage) override;
+    virtual std::error_code ColorConvert(Image<NV12> &outputImage, const Image<RGB_U8> &inputImage) override;
+    virtual std::error_code ColorConvert(Image<NV24> &outputImage, const Image<RGB_U8> &inputImage) override;
+    virtual std::error_code ColorConvert(Image<RGB_U8> &outputImage, const Image<NV12> &inputImage) override;
+    virtual std::error_code ColorConvert(Image<RGB_U8> &outputImage, const Image<NV24> &inputImage) override;
+    virtual std::error_code StereoDisparityEstimator(Image<Y_F32> &outputImage, const Image<Y_U8> &inputLeftImage,
+                                                     const Image<Y_U8> &inputRightImage, size_t windowSize,
+                                                     size_t maxDisparity) override;
+    virtual std::error_code StereoDisparityEstimator(Image<Y_F32> &outputImage, const Image<NV12> &inputLeftImage,
+                                                     const Image<NV12> &inputRightImage, size_t windowSize,
+                                                     size_t maxDisparity) override;
+    virtual std::error_code StereoDisparityEstimator(Image<Y_F32> &outputImage, const Image<NV24> &inputLeftImage,
+                                                     const Image<NV24> &inputRightImage, size_t windowSize,
+                                                     size_t maxDisparity) override;
+
+protected:
+    friend class VPITensorContext;
+
+    VPITensorStream(const ComputeEngine &computeEngine);
+    ~VPITensorStream();
+
+private:
+    class VPIResizeImpl;
+    class VPIRemapImpl;
+    class VPIColorConvertImpl;
+    class VPIStereoDisparityEstimatorImpl;
+    
+    mutable std::mutex m_fence;
+
+    std::unique_ptr<VPIResizeImpl> m_resizer;
+    std::unique_ptr<VPIRemapImpl> m_remapper;
+    std::unique_ptr<VPIColorConvertImpl> m_colorConverter;
+    std::unique_ptr<VPIStereoDisparityEstimatorImpl> m_stereoDisparityEstimator;
+
+    VPIStream m_stream;
+    VPIBackend m_backend;
+};
+
+}} // namespace cvcore::tensor_ops
+
+#endif // CVCORE_VPITENSOROPERATORS_H
diff --git a/isaac_ros_ess/gxf/ess/cvcore/src/trtbackend/TRTBackend.cpp b/isaac_ros_ess/gxf/ess/cvcore/src/trtbackend/TRTBackend.cpp
new file mode 100644
index 0000000..32a80d6
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/cvcore/src/trtbackend/TRTBackend.cpp
@@ -0,0 +1,632 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "cv/trtbackend/TRTBackend.h"
+
+#include "NvInfer.h"
+#include "NvOnnxConfig.h"
+#include "NvOnnxParser.h"
+#include "NvUffParser.h"
+#include "NvUtils.h"
+
+#include <algorithm>
+#include <fstream>
+#include <iostream>
+#include <stdexcept>
+
+namespace cvcore {
+
+namespace {
+
+
+void WriteSerializedEngineToFile(const char *data, size_t engineSize, std::string &outputFile)
+{
+    std::ofstream outFile(outputFile.c_str(), std::ios::binary);
+    if (!outFile.is_open())
+    {
+        throw std::runtime_error("Cannot open file to write serialized Engine. Permissions? ");
+    }
+    else
+    {
+        outFile.write(data, engineSize);
+        outFile.close();
+    }
+}
+
+nvinfer1::Dims4 GetTRTBlobDimension(int batch, int channels, int height, int width, TRTBackendBlobLayout layout)
+{
+    nvinfer1::Dims4 dims;
+    switch (layout)
+    {
+    case TRTBackendBlobLayout::PLANAR:
+    {
+        dims = {batch, channels, height, width};
+        break;
+    }
+    case TRTBackendBlobLayout::INTERLEAVED:
+    {
+        dims = {batch, height, width, channels};
+        break;
+    }
+    default:
+    {
+        throw std::runtime_error("Only PLANAR and INTERLEAVED types allowed");
+    }
+    }
+    return dims;
+}
+
+nvinfer1::Dims3 GetTRTBlobDimension(int channels, int height, int width, TRTBackendBlobLayout layout)
+{
+    nvinfer1::Dims3 dims;
+    switch (layout)
+    {
+    case TRTBackendBlobLayout::PLANAR:
+    {
+        dims = {channels, height, width};
+        break;
+    }
+    case TRTBackendBlobLayout::INTERLEAVED:
+    {
+        dims = {height, width, channels};
+        break;
+    }
+    default:
+    {
+        throw std::runtime_error("Only PLANAR and INTERLEAVED types allowed");
+    }
+    }
+    return dims;
+}
+
+bool SetupProfile(nvinfer1::IOptimizationProfile *profile, nvinfer1::INetworkDefinition *network,
+                  TRTBackendParams &params)
+{
+    // This shouldn't be hard-coded rather should be set by the user.
+    int kMINBatchSize = 1;
+    int kMAXBatchSize = 32;
+
+    if (kMAXBatchSize < params.batchSize)
+    {
+        throw std::runtime_error("Max batch size is hard-coded to 32.");
+    }
+
+    bool hasDynamicShape = false;
+    for (int i = 0; i < network->getNbInputs(); i++)
+    {
+        auto input                = network->getInput(i);
+        nvinfer1::Dims dims       = input->getDimensions();
+        const bool isDynamicInput = std::any_of(dims.d, dims.d + dims.nbDims, [](int dim) { return dim == -1; });
+        if (isDynamicInput)
+        {
+            hasDynamicShape = true;
+            auto it = std::find(params.inputLayers.begin(), params.inputLayers.end(), std::string(input->getName()));
+            if (it == params.inputLayers.end())
+            {
+                throw std::runtime_error("Undefined dynamic input shape");
+            }
+            int pos       = it - params.inputLayers.begin();
+            auto inputDim = params.inputDims[pos];
+            profile->setDimensions(input->getName(), nvinfer1::OptProfileSelector::kMIN,
+                                   GetTRTBlobDimension(kMINBatchSize, inputDim.channels, inputDim.height,
+                                                       inputDim.width, params.inputLayout));
+            profile->setDimensions(input->getName(), nvinfer1::OptProfileSelector::kOPT,
+                                   GetTRTBlobDimension(params.batchSize, inputDim.channels, inputDim.height,
+                                                       inputDim.width, params.inputLayout));
+            profile->setDimensions(input->getName(), nvinfer1::OptProfileSelector::kMAX,
+                                   GetTRTBlobDimension(kMAXBatchSize, inputDim.channels, inputDim.height,
+                                                       inputDim.width, params.inputLayout));
+        }
+    }
+    return hasDynamicShape;
+}
+
+nvinfer1::DataType GetTRTDataType(TRTBackendPrecision precision)
+{
+    nvinfer1::DataType dataType;
+    switch (precision)
+    {
+    case TRTBackendPrecision::INT8:
+    {
+        dataType = nvinfer1::DataType::kINT8;
+        break;
+    }
+    case TRTBackendPrecision::FP16:
+    {
+        dataType = nvinfer1::DataType::kHALF;
+        break;
+    }
+    case TRTBackendPrecision::FP32:
+    {
+        dataType = nvinfer1::DataType::kFLOAT;
+        break;
+    }
+    default:
+    {
+        dataType = nvinfer1::DataType::kFLOAT;
+        break;
+    }
+    }
+    return dataType;
+}
+
+nvuffparser::UffInputOrder GetTRTInputOrder(TRTBackendBlobLayout layout)
+{
+    nvuffparser::UffInputOrder order;
+    switch (layout)
+    {
+    case TRTBackendBlobLayout::PLANAR:
+    {
+        order = nvuffparser::UffInputOrder::kNCHW;
+        break;
+    }
+    case TRTBackendBlobLayout::INTERLEAVED:
+    {
+        order = nvuffparser::UffInputOrder::kNHWC;
+        break;
+    }
+    default:
+    {
+        throw std::runtime_error("Only PLANAR and INTERLEAVED types allowed");
+    }
+    }
+    return order;
+}
+
+} // anonymous namespace
+
+class TRTLogger : public nvinfer1::ILogger
+{
+
+public:
+    nvinfer1::ILogger &getLogger()
+    {
+        return *this;
+    }
+
+    void log(nvinfer1::ILogger::Severity severity, const char *msg) noexcept override
+    {
+        switch (severity)
+        {
+        case nvinfer1::ILogger::Severity::kINTERNAL_ERROR:
+        {
+            std::cout << msg << std::endl;
+            break;
+        }
+        case nvinfer1::ILogger::Severity::kERROR:
+        {
+            std::cout << msg << std::endl;
+            break;
+        }
+        case nvinfer1::ILogger::Severity::kWARNING:
+        {
+            std::cout << msg << std::endl;
+            break;
+        }
+        case nvinfer1::ILogger::Severity::kINFO:
+        {
+            std::cout << msg << std::endl;
+            break;
+        }
+        default:
+        {
+            std::cout << msg << std::endl;
+            break;
+        }
+        }
+    }
+};
+
+struct TRTImpl
+{
+    TRTImpl()
+        : m_logger(new TRTLogger())
+        , m_TRTRuntime(nullptr, [](nvinfer1::IRuntime *runtime) { runtime->destroy(); })
+        , m_inferenceEngine(nullptr)
+        , m_ownedInferenceEngine(nullptr, [](nvinfer1::ICudaEngine *eng) { eng->destroy(); })
+        , m_inferContext(nullptr, [](nvinfer1::IExecutionContext *ectx) { ectx->destroy(); })
+        , m_cudaStream(0)
+    {
+    }
+
+    std::unique_ptr<TRTLogger> m_logger;
+    std::unique_ptr<nvinfer1::IRuntime, void (*)(nvinfer1::IRuntime *)> m_TRTRuntime;
+    nvinfer1::ICudaEngine *m_inferenceEngine;
+    std::unique_ptr<nvinfer1::ICudaEngine, void (*)(nvinfer1::ICudaEngine *)> m_ownedInferenceEngine;
+    std::unique_ptr<nvinfer1::IExecutionContext, void (*)(nvinfer1::IExecutionContext *)> m_inferContext;
+
+    cudaStream_t m_cudaStream;
+    int m_bindingsCount             = 0;
+    int m_batchSize                 = 1;
+    bool m_explicitBatch            = false;
+    TRTBackendPrecision m_precision = TRTBackendPrecision::FP32;
+    std::unordered_map<std::string, int> m_blobMap;
+
+    void loadNetWorkFromFile(const char *modelFilePath);
+    void loadNetWorkFromUff(TRTBackendParams &params);
+    void loadNetWorkFromOnnx(TRTBackendParams &params);
+    void loadFromMemoryPointer(void *engine);
+    // find the input/output bindings
+    void setupIO(int batchSize);
+};
+
+void TRTImpl::loadNetWorkFromFile(const char *modelFilePath)
+{
+    // Initialize TRT engine and deserialize it from file
+    std::ifstream trtModelFStream(modelFilePath, std::ios::binary);
+    std::unique_ptr<char[]> trtModelContent;
+    size_t trtModelContentSize = 0;
+    if (!trtModelFStream.good())
+    {
+        std::cerr << "Model File: " << modelFilePath << std::endl;
+        throw std::runtime_error("TensorRT: Model file not found.");
+    }
+    else
+    {
+        trtModelFStream.seekg(0, trtModelFStream.end);
+        trtModelContentSize = trtModelFStream.tellg();
+        trtModelFStream.seekg(0, trtModelFStream.beg);
+        trtModelContent.reset(new char[trtModelContentSize]);
+        trtModelFStream.read(trtModelContent.get(), trtModelContentSize);
+        trtModelFStream.close();
+        std::cout << "Deserializing engine from: " << modelFilePath;
+    }
+    m_TRTRuntime.reset(nvinfer1::createInferRuntime(*(m_logger.get())));
+    m_inferenceEngine = dynamic_cast<nvinfer1::ICudaEngine *>(
+        m_TRTRuntime->deserializeCudaEngine(trtModelContent.get(), trtModelContentSize, nullptr));
+    m_ownedInferenceEngine.reset(m_inferenceEngine);
+    m_inferContext.reset(m_inferenceEngine->createExecutionContext());
+    m_inferContext->setOptimizationProfile(0);
+}
+
+void TRTImpl::loadNetWorkFromOnnx(TRTBackendParams &params)
+{
+    if (!params.explicitBatch)
+    {
+        std::cerr << "ONNX model only supports explicit batch size";
+    }
+    std::ifstream f(params.enginePath);
+    if (f.good())
+    {
+        loadNetWorkFromFile(params.enginePath.c_str());
+        return;
+    }
+    auto builder   = nvinfer1::createInferBuilder(*(m_logger.get()));
+    auto config    = builder->createBuilderConfig();
+    auto batchFlag = 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
+    auto network   = builder->createNetworkV2(batchFlag);
+    auto parser    = nvonnxparser::createParser(*network, *(m_logger.get()));
+
+    // Force FP32
+    if (!builder->platformHasFastFp16())
+    {
+        m_precision = TRTBackendPrecision::FP32;
+    }
+
+    // Configuration
+    builder->setMaxBatchSize(params.batchSize);
+    if (!parser->parseFromFile(params.weightsPath.c_str(), 0))
+    {
+        std::cerr << "Fail to parse";
+    }
+    config->setMaxWorkspaceSize(1 << 30);
+    if (m_precision == TRTBackendPrecision::FP16)
+    {
+        config->setFlag(nvinfer1::BuilderFlag::kFP16);
+    }
+
+    auto profile = builder->createOptimizationProfile();
+    if (SetupProfile(profile, network, params))
+    {
+        config->addOptimizationProfile(profile);
+    }
+
+    // Build the engine
+    m_inferenceEngine = builder->buildEngineWithConfig(*network, *config);
+    if (m_inferenceEngine == nullptr)
+    {
+        throw std::runtime_error("TensorRT: unable to create engine");
+    }
+
+    m_ownedInferenceEngine.reset(m_inferenceEngine);
+    m_inferContext.reset(m_inferenceEngine->createExecutionContext());
+
+    network->destroy();
+    builder->destroy();
+    config->destroy();
+
+    auto serializedEngine = m_inferenceEngine->serialize();
+    WriteSerializedEngineToFile(static_cast<const char *>(serializedEngine->data()), serializedEngine->size(),
+                                params.enginePath);
+}
+
+void TRTImpl::loadNetWorkFromUff(TRTBackendParams &params)
+{
+    auto builder   = nvinfer1::createInferBuilder(*(m_logger.get()));
+    auto config    = builder->createBuilderConfig();
+    auto batchFlag = params.explicitBatch
+                         ? 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH)
+                         : 0U;
+    auto network = builder->createNetworkV2(batchFlag);
+    auto parser  = nvuffparser::createUffParser();
+
+    std::ifstream f(params.enginePath);
+    if (f.good())
+    {
+        loadNetWorkFromFile(params.enginePath.c_str());
+        return;
+    }
+    nvinfer1::DataType dataType = GetTRTDataType(params.precision);
+
+    // Force FP32
+    if (!builder->platformHasFastFp16())
+    {
+        dataType    = nvinfer1::DataType::kFLOAT;
+        m_precision = TRTBackendPrecision::FP32;
+    }
+
+    // Register uff input
+    for (int i = 0; i < params.inputLayers.size(); i++)
+    {
+        if (params.explicitBatch)
+        {
+            parser->registerInput(
+                params.inputLayers[i].c_str(),
+                GetTRTBlobDimension(params.batchSize, params.inputDims[i].channels, params.inputDims[i].height,
+                                    params.inputDims[i].width, params.inputLayout),
+                GetTRTInputOrder(params.inputLayout));
+        }
+        else
+        {
+            parser->registerInput(params.inputLayers[i].c_str(),
+                                  GetTRTBlobDimension(params.inputDims[i].channels, params.inputDims[i].height,
+                                                      params.inputDims[i].width, params.inputLayout),
+                                  GetTRTInputOrder(params.inputLayout));
+        }
+    }
+
+    // Register uff output
+    for (int i = 0; i < params.outputLayers.size(); i++)
+    {
+        parser->registerOutput(params.outputLayers[i].c_str());
+    }
+
+    // Parse uff model
+    if (!parser->parse(params.weightsPath.c_str(), *network, dataType))
+    {
+        std::cerr << "Fail to parse";
+    }
+
+    // Configuration
+    if (params.explicitBatch)
+    {
+        auto profile = builder->createOptimizationProfile();
+        if (SetupProfile(profile, network, params))
+        {
+            config->addOptimizationProfile(profile);
+        }
+    }
+    else
+    {
+        builder->setMaxBatchSize(params.batchSize);
+    }
+
+    config->setMaxWorkspaceSize(1 << 30);
+    if (m_precision == TRTBackendPrecision::FP16)
+    {
+        config->setFlag(nvinfer1::BuilderFlag::kFP16);
+    }
+
+    // Build the engine
+    m_inferenceEngine = builder->buildEngineWithConfig(*network, *config);
+    if (m_inferenceEngine == nullptr)
+    {
+        throw std::runtime_error("TensorRT: unable to create engine");
+    }
+
+    m_ownedInferenceEngine.reset(m_inferenceEngine);
+    m_inferContext.reset(m_inferenceEngine->createExecutionContext());
+
+    network->destroy();
+    builder->destroy();
+    config->destroy();
+
+    auto serializedEngine = m_inferenceEngine->serialize();
+    WriteSerializedEngineToFile(static_cast<const char *>(serializedEngine->data()), serializedEngine->size(),
+                                params.enginePath);
+}
+
+void TRTImpl::loadFromMemoryPointer(void *engine)
+{
+    m_inferenceEngine = static_cast<nvinfer1::ICudaEngine *>(engine);
+    m_inferContext.reset(m_inferenceEngine->createExecutionContext());
+}
+
+void TRTImpl::setupIO(int batchSize)
+{
+    m_bindingsCount = m_inferenceEngine->getNbBindings();
+    for (int i = 0; i < m_bindingsCount; i++)
+    {
+        m_blobMap[std::string(m_inferenceEngine->getBindingName(i))] = i;
+        if (m_inferenceEngine->bindingIsInput(i))
+        {
+            nvinfer1::Dims dims_i(m_inferenceEngine->getBindingDimensions(i));
+            nvinfer1::Dims4 inputDims{batchSize, dims_i.d[1], dims_i.d[2], dims_i.d[3]};
+            m_inferContext->setBindingDimensions(i, inputDims);
+        }
+    }
+}
+
+TRTBackend::TRTBackend(const char *modelFilePath, TRTBackendPrecision precision, int batchSize, bool explicitBatch)
+    : m_pImpl(new TRTImpl())
+{
+    m_pImpl->m_precision     = precision;
+    m_pImpl->m_batchSize     = batchSize;
+    m_pImpl->m_explicitBatch = explicitBatch;
+    m_pImpl->loadNetWorkFromFile(modelFilePath);
+    m_pImpl->setupIO(m_pImpl->m_batchSize);
+}
+
+TRTBackend::TRTBackend(TRTBackendParams &inputParams)
+    : m_pImpl(new TRTImpl())
+{
+    m_pImpl->m_precision     = inputParams.precision;
+    m_pImpl->m_batchSize     = inputParams.batchSize;
+    m_pImpl->m_explicitBatch = inputParams.explicitBatch;
+    switch (inputParams.modelType)
+    {
+    case ModelType::ONNX:
+    {
+        m_pImpl->loadNetWorkFromOnnx(inputParams);
+        break;
+    }
+    case ModelType::UFF:
+    {
+        m_pImpl->loadNetWorkFromUff(inputParams);
+        break;
+    }
+    case ModelType::TRT_ENGINE:
+    {
+        m_pImpl->loadNetWorkFromFile(inputParams.enginePath.c_str());
+        break;
+    }
+    case ModelType::TRT_ENGINE_IN_MEMORY:
+    {
+        m_pImpl->loadFromMemoryPointer(inputParams.trtEngineInMemory);
+        break;
+    }
+    default:
+    {
+        throw std::runtime_error(
+            "Only Model types ONNX, UFF, TensorRT "
+            "serialized engines and a pointer to deserialized "
+            "ICudaEngine are supported\n");
+    }
+    }
+    m_pImpl->setupIO(m_pImpl->m_batchSize);
+}
+
+TRTBackend::~TRTBackend() {}
+
+void TRTBackend::infer(void **buffer)
+{
+    bool success = true;
+    if (!m_pImpl->m_inferenceEngine->hasImplicitBatchDimension())
+    {
+        m_pImpl->m_inferContext->enqueueV2(buffer, m_pImpl->m_cudaStream, nullptr);
+    }
+    else
+    {
+        m_pImpl->m_inferContext->enqueue(m_pImpl->m_batchSize, buffer, m_pImpl->m_cudaStream, nullptr);
+    }
+
+    if (!success)
+    {
+        throw std::runtime_error("TensorRT: Inference failed");
+    }
+}
+
+void TRTBackend::infer(void **buffer, int batchSize, cudaStream_t stream)
+{
+    m_pImpl->setupIO(batchSize);
+
+    bool success = true;
+    if (!m_pImpl->m_inferenceEngine->hasImplicitBatchDimension())
+    {
+        m_pImpl->m_inferContext->enqueueV2(buffer, stream, nullptr);
+    }
+    else
+    {
+        m_pImpl->m_inferContext->enqueue(batchSize, buffer, stream, nullptr);
+    }
+
+    if (!success)
+    {
+        throw std::runtime_error("TensorRT: Inference failed");
+    }
+}
+
+cudaStream_t TRTBackend::getCUDAStream() const
+{
+    return m_pImpl->m_cudaStream;
+}
+
+void TRTBackend::setCUDAStream(cudaStream_t stream)
+{
+    m_pImpl->m_cudaStream = stream;
+}
+
+int TRTBackend::getBlobCount() const
+{
+    return m_pImpl->m_bindingsCount;
+}
+
+TRTBackendBlobSize TRTBackend::getTRTBackendBlobSize(int blobIndex) const
+{
+    if (blobIndex >= m_pImpl->m_bindingsCount)
+    {
+        throw std::runtime_error("blobIndex out of range");
+    }
+    auto dim = m_pImpl->m_inferenceEngine->getBindingDimensions(blobIndex);
+    TRTBackendBlobSize blobSize;
+    if (dim.nbDims == 2)
+    {
+        blobSize = {1, dim.d[0], dim.d[1]};
+    }
+    else if (dim.nbDims == 3)
+    {
+        blobSize = {dim.d[0], dim.d[1], dim.d[2]};
+    }
+    else if (dim.nbDims == 4)
+    {
+        blobSize = {dim.d[1], dim.d[2], dim.d[3]};
+    }
+    else
+    {
+        throw std::runtime_error("Unknown TensorRT binding dimension!");
+    }
+    return blobSize;
+}
+
+int TRTBackend::getBlobLinearSize(int blobIndex) const
+{
+    const TRTBackendBlobSize shape = getTRTBackendBlobSize(blobIndex);
+    nvinfer1::Dims3 dims_val{shape.channels, shape.height, shape.width};
+    int blobSize = 1;
+    for (int i = 0; i < 3; i++)
+    {
+        blobSize *= dims_val.d[i] <= 0 ? 1 : dims_val.d[i];
+    }
+    return blobSize;
+}
+
+int TRTBackend::getBlobIndex(const char *blobName) const
+{
+    auto blobItr = m_pImpl->m_blobMap.find(std::string(blobName));
+    if (blobItr == m_pImpl->m_blobMap.end())
+    {
+        throw std::runtime_error("blobName not found");
+    }
+    return blobItr->second;
+}
+
+bool TRTBackend::bindingIsInput(const int index) const
+{
+    return m_pImpl->m_inferenceEngine->bindingIsInput(index);
+}
+
+} // namespace cvcore
diff --git a/isaac_ros_ess/gxf/ess/extensions/ess/ESS.hpp b/isaac_ros_ess/gxf/ess/extensions/ess/ESS.hpp
new file mode 100644
index 0000000..a4d32f0
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/extensions/ess/ESS.hpp
@@ -0,0 +1,119 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef NVIDIA_CVCORE_ESS_HPP
+#define NVIDIA_CVCORE_ESS_HPP
+
+#include <cstring>
+#include <fstream>
+#include <vector>
+
+#include <cv/ess/ESS.h>
+
+#include "extensions/tensor_ops/ImageAdapter.hpp"
+#include "gxf/cuda/cuda_stream_pool.hpp"
+#include "gxf/std/allocator.hpp"
+#include "gxf/std/codelet.hpp"
+#include "gxf/std/receiver.hpp"
+#include "gxf/std/transmitter.hpp"
+
+namespace nvidia {
+namespace cvcore {
+
+// CV-Core ESS GXF Codelet
+class ESS : public gxf::Codelet {
+public:
+  ESS()  = default;
+  ~ESS() = default;
+
+  gxf_result_t registerInterface(gxf::Registrar* registrar) override;
+  gxf_result_t initialize() override {
+    return GXF_SUCCESS;
+  }
+  gxf_result_t deinitialize() override {
+    return GXF_SUCCESS;
+  }
+
+  gxf_result_t start() override;
+  gxf_result_t tick() override;
+  gxf_result_t stop() override;
+
+private:
+  // cvcore image pre-processing params for ESS
+  ::cvcore::ImagePreProcessingParams preProcessorParams;
+  // cvcore model input params for ESS
+  ::cvcore::ModelInputParams modelInputParams;
+  // cvcore inference params for ESS
+  ::cvcore::ModelInferenceParams inferenceParams;
+  // extra params for ESS
+  ::cvcore::ess::ESSPreProcessorParams extraParams;
+  // cvcore ESS object
+  std::unique_ptr<::cvcore::ess::ESS> objESS;
+
+  // The name of the input left image tensor
+  gxf::Parameter<std::string> left_image_name_;
+  // The name of the input right image tensor
+  gxf::Parameter<std::string> right_image_name_;
+  // The name of the output tensor
+  gxf::Parameter<std::string> output_name_;
+  // The Cuda Stream pool for allocate cuda stream
+  gxf::Parameter<gxf::Handle<gxf::CudaStreamPool>> stream_pool_;
+  // Data allocator to create a tensor
+  gxf::Parameter<gxf::Handle<gxf::Allocator>> pool_;
+  // Data receiver to get left image data
+  gxf::Parameter<gxf::Handle<gxf::Receiver>> left_image_receiver_;
+  // Data receiver to get right image data
+  gxf::Parameter<gxf::Handle<gxf::Receiver>> right_image_receiver_;
+  // Data transmitter to send the data
+  gxf::Parameter<gxf::Handle<gxf::Transmitter>> output_transmitter_;
+  // Image adapter for output image
+  gxf::Parameter<gxf::Handle<tensor_ops::ImageAdapter>> output_adapter_;
+
+  // Pre-processing params for ESS
+  gxf::Parameter<std::string> image_type_;
+  gxf::Parameter<std::vector<float>> pixel_mean_;
+  gxf::Parameter<std::vector<float>> normalization_;
+  gxf::Parameter<std::vector<float>> standard_deviation_;
+
+  // Model input params for ESS
+  gxf::Parameter<int> max_batch_size_;
+  gxf::Parameter<int> input_layer_width_;
+  gxf::Parameter<int> input_layer_height_;
+  gxf::Parameter<std::string> model_input_type_;
+
+  // Inference params for ESS
+  gxf::Parameter<std::string> engine_file_path_;
+  gxf::Parameter<std::vector<std::string>> input_layers_name_;
+  gxf::Parameter<std::vector<std::string>> output_layers_name_;
+
+  // Extra Pre-process param
+  gxf::Parameter<std::string> preprocess_type_;
+
+  // Output params
+  gxf::Parameter<size_t> output_width_;
+  gxf::Parameter<size_t> output_height_;
+
+  // Decide which timestamp to pass down
+  gxf::Parameter<int> timestamp_policy_;
+
+  gxf::Handle<gxf::CudaStream> cuda_stream_ = nullptr;
+};
+
+} // namespace cvcore
+} // namespace nvidia
+
+#endif
diff --git a/isaac_ros_ess/gxf/ess/extensions/tensor_ops/CameraModel.cpp b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/CameraModel.cpp
new file mode 100644
index 0000000..5dce98e
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/CameraModel.cpp
@@ -0,0 +1,86 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "CameraModel.hpp"
+
+namespace nvidia {
+namespace cvcore {
+namespace tensor_ops {
+
+namespace detail {
+
+gxf::Expected<::cvcore::CameraDistortionType> GetCameraDistortionType(const std::string& type) {
+  if (type == "Perspective") {
+    return ::cvcore::CameraDistortionType::NONE;
+  } else if (type == "Polynomial") {
+    return ::cvcore::CameraDistortionType::Polynomial;
+  } else if (type == "FisheyeEquidistant") {
+    return ::cvcore::CameraDistortionType::FisheyeEquidistant;
+  } else if (type == "FisheyeEquisolid") {
+    return ::cvcore::CameraDistortionType::FisheyeEquisolid;
+  } else if (type == "FisheyeOrthoGraphic") {
+    return ::cvcore::CameraDistortionType::FisheyeOrthoGraphic;
+  } else if (type == "FisheyeStereographic") {
+    return ::cvcore::CameraDistortionType::FisheyeStereographic;
+  } else {
+    return gxf::Unexpected{GXF_FAILURE};
+  }
+}
+
+} // namespace detail
+
+gxf_result_t CameraModel::registerInterface(gxf::Registrar* registrar) {
+  gxf::Expected<void> result;
+
+  result &= registrar->parameter(distortion_type_, "distortion_type");
+  result &= registrar->parameter(distortion_coefficients_, "distortion_coefficients");
+  result &= registrar->parameter(focal_length_, "focal_length");
+  result &= registrar->parameter(principle_point_, "principle_point");
+  result &= registrar->parameter(skew_value_, "skew_value");
+
+  return gxf::ToResultCode(result);
+}
+
+gxf_result_t CameraModel::initialize() {
+  // Construct distortion model
+  auto type = detail::GetCameraDistortionType(distortion_type_.get());
+  if (!type) {
+    return GXF_FAILURE;
+  }
+  if (distortion_coefficients_.get().size() != 8) {
+    GXF_LOG_ERROR("size of distortion coefficients must be 8.");
+    return GXF_FAILURE;
+  }
+  for (size_t i = 0; i < 8; i++) {
+    distortions_.coefficients[i] = distortion_coefficients_.get()[i];
+  }
+  distortions_.type = type.value();
+
+  // Construct intrinsic model
+  if (focal_length_.get().size() != 2 || principle_point_.get().size() != 2) {
+    GXF_LOG_ERROR("focal length and principle point must be 2-element array.");
+    return GXF_FAILURE;
+  }
+  intrinsics_ = ::cvcore::CameraIntrinsics(focal_length_.get()[0], focal_length_.get()[1], principle_point_.get()[0],
+                                           principle_point_.get()[1], skew_value_.get());
+
+  return GXF_SUCCESS;
+}
+
+} // namespace tensor_ops
+} // namespace cvcore
+} // namespace nvidia
diff --git a/isaac_ros_ess/gxf/ess/extensions/tensor_ops/CameraModel.hpp b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/CameraModel.hpp
new file mode 100644
index 0000000..e68c67f
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/CameraModel.hpp
@@ -0,0 +1,60 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#ifndef NVIDIA_CVCORE_CAMERA_MODEL_HPP
+#define NVIDIA_CVCORE_CAMERA_MODEL_HPP
+
+#include "gxf/core/component.hpp"
+#include "gxf/std/parameter_parser_std.hpp"
+
+#include "cv/core/CameraModel.h"
+
+namespace nvidia {
+namespace cvcore {
+namespace tensor_ops {
+
+// Wrapper of CameraModel compatible with CVCORE
+class CameraModel : public gxf::Component {
+public:
+  virtual ~CameraModel() = default;
+  CameraModel()          = default;
+
+  gxf_result_t registerInterface(gxf::Registrar* registrar) override;
+  gxf_result_t initialize() override;
+
+  ::cvcore::CameraDistortionModel getDistortionModel() const {
+    return distortions_;
+  }
+  ::cvcore::CameraIntrinsics getCameraIntrinsics() const {
+    return intrinsics_;
+  }
+
+private:
+  gxf::Parameter<std::string> distortion_type_;
+  gxf::Parameter<std::vector<float>> distortion_coefficients_;
+  gxf::Parameter<std::vector<float>> focal_length_;
+  gxf::Parameter<std::vector<float>> principle_point_;
+  gxf::Parameter<float> skew_value_;
+
+  ::cvcore::CameraDistortionModel distortions_;
+  ::cvcore::CameraIntrinsics intrinsics_;
+};
+
+} // namespace tensor_ops
+} // namespace cvcore
+} // namespace nvidia
+
+#endif
diff --git a/isaac_ros_ess/gxf/ess/extensions/tensor_ops/ConvertColorFormat.cpp b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/ConvertColorFormat.cpp
new file mode 100644
index 0000000..074e013
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/ConvertColorFormat.cpp
@@ -0,0 +1,214 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#include "ConvertColorFormat.hpp"
+
+namespace nvidia {
+namespace cvcore {
+namespace tensor_ops {
+
+namespace detail {
+
+template<::cvcore::ImageType T_IN, ::cvcore::ImageType T_OUT>
+gxf_result_t ConvertColorFormatImpl(gxf::Entity& output, gxf::Entity& input, const ImageInfo& output_info,
+                                    const ImageInfo& input_info, const char* output_name, const char* input_name,
+                                    gxf::Handle<ImageAdapter> output_adapter, gxf::Handle<ImageAdapter> input_adapter,
+                                    gxf::Handle<gxf::Allocator> allocator,
+                                    ::cvcore::tensor_ops::ColorConversionType type, cudaStream_t stream) {
+  auto input_image = input_adapter->WrapImageFromMessage<T_IN>(input, input_name);
+  if (!input_image) {
+    return GXF_FAILURE;
+  }
+
+  auto error = output_adapter->AddImageToMessage<T_OUT>(output, output_info.width, output_info.height, allocator,
+                                                        output_info.is_cpu, output_name);
+  if (error != GXF_SUCCESS) {
+    return GXF_FAILURE;
+  }
+
+  auto output_image = output_adapter->WrapImageFromMessage<T_OUT>(output, output_name);
+  if (!output_image) {
+    return GXF_FAILURE;
+  }
+
+  ::cvcore::tensor_ops::ConvertColorFormat(output_image.value(), input_image.value(), type, stream);
+  return GXF_SUCCESS;
+}
+
+template<::cvcore::ImageType T_IN, ::cvcore::ImageType T_OUT>
+gxf_result_t ConvertColorFormatStreamImpl(gxf::Entity& output, gxf::Entity& input, const ImageInfo& output_info,
+                                          const ImageInfo& input_info, const char* output_name, const char* input_name,
+                                          gxf::Handle<TensorStream> stream, gxf::Handle<ImageAdapter> output_adapter,
+                                          gxf::Handle<ImageAdapter> input_adapter,
+                                          gxf::Handle<gxf::Allocator> allocator) {
+  auto input_image = input_adapter->WrapImageFromMessage<T_IN>(input, input_name);
+  if (!input_image) {
+    return GXF_FAILURE;
+  }
+
+  auto error = output_adapter->AddImageToMessage<T_OUT>(output, output_info.width, output_info.height, allocator,
+                                                        output_info.is_cpu, output_name);
+  if (error != GXF_SUCCESS) {
+    return GXF_FAILURE;
+  }
+
+  auto output_image = output_adapter->WrapImageFromMessage<T_OUT>(output, output_name);
+  if (!output_image) {
+    return GXF_FAILURE;
+  }
+
+  auto err_code = stream->getStream()->ColorConvert(output_image.value(), input_image.value());
+  if (err_code != ::cvcore::make_error_condition(::cvcore::ErrorCode::SUCCESS)) {
+    GXF_LOG_ERROR("color conversion operation failed.");
+    return GXF_FAILURE;
+  }
+
+  return GXF_SUCCESS;
+}
+
+} // namespace detail
+
+template<bool USE_TENSOR_STREAM>
+gxf_result_t ConvertColorFormatBase<USE_TENSOR_STREAM>::registerInterface(gxf::Registrar* registrar) {
+  gxf::Expected<void> result;
+
+  result &= registrar->parameter(output_type_, "output_type");
+  result &= registrar->parameter(receiver_, "receiver");
+  result &= registrar->parameter(transmitter_, "transmitter");
+  result &= registrar->parameter(pool_, "pool");
+  result &= registrar->parameter(stream_, "stream", "tensor stream", "tensor stream object",
+                                 gxf::Registrar::NoDefaultParameter(), GXF_PARAMETER_FLAGS_OPTIONAL);
+  result &= registrar->parameter(stream_pool_, "stream_pool", "cuda stream pool", "cuda stream pool object",
+                                 gxf::Registrar::NoDefaultParameter(), GXF_PARAMETER_FLAGS_OPTIONAL);
+  result &= registrar->parameter(input_adapter_, "input_adapter");
+  result &= registrar->parameter(output_adapter_, "output_adapter");
+  result &= registrar->parameter(input_name_, "input_name", "input name", "input tensor name",
+                                 gxf::Registrar::NoDefaultParameter(), GXF_PARAMETER_FLAGS_OPTIONAL);
+  result &= registrar->parameter(output_name_, "output_name", "output name", "output tensor name",
+                                 gxf::Registrar::NoDefaultParameter(), GXF_PARAMETER_FLAGS_OPTIONAL);
+
+  return gxf::ToResultCode(result);
+}
+
+template<bool USE_TENSOR_STREAM>
+gxf::Expected<ImageInfo> ConvertColorFormatBase<USE_TENSOR_STREAM>::doInferOutputInfo(gxf::Entity& input) {
+  // Set output type
+  auto output_type = GetImageTypeFromString(output_type_);
+  if (!output_type) {
+    return gxf::Unexpected{GXF_FAILURE};
+  }
+  // Check if no-op is needed
+  no_op_ = output_type.value() == input_info_.type;
+  return ImageInfo{output_type.value(), input_info_.width, input_info_.height, input_info_.is_cpu};
+}
+
+template<bool USE_TENSOR_STREAM>
+gxf_result_t ConvertColorFormatBase<USE_TENSOR_STREAM>::doUpdateCameraMessage(gxf::Handle<gxf::CameraModel>& output,
+                                                                              gxf::Handle<gxf::CameraModel>& input) {
+  *output = *input;
+  return GXF_SUCCESS;
+}
+
+#define DEFINE_CONVERT_COLOR_FORMAT(INPUT_TYPE, OUTPUT_TYPE, CONVERSION_TYPE)                                         \
+  if (input_info_.type == INPUT_TYPE && output_info_.type == OUTPUT_TYPE) {                                           \
+    return detail::ConvertColorFormatImpl<INPUT_TYPE, OUTPUT_TYPE>(                                                   \
+      output, input, output_info_, input_info_, output_name, input_name, output_adapter_.get(), input_adapter_.get(), \
+      pool_.get(), CONVERSION_TYPE, stream);                                                                          \
+  }
+
+#define DEFINE_STREAM_CONVERT_COLOR_FORMAT(INPUT_TYPE, OUTPUT_TYPE)                                 \
+  if (input_info_.type == INPUT_TYPE && output_info_.type == OUTPUT_TYPE) {                         \
+    return detail::ConvertColorFormatStreamImpl<INPUT_TYPE, OUTPUT_TYPE>(                           \
+      output, input, output_info_, input_info_, output_name, input_name, stream_.try_get().value(), \
+      output_adapter_.get(), input_adapter_.get(), pool_.get());                                    \
+  }
+
+template<>
+gxf_result_t ConvertColorFormatBase<true>::doExecute(gxf::Entity& output, gxf::Entity& input, cudaStream_t stream,
+                                                     const char* output_name, const char* input_name) {
+  GXF_LOG_INFO("execute convert color format");
+
+  // Run the color conversion operation
+  DEFINE_STREAM_CONVERT_COLOR_FORMAT(::cvcore::ImageType::RGB_U8, ::cvcore::ImageType::BGR_U8);
+  DEFINE_STREAM_CONVERT_COLOR_FORMAT(::cvcore::ImageType::BGR_U8, ::cvcore::ImageType::RGB_U8);
+  DEFINE_STREAM_CONVERT_COLOR_FORMAT(::cvcore::ImageType::NV12, ::cvcore::ImageType::BGR_U8);
+  DEFINE_STREAM_CONVERT_COLOR_FORMAT(::cvcore::ImageType::BGR_U8, ::cvcore::ImageType::NV12);
+  DEFINE_STREAM_CONVERT_COLOR_FORMAT(::cvcore::ImageType::NV12, ::cvcore::ImageType::RGB_U8);
+  DEFINE_STREAM_CONVERT_COLOR_FORMAT(::cvcore::ImageType::RGB_U8, ::cvcore::ImageType::NV12);
+  DEFINE_STREAM_CONVERT_COLOR_FORMAT(::cvcore::ImageType::NV24, ::cvcore::ImageType::BGR_U8);
+  DEFINE_STREAM_CONVERT_COLOR_FORMAT(::cvcore::ImageType::BGR_U8, ::cvcore::ImageType::NV24);
+  DEFINE_STREAM_CONVERT_COLOR_FORMAT(::cvcore::ImageType::NV24, ::cvcore::ImageType::RGB_U8);
+  DEFINE_STREAM_CONVERT_COLOR_FORMAT(::cvcore::ImageType::RGB_U8, ::cvcore::ImageType::NV24);
+
+  // Return error code for unsupported type
+  GXF_LOG_ERROR("invalid input/output type for image color conversion.");
+  return GXF_FAILURE;
+}
+
+template<>
+gxf_result_t ConvertColorFormatBase<false>::doExecute(gxf::Entity& output, gxf::Entity& input, cudaStream_t stream,
+                                                      const char* output_name, const char* input_name) {
+  GXF_LOG_INFO("execute convert color format");
+
+  // Run the color conversion operation
+  DEFINE_CONVERT_COLOR_FORMAT(::cvcore::ImageType::RGB_U8, ::cvcore::ImageType::BGR_U8,
+                              ::cvcore::tensor_ops::ColorConversionType::RGB2BGR);
+  DEFINE_CONVERT_COLOR_FORMAT(::cvcore::ImageType::RGB_U16, ::cvcore::ImageType::BGR_U16,
+                              ::cvcore::tensor_ops::ColorConversionType::RGB2BGR);
+  DEFINE_CONVERT_COLOR_FORMAT(::cvcore::ImageType::RGB_F32, ::cvcore::ImageType::BGR_F32,
+                              ::cvcore::tensor_ops::ColorConversionType::RGB2BGR);
+  DEFINE_CONVERT_COLOR_FORMAT(::cvcore::ImageType::BGR_U8, ::cvcore::ImageType::RGB_U8,
+                              ::cvcore::tensor_ops::ColorConversionType::BGR2RGB);
+  DEFINE_CONVERT_COLOR_FORMAT(::cvcore::ImageType::BGR_U16, ::cvcore::ImageType::RGB_U16,
+                              ::cvcore::tensor_ops::ColorConversionType::BGR2RGB);
+  DEFINE_CONVERT_COLOR_FORMAT(::cvcore::ImageType::BGR_F32, ::cvcore::ImageType::RGB_F32,
+                              ::cvcore::tensor_ops::ColorConversionType::BGR2RGB);
+  DEFINE_CONVERT_COLOR_FORMAT(::cvcore::ImageType::RGB_U8, ::cvcore::ImageType::Y_U8,
+                              ::cvcore::tensor_ops::ColorConversionType::RGB2GRAY);
+  DEFINE_CONVERT_COLOR_FORMAT(::cvcore::ImageType::RGB_U16, ::cvcore::ImageType::Y_U16,
+                              ::cvcore::tensor_ops::ColorConversionType::RGB2GRAY);
+  DEFINE_CONVERT_COLOR_FORMAT(::cvcore::ImageType::RGB_F32, ::cvcore::ImageType::Y_F32,
+                              ::cvcore::tensor_ops::ColorConversionType::RGB2GRAY);
+  DEFINE_CONVERT_COLOR_FORMAT(::cvcore::ImageType::BGR_U8, ::cvcore::ImageType::Y_U8,
+                              ::cvcore::tensor_ops::ColorConversionType::BGR2GRAY);
+  DEFINE_CONVERT_COLOR_FORMAT(::cvcore::ImageType::BGR_U16, ::cvcore::ImageType::Y_U16,
+                              ::cvcore::tensor_ops::ColorConversionType::BGR2GRAY);
+  DEFINE_CONVERT_COLOR_FORMAT(::cvcore::ImageType::BGR_F32, ::cvcore::ImageType::Y_F32,
+                              ::cvcore::tensor_ops::ColorConversionType::BGR2GRAY);
+  DEFINE_CONVERT_COLOR_FORMAT(::cvcore::ImageType::Y_U8, ::cvcore::ImageType::RGB_U8,
+                              ::cvcore::tensor_ops::ColorConversionType::GRAY2RGB);
+  DEFINE_CONVERT_COLOR_FORMAT(::cvcore::ImageType::Y_U16, ::cvcore::ImageType::RGB_U16,
+                              ::cvcore::tensor_ops::ColorConversionType::GRAY2RGB);
+  DEFINE_CONVERT_COLOR_FORMAT(::cvcore::ImageType::Y_F32, ::cvcore::ImageType::RGB_F32,
+                              ::cvcore::tensor_ops::ColorConversionType::GRAY2RGB);
+  DEFINE_CONVERT_COLOR_FORMAT(::cvcore::ImageType::Y_U8, ::cvcore::ImageType::BGR_U8,
+                              ::cvcore::tensor_ops::ColorConversionType::GRAY2BGR);
+  DEFINE_CONVERT_COLOR_FORMAT(::cvcore::ImageType::Y_U16, ::cvcore::ImageType::BGR_U16,
+                              ::cvcore::tensor_ops::ColorConversionType::GRAY2BGR);
+  DEFINE_CONVERT_COLOR_FORMAT(::cvcore::ImageType::Y_F32, ::cvcore::ImageType::BGR_F32,
+                              ::cvcore::tensor_ops::ColorConversionType::GRAY2BGR);
+
+  // Return error code for unsupported type
+  GXF_LOG_ERROR("invalid input/output type for image color conversion.");
+  return GXF_FAILURE;
+}
+
+template class ConvertColorFormatBase<true>;
+template class ConvertColorFormatBase<false>;
+
+} // namespace tensor_ops
+} // namespace cvcore
+} // namespace nvidia
diff --git a/isaac_ros_ess/gxf/ess/extensions/tensor_ops/ConvertColorFormat.hpp b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/ConvertColorFormat.hpp
new file mode 100644
index 0000000..f78786b
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/ConvertColorFormat.hpp
@@ -0,0 +1,51 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#ifndef NVIDIA_CVCORE_CONVERT_COLOR_FORMAT_HPP
+#define NVIDIA_CVCORE_CONVERT_COLOR_FORMAT_HPP
+
+#include "TensorOperator.hpp"
+
+namespace nvidia {
+namespace cvcore {
+namespace tensor_ops {
+
+// ConvertColorFormat operator.
+template<bool USE_TENSOR_STREAM>
+class ConvertColorFormatBase : public TensorOperator {
+public:
+  virtual ~ConvertColorFormatBase() {}
+
+  gxf_result_t registerInterface(gxf::Registrar* registrar) override final;
+
+private:
+  gxf::Expected<ImageInfo> doInferOutputInfo(gxf::Entity& input) override final;
+  gxf_result_t doUpdateCameraMessage(gxf::Handle<gxf::CameraModel>& output,
+                                     gxf::Handle<gxf::CameraModel>& input) override final;
+  gxf_result_t doExecute(gxf::Entity& output, gxf::Entity& input, cudaStream_t stream, const char* output_name,
+                         const char* input_name) override final;
+
+  gxf::Parameter<std::string> output_type_;
+};
+
+class ConvertColorFormat : public ConvertColorFormatBase<false> {};
+class StreamConvertColorFormat : public ConvertColorFormatBase<true> {};
+
+} // namespace tensor_ops
+} // namespace cvcore
+} // namespace nvidia
+
+#endif
diff --git a/isaac_ros_ess/gxf/ess/extensions/tensor_ops/CropAndResize.cpp b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/CropAndResize.cpp
new file mode 100644
index 0000000..d0c2872
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/CropAndResize.cpp
@@ -0,0 +1,161 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#include "CropAndResize.hpp"
+#include "Resize.hpp"
+
+namespace nvidia {
+namespace cvcore {
+namespace tensor_ops {
+
+namespace detail {
+
+template<::cvcore::ImageType T>
+gxf_result_t CropAndResizeImpl(gxf::Entity& output, gxf::Entity& input, const ImageInfo& output_info,
+                               const ImageInfo& input_info, const char* output_name, const char* input_name,
+                               gxf::Handle<ImageAdapter> output_adapter, gxf::Handle<ImageAdapter> input_adapter,
+                               gxf::Handle<gxf::Allocator> allocator, const std::vector<::cvcore::BBox>& src_rois,
+                               ::cvcore::tensor_ops::InterpolationType interp_type, cudaStream_t stream) {
+  auto input_image = input_adapter->WrapImageFromMessage<T>(input, input_name);
+  if (!input_image) {
+    return GXF_FAILURE;
+  }
+
+  const size_t num_output = src_rois.size();
+
+  for (size_t i = 0; i < num_output; i++) {
+    const std::string output_name_i = std::string(output_name) + "_" + std::to_string(i);
+    auto error = output_adapter->AddImageToMessage<T>(output, output_info.width, output_info.height, allocator,
+                                                      output_info.is_cpu, output_name_i.c_str());
+    if (error != GXF_SUCCESS) {
+      return GXF_FAILURE;
+    }
+    auto output_image = output_adapter->WrapImageFromMessage<T>(output, output_name_i.c_str());
+    if (!output_image) {
+      return GXF_FAILURE;
+    }
+    ::cvcore::tensor_ops::CropAndResize(output_image.value(), input_image.value(), src_rois[i], interp_type, stream);
+  }
+
+  return GXF_SUCCESS;
+}
+
+} // namespace detail
+
+gxf_result_t CropAndResize::registerInterface(gxf::Registrar* registrar) {
+  gxf::Expected<void> result;
+
+  result &= registrar->parameter(output_width_, "output_width");
+  result &= registrar->parameter(output_height_, "output_height");
+  result &= registrar->parameter(interp_type_, "interp_type");
+  result &= registrar->parameter(keep_aspect_ratio_, "keep_aspect_ratio");
+  result &= registrar->parameter(receiver_bbox_, "receiver_bbox");
+  result &= registrar->parameter(receiver_, "receiver");
+  result &= registrar->parameter(transmitter_, "transmitter");
+  result &= registrar->parameter(pool_, "pool");
+  result &= registrar->parameter(stream_pool_, "stream_pool", "cuda stream pool", "cuda stream pool object",
+                                 gxf::Registrar::NoDefaultParameter(), GXF_PARAMETER_FLAGS_OPTIONAL);
+  result &= registrar->parameter(input_adapter_, "input_adapter");
+  result &= registrar->parameter(output_adapter_, "output_adapter");
+  result &= registrar->parameter(input_name_, "input_name", "input name", "input tensor name",
+                                 gxf::Registrar::NoDefaultParameter(), GXF_PARAMETER_FLAGS_OPTIONAL);
+  result &= registrar->parameter(output_name_, "output_name", "output name", "output tensor name",
+                                 gxf::Registrar::NoDefaultParameter(), GXF_PARAMETER_FLAGS_OPTIONAL);
+
+  return gxf::ToResultCode(result);
+}
+
+gxf::Expected<ImageInfo> CropAndResize::doInferOutputInfo(gxf::Entity& input) {
+  // Set crop regions
+  auto input_bbox_message = receiver_bbox_->receive();
+  if (!input_bbox_message) {
+    return gxf::Unexpected{GXF_FAILURE};
+  }
+  auto bbox_tensor = input_bbox_message.value().get<gxf::Tensor>();
+  if (!bbox_tensor) {
+    return gxf::Unexpected{GXF_FAILURE};
+  }
+  const gxf::Shape bbox_shape = bbox_tensor.value()->shape();
+  if (bbox_shape.rank() != 2 || bbox_shape.dimension(1) != 4) {
+    GXF_LOG_ERROR("invalid input bbox dimension.");
+    return gxf::Unexpected{GXF_FAILURE};
+  }
+  const size_t num_bbox = bbox_shape.dimension(0);
+  auto bbox_pointer     = bbox_tensor.value()->data<int32_t>();
+  if (!bbox_pointer) {
+    GXF_LOG_ERROR("empty bbox input.");
+    return gxf::Unexpected{GXF_FAILURE};
+  }
+  std::vector<::cvcore::BBox> rois;
+  for (size_t i = 0; i < num_bbox; i++) {
+    const int index = i * 4;
+    rois.push_back({bbox_pointer.value()[index], bbox_pointer.value()[index + 1], bbox_pointer.value()[index + 2],
+                    bbox_pointer.value()[index + 3]});
+  }
+  input_rois_ = std::move(rois);
+  // Check if no-op is needed
+  no_op_ = input_rois_.size() == 1 && input_rois_[0].xmin == 0 &&
+           input_rois_[0].xmax == static_cast<int>(input_info_.width) && input_rois_[0].ymin == 0 &&
+           input_rois_[0].ymax == static_cast<int>(input_info_.height);
+
+  return ImageInfo{input_info_.type, output_width_.get(), output_height_.get(), input_info_.is_cpu};
+}
+
+gxf_result_t CropAndResize::doUpdateCameraMessage(gxf::Handle<gxf::CameraModel>& output,
+                                                  gxf::Handle<gxf::CameraModel>& input) {
+  auto crop_result = GetCroppedCameraModel(*input, input_rois_[0]);
+  if (!crop_result) {
+    return GXF_FAILURE;
+  }
+  *output = GetScaledCameraModel(crop_result.value(), output_info_.width, output_info_.height, false).value();
+  return GXF_SUCCESS;
+}
+
+#define DEFINE_CROP_AND_RESIZE(INPUT_TYPE)                                                                          \
+  if (input_info_.type == INPUT_TYPE) {                                                                             \
+    return detail::CropAndResizeImpl<INPUT_TYPE>(output, input, output_info_, input_info_, output_name, input_name, \
+                                                 output_adapter_.get(), input_adapter_.get(), pool_.get(),          \
+                                                 input_rois_, interp.value(), stream);                              \
+  }
+
+gxf_result_t CropAndResize::doExecute(gxf::Entity& output, gxf::Entity& input, cudaStream_t stream,
+                                      const char* output_name, const char* input_name) {
+  GXF_LOG_INFO("execute crop_and_resize.");
+  // Check if interpolation type is valid
+  auto interp = GetInterpolationType(interp_type_);
+  if (!interp) {
+    return interp.error();
+  }
+
+  // Run the image resizing operation
+  DEFINE_CROP_AND_RESIZE(::cvcore::ImageType::Y_U8);
+  DEFINE_CROP_AND_RESIZE(::cvcore::ImageType::Y_U16);
+  DEFINE_CROP_AND_RESIZE(::cvcore::ImageType::Y_F32);
+  DEFINE_CROP_AND_RESIZE(::cvcore::ImageType::RGB_U8);
+  DEFINE_CROP_AND_RESIZE(::cvcore::ImageType::RGB_U16);
+  DEFINE_CROP_AND_RESIZE(::cvcore::ImageType::RGB_F32);
+  DEFINE_CROP_AND_RESIZE(::cvcore::ImageType::BGR_U8);
+  DEFINE_CROP_AND_RESIZE(::cvcore::ImageType::BGR_U16);
+  DEFINE_CROP_AND_RESIZE(::cvcore::ImageType::BGR_F32);
+
+  // Return error code for unsupported type
+  GXF_LOG_ERROR("invalid input/output type for image crop_and_resize.");
+  return GXF_FAILURE;
+}
+
+} // namespace tensor_ops
+} // namespace cvcore
+} // namespace nvidia
diff --git a/isaac_ros_ess/gxf/ess/extensions/tensor_ops/CropAndResize.hpp b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/CropAndResize.hpp
new file mode 100644
index 0000000..2bde6fb
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/CropAndResize.hpp
@@ -0,0 +1,53 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#ifndef NVIDIA_CVCORE_CROP_AND_RESIZE_HPP
+#define NVIDIA_CVCORE_CROP_AND_RESIZE_HPP
+
+#include "TensorOperator.hpp"
+#include "cv/core/BBox.h"
+
+namespace nvidia {
+namespace cvcore {
+namespace tensor_ops {
+
+// CropAndResize operator.
+class CropAndResize : public TensorOperator {
+public:
+  virtual ~CropAndResize() {}
+
+  gxf_result_t registerInterface(gxf::Registrar* registrar) override;
+
+private:
+  gxf::Expected<ImageInfo> doInferOutputInfo(gxf::Entity& input) override final;
+  gxf_result_t doUpdateCameraMessage(gxf::Handle<gxf::CameraModel>& output,
+                                     gxf::Handle<gxf::CameraModel>& input) override final;
+  gxf_result_t doExecute(gxf::Entity& output, gxf::Entity& input, cudaStream_t stream, const char* output_name,
+                         const char* input_name) override final;
+
+  gxf::Parameter<size_t> output_width_;
+  gxf::Parameter<size_t> output_height_;
+  gxf::Parameter<std::string> interp_type_;
+  gxf::Parameter<bool> keep_aspect_ratio_;
+  gxf::Parameter<gxf::Handle<gxf::Receiver>> receiver_bbox_;
+  std::vector<::cvcore::BBox> input_rois_;
+};
+
+} // namespace tensor_ops
+} // namespace cvcore
+} // namespace nvidia
+
+#endif
diff --git a/isaac_ros_ess/gxf/ess/extensions/tensor_ops/Frame3D.cpp b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/Frame3D.cpp
new file mode 100644
index 0000000..c28829f
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/Frame3D.cpp
@@ -0,0 +1,56 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "Frame3D.hpp"
+
+namespace nvidia {
+namespace cvcore {
+namespace tensor_ops {
+
+gxf_result_t Frame3D::registerInterface(gxf::Registrar* registrar) {
+  gxf::Expected<void> result;
+
+  result &= registrar->parameter(rotation_, "rotation");
+  result &= registrar->parameter(translation_, "translation");
+
+  return gxf::ToResultCode(result);
+}
+
+gxf_result_t Frame3D::initialize() {
+  // Construct extrinsic model
+  if (rotation_.get().size() != 9) {
+    GXF_LOG_ERROR("size of rotation matrix must be 9");
+    return GXF_FAILURE;
+  }
+  if (translation_.get().size() != 3) {
+    GXF_LOG_ERROR("size of translation vector must be 3");
+    return GXF_FAILURE;
+  }
+  float raw_matrix[3][4];
+  for (size_t i = 0; i < 9; i++) {
+    raw_matrix[i / 3][i % 3] = rotation_.get()[i];
+  }
+  for (size_t i = 0; i < 3; i++) {
+    raw_matrix[i][3] = translation_.get()[i];
+  }
+  extrinsics_ = ::cvcore::CameraExtrinsics(raw_matrix);
+  return GXF_SUCCESS;
+}
+
+} // namespace tensor_ops
+} // namespace cvcore
+} // namespace nvidia
diff --git a/isaac_ros_ess/gxf/ess/extensions/tensor_ops/Frame3D.hpp b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/Frame3D.hpp
new file mode 100644
index 0000000..aec8cf5
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/Frame3D.hpp
@@ -0,0 +1,53 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#ifndef NVIDIA_CVCORE_FRAME3D_HPP
+#define NVIDIA_CVCORE_FRAME3D_HPP
+
+#include "gxf/core/component.hpp"
+#include "gxf/std/parameter_parser_std.hpp"
+
+#include "cv/core/CameraModel.h"
+
+namespace nvidia {
+namespace cvcore {
+namespace tensor_ops {
+
+// Wrapper of CameraExtrinsics compatible with CVCORE
+class Frame3D : public gxf::Component {
+public:
+  virtual ~Frame3D() = default;
+  Frame3D()          = default;
+
+  gxf_result_t registerInterface(gxf::Registrar* registrar) override;
+  gxf_result_t initialize() override;
+
+  ::cvcore::CameraExtrinsics getCameraExtrinsics() const {
+    return extrinsics_;
+  }
+
+private:
+  gxf::Parameter<std::vector<float>> rotation_;
+  gxf::Parameter<std::vector<float>> translation_;
+
+  ::cvcore::CameraExtrinsics extrinsics_;
+};
+
+} // namespace tensor_ops
+} // namespace cvcore
+} // namespace nvidia
+
+#endif
diff --git a/isaac_ros_ess/gxf/ess/extensions/tensor_ops/ImageAdapter.cpp b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/ImageAdapter.cpp
new file mode 100644
index 0000000..12e7fe7
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/ImageAdapter.cpp
@@ -0,0 +1,78 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#include "ImageAdapter.hpp"
+
+namespace nvidia {
+namespace cvcore {
+namespace tensor_ops {
+
+gxf_result_t ImageAdapter::registerInterface(gxf::Registrar* registrar) {
+  gxf::Expected<void> result;
+
+  result &= registrar->parameter(message_type_param_, "message_type");
+  result &= registrar->parameter(image_type_param_, "image_type", "image type", "optional image type",
+                                 gxf::Registrar::NoDefaultParameter(), GXF_PARAMETER_FLAGS_OPTIONAL);
+  result &= registrar->parameter(allocate_pitch_linear_, "allocate_pitch_linear",
+                                 "if true, allocate output buffers as padded pitch linear surfaces", "", false);
+
+  return gxf::ToResultCode(result);
+}
+
+gxf_result_t ImageAdapter::initialize() {
+  if (message_type_param_.get() == "Tensor") {
+    message_type_ = BufferType::TENSOR;
+  } else if (message_type_param_.get() == "VideoBuffer") {
+    message_type_ = BufferType::VIDEO_BUFFER;
+  } else {
+    GXF_LOG_ERROR("unknown buffer type.");
+    return GXF_FAILURE;
+  }
+
+  const auto& image_type_param = image_type_param_.try_get();
+  if (message_type_ == BufferType::TENSOR && !image_type_param) {
+    GXF_LOG_INFO("image type must be specified for gxf::Tensor.");
+    return GXF_FAILURE;
+  }
+  if (image_type_param) {
+    const auto image_type = GetImageTypeFromString(image_type_param.value());
+    if (!image_type) {
+      return GXF_FAILURE;
+    }
+    image_type_ = image_type.value();
+  }
+  return GXF_SUCCESS;
+}
+
+gxf::Expected<ImageInfo> ImageAdapter::GetImageInfo(const gxf::Entity& message, const char* name) {
+  if (message_type_ == BufferType::TENSOR) {
+    auto tensor = message.get<gxf::Tensor>(name);
+    if (!tensor) {
+      return gxf::Unexpected{GXF_FAILURE};
+    }
+    return detail::GetTensorInfo(tensor.value(), image_type_);
+  } else {
+    auto video_buffer = message.get<gxf::VideoBuffer>(name);
+    if (!video_buffer) {
+      return gxf::Unexpected{GXF_FAILURE};
+    }
+    return detail::GetVideoBufferInfo(video_buffer.value());
+  }
+}
+
+} // namespace tensor_ops
+} // namespace cvcore
+} // namespace nvidia
diff --git a/isaac_ros_ess/gxf/ess/extensions/tensor_ops/ImageAdapter.hpp b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/ImageAdapter.hpp
new file mode 100644
index 0000000..3b41391
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/ImageAdapter.hpp
@@ -0,0 +1,101 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#ifndef NVIDIA_CVCORE_IMAGE_ADAPTER_HPP
+#define NVIDIA_CVCORE_IMAGE_ADAPTER_HPP
+
+#include "ImageUtils.hpp"
+#include "detail/ImageAdapterTensorImpl.hpp"
+#include "detail/ImageAdapterVideoBufferImpl.hpp"
+
+#include "gxf/core/component.hpp"
+#include "gxf/multimedia/video.hpp"
+#include "gxf/std/allocator.hpp"
+#include "gxf/std/parameter_parser_std.hpp"
+#include "gxf/std/tensor.hpp"
+
+#include "cv/core/Image.h"
+
+namespace nvidia {
+namespace cvcore {
+namespace tensor_ops {
+
+// Enum class: gxf::Tensor and gxf::VideoBuffer
+enum class BufferType {
+  TENSOR,
+  VIDEO_BUFFER,
+};
+
+// Utility component for conversion between message and cvcore image type
+class ImageAdapter : public gxf::Component {
+public:
+  virtual ~ImageAdapter() = default;
+  ImageAdapter()          = default;
+
+  gxf_result_t registerInterface(gxf::Registrar* registrar) override;
+  gxf_result_t initialize() override;
+
+  gxf::Expected<ImageInfo> GetImageInfo(const gxf::Entity& message, const char* name = nullptr);
+
+  template<::cvcore::ImageType T>
+  gxf::Expected<::cvcore::Image<T>> WrapImageFromMessage(const gxf::Entity& message, const char* name = nullptr) {
+    if (message_type_ == BufferType::TENSOR) {
+      auto tensor = message.get<gxf::Tensor>(name);
+      if (!tensor) {
+        return gxf::Unexpected{GXF_FAILURE};
+      }
+      return detail::WrapImageFromTensor<T>(tensor.value());
+    } else {
+      auto video_buffer = message.get<gxf::VideoBuffer>(name);
+      if (!video_buffer) {
+        return gxf::Unexpected{GXF_FAILURE};
+      }
+      return detail::WrapImageFromVideoBuffer<T>(video_buffer.value());
+    }
+  }
+
+  template<::cvcore::ImageType T>
+  gxf_result_t AddImageToMessage(gxf::Entity& message, size_t width, size_t height,
+                                 gxf::Handle<gxf::Allocator> allocator, bool is_cpu, const char* name = nullptr) {
+    if (message_type_ == BufferType::TENSOR) {
+      auto tensor = message.add<gxf::Tensor>(name);
+      if (!tensor) {
+        return GXF_FAILURE;
+      }
+      return detail::AllocateTensor<T>(tensor.value(), width, height, allocator, is_cpu, allocate_pitch_linear_.get());
+    } else {
+      auto video_buffer = message.add<gxf::VideoBuffer>(name);
+      if (!video_buffer) {
+        return GXF_FAILURE;
+      }
+      return detail::AllocateVideoBuffer<T>(video_buffer.value(), width, height, allocator, is_cpu, allocate_pitch_linear_.get());
+    }
+  }
+
+private:
+  gxf::Parameter<std::string> message_type_param_;
+  gxf::Parameter<std::string> image_type_param_;
+  gxf::Parameter<bool> allocate_pitch_linear_;
+
+  ::cvcore::ImageType image_type_;
+  BufferType message_type_;
+};
+
+} // namespace tensor_ops
+} // namespace cvcore
+} // namespace nvidia
+
+#endif
diff --git a/isaac_ros_ess/gxf/ess/extensions/tensor_ops/ImageUtils.cpp b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/ImageUtils.cpp
new file mode 100644
index 0000000..ffb5bcc
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/ImageUtils.cpp
@@ -0,0 +1,175 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#include "ImageUtils.hpp"
+
+#include <algorithm>
+#include <iterator>
+
+namespace nvidia {
+namespace cvcore {
+namespace tensor_ops {
+
+// helper function to match input image type string to cvcore::ImageType
+gxf::Expected<::cvcore::ImageType> GetImageTypeFromString(const std::string& type) {
+  if (type == "Y_U8") {
+    return ::cvcore::ImageType::Y_U8;
+  } else if (type == "Y_U16") {
+    return ::cvcore::ImageType::Y_U16;
+  } else if (type == "Y_F32") {
+    return ::cvcore::ImageType::Y_F32;
+  } else if (type == "RGB_U8") {
+    return ::cvcore::ImageType::RGB_U8;
+  } else if (type == "RGB_U16") {
+    return ::cvcore::ImageType::RGB_U16;
+  } else if (type == "RGB_F32") {
+    return ::cvcore::ImageType::RGB_F32;
+  } else if (type == "BGR_U8") {
+    return ::cvcore::ImageType::BGR_U8;
+  } else if (type == "BGR_U16") {
+    return ::cvcore::ImageType::BGR_U16;
+  } else if (type == "BGR_F32") {
+    return ::cvcore::ImageType::BGR_F32;
+  } else if (type == "PLANAR_RGB_U8") {
+    return ::cvcore::ImageType::PLANAR_RGB_U8;
+  } else if (type == "PLANAR_RGB_U16") {
+    return ::cvcore::ImageType::PLANAR_RGB_U16;
+  } else if (type == "PLANAR_RGB_F32") {
+    return ::cvcore::ImageType::PLANAR_RGB_F32;
+  } else if (type == "PLANAR_BGR_U8") {
+    return ::cvcore::ImageType::PLANAR_BGR_U8;
+  } else if (type == "PLANAR_BGR_U16") {
+    return ::cvcore::ImageType::PLANAR_BGR_U16;
+  } else if (type == "PLANAR_BGR_F32") {
+    return ::cvcore::ImageType::PLANAR_BGR_F32;
+  } else if (type == "NV12") {
+    return ::cvcore::ImageType::NV12;
+  } else if (type == "NV24") {
+    return ::cvcore::ImageType::NV24;
+  } else {
+    GXF_LOG_ERROR("invalid image type.");
+    return gxf::Unexpected{GXF_FAILURE};
+  }
+}
+
+gxf::Expected<::cvcore::tensor_ops::InterpolationType> GetInterpolationType(const std::string& type) {
+  if (type == "nearest") {
+    return ::cvcore::tensor_ops::InterpolationType::INTERP_NEAREST;
+  } else if (type == "linear") {
+    return ::cvcore::tensor_ops::InterpolationType::INTERP_LINEAR;
+  } else if (type == "cubic_bspline") {
+    return ::cvcore::tensor_ops::InterpolationType::INTERP_CUBIC_BSPLINE;
+  } else if (type == "cubic_catmullrom") {
+    return ::cvcore::tensor_ops::InterpolationType::INTERP_CUBIC_CATMULLROM;
+  } else {
+    GXF_LOG_ERROR("invalid interpolation type.");
+    return gxf::Unexpected{GXF_FAILURE};
+  }
+}
+
+gxf::Expected<::cvcore::tensor_ops::BorderType> GetBorderType(const std::string& type) {
+  if (type == "zero") {
+    return ::cvcore::tensor_ops::BorderType::BORDER_ZERO;
+  } else if (type == "repeat") {
+    return ::cvcore::tensor_ops::BorderType::BORDER_REPEAT;
+  } else if (type == "reverse") {
+    return ::cvcore::tensor_ops::BorderType::BORDER_REVERSE;
+  } else if (type == "mirror") {
+    return ::cvcore::tensor_ops::BorderType::BORDER_MIRROR;
+  } else {
+    GXF_LOG_ERROR("invalid border type.");
+    return gxf::Unexpected{GXF_FAILURE};
+  }
+}
+
+gxf::Expected<::cvcore::CameraDistortionType> GetCameraDistortionType(gxf::DistortionType type) {
+  switch (type) {
+  case gxf::DistortionType::Perspective:
+    return ::cvcore::CameraDistortionType::NONE;
+  case gxf::DistortionType::Polynomial:
+    return ::cvcore::CameraDistortionType::Polynomial;
+  case gxf::DistortionType::FisheyeEquidistant:
+    return ::cvcore::CameraDistortionType::FisheyeEquidistant;
+  case gxf::DistortionType::FisheyeEquisolid:
+    return ::cvcore::CameraDistortionType::FisheyeEquisolid;
+  case gxf::DistortionType::FisheyeOrthoGraphic:
+    return ::cvcore::CameraDistortionType::FisheyeOrthoGraphic;
+  case gxf::DistortionType::FisheyeStereographic:
+    return ::cvcore::CameraDistortionType::FisheyeStereographic;
+  default:
+    GXF_LOG_ERROR("invalid distortion type.");
+    return gxf::Unexpected{GXF_FAILURE};
+  }
+}
+
+gxf::Expected<gxf::DistortionType> GetDistortionType(::cvcore::CameraDistortionType type) {
+  switch (type) {
+  case ::cvcore::CameraDistortionType::Polynomial:
+    return gxf::DistortionType::Polynomial;
+  case ::cvcore::CameraDistortionType::FisheyeEquidistant:
+    return gxf::DistortionType::FisheyeEquidistant;
+  case ::cvcore::CameraDistortionType::FisheyeEquisolid:
+    return gxf::DistortionType::FisheyeEquisolid;
+  case ::cvcore::CameraDistortionType::FisheyeOrthoGraphic:
+    return gxf::DistortionType::FisheyeOrthoGraphic;
+  case ::cvcore::CameraDistortionType::FisheyeStereographic:
+    return gxf::DistortionType::FisheyeStereographic;
+  default:
+    GXF_LOG_ERROR("invalid distortion type.");
+    return gxf::Unexpected{GXF_FAILURE};
+  }
+}
+
+gxf::Expected<gxf::CameraModel> GetCroppedCameraModel(const gxf::CameraModel& input, const ::cvcore::BBox& roi) {
+  if (!roi.isValid()) {
+    return gxf::Unexpected{GXF_FAILURE};
+  }
+  gxf::CameraModel camera;
+  const size_t output_width  = roi.xmax - roi.xmin;
+  const size_t output_height = roi.ymax - roi.ymin;
+  camera.dimensions          = {static_cast<uint32_t>(output_width), static_cast<uint32_t>(output_height)};
+  camera.focal_length        = input.focal_length;
+  // We will keep the relative principal point location unchanged for cropping;
+  camera.principal_point = {input.principal_point.x / input.dimensions.x * output_width,
+                            input.principal_point.y / input.dimensions.y * output_height},
+  camera.skew_value      = input.skew_value;
+  camera.distortion_type = input.distortion_type;
+  std::copy(std::begin(input.distortion_coefficients), std::end(input.distortion_coefficients),
+            std::begin(camera.distortion_coefficients));
+  return camera;
+}
+
+gxf::Expected<gxf::CameraModel> GetScaledCameraModel(const gxf::CameraModel& input, size_t output_width,
+                                                     size_t output_height, bool keep_aspect_ratio) {
+  gxf::CameraModel camera;
+  const float scaler_x   = static_cast<float>(output_width) / input.dimensions.x;
+  const float scaler_y   = static_cast<float>(output_height) / input.dimensions.y;
+  const float min_scaler = std::min(scaler_x, scaler_y);
+  camera.dimensions      = {static_cast<uint32_t>(output_width), static_cast<uint32_t>(output_height)};
+  camera.focal_length    = keep_aspect_ratio
+                          ? nvidia::gxf::Vector2f{min_scaler * input.focal_length.x, min_scaler * input.focal_length.y}
+                          : nvidia::gxf::Vector2f{scaler_x * input.focal_length.x, scaler_y * input.focal_length.y};
+  camera.principal_point = {scaler_x * input.principal_point.x, scaler_y * input.principal_point.y},
+  camera.skew_value      = input.skew_value;
+  camera.distortion_type = input.distortion_type;
+  std::copy(std::begin(input.distortion_coefficients), std::end(input.distortion_coefficients),
+            std::begin(camera.distortion_coefficients));
+  return camera;
+}
+
+} // namespace tensor_ops
+} // namespace cvcore
+} // namespace nvidia
diff --git a/isaac_ros_ess/gxf/ess/extensions/tensor_ops/ImageUtils.hpp b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/ImageUtils.hpp
new file mode 100644
index 0000000..d052827
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/ImageUtils.hpp
@@ -0,0 +1,65 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#ifndef NVIDIA_CVCORE_IMAGE_UTILS_HPP
+#define NVIDIA_CVCORE_IMAGE_UTILS_HPP
+
+#include "cv/core/BBox.h"
+#include "cv/core/CameraModel.h"
+#include "cv/core/Image.h"
+#include "cv/tensor_ops/ImageUtils.h"
+#include "gxf/core/expected.hpp"
+#include "gxf/multimedia/camera.hpp"
+
+namespace nvidia {
+namespace cvcore {
+namespace tensor_ops {
+
+// Description of Image
+struct ImageInfo {
+  ::cvcore::ImageType type;
+  size_t width;
+  size_t height;
+  bool is_cpu;
+};
+
+// helper function to match input image type string to cvcore::ImageType
+gxf::Expected<::cvcore::ImageType> GetImageTypeFromString(const std::string& type);
+
+// Helper function to get the interpolation type
+gxf::Expected<::cvcore::tensor_ops::InterpolationType> GetInterpolationType(const std::string& type);
+
+// Helper function to get the border type
+gxf::Expected<::cvcore::tensor_ops::BorderType> GetBorderType(const std::string& type);
+
+// Helper function to get the cvcore camera distortion type
+gxf::Expected<::cvcore::CameraDistortionType> GetCameraDistortionType(gxf::DistortionType type);
+
+// Helper function to get the gxf distortion type
+gxf::Expected<gxf::DistortionType> GetDistortionType(::cvcore::CameraDistortionType type);
+
+// Helper function to get the new camera model after applying crop operation
+gxf::Expected<gxf::CameraModel> GetCroppedCameraModel(const gxf::CameraModel& input, const ::cvcore::BBox& roi);
+
+// Helper function to get the new camera model after applying scale operation
+gxf::Expected<gxf::CameraModel> GetScaledCameraModel(const gxf::CameraModel& input, size_t output_width,
+                                                     size_t output_height, bool keep_aspect_ratio);
+
+} // namespace tensor_ops
+} // namespace cvcore
+} // namespace nvidia
+
+#endif
diff --git a/isaac_ros_ess/gxf/ess/extensions/tensor_ops/InterleavedToPlanar.cpp b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/InterleavedToPlanar.cpp
new file mode 100644
index 0000000..7b63825
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/InterleavedToPlanar.cpp
@@ -0,0 +1,146 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#include "InterleavedToPlanar.hpp"
+
+namespace nvidia {
+namespace cvcore {
+namespace tensor_ops {
+
+namespace detail {
+
+template<::cvcore::ImageType T_IN, ::cvcore::ImageType T_OUT>
+gxf_result_t InterleavedToPlanarImpl(gxf::Entity& output, gxf::Entity& input, const ImageInfo& output_info,
+                                     const ImageInfo& input_info, const char* output_name, const char* input_name,
+                                     gxf::Handle<ImageAdapter> output_adapter, gxf::Handle<ImageAdapter> input_adapter,
+                                     gxf::Handle<gxf::Allocator> allocator, cudaStream_t stream) {
+  auto input_image = input_adapter->WrapImageFromMessage<T_IN>(input, input_name);
+  if (!input_image) {
+    return GXF_FAILURE;
+  }
+
+  auto error = output_adapter->AddImageToMessage<T_OUT>(output, output_info.width, output_info.height, allocator,
+                                                        output_info.is_cpu, output_name);
+  if (error != GXF_SUCCESS) {
+    return GXF_FAILURE;
+  }
+
+  auto output_image = output_adapter->WrapImageFromMessage<T_OUT>(output, output_name);
+  if (!output_image) {
+    return GXF_FAILURE;
+  }
+  ::cvcore::tensor_ops::InterleavedToPlanar(output_image.value(), input_image.value(), stream);
+  return GXF_SUCCESS;
+}
+
+} // namespace detail
+
+gxf_result_t InterleavedToPlanar::registerInterface(gxf::Registrar* registrar) {
+  gxf::Expected<void> result;
+
+  result &= registrar->parameter(receiver_, "receiver");
+  result &= registrar->parameter(transmitter_, "transmitter");
+  result &= registrar->parameter(pool_, "pool");
+  result &= registrar->parameter(stream_pool_, "stream_pool", "cuda stream pool", "cuda stream pool object",
+                                 gxf::Registrar::NoDefaultParameter(), GXF_PARAMETER_FLAGS_OPTIONAL);
+  result &= registrar->parameter(input_adapter_, "input_adapter");
+  result &= registrar->parameter(output_adapter_, "output_adapter");
+  result &= registrar->parameter(input_name_, "input_name", "input name", "input tensor name",
+                                 gxf::Registrar::NoDefaultParameter(), GXF_PARAMETER_FLAGS_OPTIONAL);
+  result &= registrar->parameter(output_name_, "output_name", "output name", "output tensor name",
+                                 gxf::Registrar::NoDefaultParameter(), GXF_PARAMETER_FLAGS_OPTIONAL);
+
+  return gxf::ToResultCode(result);
+}
+
+gxf::Expected<ImageInfo> InterleavedToPlanar::doInferOutputInfo(gxf::Entity& input) {
+  // Output type is planar
+  ::cvcore::ImageType output_type;
+  switch (input_info_.type) {
+  case ::cvcore::ImageType::RGB_U8: {
+    output_type = ::cvcore::ImageType::PLANAR_RGB_U8;
+    break;
+  }
+  case ::cvcore::ImageType::RGB_U16: {
+    output_type = ::cvcore::ImageType::PLANAR_RGB_U16;
+    break;
+  }
+  case ::cvcore::ImageType::RGB_F32: {
+    output_type = ::cvcore::ImageType::PLANAR_RGB_F32;
+    break;
+  }
+  case ::cvcore::ImageType::BGR_U8: {
+    output_type = ::cvcore::ImageType::PLANAR_BGR_U8;
+    break;
+  }
+  case ::cvcore::ImageType::BGR_U16: {
+    output_type = ::cvcore::ImageType::PLANAR_BGR_U16;
+    break;
+  }
+  case ::cvcore::ImageType::BGR_F32: {
+    output_type = ::cvcore::ImageType::PLANAR_BGR_F32;
+    break;
+  }
+  case ::cvcore::ImageType::PLANAR_RGB_U8:
+  case ::cvcore::ImageType::PLANAR_RGB_U16:
+  case ::cvcore::ImageType::PLANAR_RGB_F32:
+  case ::cvcore::ImageType::PLANAR_BGR_U8:
+  case ::cvcore::ImageType::PLANAR_BGR_U16:
+  case ::cvcore::ImageType::PLANAR_BGR_F32: {
+    output_type = input_info_.type;
+    no_op_      = true;
+    break;
+  }
+  default: {
+    GXF_LOG_ERROR("invalid input type for interleaved to planar conversion.");
+    return gxf::Unexpected{GXF_FAILURE};
+  }
+  }
+  return ImageInfo{output_type, input_info_.width, input_info_.height, input_info_.is_cpu};
+}
+
+gxf_result_t InterleavedToPlanar::doUpdateCameraMessage(gxf::Handle<gxf::CameraModel>& output,
+                                                        gxf::Handle<gxf::CameraModel>& input) {
+  *output = *input;
+  return GXF_SUCCESS;
+}
+
+#define DEFINE_INTERLEAVED_TO_PLANAR(INPUT_TYPE, OUTPUT_TYPE)                                                       \
+  if (input_info_.type == INPUT_TYPE && output_info_.type == OUTPUT_TYPE) {                                         \
+    return detail::InterleavedToPlanarImpl<INPUT_TYPE, OUTPUT_TYPE>(output, input, output_info_, input_info_,       \
+                                                                    output_name, input_name, output_adapter_.get(), \
+                                                                    input_adapter_.get(), pool_.get(), stream);     \
+  }
+
+gxf_result_t InterleavedToPlanar::doExecute(gxf::Entity& output, gxf::Entity& input, cudaStream_t stream,
+                                            const char* output_name, const char* input_name) {
+  GXF_LOG_INFO("execute interleaved_to_planar conversion");
+  // Run the interleaved to planar operation
+  DEFINE_INTERLEAVED_TO_PLANAR(::cvcore::ImageType::RGB_U8, ::cvcore::ImageType::PLANAR_RGB_U8);
+  DEFINE_INTERLEAVED_TO_PLANAR(::cvcore::ImageType::RGB_U16, ::cvcore::ImageType::PLANAR_RGB_U16);
+  DEFINE_INTERLEAVED_TO_PLANAR(::cvcore::ImageType::RGB_F32, ::cvcore::ImageType::PLANAR_RGB_F32);
+  DEFINE_INTERLEAVED_TO_PLANAR(::cvcore::ImageType::BGR_U8, ::cvcore::ImageType::PLANAR_BGR_U8);
+  DEFINE_INTERLEAVED_TO_PLANAR(::cvcore::ImageType::BGR_U16, ::cvcore::ImageType::PLANAR_BGR_U16);
+  DEFINE_INTERLEAVED_TO_PLANAR(::cvcore::ImageType::BGR_F32, ::cvcore::ImageType::PLANAR_BGR_F32);
+
+  // Return error code for unsupported type
+  GXF_LOG_ERROR("invalid input/output type for image interleaved to planar conversion.");
+  return GXF_FAILURE;
+}
+
+} // namespace tensor_ops
+} // namespace cvcore
+} // namespace nvidia
diff --git a/isaac_ros_ess/gxf/ess/extensions/tensor_ops/InterleavedToPlanar.hpp b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/InterleavedToPlanar.hpp
new file mode 100644
index 0000000..2e1efbe
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/InterleavedToPlanar.hpp
@@ -0,0 +1,45 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#ifndef NVIDIA_CVCORE_INTERLEAVED_TO_PLANAR_HPP
+#define NVIDIA_CVCORE_INTERLEAVED_TO_PLANAR_HPP
+
+#include "TensorOperator.hpp"
+
+namespace nvidia {
+namespace cvcore {
+namespace tensor_ops {
+
+// InterleavedToPlanar operator.
+class InterleavedToPlanar : public TensorOperator {
+public:
+  virtual ~InterleavedToPlanar() {}
+
+  gxf_result_t registerInterface(gxf::Registrar* registrar) override;
+
+private:
+  gxf::Expected<ImageInfo> doInferOutputInfo(gxf::Entity& input) override final;
+  gxf_result_t doUpdateCameraMessage(gxf::Handle<gxf::CameraModel>& output,
+                                     gxf::Handle<gxf::CameraModel>& input) override final;
+  gxf_result_t doExecute(gxf::Entity& output, gxf::Entity& input, cudaStream_t stream, const char* output_name,
+                         const char* input_name) override final;
+};
+
+} // namespace tensor_ops
+} // namespace cvcore
+} // namespace nvidia
+
+#endif
diff --git a/isaac_ros_ess/gxf/ess/extensions/tensor_ops/Normalize.cpp b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/Normalize.cpp
new file mode 100644
index 0000000..2429abb
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/Normalize.cpp
@@ -0,0 +1,183 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#include "Normalize.hpp"
+
+namespace nvidia {
+namespace cvcore {
+namespace tensor_ops {
+
+namespace detail {
+
+template<::cvcore::ImageType T_IN, ::cvcore::ImageType T_OUT>
+gxf_result_t NormalizeC1Impl(gxf::Entity& output, gxf::Entity& input, const ImageInfo& output_info,
+                             const ImageInfo& input_info, const char* output_name, const char* input_name,
+                             gxf::Handle<ImageAdapter> output_adapter, gxf::Handle<ImageAdapter> input_adapter,
+                             gxf::Handle<gxf::Allocator> allocator, const std::vector<float>& scales,
+                             const std::vector<float>& offsets, cudaStream_t stream) {
+  if (scales.size() != 1 || offsets.size() != 1) {
+    GXF_LOG_ERROR("invalid scales/offsets dimension");
+    return GXF_FAILURE;
+  }
+
+  auto input_image = input_adapter->WrapImageFromMessage<T_IN>(input, input_name);
+  if (!input_image) {
+    return GXF_FAILURE;
+  }
+
+  auto error = output_adapter->AddImageToMessage<T_OUT>(output, output_info.width, output_info.height, allocator,
+                                                        output_info.is_cpu, output_name);
+  if (error != GXF_SUCCESS) {
+    return GXF_FAILURE;
+  }
+
+  auto output_image = output_adapter->WrapImageFromMessage<T_OUT>(output, output_name);
+  if (!output_image) {
+    return GXF_FAILURE;
+  }
+  ::cvcore::tensor_ops::Normalize(output_image.value(), input_image.value(), scales[0], offsets[0], stream);
+  return GXF_SUCCESS;
+}
+
+template<::cvcore::ImageType T_IN, ::cvcore::ImageType T_OUT>
+gxf_result_t NormalizeC3Impl(gxf::Entity& output, gxf::Entity& input, const ImageInfo& output_info,
+                             const ImageInfo& input_info, const char* output_name, const char* input_name,
+                             gxf::Handle<ImageAdapter> output_adapter, gxf::Handle<ImageAdapter> input_adapter,
+                             gxf::Handle<gxf::Allocator> allocator, const std::vector<float>& scales,
+                             const std::vector<float>& offsets, cudaStream_t stream) {
+  if (scales.size() != 3 || offsets.size() != 3) {
+    GXF_LOG_ERROR("invalid scales/offsets dimension");
+    return GXF_FAILURE;
+  }
+
+  auto input_image = input_adapter->WrapImageFromMessage<T_IN>(input, input_name);
+  if (!input_image) {
+    return GXF_FAILURE;
+  }
+
+  auto error = output_adapter->AddImageToMessage<T_OUT>(output, output_info.width, output_info.height, allocator,
+                                                        output_info.is_cpu, output_name);
+  if (error != GXF_SUCCESS) {
+    return GXF_FAILURE;
+  }
+
+  auto output_image = output_adapter->WrapImageFromMessage<T_OUT>(output, output_name);
+  if (!output_image) {
+    return GXF_FAILURE;
+  }
+  const float scales_value[3]  = {scales[0], scales[1], scales[2]};
+  const float offsets_value[3] = {offsets[0], offsets[1], offsets[2]};
+  ::cvcore::tensor_ops::Normalize(output_image.value(), input_image.value(), scales_value, offsets_value, stream);
+  return GXF_SUCCESS;
+}
+
+} // namespace detail
+
+gxf_result_t Normalize::registerInterface(gxf::Registrar* registrar) {
+  gxf::Expected<void> result;
+
+  result &= registrar->parameter(scales_, "scales");
+  result &= registrar->parameter(offsets_, "offsets");
+  result &= registrar->parameter(receiver_, "receiver");
+  result &= registrar->parameter(transmitter_, "transmitter");
+  result &= registrar->parameter(pool_, "pool");
+  result &= registrar->parameter(stream_pool_, "stream_pool", "cuda stream pool", "cuda stream pool object",
+                                 gxf::Registrar::NoDefaultParameter(), GXF_PARAMETER_FLAGS_OPTIONAL);
+  result &= registrar->parameter(input_adapter_, "input_adapter");
+  result &= registrar->parameter(output_adapter_, "output_adapter");
+  result &= registrar->parameter(input_name_, "input_name", "input name", "input tensor name",
+                                 gxf::Registrar::NoDefaultParameter(), GXF_PARAMETER_FLAGS_OPTIONAL);
+  result &= registrar->parameter(output_name_, "output_name", "output name", "output tensor name",
+                                 gxf::Registrar::NoDefaultParameter(), GXF_PARAMETER_FLAGS_OPTIONAL);
+
+  return gxf::ToResultCode(result);
+}
+
+gxf::Expected<ImageInfo> Normalize::doInferOutputInfo(gxf::Entity& input) {
+  // Output type is F32
+  ::cvcore::ImageType output_type;
+  switch (input_info_.type) {
+  case ::cvcore::ImageType::Y_U8:
+  case ::cvcore::ImageType::Y_U16:
+  case ::cvcore::ImageType::Y_F32: {
+    output_type = ::cvcore::ImageType::Y_F32;
+    break;
+  }
+  case ::cvcore::ImageType::RGB_U8:
+  case ::cvcore::ImageType::RGB_U16:
+  case ::cvcore::ImageType::RGB_F32: {
+    output_type = ::cvcore::ImageType::RGB_F32;
+    break;
+  }
+  case ::cvcore::ImageType::BGR_U8:
+  case ::cvcore::ImageType::BGR_U16:
+  case ::cvcore::ImageType::BGR_F32: {
+    output_type = ::cvcore::ImageType::BGR_F32;
+    break;
+  }
+  default: {
+    GXF_LOG_ERROR("invalid input type for normalize.");
+    return gxf::Unexpected{GXF_FAILURE};
+  }
+  }
+  // Operation must be performed under any condition
+  no_op_ = false;
+  return ImageInfo{output_type, input_info_.width, input_info_.height, input_info_.is_cpu};
+}
+
+gxf_result_t Normalize::doUpdateCameraMessage(gxf::Handle<gxf::CameraModel>& output,
+                                              gxf::Handle<gxf::CameraModel>& input) {
+  *output = *input;
+  return GXF_SUCCESS;
+}
+
+#define DEFINE_NORMALIZE_C1(INPUT_TYPE, OUTPUT_TYPE)                                                                 \
+  if (input_info_.type == INPUT_TYPE && output_info_.type == OUTPUT_TYPE) {                                          \
+    return detail::NormalizeC1Impl<INPUT_TYPE, OUTPUT_TYPE>(output, input, output_info_, input_info_, output_name,   \
+                                                            input_name, output_adapter_.get(), input_adapter_.get(), \
+                                                            pool_.get(), scales_.get(), offsets_.get(), stream);     \
+  }
+
+#define DEFINE_NORMALIZE_C3(INPUT_TYPE, OUTPUT_TYPE)                                                                 \
+  if (input_info_.type == INPUT_TYPE && output_info_.type == OUTPUT_TYPE) {                                          \
+    return detail::NormalizeC3Impl<INPUT_TYPE, OUTPUT_TYPE>(output, input, output_info_, input_info_, output_name,   \
+                                                            input_name, output_adapter_.get(), input_adapter_.get(), \
+                                                            pool_.get(), scales_.get(), offsets_.get(), stream);     \
+  }
+
+gxf_result_t Normalize::doExecute(gxf::Entity& output, gxf::Entity& input, cudaStream_t stream, const char* output_name,
+                                  const char* input_name) {
+  GXF_LOG_INFO("execute normalize");
+
+  // Run the image normalization operation
+  DEFINE_NORMALIZE_C1(::cvcore::ImageType::Y_U8, ::cvcore::ImageType::Y_F32);
+  DEFINE_NORMALIZE_C1(::cvcore::ImageType::Y_U16, ::cvcore::ImageType::Y_F32);
+  DEFINE_NORMALIZE_C1(::cvcore::ImageType::Y_F32, ::cvcore::ImageType::Y_F32);
+  DEFINE_NORMALIZE_C3(::cvcore::ImageType::RGB_U8, ::cvcore::ImageType::RGB_F32);
+  DEFINE_NORMALIZE_C3(::cvcore::ImageType::RGB_U16, ::cvcore::ImageType::RGB_F32);
+  DEFINE_NORMALIZE_C3(::cvcore::ImageType::RGB_F32, ::cvcore::ImageType::RGB_F32);
+  DEFINE_NORMALIZE_C3(::cvcore::ImageType::BGR_U8, ::cvcore::ImageType::BGR_F32);
+  DEFINE_NORMALIZE_C3(::cvcore::ImageType::BGR_U16, ::cvcore::ImageType::BGR_F32);
+  DEFINE_NORMALIZE_C3(::cvcore::ImageType::BGR_F32, ::cvcore::ImageType::BGR_F32);
+
+  // Return error code for unsupported type
+  GXF_LOG_ERROR("invalid input/output type for image normalize.");
+  return GXF_FAILURE;
+}
+
+} // namespace tensor_ops
+} // namespace cvcore
+} // namespace nvidia
diff --git a/isaac_ros_ess/gxf/ess/extensions/tensor_ops/Normalize.hpp b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/Normalize.hpp
new file mode 100644
index 0000000..efb735a
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/Normalize.hpp
@@ -0,0 +1,47 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#ifndef NVIDIA_CVCORE_NORMALIZE_HPP
+#define NVIDIA_CVCORE_NORMALIZE_HPP
+
+#include "TensorOperator.hpp"
+
+namespace nvidia {
+namespace cvcore {
+namespace tensor_ops {
+
+// Normalization operator.
+class Normalize : public TensorOperator {
+public:
+  virtual ~Normalize() {}
+  gxf_result_t registerInterface(gxf::Registrar* registrar) override;
+
+private:
+  gxf::Expected<ImageInfo> doInferOutputInfo(gxf::Entity& input) override final;
+  gxf_result_t doUpdateCameraMessage(gxf::Handle<gxf::CameraModel>& output,
+                                     gxf::Handle<gxf::CameraModel>& input) override final;
+  gxf_result_t doExecute(gxf::Entity& output, gxf::Entity& input, cudaStream_t stream, const char* output_name,
+                         const char* input_name) override final;
+
+  gxf::Parameter<std::vector<float>> scales_;
+  gxf::Parameter<std::vector<float>> offsets_;
+};
+
+} // namespace tensor_ops
+} // namespace cvcore
+} // namespace nvidia
+
+#endif
diff --git a/isaac_ros_ess/gxf/ess/extensions/tensor_ops/Reshape.cpp b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/Reshape.cpp
new file mode 100644
index 0000000..322b843
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/Reshape.cpp
@@ -0,0 +1,98 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#include "Reshape.hpp"
+
+namespace nvidia {
+namespace cvcore {
+namespace tensor_ops {
+
+gxf_result_t Reshape::registerInterface(gxf::Registrar* registrar) {
+  gxf::Expected<void> result;
+
+  result &= registrar->parameter(output_shape_, "output_shape");
+  result &= registrar->parameter(receiver_, "receiver");
+  result &= registrar->parameter(transmitter_, "transmitter");
+  result &= registrar->parameter(pool_, "pool");
+  result &= registrar->parameter(input_adapter_, "input_adapter");
+  result &= registrar->parameter(output_adapter_, "output_adapter");
+  result &= registrar->parameter(input_name_, "input_name", "input name", "input tensor name",
+                                 gxf::Registrar::NoDefaultParameter(), GXF_PARAMETER_FLAGS_OPTIONAL);
+  result &= registrar->parameter(output_name_, "output_name", "output name", "output tensor name",
+                                 gxf::Registrar::NoDefaultParameter(), GXF_PARAMETER_FLAGS_OPTIONAL);
+
+  return gxf::ToResultCode(result);
+}
+
+gxf_result_t Reshape::doUpdateCameraMessage(gxf::Handle<gxf::CameraModel>& output,
+                                            gxf::Handle<gxf::CameraModel>& input) {
+  *output = *input;
+  return GXF_SUCCESS;
+}
+
+gxf_result_t Reshape::doExecute(gxf::Entity& output, gxf::Entity& input, cudaStream_t stream, const char* output_name,
+                                const char* input_name) {
+  GXF_LOG_INFO("execute reshape.");
+
+  auto input_tensor = input.get<gxf::Tensor>(input_name);
+  if (!input_tensor) {
+    GXF_LOG_ERROR("input message does not contain Tensor");
+    return input_tensor.error();
+  }
+
+  auto output_tensor = output.add<gxf::Tensor>(output_name);
+  if (!output_tensor) {
+    GXF_LOG_ERROR("unable to add output Tensor");
+    return output_tensor.error();
+  }
+
+  const auto& input_shape                        = input_tensor.value()->shape();
+  const std::vector<int32_t>& output_shape_arr   = output_shape_;
+  std::array<int32_t, gxf::Shape::kMaxRank> dims = {};
+  std::copy(output_shape_arr.begin(), output_shape_arr.end(), dims.begin());
+  const auto output_shape = gxf::Shape(dims, output_shape_arr.size());
+
+  if (output_shape.size() != input_shape.size()) {
+    GXF_LOG_ERROR("reshape size mismatch.");
+    return GXF_FAILURE;
+  }
+
+  auto result = output_tensor.value()->reshapeCustom(
+    output_shape, input_tensor.value()->element_type(), gxf::PrimitiveTypeSize(input_tensor.value()->element_type()),
+    gxf::Unexpected{GXF_UNINITIALIZED_VALUE}, input_tensor.value()->storage_type(), pool_.get());
+
+  if (!result) {
+    GXF_LOG_ERROR("reshape tensor failed.");
+    return result.error();
+  }
+
+  // Simply copy the memory
+  if (input_tensor.value()->storage_type() == gxf::MemoryStorageType::kDevice) {
+    cudaError_t error = cudaMemcpyAsync(output_tensor.value()->pointer(), input_tensor.value()->pointer(),
+                                        input_tensor.value()->size(), cudaMemcpyDeviceToDevice, stream);
+    if (error != cudaSuccess) {
+      GXF_LOG_ERROR("cudaMemcpyAsync returned error code");
+      return GXF_FAILURE;
+    }
+  } else {
+    memcpy(output_tensor.value()->pointer(), input_tensor.value()->pointer(), input_tensor.value()->size());
+  }
+  return GXF_SUCCESS;
+}
+
+} // namespace tensor_ops
+} // namespace cvcore
+} // namespace nvidia
diff --git a/isaac_ros_ess/gxf/ess/extensions/tensor_ops/Reshape.hpp b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/Reshape.hpp
new file mode 100644
index 0000000..c6f1c6f
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/Reshape.hpp
@@ -0,0 +1,49 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#ifndef NVIDIA_CVCORE_RESHAPE_HPP
+#define NVIDIA_CVCORE_RESHAPE_HPP
+
+#include "TensorOperator.hpp"
+
+namespace nvidia {
+namespace cvcore {
+namespace tensor_ops {
+
+// Reshaping operator (only valid for gxf::Tensor).
+class Reshape : public TensorOperator {
+public:
+  virtual ~Reshape() {}
+  gxf_result_t registerInterface(gxf::Registrar* registrar) override;
+
+private:
+  gxf::Expected<ImageInfo> doInferOutputInfo(gxf::Entity& input) override final {
+    no_op_ = false;
+    return ImageInfo{};
+  };
+  gxf_result_t doUpdateCameraMessage(gxf::Handle<gxf::CameraModel>& output,
+                                     gxf::Handle<gxf::CameraModel>& input) override final;
+  gxf_result_t doExecute(gxf::Entity& output, gxf::Entity& input, cudaStream_t stream, const char* output_name,
+                         const char* input_name) override final;
+
+  gxf::Parameter<std::vector<int32_t>> output_shape_;
+};
+
+} // namespace tensor_ops
+} // namespace cvcore
+} // namespace nvidia
+
+#endif
diff --git a/isaac_ros_ess/gxf/ess/extensions/tensor_ops/Resize.cpp b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/Resize.cpp
new file mode 100644
index 0000000..943ac5a
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/Resize.cpp
@@ -0,0 +1,194 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#include "Resize.hpp"
+
+namespace nvidia {
+namespace cvcore {
+namespace tensor_ops {
+
+namespace detail {
+
+template<::cvcore::ImageType T>
+gxf_result_t ResizeImpl(gxf::Entity& output, gxf::Entity& input, const ImageInfo& output_info,
+                        const ImageInfo& input_info, const char* output_name, const char* input_name,
+                        gxf::Handle<ImageAdapter> output_adapter, gxf::Handle<ImageAdapter> input_adapter,
+                        gxf::Handle<gxf::Allocator> allocator, bool keep_aspect_ratio,
+                        ::cvcore::tensor_ops::InterpolationType interp_type, cudaStream_t stream) {
+  auto input_image = input_adapter->WrapImageFromMessage<T>(input, input_name);
+  if (!input_image) {
+    return GXF_FAILURE;
+  }
+
+  auto error = output_adapter->AddImageToMessage<T>(output, output_info.width, output_info.height, allocator,
+                                                    output_info.is_cpu, output_name);
+  if (error != GXF_SUCCESS) {
+    return GXF_FAILURE;
+  }
+
+  auto output_image = output_adapter->WrapImageFromMessage<T>(output, output_name);
+  if (!output_image) {
+    return GXF_FAILURE;
+  }
+  ::cvcore::tensor_ops::Resize(output_image.value(), input_image.value(), keep_aspect_ratio, interp_type, stream);
+  return GXF_SUCCESS;
+}
+
+template<::cvcore::ImageType T>
+gxf_result_t ResizeStreamImpl(gxf::Entity& output, gxf::Entity& input, const ImageInfo& output_info,
+                              const ImageInfo& input_info, const char* output_name, const char* input_name,
+                              gxf::Handle<TensorStream> stream, gxf::Handle<ImageAdapter> output_adapter,
+                              gxf::Handle<ImageAdapter> input_adapter, gxf::Handle<gxf::Allocator> allocator,
+                              ::cvcore::tensor_ops::InterpolationType interp_type,
+                              ::cvcore::tensor_ops::BorderType border_type) {
+  auto input_image = input_adapter->WrapImageFromMessage<T>(input, input_name);
+  if (!input_image) {
+    return GXF_FAILURE;
+  }
+
+  auto error = output_adapter->AddImageToMessage<T>(output, output_info.width, output_info.height, allocator,
+                                                    output_info.is_cpu, output_name);
+  if (error != GXF_SUCCESS) {
+    return GXF_FAILURE;
+  }
+
+  auto output_image = output_adapter->WrapImageFromMessage<T>(output, output_name);
+  if (!output_image) {
+    return GXF_FAILURE;
+  }
+
+  auto err_code = stream->getStream()->Resize(output_image.value(), input_image.value(), interp_type, border_type);
+  if (err_code != ::cvcore::make_error_condition(::cvcore::ErrorCode::SUCCESS)) {
+    GXF_LOG_ERROR("resize operation failed.");
+    return GXF_FAILURE;
+  }
+
+  return GXF_SUCCESS;
+}
+
+} // namespace detail
+
+template<bool USE_TENSOR_STREAM>
+gxf_result_t ResizeBase<USE_TENSOR_STREAM>::registerInterface(gxf::Registrar* registrar) {
+  gxf::Expected<void> result;
+
+  result &= registrar->parameter(output_width_, "output_width");
+  result &= registrar->parameter(output_height_, "output_height");
+  result &= registrar->parameter(interp_type_, "interp_type");
+  result &= registrar->parameter(border_type_, "border_type");
+  result &= registrar->parameter(keep_aspect_ratio_, "keep_aspect_ratio");
+  result &= registrar->parameter(receiver_, "receiver");
+  result &= registrar->parameter(transmitter_, "transmitter");
+  result &= registrar->parameter(pool_, "pool");
+  result &= registrar->parameter(stream_, "stream", "tensor stream", "tensor stream object",
+                                 gxf::Registrar::NoDefaultParameter(), GXF_PARAMETER_FLAGS_OPTIONAL);
+  result &= registrar->parameter(stream_pool_, "stream_pool", "cuda stream pool", "cuda stream pool object",
+                                 gxf::Registrar::NoDefaultParameter(), GXF_PARAMETER_FLAGS_OPTIONAL);
+  result &= registrar->parameter(input_adapter_, "input_adapter");
+  result &= registrar->parameter(output_adapter_, "output_adapter");
+  result &= registrar->parameter(input_name_, "input_name", "input name", "input tensor name",
+                                 gxf::Registrar::NoDefaultParameter(), GXF_PARAMETER_FLAGS_OPTIONAL);
+  result &= registrar->parameter(output_name_, "output_name", "output name", "output tensor name",
+                                 gxf::Registrar::NoDefaultParameter(), GXF_PARAMETER_FLAGS_OPTIONAL);
+
+  return gxf::ToResultCode(result);
+}
+
+template<bool USE_TENSOR_STREAM>
+gxf::Expected<ImageInfo> ResizeBase<USE_TENSOR_STREAM>::doInferOutputInfo(gxf::Entity& input) {
+  // Check if no-op is needed
+  no_op_ = output_width_.get() == input_info_.width && output_height_.get() == input_info_.height;
+  return ImageInfo{input_info_.type, output_width_.get(), output_height_.get(), input_info_.is_cpu};
+}
+
+template<bool USE_TENSOR_STREAM>
+gxf_result_t ResizeBase<USE_TENSOR_STREAM>::doUpdateCameraMessage(gxf::Handle<gxf::CameraModel>& output,
+                                                                  gxf::Handle<gxf::CameraModel>& input) {
+  *output = GetScaledCameraModel(*input, output_info_.width, output_info_.height, keep_aspect_ratio_.get()).value();
+  return GXF_SUCCESS;
+}
+
+#define DEFINE_RESIZE(INPUT_TYPE)                                                                            \
+  if (input_info_.type == INPUT_TYPE) {                                                                      \
+    return detail::ResizeImpl<INPUT_TYPE>(output, input, output_info_, input_info_, output_name, input_name, \
+                                          output_adapter_.get(), input_adapter_.get(), pool_.get(),          \
+                                          keep_aspect_ratio_.get(), interp.value(), stream);                 \
+  }
+
+#define DEFINE_STREAM_RESIZE(INPUT_TYPE)                                                                            \
+  if (input_info_.type == INPUT_TYPE) {                                                                             \
+    return detail::ResizeStreamImpl<INPUT_TYPE>(output, input, output_info_, input_info_, output_name, input_name,  \
+                                                stream_.try_get().value(), output_adapter_.get(),                   \
+                                                input_adapter_.get(), pool_.get(), interp.value(), border.value()); \
+  }
+
+template<>
+gxf_result_t ResizeBase<true>::doExecute(gxf::Entity& output, gxf::Entity& input, cudaStream_t stream,
+                                         const char* output_name, const char* input_name) {
+  GXF_LOG_INFO("execute resize.");
+  // Check if interpolation type is valid
+  auto interp = GetInterpolationType(interp_type_);
+  if (!interp) {
+    return interp.error();
+  }
+  auto border = GetBorderType(border_type_);
+  if (!border) {
+    return border.error();
+  }
+
+  // Run the image resizing operation
+  DEFINE_STREAM_RESIZE(::cvcore::ImageType::RGB_U8);
+  DEFINE_STREAM_RESIZE(::cvcore::ImageType::BGR_U8);
+  DEFINE_STREAM_RESIZE(::cvcore::ImageType::NV12);
+  DEFINE_STREAM_RESIZE(::cvcore::ImageType::NV24);
+
+  // Return error code for unsupported type
+  GXF_LOG_ERROR("invalid input/output type for image resize.");
+  return GXF_FAILURE;
+}
+
+template<>
+gxf_result_t ResizeBase<false>::doExecute(gxf::Entity& output, gxf::Entity& input, cudaStream_t stream,
+                                          const char* output_name, const char* input_name) {
+  GXF_LOG_INFO("execute resize.");
+  // Check if interpolation type is valid
+  auto interp = GetInterpolationType(interp_type_);
+  if (!interp) {
+    return interp.error();
+  }
+
+  // Run the image resizing operation
+  DEFINE_RESIZE(::cvcore::ImageType::Y_U8);
+  DEFINE_RESIZE(::cvcore::ImageType::Y_U16);
+  DEFINE_RESIZE(::cvcore::ImageType::Y_F32);
+  DEFINE_RESIZE(::cvcore::ImageType::RGB_U8);
+  DEFINE_RESIZE(::cvcore::ImageType::RGB_U16);
+  DEFINE_RESIZE(::cvcore::ImageType::RGB_F32);
+  DEFINE_RESIZE(::cvcore::ImageType::BGR_U8);
+  DEFINE_RESIZE(::cvcore::ImageType::BGR_U16);
+  DEFINE_RESIZE(::cvcore::ImageType::BGR_F32);
+
+  // Return error code for unsupported type
+  GXF_LOG_ERROR("invalid input/output type for image resize.");
+  return GXF_FAILURE;
+}
+
+template class ResizeBase<true>;
+template class ResizeBase<false>;
+
+} // namespace tensor_ops
+} // namespace cvcore
+} // namespace nvidia
diff --git a/isaac_ros_ess/gxf/ess/extensions/tensor_ops/Resize.hpp b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/Resize.hpp
new file mode 100644
index 0000000..6771e6e
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/Resize.hpp
@@ -0,0 +1,55 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#ifndef NVIDIA_CVCORE_RESIZE_HPP
+#define NVIDIA_CVCORE_RESIZE_HPP
+
+#include "TensorOperator.hpp"
+
+namespace nvidia {
+namespace cvcore {
+namespace tensor_ops {
+
+// Resizing operator.
+template<bool USE_TENSOR_STREAM>
+class ResizeBase : public TensorOperator {
+public:
+  virtual ~ResizeBase() {}
+
+  gxf_result_t registerInterface(gxf::Registrar* registrar) override;
+
+private:
+  gxf::Expected<ImageInfo> doInferOutputInfo(gxf::Entity& input) override final;
+  gxf_result_t doUpdateCameraMessage(gxf::Handle<gxf::CameraModel>& output,
+                                     gxf::Handle<gxf::CameraModel>& input) override final;
+  gxf_result_t doExecute(gxf::Entity& output, gxf::Entity& input, cudaStream_t stream, const char* output_name,
+                         const char* input_name) override final;
+
+  gxf::Parameter<size_t> output_width_;
+  gxf::Parameter<size_t> output_height_;
+  gxf::Parameter<std::string> interp_type_;
+  gxf::Parameter<std::string> border_type_;
+  gxf::Parameter<bool> keep_aspect_ratio_;
+};
+
+class Resize : public ResizeBase<false> {};
+class StreamResize : public ResizeBase<true> {};
+
+} // namespace tensor_ops
+} // namespace cvcore
+} // namespace nvidia
+
+#endif
diff --git a/isaac_ros_ess/gxf/ess/extensions/tensor_ops/TensorOperator.cpp b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/TensorOperator.cpp
new file mode 100644
index 0000000..294fb06
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/TensorOperator.cpp
@@ -0,0 +1,235 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#include "TensorOperator.hpp"
+
+#include "gxf/std/timestamp.hpp"
+
+namespace nvidia {
+namespace cvcore {
+namespace tensor_ops {
+
+namespace detail {
+
+// Function to bind a cuda stream with cid into downstream message
+gxf_result_t BindCudaStream(gxf::Entity& message, gxf_uid_t cid) {
+  if (cid == kNullUid) {
+    GXF_LOG_ERROR("stream_cid is null");
+    return GXF_FAILURE;
+  }
+  auto output_stream_id = message.add<gxf::CudaStreamId>("stream");
+  if (!output_stream_id) {
+    GXF_LOG_ERROR("failed to add cudastreamid.");
+    return GXF_FAILURE;
+  }
+  output_stream_id.value()->stream_cid = cid;
+  return GXF_SUCCESS;
+}
+
+// Function to record a new cuda event
+gxf_result_t RecordCudaEvent(gxf::Entity& message, gxf::Handle<gxf::CudaStream>& stream) {
+  // Create a new event
+  cudaEvent_t cuda_event;
+  cudaEventCreateWithFlags(&cuda_event, 0);
+  gxf::CudaEvent event;
+  auto ret = event.initWithEvent(cuda_event, stream->dev_id(), [](auto) {});
+  if (!ret) {
+    GXF_LOG_ERROR("failed to init cuda event");
+    return GXF_FAILURE;
+  }
+  // Record the event
+  // Can define []() { GXF_LOG_DEBUG("tensorops event synced"); } as callback func for debug purpose
+  ret = stream->record(event.event().value(),
+                       [event = cuda_event, entity = message.clone().value()](auto) { cudaEventDestroy(event); });
+  if (!ret) {
+    GXF_LOG_ERROR("record event failed");
+    return ret.error();
+  }
+  return GXF_SUCCESS;
+}
+
+template<typename T>
+gxf_result_t RerouteMessage(gxf::Entity& output, gxf::Entity& input,
+                            std::function<gxf_result_t(gxf::Handle<T>, gxf::Handle<T>)> func,
+                            const char* name = nullptr) {
+  auto maybe_component = input.get<T>();
+  if (maybe_component) {
+    auto output_component = output.add<T>(name != nullptr ? name : maybe_component.value().name());
+    if (!output_component) {
+      GXF_LOG_ERROR("add output component failed.");
+      return output_component.error();
+    }
+    return func(output_component.value(), maybe_component.value());
+  }
+  return GXF_SUCCESS;
+}
+
+} // namespace detail
+
+gxf_result_t TensorOperator::inferOutputInfo(gxf::Entity& input) {
+  const char* input_name = input_name_.try_get() ? input_name_.try_get().value().c_str() : nullptr;
+  auto input_info        = input_adapter_.get()->GetImageInfo(input, input_name);
+  if (!input_info) {
+    return input_info.error();
+  }
+  input_info_      = input_info.value();
+  auto output_info = doInferOutputInfo(input);
+  if (!output_info) {
+    return output_info.error();
+  }
+  output_info_ = output_info.value();
+  return GXF_SUCCESS;
+}
+
+gxf_result_t TensorOperator::updateCameraMessage(gxf::Handle<gxf::CameraModel>& output,
+                                                 gxf::Handle<gxf::CameraModel>& input) {
+  return doUpdateCameraMessage(output, input);
+}
+
+gxf_result_t TensorOperator::execute(gxf::Entity& output, gxf::Entity& input, cudaStream_t stream) {
+  const char* output_name = output_name_.try_get() ? output_name_.try_get().value().c_str() : nullptr;
+  const char* input_name  = input_name_.try_get() ? input_name_.try_get().value().c_str() : nullptr;
+  return doExecute(output, input, stream, output_name, input_name);
+}
+
+gxf_result_t TensorOperator::start() {
+  // Allocate cuda stream using stream pool if necessary
+  if (stream_pool_.try_get()) {
+    auto stream = stream_pool_.try_get().value()->allocateStream();
+    if (!stream) {
+      GXF_LOG_ERROR("allocating stream failed.");
+      return GXF_FAILURE;
+    }
+    cuda_stream_ptr_ = std::move(stream.value());
+    if (!cuda_stream_ptr_->stream()) {
+      GXF_LOG_ERROR("allocated stream is not initialized.");
+      return GXF_FAILURE;
+    }
+  }
+  return GXF_SUCCESS;
+}
+
+gxf_result_t TensorOperator::tick() {
+  // Receiving the data
+  auto input_message = receiver_->receive();
+  // Check received message for errors
+  if (!input_message) {
+    return input_message.error();
+  }
+  // Infer output ImageInfo and if it's no-op
+  auto error = inferOutputInfo(input_message.value());
+  if (error != GXF_SUCCESS) {
+    return error;
+  }
+  // Re-direct the input message if no-op is needed
+  if (no_op_) {
+    transmitter_->publish(input_message.value());
+    return GXF_SUCCESS;
+  }
+  // Create output message
+  gxf::Expected<gxf::Entity> output_message = gxf::Entity::New(context());
+  if (!output_message) {
+    return output_message.error();
+  }
+  // Pass through timestamp if presented in input message
+  error =
+    detail::RerouteMessage<gxf::Timestamp>(output_message.value(), input_message.value(),
+                                           [](gxf::Handle<gxf::Timestamp> output, gxf::Handle<gxf::Timestamp> input) {
+                                             *output = *input;
+                                             return GXF_SUCCESS;
+                                           });
+  if (error != GXF_SUCCESS) {
+    return error;
+  }
+  // Pass through cudaStreamId or create a new cuda stream for NPP backend only
+  cudaStream_t cuda_stream = 0; // default stream
+  if (!stream_.try_get()) {
+    // Allocate new CudaStream if StreamPool attached
+    if (stream_pool_.try_get()) {
+      cuda_stream = cuda_stream_ptr_->stream().value();
+      if (detail::BindCudaStream(output_message.value(), cuda_stream_ptr_.cid()) != GXF_SUCCESS) {
+        return GXF_FAILURE;
+      }
+    }
+    auto input_stream_id = input_message.value().get<gxf::CudaStreamId>();
+    if (input_stream_id) {
+      auto stream =
+        gxf::Handle<gxf::CudaStream>::Create(input_stream_id.value().context(), input_stream_id.value()->stream_cid);
+      if (!stream) {
+        GXF_LOG_ERROR("create cudastream from cid failed.");
+        return GXF_FAILURE;
+      }
+      if (stream_pool_.try_get()) {
+        // sync upstreaming input cuda stream
+        if (!stream.value()->syncStream()) {
+          GXF_LOG_ERROR("sync stream failed.");
+          return GXF_FAILURE;
+        }
+      } else {
+        cuda_stream = stream.value()->stream().value();
+        if (detail::BindCudaStream(output_message.value(), stream.value().cid()) != GXF_SUCCESS) {
+          return GXF_FAILURE;
+        }
+        cuda_stream_ptr_ = stream.value();
+      }
+    }
+  }
+  // Execute the operation
+  error = execute(output_message.value(), input_message.value(), cuda_stream);
+  if (error != GXF_SUCCESS) {
+    GXF_LOG_ERROR("operation failed.");
+    return GXF_FAILURE;
+  }
+  // Record the cuda event if necessary
+  if (!cuda_stream_ptr_.is_null()) {
+    // record on both input/output stream
+    if (detail::RecordCudaEvent(input_message.value(), cuda_stream_ptr_) != GXF_SUCCESS) {
+      return GXF_FAILURE;
+    }
+    if (detail::RecordCudaEvent(output_message.value(), cuda_stream_ptr_) != GXF_SUCCESS) {
+      return GXF_FAILURE;
+    }
+  }
+  // Update output camera message if necessary
+  error = detail::RerouteMessage<gxf::CameraModel>(
+    output_message.value(), input_message.value(),
+    [this](gxf::Handle<gxf::CameraModel> output, gxf::Handle<gxf::CameraModel> input) {
+      return updateCameraMessage(output, input);
+    },
+    "camera");
+  if (error != GXF_SUCCESS) {
+    return error;
+  }
+  // Pass through pose3d message if necessary
+  error = detail::RerouteMessage<gxf::Pose3D>(
+    output_message.value(), input_message.value(),
+    [](gxf::Handle<gxf::Pose3D> output, gxf::Handle<gxf::Pose3D> input) {
+      *output = *input;
+      return GXF_SUCCESS;
+    },
+    "pose");
+  if (error != GXF_SUCCESS) {
+    return error;
+  }
+  // Send the processed data
+  transmitter_->publish(output_message.value());
+
+  return GXF_SUCCESS;
+}
+
+} // namespace tensor_ops
+} // namespace cvcore
+} // namespace nvidia
diff --git a/isaac_ros_ess/gxf/ess/extensions/tensor_ops/TensorOperator.hpp b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/TensorOperator.hpp
new file mode 100644
index 0000000..4c65d47
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/TensorOperator.hpp
@@ -0,0 +1,95 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#ifndef NVIDIA_CVCORE_TENSOR_OPERATOR_HPP
+#define NVIDIA_CVCORE_TENSOR_OPERATOR_HPP
+
+#include "ImageAdapter.hpp"
+#include "ImageUtils.hpp"
+#include "TensorStream.hpp"
+
+#include "gxf/cuda/cuda_stream.hpp"
+#include "gxf/cuda/cuda_stream_id.hpp"
+#include "gxf/cuda/cuda_stream_pool.hpp"
+#include "gxf/std/allocator.hpp"
+#include "gxf/std/codelet.hpp"
+#include "gxf/std/parameter_parser_std.hpp"
+#include "gxf/std/receiver.hpp"
+#include "gxf/std/tensor.hpp"
+#include "gxf/std/transmitter.hpp"
+
+#include "cv/core/Image.h"
+#include "cv/core/Tensor.h"
+#include "cv/tensor_ops/ImageUtils.h"
+
+namespace nvidia {
+namespace cvcore {
+namespace tensor_ops {
+
+// Base class for all tensor_ops operators
+class TensorOperator : public gxf::Codelet {
+public:
+  virtual ~TensorOperator() = default;
+
+  gxf_result_t inferOutputInfo(gxf::Entity& input);
+
+  gxf_result_t updateCameraMessage(gxf::Handle<gxf::CameraModel>& output, gxf::Handle<gxf::CameraModel>& input);
+
+  gxf_result_t execute(gxf::Entity& output, gxf::Entity& input, cudaStream_t stream);
+
+  gxf_result_t start() override;
+
+  gxf_result_t tick() override;
+
+  virtual gxf_result_t stop() override {
+    return GXF_SUCCESS;
+  }
+
+protected:
+  gxf::Parameter<gxf::Handle<gxf::Receiver>> receiver_;
+  gxf::Parameter<gxf::Handle<gxf::Transmitter>> transmitter_;
+  gxf::Parameter<gxf::Handle<gxf::Allocator>> pool_;
+  gxf::Parameter<gxf::Handle<TensorStream>> stream_;
+  gxf::Parameter<gxf::Handle<gxf::CudaStreamPool>> stream_pool_;
+  gxf::Parameter<gxf::Handle<ImageAdapter>> input_adapter_;
+  gxf::Parameter<gxf::Handle<ImageAdapter>> output_adapter_;
+  gxf::Parameter<std::string> input_name_;
+  gxf::Parameter<std::string> output_name_;
+
+  // Input image info
+  ImageInfo input_info_;
+  // Output image info
+  ImageInfo output_info_;
+  // Whether to skip the operation(by default is false)
+  bool no_op_ = false;
+
+private:
+  virtual gxf::Expected<ImageInfo> doInferOutputInfo(gxf::Entity& input) = 0;
+
+  virtual gxf_result_t doUpdateCameraMessage(gxf::Handle<gxf::CameraModel>& output,
+                                             gxf::Handle<gxf::CameraModel>& input) = 0;
+
+  virtual gxf_result_t doExecute(gxf::Entity& output, gxf::Entity& input, cudaStream_t stream, const char* output_name,
+                                 const char* input_name) = 0;
+
+  gxf::Handle<gxf::CudaStream> cuda_stream_ptr_ = nullptr;
+};
+
+} // namespace tensor_ops
+} // namespace cvcore
+} // namespace nvidia
+
+#endif
diff --git a/isaac_ros_ess/gxf/ess/extensions/tensor_ops/TensorOps.cpp b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/TensorOps.cpp
new file mode 100644
index 0000000..1694d95
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/TensorOps.cpp
@@ -0,0 +1,75 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#include "CameraModel.hpp"
+#include "ConvertColorFormat.hpp"
+#include "CropAndResize.hpp"
+#include "Frame3D.hpp"
+#include "ImageAdapter.hpp"
+#include "InterleavedToPlanar.hpp"
+#include "Normalize.hpp"
+#include "Reshape.hpp"
+#include "Resize.hpp"
+#include "TensorOperator.hpp"
+#include "TensorStream.hpp"
+#include "Undistort.hpp"
+
+#include "gxf/std/extension_factory_helper.hpp"
+
+GXF_EXT_FACTORY_BEGIN()
+GXF_EXT_FACTORY_SET_INFO(0x6eae64ff97a94d9b, 0xb324f85e6a98a75a, "NvCvTensorOpsExtension",
+                         "Generic CVCORE tensor_ops interfaces", "Nvidia_Gxf", "3.1.0", "LICENSE");
+
+GXF_EXT_FACTORY_ADD(0xd073a92344ba4b81, 0xbd0f18f4996048e9, nvidia::cvcore::tensor_ops::CameraModel,
+                    nvidia::gxf::Component,
+                    "Construct Camera distortion model / Camera intrinsic compatible with CVCORE");
+
+GXF_EXT_FACTORY_ADD(0x6c9419223e4b4c2b, 0x899a4d65279c6507, nvidia::cvcore::tensor_ops::Frame3D, nvidia::gxf::Component,
+                    "Construct Camera extrinsic compatible with CVCORE");
+
+GXF_EXT_FACTORY_ADD(0xd94385e5b35b4634, 0x9adb0d214a3865f6, nvidia::cvcore::tensor_ops::TensorStream,
+                    nvidia::gxf::Component, "Wrapper of CVCORE ITensorOperatorStream/ITensorOperatorContext");
+
+GXF_EXT_FACTORY_ADD(0xd0c4ddad486a4a91, 0xb69c8a5304b205ef, nvidia::cvcore::tensor_ops::ImageAdapter,
+                    nvidia::gxf::Component, "Utility component for conversion between message and cvcore image type");
+
+GXF_EXT_FACTORY_ADD(0xadebc792bd0b4a56, 0x99c1405fd2ea0727, nvidia::cvcore::tensor_ops::StreamUndistort,
+                    nvidia::gxf::Codelet, "Codelet for stream image undistortion in tensor_ops");
+
+GXF_EXT_FACTORY_ADD(0xa58141ac7eca4ea5, 0x9b545446fe379a11, nvidia::cvcore::tensor_ops::Resize, nvidia::gxf::Codelet,
+                    "Codelet for image resizing in tensor_ops");
+
+GXF_EXT_FACTORY_ADD(0xeb8b5f5b36d44b48, 0x81f959fd28e6f677, nvidia::cvcore::tensor_ops::StreamResize,
+                    nvidia::gxf::Codelet, "Codelet for stream image resizing in tensor_ops");
+
+GXF_EXT_FACTORY_ADD(0x4a7ff422de3841bc, 0x9e743ac10d9294b6, nvidia::cvcore::tensor_ops::CropAndResize,
+                    nvidia::gxf::Codelet, "Codelet for crop and resizing operation in tensor_ops");
+
+GXF_EXT_FACTORY_ADD(0x7018f0b9034c462b, 0xa9fbaf7ee012974f, nvidia::cvcore::tensor_ops::Normalize, nvidia::gxf::Codelet,
+                    "Codelet for image normalization in tensor_ops");
+
+GXF_EXT_FACTORY_ADD(0x269d4237f3c3479d, 0xbcca9ecc44c71a70, nvidia::cvcore::tensor_ops::InterleavedToPlanar,
+                    nvidia::gxf::Codelet, "Codelet for convert interleaved image to planar image in tensor_ops");
+
+GXF_EXT_FACTORY_ADD(0xfc4d7b4d8fcc4daa, 0xa286056e0fcafa78, nvidia::cvcore::tensor_ops::ConvertColorFormat,
+                    nvidia::gxf::Codelet, "Codelet for image color conversion in tensor_ops");
+
+GXF_EXT_FACTORY_ADD(0x5ab4a4d8f7a34552, 0xa90be52660b076fd, nvidia::cvcore::tensor_ops::StreamConvertColorFormat,
+                    nvidia::gxf::Codelet, "Codelet for stream image color conversion in tensor_ops");
+
+GXF_EXT_FACTORY_ADD(0x26789b7d5a8d4e84, 0x86b845ec5f4cd12a, nvidia::cvcore::tensor_ops::Reshape, nvidia::gxf::Codelet,
+                    "Codelet for image reshape in tensor_ops");
+GXF_EXT_FACTORY_END()
diff --git a/isaac_ros_ess/gxf/ess/extensions/tensor_ops/TensorStream.cpp b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/TensorStream.cpp
new file mode 100644
index 0000000..2f825cf
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/TensorStream.cpp
@@ -0,0 +1,124 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "TensorStream.hpp"
+
+#include "cv/core/ComputeEngine.h"
+#include "cv/tensor_ops/TensorOperators.h"
+
+namespace nvidia {
+namespace cvcore {
+namespace tensor_ops {
+
+namespace detail {
+
+gxf::Expected<::cvcore::tensor_ops::TensorBackend> GetContextType(const std::string& type) {
+  if (type == "NPP") {
+    return ::cvcore::tensor_ops::TensorBackend::NPP;
+  } else if (type == "VPI") {
+    return ::cvcore::tensor_ops::TensorBackend::VPI;
+  } else if (type == "DALI") {
+    return ::cvcore::tensor_ops::TensorBackend::DALI;
+  } else {
+    return gxf::Unexpected{GXF_FAILURE};
+  }
+}
+
+gxf::Expected<::cvcore::ComputeEngine> GetComputeEngineType(const std::string& type) {
+  if (type == "UNKNOWN") {
+    return ::cvcore::ComputeEngine::UNKNOWN;
+  } else if (type == "CPU") {
+    return ::cvcore::ComputeEngine::CPU;
+  } else if (type == "PVA") {
+    return ::cvcore::ComputeEngine::PVA;
+  } else if (type == "VIC") {
+    return ::cvcore::ComputeEngine::VIC;
+  } else if (type == "NVENC") {
+    return ::cvcore::ComputeEngine::NVENC;
+  } else if (type == "GPU") {
+    return ::cvcore::ComputeEngine::GPU;
+  } else if (type == "DLA") {
+    return ::cvcore::ComputeEngine::DLA;
+  } else if (type == "COMPUTE_FAULT") {
+    return ::cvcore::ComputeEngine::COMPUTE_FAULT;
+  } else {
+    return gxf::Unexpected{GXF_FAILURE};
+  }
+}
+
+} // namespace detail
+
+gxf_result_t TensorStream::registerInterface(gxf::Registrar* registrar) {
+  gxf::Expected<void> result;
+
+  result &= registrar->parameter(backend_type_, "backend_type");
+  result &= registrar->parameter(engine_type_, "engine_type");
+
+  return gxf::ToResultCode(result);
+}
+
+gxf_result_t TensorStream::initialize() {
+  // Construct context
+  auto backend_type = detail::GetContextType(backend_type_.get());
+  if (!backend_type) {
+    GXF_LOG_ERROR("unknown backend type.");
+    return GXF_FAILURE;
+  }
+  if (!::cvcore::tensor_ops::TensorContextFactory::IsBackendSupported(backend_type.value())) {
+    GXF_LOG_ERROR("unsupported context type.");
+    return GXF_FAILURE;
+  }
+  auto err_code = ::cvcore::tensor_ops::TensorContextFactory::CreateContext(context_, backend_type.value());
+  if (err_code != ::cvcore::make_error_code(::cvcore::ErrorCode::SUCCESS)) {
+    GXF_LOG_ERROR("tensor context creation failed.");
+    return GXF_FAILURE;
+  }
+  // Construct stream
+  auto engine_type = detail::GetComputeEngineType(engine_type_.get());
+  if (!engine_type) {
+    return GXF_FAILURE;
+  }
+
+  if (!context_->IsComputeEngineCompatible(engine_type.value())) {
+    GXF_LOG_ERROR("invalid compute engine type.");
+    return GXF_FAILURE;
+  }
+  err_code = context_->CreateStream(stream_, engine_type.value());
+  if (err_code != ::cvcore::make_error_code(::cvcore::ErrorCode::SUCCESS)) {
+    GXF_LOG_ERROR("tensor stream creation failed.");
+    return GXF_FAILURE;
+  }
+  return GXF_SUCCESS;
+}
+
+gxf_result_t TensorStream::deinitialize() {
+  auto err_code = context_->DestroyStream(stream_);
+  if (err_code != ::cvcore::make_error_code(::cvcore::ErrorCode::SUCCESS)) {
+    GXF_LOG_ERROR("tensor stream destroy failed.");
+    return GXF_FAILURE;
+  }
+  err_code = ::cvcore::tensor_ops::TensorContextFactory::DestroyContext(context_);
+  if (err_code != ::cvcore::make_error_code(::cvcore::ErrorCode::SUCCESS)) {
+    GXF_LOG_ERROR("tensor context destroy failed.");
+    return GXF_FAILURE;
+  }
+  return GXF_SUCCESS;
+}
+
+} // namespace tensor_ops
+} // namespace cvcore
+} // namespace nvidia
diff --git a/isaac_ros_ess/gxf/ess/extensions/tensor_ops/TensorStream.hpp b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/TensorStream.hpp
new file mode 100644
index 0000000..710e11f
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/TensorStream.hpp
@@ -0,0 +1,59 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#ifndef NVIDIA_CVCORE_TENSOR_STREAM_HPP
+#define NVIDIA_CVCORE_TENSOR_STREAM_HPP
+
+#include "gxf/core/component.hpp"
+#include "gxf/std/parameter_parser_std.hpp"
+
+#include "cv/tensor_ops/ITensorOperatorContext.h"
+#include "cv/tensor_ops/ITensorOperatorStream.h"
+
+namespace nvidia {
+namespace cvcore {
+namespace tensor_ops {
+
+// Wrapper of CVCORE ITensorOperatorStream/ITensorOperatorContext
+class TensorStream : public gxf::Component {
+public:
+  virtual ~TensorStream() = default;
+  TensorStream()          = default;
+
+  gxf_result_t registerInterface(gxf::Registrar* registrar) override;
+  gxf_result_t initialize() override;
+  gxf_result_t deinitialize() override;
+
+  ::cvcore::tensor_ops::TensorOperatorContext getContext() const {
+    return context_;
+  }
+  ::cvcore::tensor_ops::TensorOperatorStream getStream() const {
+    return stream_;
+  }
+
+private:
+  gxf::Parameter<std::string> backend_type_;
+  gxf::Parameter<std::string> engine_type_;
+
+  ::cvcore::tensor_ops::TensorOperatorContext context_;
+  ::cvcore::tensor_ops::TensorOperatorStream stream_;
+};
+
+} // namespace tensor_ops
+} // namespace cvcore
+} // namespace nvidia
+
+#endif
diff --git a/isaac_ros_ess/gxf/ess/extensions/tensor_ops/Undistort.cpp b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/Undistort.cpp
new file mode 100644
index 0000000..b1eed4e
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/Undistort.cpp
@@ -0,0 +1,285 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <numeric>
+
+#include "ImageUtils.hpp"
+#include "Undistort.hpp"
+#include "gxf/multimedia/camera.hpp"
+
+namespace nvidia {
+namespace cvcore {
+namespace tensor_ops {
+
+namespace detail {
+
+template<::cvcore::ImageType T>
+gxf_result_t UndistortImpl(gxf::Entity& output, gxf::Entity& input, const ImageInfo& output_info,
+                           const ImageInfo& input_info, const char* output_name, const char* input_name,
+                           gxf::Handle<TensorStream> stream, gxf::Handle<ImageAdapter> output_adapter,
+                           gxf::Handle<ImageAdapter> input_adapter, gxf::Handle<gxf::Allocator> allocator,
+                           ::cvcore::tensor_ops::ImageWarp warp, ::cvcore::tensor_ops::InterpolationType interp_type,
+                           ::cvcore::tensor_ops::BorderType border_type) {
+  auto input_image = input_adapter->WrapImageFromMessage<T>(input, input_name);
+  if (!input_image) {
+    return GXF_FAILURE;
+  }
+
+  auto error = output_adapter->AddImageToMessage<T>(output, output_info.width, output_info.height, allocator,
+                                                    output_info.is_cpu, output_name);
+  if (error != GXF_SUCCESS) {
+    return GXF_FAILURE;
+  }
+
+  auto output_image = output_adapter->WrapImageFromMessage<T>(output, output_name);
+  if (!output_image) {
+    return GXF_FAILURE;
+  }
+
+  auto err_code = stream->getStream()->Remap(output_image.value(), input_image.value(), warp, interp_type, border_type);
+  if (err_code != ::cvcore::make_error_condition(::cvcore::ErrorCode::SUCCESS)) {
+    GXF_LOG_ERROR("undistort operation failed.");
+    return GXF_FAILURE;
+  }
+
+  return GXF_SUCCESS;
+}
+
+gxf::Expected<::cvcore::CameraIntrinsics> GetIntrinsicsFromMessage(gxf::Handle<gxf::CameraModel>& camera_model) {
+  return ::cvcore::CameraIntrinsics(camera_model->focal_length.x, camera_model->focal_length.y,
+                                    camera_model->principal_point.x, camera_model->principal_point.y,
+                                    camera_model->skew_value);
+}
+
+gxf::Expected<::cvcore::CameraExtrinsics> GetExtrinsicsFromMessage(gxf::Handle<gxf::Pose3D>& pose) {
+  float raw_matrix[3][4];
+  for (size_t i = 0; i < 9; i++) {
+    raw_matrix[i / 3][i % 3] = pose->rotation[i];
+  }
+  for (size_t i = 0; i < 3; i++) {
+    raw_matrix[i][3] = pose->translation[i];
+  }
+  return ::cvcore::CameraExtrinsics(raw_matrix);
+}
+
+gxf::Expected<::cvcore::CameraDistortionModel> GetDistortionsFromMessage(gxf::Handle<gxf::CameraModel>& camera_model) {
+  auto distortion_type = GetCameraDistortionType(camera_model->distortion_type);
+  if (!distortion_type) {
+    return gxf::Unexpected{GXF_FAILURE};
+  }
+  auto distortion_model = ::cvcore::CameraDistortionModel();
+  for (size_t i = 0; i < 8; i++) {
+    distortion_model.coefficients[i] = camera_model->distortion_coefficients[i];
+  }
+  distortion_model.type = distortion_type.value();
+  return distortion_model;
+}
+
+} // namespace detail
+
+gxf_result_t UndistortBase::start() {
+  // Load grid object
+  image_grid_.numHorizRegions = regions_width_.get().size();
+  image_grid_.numVertRegions  = regions_height_.get().size();
+  if (regions_width_.get().size() != horizontal_intervals_.get().size() ||
+      regions_height_.get().size() != vertical_intervals_.get().size()) {
+    GXF_LOG_ERROR("invalid image grid.");
+    return GXF_FAILURE;
+  }
+  std::copy(regions_width_.get().begin(), regions_width_.get().end(), image_grid_.regionWidth.begin());
+  std::copy(regions_height_.get().begin(), regions_height_.get().end(), image_grid_.regionHeight.begin());
+  std::copy(horizontal_intervals_.get().begin(), horizontal_intervals_.get().end(), image_grid_.horizInterval.begin());
+  std::copy(vertical_intervals_.get().begin(), vertical_intervals_.get().end(), image_grid_.vertInterval.begin());
+  output_shape_.x = static_cast<decltype(output_shape_.x)>(
+    std::accumulate(image_grid_.regionWidth.begin(), image_grid_.regionWidth.end(), 0));
+  output_shape_.y = static_cast<decltype(output_shape_.y)>(
+    std::accumulate(image_grid_.regionHeight.begin(), image_grid_.regionHeight.end(), 0));
+
+  // Generate Image Warp if possible
+  if (input_camera_model_.try_get() && reference_frame_.try_get()) {
+    input_camera_info_ = {input_camera_model_.try_get().value()->getCameraIntrinsics(),
+                          reference_frame_.try_get().value()->getCameraExtrinsics(),
+                          input_camera_model_.try_get().value()->getDistortionModel()};
+
+    output_camera_intrinsics_ = output_camera_model_.try_get()
+                                  ? output_camera_model_.try_get().value()->getCameraIntrinsics()
+                                  : input_camera_info_.intrinsic;
+
+    auto err_code = stream_->getStream()->GenerateWarpFromCameraModel(image_warp_, image_grid_, input_camera_info_,
+                                                                      output_camera_intrinsics_);
+    if (err_code != ::cvcore::make_error_condition(::cvcore::ErrorCode::SUCCESS)) {
+      GXF_LOG_ERROR("image warp creation failed.");
+      return GXF_FAILURE;
+    }
+  }
+
+  return GXF_SUCCESS;
+}
+
+gxf_result_t UndistortBase::stop() {
+  auto err_code = stream_->getStream()->DestroyWarp(image_warp_);
+  if (err_code != ::cvcore::make_error_condition(::cvcore::ErrorCode::SUCCESS)) {
+    GXF_LOG_ERROR("image warp de-allocation failed.");
+    return GXF_FAILURE;
+  }
+  return GXF_SUCCESS;
+}
+
+gxf_result_t UndistortBase::registerInterface(gxf::Registrar* registrar) {
+  gxf::Expected<void> result;
+
+  result &= registrar->parameter(input_camera_model_, "input_camera_model", "", "",
+                                 gxf::Registrar::NoDefaultParameter(), GXF_PARAMETER_FLAGS_OPTIONAL);
+  result &= registrar->parameter(reference_frame_, "reference_frame", "", "", gxf::Registrar::NoDefaultParameter(),
+                                 GXF_PARAMETER_FLAGS_OPTIONAL);
+  result &= registrar->parameter(output_camera_model_, "output_camera_model", "", "",
+                                 gxf::Registrar::NoDefaultParameter(), GXF_PARAMETER_FLAGS_OPTIONAL);
+  result &= registrar->parameter(regions_width_, "regions_width");
+  result &= registrar->parameter(regions_height_, "regions_height");
+  result &= registrar->parameter(horizontal_intervals_, "horizontal_intervals");
+  result &= registrar->parameter(vertical_intervals_, "vertical_intervals");
+  result &= registrar->parameter(interp_type_, "interp_type");
+  result &= registrar->parameter(border_type_, "border_type");
+  result &= registrar->parameter(receiver_, "receiver");
+  result &= registrar->parameter(transmitter_, "transmitter");
+  result &= registrar->parameter(pool_, "pool");
+  result &= registrar->parameter(stream_, "stream");
+  result &= registrar->parameter(stream_pool_, "stream_pool", "cuda stream pool", "cuda stream pool object",
+                                 gxf::Registrar::NoDefaultParameter(), GXF_PARAMETER_FLAGS_OPTIONAL);
+  result &= registrar->parameter(input_adapter_, "input_adapter");
+  result &= registrar->parameter(output_adapter_, "output_adapter");
+  result &= registrar->parameter(input_name_, "input_name", "input name", "input tensor name",
+                                 gxf::Registrar::NoDefaultParameter(), GXF_PARAMETER_FLAGS_OPTIONAL);
+  result &= registrar->parameter(output_name_, "output_name", "output name", "output tensor name",
+                                 gxf::Registrar::NoDefaultParameter(), GXF_PARAMETER_FLAGS_OPTIONAL);
+
+  return gxf::ToResultCode(result);
+}
+
+gxf::Expected<ImageInfo> UndistortBase::doInferOutputInfo(gxf::Entity& input) {
+  // Check if the input distortion type is Perpective
+  auto maybe_camera_message = input.get<gxf::CameraModel>();
+  if (maybe_camera_message) {
+    no_op_ = maybe_camera_message.value()->distortion_type == gxf::DistortionType::Perspective;
+  }
+  // Output size may vary, but the format must be the same
+  return ImageInfo{input_info_.type, static_cast<size_t>(output_shape_.x), static_cast<size_t>(output_shape_.y),
+                   input_info_.is_cpu};
+}
+
+gxf_result_t UndistortBase::doUpdateCameraMessage(gxf::Handle<gxf::CameraModel>& output,
+                                                  gxf::Handle<gxf::CameraModel>& input) {
+  *output                   = *input;
+  (*output).distortion_type = gxf::DistortionType::Perspective;
+  for (size_t i = 0; i < gxf::CameraModel::kMaxDistortionCoefficients; i++) {
+    (*output).distortion_coefficients[i] = 0.;
+  }
+  (*output).dimensions        = output_shape_;
+  (*output).focal_length.x    = output_camera_intrinsics_.fx();
+  (*output).focal_length.y    = output_camera_intrinsics_.fy();
+  (*output).principal_point.x = output_camera_intrinsics_.cx();
+  (*output).principal_point.y = output_camera_intrinsics_.cy();
+  (*output).skew_value        = output_camera_intrinsics_.skew();
+  return GXF_SUCCESS;
+}
+
+#define DEFINE_UNDISTORT(INPUT_TYPE)                                                                                  \
+  if (input_info_.type == INPUT_TYPE) {                                                                               \
+    return detail::UndistortImpl<INPUT_TYPE>(output, input, output_info_, input_info_, output_name, input_name,       \
+                                             stream_.get(), output_adapter_.get(), input_adapter_.get(), pool_.get(), \
+                                             image_warp_, interp.value(), border.value());                            \
+  }
+
+gxf_result_t UndistortBase::doExecute(gxf::Entity& output, gxf::Entity& input, cudaStream_t stream,
+                                      const char* output_name, const char* input_name) {
+  GXF_LOG_INFO("execute undistort.");
+  auto maybe_camera_message = input.get<gxf::CameraModel>();
+  auto maybe_pose3d_message = input.get<gxf::Pose3D>();
+  if (!maybe_camera_message) {
+    if (image_warp_ == nullptr) {
+      GXF_LOG_ERROR("no camera information found.");
+      return GXF_FAILURE;
+    }
+  } else {
+    auto maybe_intrinsics  = detail::GetIntrinsicsFromMessage(maybe_camera_message.value());
+    auto maybe_distortions = detail::GetDistortionsFromMessage(maybe_camera_message.value());
+    if (!maybe_intrinsics || !maybe_distortions) {
+      return GXF_FAILURE;
+    }
+    const auto& new_intrinsics  = maybe_intrinsics.value();
+    const auto& new_distortions = maybe_distortions.value();
+
+    auto new_extrinsics = maybe_pose3d_message ? detail::GetExtrinsicsFromMessage(maybe_pose3d_message.value()).value()
+                                               : ::cvcore::CameraExtrinsics();
+
+    const bool reset = image_warp_ == nullptr || new_intrinsics != input_camera_info_.intrinsic ||
+                       new_distortions != input_camera_info_.distortion ||
+                       new_extrinsics != input_camera_info_.extrinsic;
+
+    if (reset) {
+      auto new_width  = static_cast<float>(output_shape_.x);
+      auto new_height = static_cast<float>(output_shape_.y);
+
+      // These two parameters (width_scale and height_scale) can be
+      // used to determine a crop or pad regime depending on which dimension to
+      // preserve in the case of keep_aspect ratio. In this case, we assume
+      // always_crop=True, or that we will always use the largest dimension
+      // change.
+      auto width_scale  = new_width / static_cast<float>(input_info_.width);
+      auto height_scale = new_height / static_cast<float>(input_info_.height);
+      auto scale        = std::max({width_scale, height_scale}); // Always crop
+
+      input_camera_info_                           = {new_intrinsics, new_extrinsics, new_distortions};
+      output_camera_intrinsics_                    = new_intrinsics;
+      output_camera_intrinsics_.m_intrinsics[0][0] = scale * new_intrinsics.m_intrinsics[0][0];
+      output_camera_intrinsics_.m_intrinsics[0][1] = scale * new_intrinsics.m_intrinsics[0][1];
+      output_camera_intrinsics_.m_intrinsics[1][1] = scale * new_intrinsics.m_intrinsics[1][1];
+      output_camera_intrinsics_.m_intrinsics[0][2] = scale * new_intrinsics.m_intrinsics[0][2];
+      output_camera_intrinsics_.m_intrinsics[1][2] = scale * new_intrinsics.m_intrinsics[1][2];
+
+      auto err_code = stream_->getStream()->GenerateWarpFromCameraModel(image_warp_, image_grid_, input_camera_info_,
+                                                                        output_camera_intrinsics_);
+      if (err_code != ::cvcore::make_error_condition(::cvcore::ErrorCode::SUCCESS)) {
+        GXF_LOG_ERROR("image warp creation failed.");
+        return GXF_FAILURE;
+      }
+    }
+  }
+
+  auto interp = GetInterpolationType(interp_type_);
+  if (!interp) {
+    return interp.error();
+  }
+  auto border = GetBorderType(border_type_);
+  if (!border) {
+    return border.error();
+  }
+
+  // Run the image undistortion operation
+  DEFINE_UNDISTORT(::cvcore::ImageType::BGR_U8);
+  DEFINE_UNDISTORT(::cvcore::ImageType::RGB_U8);
+  DEFINE_UNDISTORT(::cvcore::ImageType::NV12);
+  DEFINE_UNDISTORT(::cvcore::ImageType::NV24);
+
+  // Return error code for unsupported type
+  GXF_LOG_ERROR("invalid input/output type for image undistort.");
+  return GXF_FAILURE;
+}
+
+} // namespace tensor_ops
+} // namespace cvcore
+} // namespace nvidia
diff --git a/isaac_ros_ess/gxf/ess/extensions/tensor_ops/Undistort.hpp b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/Undistort.hpp
new file mode 100644
index 0000000..377f621
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/Undistort.hpp
@@ -0,0 +1,69 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#ifndef NVIDIA_CVCORE_UNDISTORT_HPP
+#define NVIDIA_CVCORE_UNDISTORT_HPP
+
+#include "CameraModel.hpp"
+#include "Frame3D.hpp"
+#include "TensorOperator.hpp"
+
+namespace nvidia {
+namespace cvcore {
+namespace tensor_ops {
+
+// Undistort operator.
+class UndistortBase : public TensorOperator {
+public:
+  virtual ~UndistortBase() {}
+
+  gxf_result_t start() override final;
+  gxf_result_t stop() override final;
+
+  gxf_result_t registerInterface(gxf::Registrar* registrar) override;
+
+private:
+  gxf::Expected<ImageInfo> doInferOutputInfo(gxf::Entity& input) override final;
+  gxf_result_t doUpdateCameraMessage(gxf::Handle<gxf::CameraModel>& output,
+                                     gxf::Handle<gxf::CameraModel>& input) override final;
+  gxf_result_t doExecute(gxf::Entity& output, gxf::Entity& input, cudaStream_t stream, const char* output_name,
+                         const char* input_name) override final;
+
+  gxf::Parameter<gxf::Handle<CameraModel>> input_camera_model_;
+  gxf::Parameter<gxf::Handle<Frame3D>> reference_frame_;
+  gxf::Parameter<gxf::Handle<CameraModel>> output_camera_model_;
+  gxf::Parameter<std::vector<std::int16_t>> regions_width_;
+  gxf::Parameter<std::vector<std::int16_t>> regions_height_;
+  gxf::Parameter<std::vector<std::int16_t>> horizontal_intervals_;
+  gxf::Parameter<std::vector<std::int16_t>> vertical_intervals_;
+  gxf::Parameter<std::string> interp_type_;
+  gxf::Parameter<std::string> border_type_;
+
+  ::cvcore::tensor_ops::ImageGrid image_grid_;
+  ::cvcore::tensor_ops::ImageWarp image_warp_;
+  gxf::Vector2u output_shape_;
+
+  ::cvcore::CameraModel input_camera_info_;
+  ::cvcore::CameraIntrinsics output_camera_intrinsics_;
+};
+
+class StreamUndistort : public UndistortBase {};
+
+} // namespace tensor_ops
+} // namespace cvcore
+} // namespace nvidia
+
+#endif
diff --git a/isaac_ros_ess/gxf/ess/extensions/tensor_ops/detail/ImageAdapterTensorImpl.cpp b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/detail/ImageAdapterTensorImpl.cpp
new file mode 100644
index 0000000..37099fc
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/detail/ImageAdapterTensorImpl.cpp
@@ -0,0 +1,105 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#include "ImageAdapterTensorImpl.hpp"
+
+namespace nvidia {
+namespace cvcore {
+namespace tensor_ops {
+namespace detail {
+
+gxf::Expected<std::tuple<size_t, size_t, size_t>> GetHWCIndices(const ::cvcore::ImageType type) {
+  switch (type) {
+    case ::cvcore::ImageType::Y_U8:
+    case ::cvcore::ImageType::Y_U16:
+    case ::cvcore::ImageType::Y_F32:
+    case ::cvcore::ImageType::RGB_U8:
+    case ::cvcore::ImageType::BGR_U8:
+    case ::cvcore::ImageType::RGB_U16:
+    case ::cvcore::ImageType::BGR_U16:
+    case ::cvcore::ImageType::RGB_F32:
+    case ::cvcore::ImageType::BGR_F32: {
+      return std::make_tuple(0, 1, 2);
+    }
+    case ::cvcore::ImageType::PLANAR_RGB_U8:
+    case ::cvcore::ImageType::PLANAR_BGR_U8:
+    case ::cvcore::ImageType::PLANAR_RGB_U16:
+    case ::cvcore::ImageType::PLANAR_BGR_U16:
+    case ::cvcore::ImageType::PLANAR_RGB_F32:
+    case ::cvcore::ImageType::PLANAR_BGR_F32: {
+      return std::make_tuple(1, 2, 0);
+    }
+    default: {
+      GXF_LOG_ERROR("invalid image type.");
+      return gxf::Unexpected{GXF_FAILURE};
+    }
+  }
+}
+
+gxf::Expected<gxf::PrimitiveType> GetPrimitiveType(const ::cvcore::ImageType image_type) {
+  switch (image_type) {
+    case ::cvcore::ImageType::Y_U8:
+    case ::cvcore::ImageType::RGB_U8:
+    case ::cvcore::ImageType::BGR_U8:
+    case ::cvcore::ImageType::PLANAR_RGB_U8:
+    case ::cvcore::ImageType::PLANAR_BGR_U8: {
+      return gxf::PrimitiveType::kUnsigned8;
+    }
+    case ::cvcore::ImageType::Y_U16:
+    case ::cvcore::ImageType::RGB_U16:
+    case ::cvcore::ImageType::BGR_U16:
+    case ::cvcore::ImageType::PLANAR_RGB_U16:
+    case ::cvcore::ImageType::PLANAR_BGR_U16: {
+      return gxf::PrimitiveType::kUnsigned16;
+    }
+    case ::cvcore::ImageType::Y_F32:
+    case ::cvcore::ImageType::RGB_F32:
+    case ::cvcore::ImageType::BGR_F32:
+    case ::cvcore::ImageType::PLANAR_RGB_F32:
+    case ::cvcore::ImageType::PLANAR_BGR_F32: {
+      return gxf::PrimitiveType::kFloat32;
+    }
+    default: {
+      GXF_LOG_ERROR("invalid image type.");
+      return gxf::Unexpected{GXF_FAILURE};
+    }
+  }
+}
+
+gxf::Expected<ImageInfo> GetTensorInfo(gxf::Handle<gxf::Tensor> tensor, const ::cvcore::ImageType type) {
+  const auto& shape       = tensor->shape();
+  const auto rank         = tensor->rank();
+  const auto storage_type = tensor->storage_type();
+
+  if (rank != 3) {
+    GXF_LOG_ERROR("unexpected tensor shape.");
+    return gxf::Unexpected{GXF_FAILURE};
+  }
+
+  const auto indices = GetHWCIndices(type);
+  if (!indices) {
+    return gxf::Unexpected{GXF_FAILURE};
+  }
+  const size_t width  = shape.dimension(std::get<1>(indices.value()));
+  const size_t height = shape.dimension(std::get<0>(indices.value()));
+
+  return ImageInfo{type, width, height, storage_type != gxf::MemoryStorageType::kDevice};
+}
+
+} // namespace detail
+} // namespace tensor_ops
+} // namespace cvcore
+} // namespace nvidia
diff --git a/isaac_ros_ess/gxf/ess/extensions/tensor_ops/detail/ImageAdapterTensorImpl.hpp b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/detail/ImageAdapterTensorImpl.hpp
new file mode 100644
index 0000000..45a060a
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/detail/ImageAdapterTensorImpl.hpp
@@ -0,0 +1,105 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#ifndef NVIDIA_CVCORE_TENSOR_ADAPTER_HPP
+#define NVIDIA_CVCORE_TENSOR_ADAPTER_HPP
+
+#include "../ImageUtils.hpp"
+
+#include <tuple>
+
+#include "cv/core/Image.h"
+#include "gxf/std/allocator.hpp"
+#include "gxf/std/tensor.hpp"
+
+namespace nvidia {
+namespace cvcore {
+namespace tensor_ops {
+namespace detail {
+
+gxf::Expected<std::tuple<size_t, size_t, size_t>> GetHWCIndices(const ::cvcore::ImageType type);
+
+gxf::Expected<gxf::PrimitiveType> GetPrimitiveType(const ::cvcore::ImageType image_type);
+
+gxf::Expected<ImageInfo> GetTensorInfo(gxf::Handle<gxf::Tensor> tensor, const ::cvcore::ImageType type);
+
+template<::cvcore::ImageType T,
+         typename std::enable_if<T != ::cvcore::ImageType::NV12 && T != ::cvcore::ImageType::NV24>::type* = nullptr>
+gxf::Expected<::cvcore::Image<T>> WrapImageFromTensor(gxf::Handle<gxf::Tensor> tensor) {
+  const auto info = GetTensorInfo(tensor, T);
+  if (!info) {
+    return gxf::Unexpected{GXF_FAILURE};
+  }
+  using D      = typename ::cvcore::detail::ChannelTypeToNative<::cvcore::ImageTraits<T, 3>::CT>::Type;
+  auto pointer = tensor->data<D>();
+  if (!pointer) {
+    return gxf::Unexpected{GXF_FAILURE};
+  }
+  const size_t stride = tensor->stride(std::get<0>(GetHWCIndices(T).value()));
+  return ::cvcore::Image<T>(info.value().width, info.value().height, stride, pointer.value(), info.value().is_cpu);
+}
+
+template<::cvcore::ImageType T,
+         typename std::enable_if<T == ::cvcore::ImageType::NV12 || T == ::cvcore::ImageType::NV24>::type* = nullptr>
+gxf::Expected<::cvcore::Image<T>> WrapImageFromTensor(gxf::Handle<gxf::Tensor> tensor) {
+  GXF_LOG_ERROR("NV12/NV24 not supported for gxf::Tensor");
+  return gxf::Unexpected{GXF_FAILURE};
+}
+
+template<::cvcore::ImageType T,
+         typename std::enable_if<T != ::cvcore::ImageType::NV12 && T != ::cvcore::ImageType::NV24>::type* = nullptr>
+gxf_result_t AllocateTensor(gxf::Handle<gxf::Tensor> tensor, size_t width, size_t height,
+                            gxf::Handle<gxf::Allocator> allocator, bool is_cpu, bool allocate_pitch_linear) {
+  const auto primitive_type = GetPrimitiveType(T);
+  if (!primitive_type) {
+    return primitive_type.error();
+  }
+
+  const auto indices = GetHWCIndices(T);
+  if (!indices) {
+    return GXF_FAILURE;
+  }
+  std::array<int32_t, gxf::Shape::kMaxRank> dims;
+  dims[std::get<0>(indices.value())] = height;
+  dims[std::get<1>(indices.value())] = width;
+  dims[std::get<2>(indices.value())] = ::cvcore::detail::ChannelToCount<::cvcore::ImageTraits<T, 3>::CC>();
+  const gxf::Shape shape(dims, 3);
+
+  auto result =
+    tensor->reshapeCustom(shape, primitive_type.value(), gxf::PrimitiveTypeSize(primitive_type.value()),
+                          gxf::Unexpected{GXF_UNINITIALIZED_VALUE},
+                          is_cpu ? gxf::MemoryStorageType::kHost : gxf::MemoryStorageType::kDevice, allocator);
+  if (!result) {
+    GXF_LOG_ERROR("reshape tensor failed.");
+    return GXF_FAILURE;
+  }
+  return GXF_SUCCESS;
+}
+
+template<::cvcore::ImageType T,
+         typename std::enable_if<T == ::cvcore::ImageType::NV12 || T == ::cvcore::ImageType::NV24>::type* = nullptr>
+gxf_result_t AllocateTensor(gxf::Handle<gxf::Tensor> tensor, size_t width, size_t height,
+                            gxf::Handle<gxf::Allocator> allocator, bool is_cpu, bool allocate_pitch_linear) {
+  GXF_LOG_ERROR("NV12/NV24 not supported for gxf::Tensor");
+  return GXF_FAILURE;
+}
+
+} // namespace detail
+} // namespace tensor_ops
+} // namespace cvcore
+} // namespace nvidia
+
+#endif
diff --git a/isaac_ros_ess/gxf/ess/extensions/tensor_ops/detail/ImageAdapterVideoBufferImpl.cpp b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/detail/ImageAdapterVideoBufferImpl.cpp
new file mode 100644
index 0000000..ac257b4
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/detail/ImageAdapterVideoBufferImpl.cpp
@@ -0,0 +1,88 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#include "ImageAdapterVideoBufferImpl.hpp"
+
+namespace nvidia {
+namespace cvcore {
+namespace tensor_ops {
+namespace detail {
+
+gxf::Expected<::cvcore::ImageType> GetImageTypeFromVideoFormat(const gxf::VideoFormat format) {
+  switch (format) {
+  case gxf::VideoFormat::GXF_VIDEO_FORMAT_NV12:
+  case gxf::VideoFormat::GXF_VIDEO_FORMAT_NV12_ER: {
+    return ::cvcore::ImageType::NV12;
+  }
+  case gxf::VideoFormat::GXF_VIDEO_FORMAT_NV24:
+  case gxf::VideoFormat::GXF_VIDEO_FORMAT_NV24_ER: {
+    return ::cvcore::ImageType::NV24;
+  }
+  case gxf::VideoFormat::GXF_VIDEO_FORMAT_RGBA: {
+    return ::cvcore::ImageType::RGBA_U8;
+  }
+  case gxf::VideoFormat::GXF_VIDEO_FORMAT_RGB: {
+    return ::cvcore::ImageType::RGB_U8;
+  }
+  case gxf::VideoFormat::GXF_VIDEO_FORMAT_RGB32: {
+    return ::cvcore::ImageType::RGB_F32;
+  }
+  case gxf::VideoFormat::GXF_VIDEO_FORMAT_BGR: {
+    return ::cvcore::ImageType::BGR_U8;
+  }
+  case gxf::VideoFormat::GXF_VIDEO_FORMAT_BGR32: {
+    return ::cvcore::ImageType::BGR_F32;
+  }
+  case gxf::VideoFormat::GXF_VIDEO_FORMAT_R8_G8_B8: {
+    return ::cvcore::ImageType::PLANAR_RGB_U8;
+  }
+  case gxf::VideoFormat::GXF_VIDEO_FORMAT_R32_G32_B32: {
+    return ::cvcore::ImageType::PLANAR_RGB_F32;
+  }
+  case gxf::VideoFormat::GXF_VIDEO_FORMAT_B8_G8_R8: {
+    return ::cvcore::ImageType::PLANAR_BGR_U8;
+  }
+  case gxf::VideoFormat::GXF_VIDEO_FORMAT_B32_G32_R32: {
+    return ::cvcore::ImageType::PLANAR_BGR_F32;
+  }
+  case gxf::VideoFormat::GXF_VIDEO_FORMAT_GRAY: {
+    return ::cvcore::ImageType::Y_U8;
+  }
+  case gxf::VideoFormat::GXF_VIDEO_FORMAT_GRAY32: {
+    return ::cvcore::ImageType::Y_F32;
+  }
+  default: {
+    GXF_LOG_ERROR("invalid video format.");
+    return gxf::Unexpected{GXF_FAILURE};
+  }
+  }
+}
+
+gxf::Expected<ImageInfo> GetVideoBufferInfo(gxf::Handle<gxf::VideoBuffer> video_buffer) {
+  const auto buffer_info  = video_buffer->video_frame_info();
+  const auto storage_type = video_buffer->storage_type();
+  auto image_type         = GetImageTypeFromVideoFormat(buffer_info.color_format);
+  if (!image_type) {
+    return gxf::Unexpected{GXF_FAILURE};
+  }
+  return ImageInfo{image_type.value(), buffer_info.width, buffer_info.height,
+                   storage_type != gxf::MemoryStorageType::kDevice};
+}
+
+} // namespace detail
+} // namespace tensor_ops
+} // namespace cvcore
+} // namespace nvidia
diff --git a/isaac_ros_ess/gxf/ess/extensions/tensor_ops/detail/ImageAdapterVideoBufferImpl.hpp b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/detail/ImageAdapterVideoBufferImpl.hpp
new file mode 100644
index 0000000..6664640
--- /dev/null
+++ b/isaac_ros_ess/gxf/ess/extensions/tensor_ops/detail/ImageAdapterVideoBufferImpl.hpp
@@ -0,0 +1,294 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+#ifndef NVIDIA_CVCORE_VIDEO_BUFFER_ADAPTER_HPP
+#define NVIDIA_CVCORE_VIDEO_BUFFER_ADAPTER_HPP
+
+#include "../ImageUtils.hpp"
+
+#include "gxf/multimedia/video.hpp"
+#include "gxf/std/allocator.hpp"
+
+#include "cv/core/Image.h"
+
+namespace nvidia {
+namespace cvcore {
+namespace tensor_ops {
+namespace detail {
+
+gxf::Expected<::cvcore::ImageType> GetImageTypeFromVideoFormat(const gxf::VideoFormat format);
+
+gxf::Expected<ImageInfo> GetVideoBufferInfo(gxf::Handle<gxf::VideoBuffer> video_buffer);
+
+template<::cvcore::ImageType T,
+         typename std::enable_if<T != ::cvcore::ImageType::NV12 && T != ::cvcore::ImageType::NV24>::type* = nullptr>
+gxf::Expected<::cvcore::Image<T>> WrapImageFromVideoBuffer(gxf::Handle<gxf::VideoBuffer> video_buffer) {
+  const auto info = GetVideoBufferInfo(video_buffer);
+  if (!info) {
+    return gxf::Unexpected{GXF_FAILURE};
+  }
+  using D      = typename ::cvcore::detail::ChannelTypeToNative<::cvcore::ImageTraits<T, 3>::CT>::Type;
+  auto pointer = reinterpret_cast<D*>(video_buffer->pointer());
+  if (!pointer) {
+    return gxf::Unexpected{GXF_FAILURE};
+  }
+  const auto& color_planes = video_buffer->video_frame_info().color_planes;
+  return ::cvcore::Image<T>(info.value().width, info.value().height, color_planes[0].stride, pointer,
+                            info.value().is_cpu);
+}
+
+template<::cvcore::ImageType T,
+         typename std::enable_if<T == ::cvcore::ImageType::NV12 || T == ::cvcore::ImageType::NV24>::type* = nullptr>
+gxf::Expected<::cvcore::Image<T>> WrapImageFromVideoBuffer(gxf::Handle<gxf::VideoBuffer> video_buffer) {
+  const auto info = GetVideoBufferInfo(video_buffer);
+  if (!info) {
+    return gxf::Unexpected{GXF_FAILURE};
+  }
+  // Note only U8 is supported in NV12/NV24
+  auto pointer = reinterpret_cast<uint8_t*>(video_buffer->pointer());
+  if (!pointer) {
+    return gxf::Unexpected{GXF_FAILURE};
+  }
+  const auto& color_planes = video_buffer->video_frame_info().color_planes;
+  return ::cvcore::Image<T>(info.value().width, info.value().height, color_planes[0].stride, color_planes[1].stride,
+                            pointer, pointer + color_planes[1].offset, info.value().is_cpu);
+}
+
+template<::cvcore::ImageType T>
+struct ImageTypeToVideoFormat {
+  static constexpr gxf::VideoFormat format = gxf::VideoFormat::GXF_VIDEO_FORMAT_CUSTOM;
+};
+
+template<>
+struct ImageTypeToVideoFormat<::cvcore::ImageType::NV12> {
+  static constexpr gxf::VideoFormat format = gxf::VideoFormat::GXF_VIDEO_FORMAT_NV12_ER;
+};
+
+template<>
+struct ImageTypeToVideoFormat<::cvcore::ImageType::NV24> {
+  static constexpr gxf::VideoFormat format = gxf::VideoFormat::GXF_VIDEO_FORMAT_NV24_ER;
+};
+
+template<>
+struct ImageTypeToVideoFormat<::cvcore::ImageType::RGBA_U8> {
+  static constexpr gxf::VideoFormat format = gxf::VideoFormat::GXF_VIDEO_FORMAT_RGBA;
+};
+
+template<>
+struct ImageTypeToVideoFormat<::cvcore::ImageType::RGB_U8> {
+  static constexpr gxf::VideoFormat format = gxf::VideoFormat::GXF_VIDEO_FORMAT_RGB;
+};
+
+template<>
+struct ImageTypeToVideoFormat<::cvcore::ImageType::RGB_F32> {
+  static constexpr gxf::VideoFormat format = gxf::VideoFormat::GXF_VIDEO_FORMAT_RGB32;
+};
+
+template<>
+struct ImageTypeToVideoFormat<::cvcore::ImageType::BGR_U8> {
+  static constexpr gxf::VideoFormat format = gxf::VideoFormat::GXF_VIDEO_FORMAT_BGR;
+};
+
+template<>
+struct ImageTypeToVideoFormat<::cvcore::ImageType::BGR_F32> {
+  static constexpr gxf::VideoFormat format = gxf::VideoFormat::GXF_VIDEO_FORMAT_BGR32;
+};
+
+template<>
+struct ImageTypeToVideoFormat<::cvcore::ImageType::PLANAR_RGB_U8> {
+  static constexpr gxf::VideoFormat format = gxf::VideoFormat::GXF_VIDEO_FORMAT_R8_G8_B8;
+};
+
+template<>
+struct ImageTypeToVideoFormat<::cvcore::ImageType::PLANAR_RGB_F32> {
+  static constexpr gxf::VideoFormat format = gxf::VideoFormat::GXF_VIDEO_FORMAT_R32_G32_B32;
+};
+
+template<>
+struct ImageTypeToVideoFormat<::cvcore::ImageType::PLANAR_BGR_U8> {
+  static constexpr gxf::VideoFormat format = gxf::VideoFormat::GXF_VIDEO_FORMAT_B8_G8_R8;
+};
+
+template<>
+struct ImageTypeToVideoFormat<::cvcore::ImageType::PLANAR_BGR_F32> {
+  static constexpr gxf::VideoFormat format = gxf::VideoFormat::GXF_VIDEO_FORMAT_B32_G32_R32;
+};
+
+template<>
+struct ImageTypeToVideoFormat<::cvcore::ImageType::Y_U8> {
+  static constexpr gxf::VideoFormat format = gxf::VideoFormat::GXF_VIDEO_FORMAT_GRAY;
+};
+
+template<>
+struct ImageTypeToVideoFormat<::cvcore::ImageType::Y_F32> {
+  static constexpr gxf::VideoFormat format = gxf::VideoFormat::GXF_VIDEO_FORMAT_GRAY32;
+};
+
+template<::cvcore::ImageType T>
+struct DefaultNoPaddingColorPlanes {};
+
+template<>
+struct DefaultNoPaddingColorPlanes<::cvcore::ImageType::NV12> {
+  DefaultNoPaddingColorPlanes(size_t width)
+    : planes({gxf::ColorPlane("Y", 1, width), gxf::ColorPlane("UV", 2, width)}) {}
+  std::array<gxf::ColorPlane, 2> planes;
+};
+
+template<>
+struct DefaultNoPaddingColorPlanes<::cvcore::ImageType::NV24> {
+  DefaultNoPaddingColorPlanes(size_t width)
+    : planes({gxf::ColorPlane("Y", 1, width), gxf::ColorPlane("UV", 2, width * 2)}) {}
+  std::array<gxf::ColorPlane, 2> planes;
+};
+
+template<>
+struct DefaultNoPaddingColorPlanes<::cvcore::ImageType::RGBA_U8> {
+  DefaultNoPaddingColorPlanes(size_t width)
+    : planes({gxf::ColorPlane("RGBA", 4, width * 4)}) {}
+  std::array<gxf::ColorPlane, 1> planes;
+};
+
+template<>
+struct DefaultNoPaddingColorPlanes<::cvcore::ImageType::RGB_U8> {
+  DefaultNoPaddingColorPlanes(size_t width)
+    : planes({gxf::ColorPlane("RGB", 3, width * 3)}) {}
+  std::array<gxf::ColorPlane, 1> planes;
+};
+
+template<>
+struct DefaultNoPaddingColorPlanes<::cvcore::ImageType::RGB_F32> {
+  DefaultNoPaddingColorPlanes(size_t width)
+    : planes({gxf::ColorPlane("RGB", 12, width * 12)}) {}
+  std::array<gxf::ColorPlane, 1> planes;
+};
+
+template<>
+struct DefaultNoPaddingColorPlanes<::cvcore::ImageType::BGR_U8> {
+  DefaultNoPaddingColorPlanes(size_t width)
+    : planes({gxf::ColorPlane("BGR", 3, width * 3)}) {}
+  std::array<gxf::ColorPlane, 1> planes;
+};
+
+template<>
+struct DefaultNoPaddingColorPlanes<::cvcore::ImageType::BGR_F32> {
+  DefaultNoPaddingColorPlanes(size_t width)
+    : planes({gxf::ColorPlane("BGR", 12, width * 12)}) {}
+  std::array<gxf::ColorPlane, 1> planes;
+};
+
+template<>
+struct DefaultNoPaddingColorPlanes<::cvcore::ImageType::PLANAR_RGB_U8> {
+  DefaultNoPaddingColorPlanes(size_t width)
+    : planes({gxf::ColorPlane("R", 1, width), gxf::ColorPlane("G", 1, width), gxf::ColorPlane("B", 1, width)}) {}
+  std::array<gxf::ColorPlane, 3> planes;
+};
+
+template<>
+struct DefaultNoPaddingColorPlanes<::cvcore::ImageType::PLANAR_RGB_F32> {
+  DefaultNoPaddingColorPlanes(size_t width)
+    : planes(
+        {gxf::ColorPlane("R", 4, width * 4), gxf::ColorPlane("G", 4, width * 4), gxf::ColorPlane("B", 4, width * 4)}) {}
+  std::array<gxf::ColorPlane, 3> planes;
+};
+
+template<>
+struct DefaultNoPaddingColorPlanes<::cvcore::ImageType::PLANAR_BGR_U8> {
+  DefaultNoPaddingColorPlanes(size_t width)
+    : planes({gxf::ColorPlane("B", 1, width), gxf::ColorPlane("G", 1, width), gxf::ColorPlane("R", 1, width)}) {}
+  std::array<gxf::ColorPlane, 3> planes;
+};
+
+template<>
+struct DefaultNoPaddingColorPlanes<::cvcore::ImageType::PLANAR_BGR_F32> {
+  DefaultNoPaddingColorPlanes(size_t width)
+    : planes(
+        {gxf::ColorPlane("B", 4, width * 4), gxf::ColorPlane("G", 4, width * 4), gxf::ColorPlane("R", 4, width * 4)}) {}
+  std::array<gxf::ColorPlane, 3> planes;
+};
+
+template<>
+struct DefaultNoPaddingColorPlanes<::cvcore::ImageType::Y_U8> {
+  DefaultNoPaddingColorPlanes(size_t width)
+    : planes({gxf::ColorPlane("gray", 1, width)}) {}
+  std::array<gxf::ColorPlane, 1> planes;
+};
+
+template<>
+struct DefaultNoPaddingColorPlanes<::cvcore::ImageType::Y_F32> {
+  DefaultNoPaddingColorPlanes(size_t width)
+    : planes({gxf::ColorPlane("gray", 4, width * 4)}) {}
+  std::array<gxf::ColorPlane, 1> planes;
+};
+
+// This include the list of image types that GXF supported so far
+constexpr bool IsValidGXFImageType(const ::cvcore::ImageType type) {
+  return type == ::cvcore::ImageType::NV12 || type == ::cvcore::ImageType::NV24 ||
+         type == ::cvcore::ImageType::RGBA_U8 || type == ::cvcore::ImageType::RGB_U8 ||
+         type == ::cvcore::ImageType::BGR_U8 || type == ::cvcore::ImageType::RGB_F32 ||
+         type == ::cvcore::ImageType::BGR_F32 || type == ::cvcore::ImageType::PLANAR_RGB_U8 ||
+         type == ::cvcore::ImageType::PLANAR_BGR_U8 || type == ::cvcore::ImageType::PLANAR_RGB_F32 ||
+         type == ::cvcore::ImageType::PLANAR_BGR_F32 || type == ::cvcore::ImageType::Y_U8 ||
+         type == ::cvcore::ImageType::Y_F32;
+}
+
+template<::cvcore::ImageType T, typename std::enable_if<IsValidGXFImageType(T)>::type* = nullptr>
+gxf_result_t AllocateVideoBuffer(gxf::Handle<gxf::VideoBuffer> video_buffer, size_t width, size_t height,
+                                 gxf::Handle<gxf::Allocator> allocator, bool is_cpu, bool allocate_pitch_linear) {
+  if (width % 2 != 0 || height % 2 != 0) {
+    GXF_LOG_ERROR("image width/height must be even for creation of gxf::VideoBuffer");
+    return GXF_FAILURE;
+  }
+  if (allocate_pitch_linear) {
+    auto result = video_buffer->resize<ImageTypeToVideoFormat<T>::format>(
+      static_cast<uint32_t>(width), static_cast<uint32_t>(height), gxf::SurfaceLayout::GXF_SURFACE_LAYOUT_PITCH_LINEAR,
+      is_cpu ? gxf::MemoryStorageType::kHost : gxf::MemoryStorageType::kDevice, allocator);
+
+    if (!result) {
+      GXF_LOG_ERROR("resize VideoBuffer failed.");
+      return GXF_FAILURE;
+    }
+  } else {
+    DefaultNoPaddingColorPlanes<T> planes_trait(width);
+    gxf::VideoFormatSize<ImageTypeToVideoFormat<T>::format> buffer_type_trait;
+    uint64_t size = buffer_type_trait.size(width, height, planes_trait.planes);
+    std::vector<gxf::ColorPlane> planes_filled{planes_trait.planes.begin(), planes_trait.planes.end()};
+    gxf::VideoBufferInfo buffer_info{static_cast<uint32_t>(width), static_cast<uint32_t>(height),
+                                     ImageTypeToVideoFormat<T>::format, planes_filled,
+                                     gxf::SurfaceLayout::GXF_SURFACE_LAYOUT_PITCH_LINEAR};
+    auto result = video_buffer->resizeCustom(
+      buffer_info, size, is_cpu ? gxf::MemoryStorageType::kHost : gxf::MemoryStorageType::kDevice, allocator);
+
+    if (!result) {
+      GXF_LOG_ERROR("custom resize VideoBuffer failed.");
+      return GXF_FAILURE;
+    }
+  }
+  return GXF_SUCCESS;
+}
+
+template<::cvcore::ImageType T, typename std::enable_if<!IsValidGXFImageType(T)>::type* = nullptr>
+gxf_result_t AllocateVideoBuffer(gxf::Handle<gxf::VideoBuffer> video_buffer, size_t width, size_t height,
+                                 gxf::Handle<gxf::Allocator> allocator, bool is_cpu, bool allocate_pitch_linear) {
+  GXF_LOG_ERROR("image type not supported in gxf::VideoBuffer");
+  return GXF_FAILURE;
+}
+
+} // namespace detail
+} // namespace tensor_ops
+} // namespace cvcore
+} // namespace nvidia
+
+#endif
diff --git a/isaac_ros_ess/include/isaac_ros_ess/ess_disparity_node.hpp b/isaac_ros_ess/include/isaac_ros_ess/ess_disparity_node.hpp
index bd73b67..f11f87e 100644
--- a/isaac_ros_ess/include/isaac_ros_ess/ess_disparity_node.hpp
+++ b/isaac_ros_ess/include/isaac_ros_ess/ess_disparity_node.hpp
@@ -1,5 +1,5 @@
 // SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
-// Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/isaac_ros_ess/launch/isaac_ros_argus_ess.launch.py b/isaac_ros_ess/launch/isaac_ros_argus_ess.launch.py
index 9d45e78..5656258 100644
--- a/isaac_ros_ess/launch/isaac_ros_argus_ess.launch.py
+++ b/isaac_ros_ess/launch/isaac_ros_argus_ess.launch.py
@@ -1,5 +1,5 @@
 # SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
-# Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/isaac_ros_ess/launch/isaac_ros_ess.launch.py b/isaac_ros_ess/launch/isaac_ros_ess.launch.py
index 7393492..08fc216 100644
--- a/isaac_ros_ess/launch/isaac_ros_ess.launch.py
+++ b/isaac_ros_ess/launch/isaac_ros_ess.launch.py
@@ -1,5 +1,5 @@
 # SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
-# Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/isaac_ros_ess/launch/isaac_ros_ess_isaac_sim.launch.py b/isaac_ros_ess/launch/isaac_ros_ess_isaac_sim.launch.py
index 7562f7c..19dc879 100644
--- a/isaac_ros_ess/launch/isaac_ros_ess_isaac_sim.launch.py
+++ b/isaac_ros_ess/launch/isaac_ros_ess_isaac_sim.launch.py
@@ -1,5 +1,5 @@
 # SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
-# Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/isaac_ros_ess/launch/isaac_ros_ess_realsense.launch.py b/isaac_ros_ess/launch/isaac_ros_ess_realsense.launch.py
index cb6c18d..27c1390 100644
--- a/isaac_ros_ess/launch/isaac_ros_ess_realsense.launch.py
+++ b/isaac_ros_ess/launch/isaac_ros_ess_realsense.launch.py
@@ -1,5 +1,5 @@
 # SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
-# Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/isaac_ros_ess/package.xml b/isaac_ros_ess/package.xml
index ffc0d2c..ed3d163 100644
--- a/isaac_ros_ess/package.xml
+++ b/isaac_ros_ess/package.xml
@@ -21,7 +21,7 @@ SPDX-License-Identifier: Apache-2.0
 <?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
 <package format="3">
   <name>isaac_ros_ess</name>
-  <version>0.20.0</version>
+  <version>0.30.0</version>
   <description>DNN Stereo Disparity Network for Isaac ROS</description>
 
   <maintainer email="xutongr@nvidia.com">Xutong Ren</maintainer>
@@ -33,12 +33,15 @@ SPDX-License-Identifier: Apache-2.0
 
   <depend>rclcpp</depend>
   <depend>rclcpp_components</depend>
+  <depend>isaac_ros_image_proc</depend>
   <depend>isaac_ros_nitros</depend>
   <depend>isaac_ros_nitros_camera_info_type</depend>
   <depend>isaac_ros_nitros_disparity_image_type</depend>
   <depend>isaac_ros_nitros_image_type</depend>
   <depend>isaac_ros_stereo_image_proc</depend>
 
+  <build_depend>isaac_ros_common</build_depend>
+
   <test_depend>ament_lint_auto</test_depend>
   <test_depend>ament_lint_common</test_depend>
   <test_depend>isaac_ros_test</test_depend>
diff --git a/isaac_ros_ess/scripts/isaac_ros_ess_visualizer.py b/isaac_ros_ess/scripts/isaac_ros_ess_visualizer.py
index c610542..988defb 100755
--- a/isaac_ros_ess/scripts/isaac_ros_ess_visualizer.py
+++ b/isaac_ros_ess/scripts/isaac_ros_ess_visualizer.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
 # SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
-# Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/isaac_ros_ess/src/ess_disparity_node.cpp b/isaac_ros_ess/src/ess_disparity_node.cpp
index 7bdfdc8..cc818e0 100644
--- a/isaac_ros_ess/src/ess_disparity_node.cpp
+++ b/isaac_ros_ess/src/ess_disparity_node.cpp
@@ -1,5 +1,5 @@
 // SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
-// Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -60,14 +60,14 @@ constexpr char APP_YAML_FILENAME[] = "config/ess_inference.yaml";
 constexpr char PACKAGE_NAME[] = "isaac_ros_ess";
 
 const std::vector<std::pair<std::string, std::string>> EXTENSIONS = {
-  {"isaac_ros_nitros", "gxf/std/libgxf_std.so"},
-  {"isaac_ros_nitros", "gxf/multimedia/libgxf_multimedia.so"},
-  {"isaac_ros_nitros", "gxf/cuda/libgxf_cuda.so"},
-  {"isaac_ros_nitros", "gxf/serialization/libgxf_serialization.so"},
-  {"isaac_ros_nitros", "gxf/tensorops/libgxf_tensorops.so"},
-  {"isaac_ros_nitros", "gxf/libgxf_disparity_extension.so"},
-  {"isaac_ros_nitros", "gxf/libgxf_synchronization.so"},
-  {"isaac_ros_nitros", "gxf/ess/libgxf_cvcore_ess.so"}
+  {"isaac_ros_gxf", "gxf/lib/std/libgxf_std.so"},
+  {"isaac_ros_gxf", "gxf/lib/multimedia/libgxf_multimedia.so"},
+  {"isaac_ros_gxf", "gxf/lib/cuda/libgxf_cuda.so"},
+  {"isaac_ros_gxf", "gxf/lib/serialization/libgxf_serialization.so"},
+  {"isaac_ros_gxf", "gxf/lib/libgxf_synchronization.so"},
+  {"isaac_ros_image_proc", "gxf/lib/image_proc/libgxf_tensorops.so"},
+  {"isaac_ros_stereo_image_proc", "gxf/lib/sgm_disparity/libgxf_disparity_extension.so"},
+  {"isaac_ros_ess", "gxf/lib/ess/libgxf_cvcore_ess.so"}
 };
 const std::vector<std::string> PRESET_EXTENSION_SPEC_NAMES = {
   "isaac_ros_ess",
diff --git a/isaac_ros_ess/test/isaac_ros_ess_test.py b/isaac_ros_ess/test/isaac_ros_ess_test.py
index 482593b..adfd239 100644
--- a/isaac_ros_ess/test/isaac_ros_ess_test.py
+++ b/isaac_ros_ess/test/isaac_ros_ess_test.py
@@ -1,5 +1,5 @@
 # SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
-# Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/resources/Isaac_sim_enable_stereo.png b/resources/Isaac_sim_enable_stereo.png
index 827f8da..69c2126 100644
--- a/resources/Isaac_sim_enable_stereo.png
+++ b/resources/Isaac_sim_enable_stereo.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:602b513e8a85d563f0f87b312677de2c92d04e9c9b8800d31f71d22e6a189c84
-size 245916
+oid sha256:8c9ba1412d085b83a42833d12765e6ea331000047bc48849359858c5e9167beb
+size 109035
diff --git a/resources/Isaac_sim_play.png b/resources/Isaac_sim_play.png
index eb2fcbd..7208c9e 100644
--- a/resources/Isaac_sim_play.png
+++ b/resources/Isaac_sim_play.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c82817945a9e9617dc082ea389b5374050dce911ef5a104ee2c7aef577006547
-size 3756879
+oid sha256:e5c0d786681c57ab859790262f19157b22a18a1f4f0bd254fa7f65df12fbe516
+size 784360
diff --git a/resources/Isaac_sim_set_stereo_offset.png b/resources/Isaac_sim_set_stereo_offset.png
index 653e042..f6ce4c2 100644
--- a/resources/Isaac_sim_set_stereo_offset.png
+++ b/resources/Isaac_sim_set_stereo_offset.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:181371ea36dcb597d3a5df938191954742e6d18812e9b0ad4cb8f25d985ff8e0
-size 113557
+oid sha256:bb562c0a01984e1115ae37811b749b14e2bb1a4429a5e3e2043157ef52d9aa36
+size 112275
diff --git a/resources/isaac_ros_ess_nodegraph.png b/resources/isaac_ros_ess_nodegraph.png
new file mode 100644
index 0000000..7052df3
--- /dev/null
+++ b/resources/isaac_ros_ess_nodegraph.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed1f1826b874f70f1d7591f92eb0cc3c208e953db30e78e3a7d789e801eafb56
+size 34903