Skip to content

Commit

Permalink
Add IGpuAllocator to MLIR-TensorRT
Browse files Browse the repository at this point in the history
  • Loading branch information
jhalakpatel committed Aug 16, 2024
1 parent 7705c34 commit 7b9a0f7
Show file tree
Hide file tree
Showing 14 changed files with 378 additions and 38 deletions.
4 changes: 2 additions & 2 deletions mlir-tensorrt/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ We currently support only building on Linux x86 systems.
We support building several different ways (only via CMake) depending on use-case.

In each case, the LLVM-Project version that we are currently aligned to is
given in `build_tools/cmake/LLVMCommit.txt`.
given in `build_tools/cmake/LLVMCommit.cmake`.

Note that currently we provide an LLVM patch which essentially cherry-picks the
bug fixes from [this open MLIR PR](https://github.com/llvm/llvm-project/pull/91524).
Expand Down Expand Up @@ -82,7 +82,7 @@ git clone https://github.com/llvm/llvm-project.git llvm-project
# Checkout the right commit. Of course, you may try
# a newer commit or your own modified LLVM-Project.
cd llvm-project
git checkout $(cat build_tools/cmake/LLVMCommit.cmake | grep -Po '(?<=").*(?=")')
git checkout $(cat ../build_tools/cmake/LLVMCommit.cmake | grep -Po '(?<=").*(?=")')

# Apply patch from llvm-project PR 91524
git apply ../build_tools/llvm-project.patch
Expand Down
34 changes: 33 additions & 1 deletion mlir-tensorrt/executor/include/mlir-executor-c/Runtime/Runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,38 @@ mtrtScalarValueCastToRuntimeValue(MTRT_ScalarValue v);
MLIR_CAPI_EXPORTED MTRT_Status
mtrtScalarValueGetType(MTRT_ScalarValue scalar, MTRT_ScalarTypeCode *code);

//===----------------------------------------------------------------------===//
// MTRT_GpuAllocator
//===----------------------------------------------------------------------===//

typedef struct MTRT_GpuAllocator {
void *ptr;
} MTRT_GpuAllocator;

/// Checks nullity of `GpuAllocator`.
MTRT_CAPI_EXPORTED bool mtrtGpuAllocatorIsNull(MTRT_GpuAllocator gpuAllocator);

/// Returns null `GpuAllocator`.
MTRT_CAPI_EXPORTED MTRT_GpuAllocator mtrtGpuAllocatorGetNull();

MTRT_CAPI_EXPORTED MTRT_Status
mtrtGpuAllocatorDestroy(MTRT_GpuAllocator executable);

MTRT_CAPI_EXPORTED MTRT_Status
mtrtGpuAllocatorCreate(MTRT_GpuAllocator *allocator);

//===----------------------------------------------------------------------===//
// MTRT_GpuAllocator
//===----------------------------------------------------------------------===//

MTRT_CAPI_EXPORTED MTRT_Status mtrtGpuAllocatorAllocate(
MTRT_GpuAllocator gpuAllocator, uint64_t size, uint64_t alignment,
uint32_t flags, MTRT_Stream stream, void **memory);

MTRT_CAPI_EXPORTED MTRT_Status
mtrtGpuAllocatorDeallocate(MTRT_GpuAllocator gpuAllocator, void *memory,
MTRT_Stream stream, bool *result);

//===----------------------------------------------------------------------===//
// MTRT_RuntimeSessionOptions
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -359,7 +391,7 @@ typedef struct MTRT_RuntimeSession {
/// that the session only has a read-only view in to the Executable for code and
/// constant data. Therefore the Executable must outlive the RuntimeSession.
MLIR_CAPI_EXPORTED MTRT_Status mtrtRuntimeSessionCreate(
MTRT_RuntimeSessionOptions options, MTRT_Executable executable,
MTRT_RuntimeSessionOptions options, MTRT_Executable executable, MTRT_GpuAllocator allocator,
MTRT_RuntimeSession *result);

/// Destory the session. This does not destroy the associated Executable, which
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -867,7 +867,8 @@ class RuntimeSession {
sol::state state,
std::unique_ptr<PinnedMemoryAllocator> pinnedMemoryAllocator,
std::unique_ptr<AllocTracker> allocTracker,
std::unique_ptr<ResourceTracker> resourceTracker);
std::unique_ptr<ResourceTracker> resourceTracker,
GpuAllocator* gpuAllocator);

ExecutableView getExecutable() const { return executable; }

Expand All @@ -881,14 +882,16 @@ class RuntimeSession {

ResourceTracker &getResourceTracker() { return *resourceTracker; }

GpuAllocator* getGpuAllocator() { return gpuAllocator; }

private:
RuntimeSessionOptions options;
ExecutableView executable;

std::unique_ptr<PinnedMemoryAllocator> pinnedMemoryAllocator;
std::unique_ptr<AllocTracker> allocTracker;
std::unique_ptr<ResourceTracker> resourceTracker;

GpuAllocator* gpuAllocator;
sol::state state;
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,6 @@ void registerLuaRuntimeMethods(lua_State *state,
const RuntimeSessionOptions &options,
PinnedMemoryAllocator *pinnedMemoryAllocator,
AllocTracker *allocTracker,
ResourceTracker *resourceTracker);
ResourceTracker *resourceTracker, GpuAllocator* allocator);

} // namespace mlirtrt::runtime
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ namespace mlirtrt::runtime {
/// `main` function. It is assumed that `main` takes no arguments and returns an
/// integer result (which is returned if the execution is successful).
/// TODO: this should take a handle to a function for streaming output/errors.
StatusOr<int64_t> runExecutorLuaScript(std::string_view luaScript);
StatusOr<int64_t> runExecutorLuaScript(std::string_view luaScript, GpuAllocator* allocator);

/// Synchronously run a serialized executor Executable one time. An `Executable`
/// is essentially a Lua script packaged with metadata and serialized constants
Expand All @@ -48,12 +48,12 @@ StatusOr<int64_t> runExecutorLuaScript(std::string_view luaScript);
/// execution is successful).
/// TODO: this should take a handle to a function for
/// streaming output/errors.
StatusOr<int64_t> runExecutorExecutable(std::unique_ptr<Executable> executable);
StatusOr<int64_t> runExecutorExecutable(std::unique_ptr<Executable> executable, GpuAllocator* allocator);

/// Create an execution state. This will setup a Lua environment and invoke
/// global initialization.
StatusOr<std::unique_ptr<RuntimeSession>>
createRuntimeSessionWithLuaBackend(ExecutableView executable,
createRuntimeSessionWithLuaBackend(ExecutableView executable, GpuAllocator* allocator,
const RuntimeSessionOptions &options);

/// Set the primary stream for the loaded executable to use.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class ResourceTracker;
/// Lua state.
void registerExecutorTensorRTModuleLuaRuntimeMethods(
lua_State *luaState, PinnedMemoryAllocator *pinnedMemoryAllocator,
AllocTracker *allocTracker, ResourceTracker *resourceTracker);
AllocTracker *allocTracker, ResourceTracker *resourceTracker, GpuAllocator* allocator);

} // namespace mlirtrt::runtime

Expand Down
73 changes: 73 additions & 0 deletions mlir-tensorrt/executor/include/mlir-executor/Support/Allocators.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,79 @@ namespace mlirtrt {

struct EventPool;

// Abstract allocator to be implemented by consumers.
using AllocatorFlags = uint32_t;

class GpuAllocator {
public:
GpuAllocator() = default;
virtual ~GpuAllocator() = default;

virtual StatusOr<void *> reallocate(void *baseAddr, uint64_t alignment,
uint64_t newSize,
std::optional<cudaStream_t> stream) = 0;

virtual StatusOr<void *> allocate(uint64_t const size,
uint64_t const alignment,
AllocatorFlags const flags,
std::optional<cudaStream_t> stream) = 0;

virtual StatusOr<bool> deallocate(void *const memory,
std::optional<cudaStream_t> stream) = 0;

protected:
GpuAllocator(GpuAllocator const &) = delete;
GpuAllocator(GpuAllocator &&) = delete;
GpuAllocator &operator=(GpuAllocator const &) & = delete;
GpuAllocator &operator=(GpuAllocator &&) & = delete;
};

class StubAllocator : public GpuAllocator {
public:
StubAllocator() = default;
~StubAllocator() = default;

StatusOr<void *> reallocate(void *baseAddr, uint64_t alignment,
uint64_t newSize,
std::optional<cudaStream_t> stream) override {
return getStatusWithMsg(
StatusCode::InternalError,
"[StubAllocator][reallocate]: Must be overriden in Python");
}

StatusOr<void *> allocate(uint64_t const size, uint64_t const alignment,
AllocatorFlags const flags,
std::optional<cudaStream_t> stream) override {
return getStatusWithMsg(
StatusCode::InternalError,
"[StubAllocator][allocate]: Must be overriden in Python");
}

StatusOr<bool> deallocate(void *const memory,
std::optional<cudaStream_t> stream) override {
return getStatusWithMsg(
StatusCode::InternalError,
"[StubAllocator][deallocate]: Must be overriden in Python");
}
};

class CustomTensorRTAllocator : public GpuAllocator {
public:
CustomTensorRTAllocator() = default;
~CustomTensorRTAllocator() = default;

StatusOr<void *> reallocate(void *baseAddr, uint64_t alignment,
uint64_t newSize,
std::optional<cudaStream_t> stream) override;

StatusOr<void *> allocate(uint64_t const size, uint64_t const alignment,
AllocatorFlags const flags,
std::optional<cudaStream_t> stream) override;

StatusOr<bool> deallocate(void *const memory,
std::optional<cudaStream_t> stream) override;
};

//===----------------------------------------------------------------------===//
// PoolTrackedCudaEvent
//===----------------------------------------------------------------------===//
Expand Down
56 changes: 55 additions & 1 deletion mlir-tensorrt/executor/lib/CAPI/Runtime/Runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "mlir-executor/Runtime/API/API.h"
#include "mlir-executor/Runtime/API/ExecutableFlatbuffer.h"
#include "mlir-executor/Runtime/Backend/Lua/LuaRuntime.h"
#include "mlir-executor/Support/Allocators.h"
#include "mlir-executor/Support/Status.h"
#include "mlir/Support/FileUtilities.h"
#include "llvm/ADT/SmallVectorExtras.h"
Expand All @@ -48,6 +49,8 @@ DEFINE_C_API_PTR_METHODS(MTRT_RuntimeSession,
::mlirtrt::runtime::RuntimeSession)
DEFINE_C_API_PTR_METHODS(MTRT_RuntimeSessionOptions,
::mlirtrt::runtime::RuntimeSessionOptions)
DEFINE_C_API_PTR_METHODS(MTRT_GpuAllocator,
::mlirtrt::GpuAllocator)
DEFINE_C_API_PTR_METHODS(MTRT_Executable, ::mlirtrt::runtime::Executable)
DEFINE_C_API_PTR_METHODS(MTRT_Stream, MTRT_StreamImpl)
DEFINE_C_API_PTR_METHODS(MTRT_RuntimeValue, ::mlirtrt::runtime::RuntimeValue)
Expand Down Expand Up @@ -598,6 +601,55 @@ MTRT_ScalarValue mtrtRuntimeValueDynCastToScalar(MTRT_RuntimeValue v) {
return wrap(static_cast<ScalarValue *>(x));
}

//===----------------------------------------------------------------------===//
// MTRT_GpuAllocator
//===----------------------------------------------------------------------===//

bool mtrtGpuAllocatorIsNull(MTRT_GpuAllocator gpuAllocator) {
return !gpuAllocator.ptr;
}

MTRT_GpuAllocator mtrtGpuAllocatorGetNull() { return MTRT_GpuAllocator{nullptr}; }

MTRT_Status mtrtGpuAllocatorDestroy(MTRT_GpuAllocator executable) {
delete unwrap(executable);
return mtrtStatusGetOk();
}

MTRT_Status mtrtGpuAllocatorCreate(MTRT_GpuAllocator *allocator) {
*allocator = MTRT_GpuAllocator{/*ptr=*/new StubAllocator()};
return mtrtStatusGetOk();
}

MTRT_Status mtrtGpuAllocatorAllocate(MTRT_GpuAllocator gpuAllocator,
uint64_t size, uint64_t alignment,
uint32_t flags, MTRT_Stream stream,
void **memory) {
GpuAllocator *cppGpuAllocator = unwrap(gpuAllocator);
StatusOr<void *> status = cppGpuAllocator->allocate(
size, alignment, flags,
!mtrtStreamIsNull(stream) ? std::optional(unwrap(stream)->getRawStream())
: std::nullopt);
if (status.isOk()) {
*memory = *status;
}
return mtrtStatusGetOk();
}

MTRT_Status mtrtGpuAllocatorDeallocate(MTRT_GpuAllocator gpuAllocator,
void *memory, MTRT_Stream stream,
bool *result) {
GpuAllocator *cppGpuAllocator = unwrap(gpuAllocator);
StatusOr<bool> status = cppGpuAllocator->deallocate(
memory, !mtrtStreamIsNull(stream)
? std::optional(unwrap(stream)->getRawStream())
: std::nullopt);
if (status.isOk()) {
*result = *status;
}
return mtrtStatusGetOk();
}

//===----------------------------------------------------------------------===//
// MTRT_RuntimeSessionOptions
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -625,12 +677,14 @@ mtrtRuntimeSessionOptionsDestroy(MTRT_RuntimeSessionOptions options) {

MTRT_Status mtrtRuntimeSessionCreate(MTRT_RuntimeSessionOptions options,
MTRT_Executable executable,
MTRT_GpuAllocator gpuAllocator,
MTRT_RuntimeSession *result) {
RuntimeSessionOptions *cppOptions = unwrap(options);
Executable *cppExecutable = unwrap(executable);
GpuAllocator *cppGpuAllocator = unwrap(gpuAllocator);

StatusOr<std::unique_ptr<RuntimeSession>> session =
createRuntimeSessionWithLuaBackend(cppExecutable->getView(), *cppOptions);
createRuntimeSessionWithLuaBackend(cppExecutable->getView(), cppGpuAllocator, *cppOptions);
if (session.isError())
return wrap(session.getStatus());

Expand Down
7 changes: 4 additions & 3 deletions mlir-tensorrt/executor/lib/Runtime/API/API.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -349,16 +349,17 @@ RuntimeSessionOptions::createUsingSingleHostMpi() {
//===----------------------------------------------------------------------===//
// RuntimeSession
//===----------------------------------------------------------------------===//

RuntimeSession::RuntimeSession(
RuntimeSessionOptions options, ExecutableView exe, sol::state state,
std::unique_ptr<PinnedMemoryAllocator> pinnedMemoryAllocator,
std::unique_ptr<AllocTracker> allocTracker,
std::unique_ptr<ResourceTracker> resourceTracker)
std::unique_ptr<ResourceTracker> resourceTracker,
GpuAllocator *gpuAllocator)
: options(std::move(options)), executable(exe),
pinnedMemoryAllocator(std::move(pinnedMemoryAllocator)),
allocTracker(std::move(allocTracker)),
resourceTracker(std::move(resourceTracker)), state(std::move(state)) {}
resourceTracker(std::move(resourceTracker)), gpuAllocator(gpuAllocator),
state(std::move(state)) {}

//===----------------------------------------------------------------------===//
// AllocTracker
Expand Down
Loading

0 comments on commit 7b9a0f7

Please sign in to comment.