Skip to content
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,20 @@ python scripts/install.py [XMAKE_CONFIG_FLAGS]

```

##### 试验功能 -- 编译marlin相关算子

```shell

# 需要从github上克隆dlpack以及tvm_ffi仓库,克隆命令参考
git clone git@github.com:dmlc/dlpack.git --recursive
git clone git@github.com:apache/tvm-ffi.git --recursive

# 设置CPATH
export CPATH=<path-to>/tvm-ffi/include:$CPATH #用来搜索tvm相关头文件
export CPATH=<path-to>/dlpack/include:$CPATH #用来搜索dlpack.h

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

需要指定commit id,尽量使用https的git地址

```

2. 编译安装

默认安装路径为 `$HOME/.infini`。
Expand Down
42 changes: 42 additions & 0 deletions include/infiniop/ops/gptq_marlin_gemm.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#ifndef __INFINIOP_GPTQ_MARLIN_GEMM_API_H__
#define __INFINIOP_GPTQ_MARLIN_GEMM_API_H__

#include "../operator_descriptor.h"
#include <cstdint>

typedef struct InfiniopDescriptor *infiniopGptqMarlinGemmDescriptor_t;

__INFINI_C __export infiniStatus_t infiniopCreateGptqMarlinGemmDescriptor(infiniopHandle_t handle,
infiniopGptqMarlinGemmDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t out_desc,
infiniopTensorDescriptor_t a_desc,
infiniopTensorDescriptor_t b_desc,
infiniopTensorDescriptor_t b_scales_desc,
infiniopTensorDescriptor_t global_scale_desc,
infiniopTensorDescriptor_t b_zeros_desc,
infiniopTensorDescriptor_t g_idx_desc,
infiniopTensorDescriptor_t perm_desc);

__INFINI_C __export infiniStatus_t infiniopGetGptqMarlinGemmWorkspaceSize(infiniopGptqMarlinGemmDescriptor_t desc, size_t *size);

__INFINI_C __export infiniStatus_t infiniopGptqMarlinGemm(infiniopGptqMarlinGemmDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *out,
const void *a,
const void *b,
void *b_scales,
void *global_scale,
void *b_zeros,
void *g_idx,
void *perm,
int64_t b_q_type_id,
bool is_k_full,
bool use_atomic_add,
bool use_fp32_reduce,
bool is_zp_float,
void *stream);

__INFINI_C __export infiniStatus_t infiniopDestroyGptqMarlinGemmDescriptor(infiniopGptqMarlinGemmDescriptor_t desc);

#endif
66 changes: 66 additions & 0 deletions src/infiniop/ops/gptq_marlin_gemm/gptq_marlin_gemm.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#ifndef __GPTQ_MARLIN_GEMM_H__
#define __GPTQ_MARLIN_GEMM_H__

#include "../../../utils.h"
#include "../../operator.h"
#include "../../tensor.h"
#include "info.h"

#define DESCRIPTOR(NAMESPACE) \
\
namespace op::gptq_marlin_gemm::NAMESPACE { \
class Descriptor final : public InfiniopDescriptor { \
struct Opaque; \
Opaque *_opaque; \
GptqMarlinGemmInfo _info; \
size_t _workspace_size; \
\
Descriptor( \
size_t workspace_size_, \
Opaque *opaque, \
GptqMarlinGemmInfo info, \
infiniDevice_t device_type, \
int device_id) \
: InfiniopDescriptor{device_type, device_id}, \
_opaque(opaque), \
_info(info), \
_workspace_size(workspace_size_) {} \
\
public: \
~Descriptor(); \
\
size_t workspaceSize() const { return _workspace_size; } \
\
static infiniStatus_t create( \
infiniopHandle_t handle, \
Descriptor **desc_ptr, \
infiniopTensorDescriptor_t out_desc, \
infiniopTensorDescriptor_t a_desc, \
infiniopTensorDescriptor_t b_desc, \
infiniopTensorDescriptor_t b_scales_desc, \
infiniopTensorDescriptor_t global_scale_desc, \
infiniopTensorDescriptor_t b_zeros_desc, \
infiniopTensorDescriptor_t g_idx_desc, \
infiniopTensorDescriptor_t perm_desc); \
\
infiniStatus_t calculate( \
void *workspace, \
size_t workspace_size, \
void *out, \
const void *a, \
const void *b, \
void *b_scales, \
void *global_scale, \
void *b_zeros, \
void *g_idx, \
void *perm, \
int64_t b_q_type_id, \
bool is_k_full, \
bool use_atomic_add, \
bool use_fp32_reduce, \
bool is_zp_float, \
void *stream) const; \
}; \
}

#endif //__GPTQ_MARLIN_GEMM_H__
59 changes: 59 additions & 0 deletions src/infiniop/ops/gptq_marlin_gemm/info.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#ifndef __GPTQ_MARLIN_GEMM_INFO_H__
#define __GPTQ_MARLIN_GEMM_INFO_H__

#include "../../../utils.h"
#include "../../tensor.h"
#include <vector>

#include <cassert>

namespace op::gptq_marlin_gemm {

class GptqMarlinGemmInfo {
GptqMarlinGemmInfo() = default;

public:
infiniDtype_t dtype;
size_t M, K, N, b_q_size_1;
int num_groups;
ptrdiff_t a_stride_0;

static utils::Result<GptqMarlinGemmInfo> create(
infiniopTensorDescriptor_t out_desc,
infiniopTensorDescriptor_t a_desc,
infiniopTensorDescriptor_t b_desc,
infiniopTensorDescriptor_t b_scales_desc,
infiniopTensorDescriptor_t global_scale_desc,
infiniopTensorDescriptor_t b_zeros_desc,
infiniopTensorDescriptor_t g_idx_desc,
infiniopTensorDescriptor_t perm_desc) {
CHECK_OR_RETURN(
out_desc != nullptr && a_desc != nullptr && b_desc != nullptr && b_scales_desc != nullptr,
INFINI_STATUS_NULL_POINTER);
const infiniDtype_t dtype = a_desc->dtype();
size_t M = out_desc->dim(0);
size_t N = out_desc->dim(1);
size_t K = a_desc->dim(1);
size_t b_q_size_1 = b_desc->dim(1);
int num_groups = static_cast<int>(b_scales_desc->dim(0));
ptrdiff_t a_stride_0 = a_desc->strides()[0];

auto ndim = out_desc->ndim();
CHECK_OR_RETURN(ndim == 2
&& a_desc->ndim() == ndim
&& b_desc->ndim() == ndim
&& b_scales_desc->ndim() == ndim,
INFINI_STATUS_BAD_TENSOR_SHAPE);

CHECK_OR_RETURN(b_scales_desc->shape()[1] == N
&& a_stride_0 % 8 == 0,
INFINI_STATUS_BAD_TENSOR_SHAPE);

return utils::Result<GptqMarlinGemmInfo>(
GptqMarlinGemmInfo{dtype, M, K, N, b_q_size_1, num_groups, a_stride_0});
}
};

} // namespace op::gptq_marlin_gemm

#endif // __GPTQ_MARLIN_GEMM_INFO_H__
Loading
Loading