forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 60
/
Copy pathint_mm_kernel.h
38 lines (33 loc) · 1.08 KB
/
int_mm_kernel.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#pragma once
#include <ATen/core/Tensor.h>
#include <ATen/native/DispatchStub.h>
namespace at::native {
using weight_to_int4pack_fn = void (*)(const Tensor&, const Tensor&);
using int4pack_mm_fn =
void (*)(const Tensor&, const Tensor&, const Tensor&, int, const Tensor&);
using int8pack_mm_fn =
void (*)(const Tensor&, const Tensor&, const Tensor&, const Tensor&);
using dyn_quant_pack_4bit_weight_fn = void (*)(
Tensor&,
const Tensor&,
const Tensor&,
const std::optional<Tensor>& bias,
const int64_t,
const int64_t,
const int64_t);
using dyn_quant_matmul_4bit_fn = void (*)(
const Tensor&,
const Tensor&,
const Tensor&,
const int64_t,
const int64_t,
const int64_t,
const int64_t);
DECLARE_DISPATCH(weight_to_int4pack_fn, weight_to_int4pack_stub)
DECLARE_DISPATCH(int4pack_mm_fn, int4pack_mm_stub)
DECLARE_DISPATCH(int8pack_mm_fn, int8pack_mm_stub)
DECLARE_DISPATCH(
dyn_quant_pack_4bit_weight_fn,
dyn_quant_pack_4bit_weight_stub)
DECLARE_DISPATCH(dyn_quant_matmul_4bit_fn, dyn_quant_matmul_4bit_stub)
} // namespace at::native