Skip to content

Commit

Permalink
[muxi]Fdy/add mx rt diopi (DeepLink-org#1267)
Browse files Browse the repository at this point in the history
* add mx impl
  • Loading branch information
fandaoyi authored Jun 24, 2024
1 parent 2ee61ea commit c4a6054
Show file tree
Hide file tree
Showing 12 changed files with 326 additions and 66 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -141,3 +141,4 @@ kernel_meta_*
# generate files
impl/*/test/export_functions.cpp
proto/include/diopi/diopi_adaptors.hpp
diopilib
170 changes: 106 additions & 64 deletions adaptor/codegen/gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@
"AdamW": ["param", "exp_avg", "exp_avg_sq", "max_exp_avg_sq"],
}

def remap_impl_device(device):
return "cuda" if device == "torch" else device


def findAllFile(base: str) -> Iterator[str]:
for root, ds, fs in os.walk(base):
Expand Down Expand Up @@ -178,16 +181,21 @@ def prepare() -> Tuple[dict, str]:
help="if functinos are implemented with plugin mode once more, then compile both of them.",
default=False,
)
parser.add_argument(
'--base_device',
help="if set base device, generator scan base device dir first and then override them if functions defined in device dir",
default=None,
)

options = parser.parse_args()
source = os.path.join(options.diopi_dir, "proto/include/diopi")
config_path = os.path.join(
options.diopi_dir, "impl/", options.config_device
)
device = (
"cuda" if options.config_device == "torch" else options.config_device
)
plugin = options.impl_plugin
device = remap_impl_device(options.config_device)

impl_plugin = options.impl_plugin
base_device = options.base_device

def create_if_not_exist(name):
if not os.path.exists(name):
Expand All @@ -197,7 +205,7 @@ def create_if_not_exist(name):
dirs = dict(
source=source, output_dir=options.output_dir, config_path=config_path
)
return dirs, device, plugin
return dirs, device, impl_plugin, base_device


def get_func_info(content: list) -> Tuple[list, list, list, dict]:
Expand Down Expand Up @@ -529,7 +537,7 @@ def memory_format_to_str(memory_format):


def autogen_op_adaptor(
op_configs: dict, device: str, func_infos: dict, impl_funcs: dict, impl_plugin: bool, plugin_config: dict
op_configs: dict, device: str, func_infos: dict, impl_funcs: dict, func_device_map: dict
) -> list:
adaptors_code = []
cast = (
Expand All @@ -546,7 +554,7 @@ def autogen_op_adaptor(
device_mapping = "composite"
else:
continue
device_mapping = plugin_config.get(func, device)
device_mapping = func_device_map.get(func, device)
if (
(
func not in op_configs.keys()
Expand Down Expand Up @@ -700,8 +708,73 @@ def get_composite_funcs_declaration(
return composite_funcs_decl


def get_all_impl_functions(impl_base_dir) -> dict:
# get the implemented functions
impl_func_dir = os.path.join(impl_base_dir, "functions")
impl_func_ext_dir = os.path.join(impl_base_dir, "functions_ext")
impl_functions = obtain_impl_func(impl_func_dir)
impl_functions_ext = obtain_impl_func(impl_func_ext_dir)
impl_functions.update(impl_functions_ext)
return impl_functions


def gen_ascend_impl_plugin_funcs(dirs: dict, impl_base_dir: str, impl_functions: dict):
ascend_config_path = os.path.join(dirs.get("config_path"), "../ascend_npu/ascend_config.yaml")
try:
with open(ascend_config_path, "r") as f:
ascend_configs = yaml.safe_load(f)
except Exception as e:
print(e)
return
func_device_map = ascend_func_impl_config(ascend_configs)

impl_plugin_dir = os.path.join(impl_base_dir, "../ascend_npu/diopi_impl")
impl_npu_functions = obtain_impl_func(impl_plugin_dir)

#check config items all implemented
not_impled = []
for op in ascend_configs['ascend']:
if op not in impl_functions.keys():
not_impled.append(op)
if not_impled != []:
print(f"[GenAscendConfig] {not_impled} not implemented in ascend namespace")
return
not_impled.clear()
for op in ascend_configs['ascend_npu']:
if op not in impl_npu_functions.keys():
not_impled.append(op)
if not_impled != []:
print(f"[GenAscendConfig] {not_impled} not implemented in ascend_npu namespace.")
return

funcs_info, funcs_decl_raw = get_functions_support(dirs.get("source"))
funcs_npu_decl = get_impl_funcs_declaration(
funcs_decl_raw, funcs_info, impl_npu_functions.keys(), True,
)
return funcs_npu_decl, impl_npu_functions, func_device_map

def gen_base_device_impl_funcs(device: str, base_device: str, dirs: dict, impl_functions: dict):
base_device_impl_dir = os.path.join(os.path.dirname(dirs.get("config_path")), base_device)
impl_basedev_functions = get_all_impl_functions(base_device_impl_dir)
# remove ops already exist in device impl.
impl_basedev_functions = {op: args for op, args in impl_basedev_functions.items() if op not in impl_functions}

funcs_info, funcs_decl_raw = get_functions_support(dirs.get("source"))
func_base_decl = get_impl_funcs_declaration(
funcs_decl_raw, funcs_info, impl_basedev_functions.keys(), True,
)

func_device_map = {}
for op in impl_basedev_functions.keys():
func_device_map[op] = remap_impl_device(base_device)
for op in impl_functions.keys():
func_device_map[op] = device

return func_base_decl, impl_basedev_functions, func_device_map

def gen_autogen_operators(
dirs: dict, device: str, adaptor_fm: FileManager, impl_plugin: bool
dirs: dict, device: str, adaptor_fm: FileManager, impl_plugin: bool,
base_device: str,
) -> None:
config_file_path = os.path.join(
dirs.get("config_path"), "convert_config.yaml"
Expand All @@ -713,46 +786,8 @@ def gen_autogen_operators(
print(e)
return

if impl_plugin:
ascend_config_path = os.path.join(dirs.get("config_path"), "../ascend_npu/ascend_config.yaml")
try:
with open(ascend_config_path, "r") as f:
ascend_configs = yaml.safe_load(f)
except Exception as e:
print(e)
return
ascend_impl_configs = ascend_func_impl_config(ascend_configs)
else:
ascend_impl_configs = {}

# get the implemented functions
impl_base_dir = os.path.dirname(config_file_path)
impl_func_dir = os.path.join(impl_base_dir, "functions")
impl_func_ext_dir = os.path.join(impl_base_dir, "functions_ext")
impl_functions = obtain_impl_func(impl_func_dir)
impl_functions_ext = obtain_impl_func(impl_func_ext_dir)
impl_functions.update(impl_functions_ext)

if impl_plugin:
impl_plugin_dir = os.path.join(impl_base_dir, "../ascend_npu/diopi_impl")
impl_npu_functions = obtain_impl_func(impl_plugin_dir)

#check config items all implemented
not_impled = []
for op in ascend_configs['ascend']:
if op not in impl_functions.keys():
not_impled.append(op)
if not_impled != []:
print(f"[GenAscendConfig] {not_impled} not implemented in ascend namespace")
return
not_impled.clear()
for op in ascend_configs['ascend_npu']:
if op not in impl_npu_functions.keys():
not_impled.append(op)
if not_impled != []:
print(f"[GenAscendConfig] {not_impled} not implemented in ascend_npu namespace.")
return

impl_functions = get_all_impl_functions(impl_base_dir)
impl_funcs = impl_functions.keys()

# generate func information and declarations by scanning functions.h
Expand All @@ -765,28 +800,38 @@ def gen_autogen_operators(
funcs_decl = get_impl_funcs_declaration(
funcs_decl_raw, funcs_info, impl_funcs, impl_plugin
)
composite_funcs_decl = get_composite_funcs_declaration(
funcs_decl_raw, funcs_info, impl_funcs, op_configs
)

impl_functions_content = [OT.impl_declaration_content_template.substitute(dict(
device=device,
impl_declaration=list(funcs_decl.values()),
))]

composite_funcs_decl = get_composite_funcs_declaration(
funcs_decl_raw, funcs_info, impl_funcs, op_configs
)
impl_functions_content.append(OT.impl_declaration_content_template.substitute(dict(
device='composite',
impl_declaration=list(composite_funcs_decl.values()),
)))

func_device_map = {}
if base_device:
funcs_basedev_decl, impl_basedev_functions, func_device_map = gen_base_device_impl_funcs(device,
base_device, dirs, impl_functions)
impl_functions_content.append(OT.impl_declaration_content_template.substitute(dict(
device= remap_impl_device(base_device),
impl_declaration=list(funcs_basedev_decl.values()),
)))
impl_funcs = {*impl_funcs, *impl_basedev_functions.keys()}

if impl_plugin:
funcs_npu_decl = get_impl_funcs_declaration(
funcs_decl_raw, funcs_info, impl_npu_functions.keys(), impl_plugin
)
funcs_npu_decl, impl_npu_functions, func_device_map = gen_ascend_impl_plugin_funcs(
dirs, impl_base_dir, impl_functions)
impl_functions_content.append(OT.impl_declaration_content_template.substitute(dict(
device=device + '_npu',
impl_declaration=list(funcs_npu_decl.values()),
)))
impl_funcs = {*impl_funcs, *impl_npu_functions.keys()}

impl_functions_content.append(OT.impl_declaration_content_template.substitute(dict(
device='composite',
impl_declaration=list(composite_funcs_decl.values()),
)))

adaptor_fm.write(
"impl_functions.hpp",
Expand All @@ -796,12 +841,9 @@ def gen_autogen_operators(
),
)

if impl_plugin:
impl_funcs = {*impl_funcs, *impl_npu_functions.keys()}

# generate adaptor implementation codes
adaptors_code = autogen_op_adaptor(
op_configs, device, funcs_info, impl_funcs, impl_plugin, ascend_impl_configs
op_configs, device, funcs_info, impl_funcs, func_device_map
)

adaptor_fm.write(
Expand All @@ -817,10 +859,10 @@ def declare_outputs(adaptor_fm: FileManager) -> None:


def gen_all_codes() -> None:
dirs, device, impl_plugin = prepare()
dirs, device, impl_plugin, base_device = prepare()
adaptor_fm = FileManager(dirs.get("output_dir", "."))
declare_outputs(adaptor_fm)
gen_autogen_operators(dirs, device, adaptor_fm, impl_plugin)
gen_autogen_operators(dirs, device, adaptor_fm, impl_plugin, base_device)
adaptor_fm.check_all_files_written()


Expand Down
19 changes: 19 additions & 0 deletions impl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ set(LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/lib)

list(APPEND IMPL_CUDA "CUDA" "cuda")
list(APPEND IMPL_TORCH "TORCH" "LIBTORCH" "ATEN" "PYTORCH" "torch" "libtorch" "aten" "pytorch" "PyTorch")
list(APPEND IMPL_MUXI "MUXI" "muxi")
list(APPEND IMPL_TOPS "TOPS" "tops" "TOPSRIDER" "topsrider")
list(APPEND IMPL_CAMB_TORCH "CAMB_PYTORCH" "camb_pytorch")
list(APPEND IMPL_CAMB "CAMB" "camb")
Expand All @@ -61,6 +62,8 @@ elseif(${IMPL_OPT} IN_LIST IMPL_TOPS)
add_subdirectory(topsrider)
elseif (${IMPL_OPT} IN_LIST IMPL_TORCH)
add_subdirectory(torch)
elseif (${IMPL_OPT} IN_LIST IMPL_MUXI)
add_subdirectory(muxi)
elseif (${IMPL_OPT} IN_LIST IMPL_CAMB_TORCH)
add_subdirectory(camb_pytorch)
elseif (${IMPL_OPT} IN_LIST IMPL_CAMB)
Expand All @@ -77,6 +80,22 @@ else()
message(WARNING "No implementation module is compiled, cmake requires option -DIMPL_OPT=CUDA or TORCH")
endif()

# 1.the lib ${DEVICEIMPL} in which all exported symbols are 'weak' can be considered as 'no-needed' lib.
# some compilers force link such libs by default, but others having 'as-needed' default link-config may
# throw away these libs. so we manually add '-no-as-needed' here to guarantee linking ${DEVICEIMPL}.
# eg: if you compiler don't link 'no-needed' libs by default, please use 'g++ -dumpspecs' and see '*link:' section
# to check if it contains policy like '%{!fsanitize=*:--as-needed}' or other policy having '--as-needed' set.
# you can change compiler's default spec by typing 'gcc -specs=./new.specs' but it's hard to use.
# Supplementary: https://gcc.gnu.org/onlinedocs/gcc/Spec-Files.html

# 2. when the code below adding "-no-as-needed" opt to link.txt, the opt isn't be added exactly before the place
# ${DEVICEIMPL} is linked but as a link option before any link-items. if another link-item change linking-policy
# as "-Wl,--no-as-needed,\"\$<TARGET_FILE:other_lib>\" -Wl,--as-needed" and ${DEVICEIMPL} is linked just after
# this item; it will still be linked by -as-needed and finally be throw away by ld and cause error !!
# so if this error happens, please add link-option "-Wl,-no-as-needed" to the lib which link ${DEVICEIMPL}.

target_link_options(${DEVICEIMPL} INTERFACE "LINKER:-no-as-needed")

# install
install(DIRECTORY ${DIOPI_IMPL_DIR}/../proto/include/ TYPE INCLUDE)
install(FILES lib/lib${DEVICEIMPL}.so TYPE LIB)
75 changes: 75 additions & 0 deletions impl/muxi/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
project(muxi_impl)

# muxi torch config
add_compile_definitions(USE_MACA=1)
set(USE_MACA ON)

set(BASE_TORCH_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../torch")
include(${BASE_TORCH_DIR}/cmake/TorchBaseFunc.cmake)
InitFindTorch()

find_package(Torch REQUIRED)
if (Torch_FOUND)
message(STATUS "TORCH_CXX_FLAGS: ${TORCH_CXX_FLAGS}")
message(STATUS "TORCH_LIBRARIES: ${TORCH_LIBRARIES}")

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
add_definitions(-DTORCH_VERSION_MAJOR=${Torch_VERSION_MAJOR})
add_definitions(-DTORCH_VERSION_MINOR=${Torch_VERSION_MINOR})
add_definitions(-DTORCH_VERSION_PATCH=${Torch_VERSION_PATCH})
add_definitions(-DTORCH_VERSION=${Torch_VERSION})
message(STATUS "Found Torch Version: ${Torch_VERSION}")
endif()

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")

file(GLOB REAL_IMPL_SRC
${BASE_TORCH_DIR}/functions/error.cpp
${BASE_TORCH_DIR}/functions/functions.cpp

${BASE_TORCH_DIR}/functions/functions_lightllm.cpp
${BASE_TORCH_DIR}/functions/functions_mmcv.cpp
${BASE_TORCH_DIR}/helper.cpp
${BASE_TORCH_DIR}/functions/functions_mmcv/*.cu

${BASE_TORCH_DIR}/functions/functions_ext.cpp
${BASE_TORCH_DIR}/functions/functions_ext/*.cu

# mx cpp
functions/functions.cpp
)

# adaptor
set(USE_ADAPTOR ON)
if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/convert_config.yaml")
message(FATAL_ERROR "convert_config.yaml doesn't exist.")
endif()

if(USE_ADAPTOR)
# dependency
file(GLOB ADAPTOR_TEMPLATE_CODE RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${ADAPTOR_DIR}/codegen/*.py)
add_custom_target(adaptor_gen_dependency DEPENDS ${ADAPTOR_TEMPLATE_CODE})

set(ADAPTOR_CSRC_PATH "${ADAPTOR_DIR}/csrc")
set(GEN_FILES ${ADAPTOR_CSRC_PATH}/diopi_adaptor.cpp ${ADAPTOR_CSRC_PATH}/impl_functions.hpp)
add_custom_target(adaptor_code_gen
COMMAND python3 ${ADAPTOR_DIR}/codegen/gen.py --diopi_dir=${DIOPI_IMPL_DIR}/../ --output_dir=${ADAPTOR_CSRC_PATH}
--config_device=muxi --base_device=torch
BYPRODUCTS ${GEN_FILES}
DEPENDS adaptor_gen_dependency)
list(APPEND REAL_IMPL_SRC ${ADAPTOR_CSRC_PATH}/convert.cpp ${ADAPTOR_CSRC_PATH}/diopi_adaptor.cpp ${ADAPTOR_CSRC_PATH}/composite_ops.cpp)
endif()

cuda_add_library(${DEVICEIMPL} SHARED ${REAL_IMPL_SRC})
target_link_libraries(${DEVICEIMPL} ${TORCH_LIBRARIES})
add_subdirectory(functions/functions_ext/flash-attention)
target_link_libraries(${DEVICEIMPL} diopi_torch_ext_flash_attn)
target_include_directories(${DEVICEIMPL} PRIVATE ${BASE_TORCH_DIR})

if(USE_ADAPTOR)
add_dependencies(${DEVICEIMPL} adaptor_code_gen)
endif()

if (TEST)
add_subdirectory(test)
endif()
10 changes: 10 additions & 0 deletions impl/muxi/convert_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
- diopiLinear:
supportComposite: true

- diopiRMSNorm:
tensor_dtype:
inv_rms: (float16)->float32

- diopiRMSNormBackward:
tensor_dtype:
inv_rms: (float16)->float32
Loading

0 comments on commit c4a6054

Please sign in to comment.