Skip to content

Commit

Permalink
[ascend]Fdy change dyn load (DeepLink-org#1301)
Browse files Browse the repository at this point in the history
Fdy change dyn load.

---------

Co-authored-by: yangbofun <[email protected]>
Co-authored-by: wiryls <[email protected]
  • Loading branch information
fandaoyi and yangbofun authored Jul 29, 2024
1 parent abb56e2 commit aefa5b7
Show file tree
Hide file tree
Showing 16 changed files with 465 additions and 183 deletions.
33 changes: 32 additions & 1 deletion .github/workflows/_runs-on-nv-step1.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,37 @@ jobs:
"""
fi
# open job after dynamic torch ready (with out unique + gen diopi suffix lib)
Build-torch-dynamic:
if: false
name: Build-torch-dynamic
runs-on: ${{ inputs.runner }}
env:
GETRUNNER: ${{ inputs.runner }}
DEEPLINK_PATH: ${{ inputs.deeplink_path }}
ENV_PATH: ${{ inputs.env_path }}
SLURM_PAR_V100: "pat_dev"
BUILD_TEST2: "build_test_dyn"
steps:
- name: build
run: |
if [[ "${GETRUNNER}" == *sco* ]];then
set -e
cd ${DEEPLINK_PATH}/ && ls -al && find ${DEEPLINK_PATH}/ -maxdepth 1 -mmin +300 -type d |xargs rm -rf
cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${BUILD_TEST2} && cp -R source ${BUILD_TEST2} && cd ${BUILD_TEST2}
srun --job-name=${GITHUB_JOB} bash -c "export USE_COVERAGE=ON && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${BUILD_TEST2} \
&& source ${ENV_PATH}/pt2.0_diopi \
&& cd impl && bash scripts/build_impl.sh torch_dyload" || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${BUILD_TEST2} && exit 1 )
else
ssh SH1424 """
set -e
cd ${DEEPLINK_PATH}/ && ls -al && find ${DEEPLINK_PATH}/ -maxdepth 1 -mmin +300 -type d |xargs rm -rf
source ${ENV_PATH}/pt2.0_diopi
cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${BUILD_TEST2} && cp -R source ${BUILD_TEST2} && cd ${BUILD_TEST2}
srun --job-name=${GITHUB_JOB} --partition=${SLURM_PAR_V100} --time=20 bash -c 'cd impl && bash scripts/build_impl.sh torch_dyload' || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${BUILD_TEST2} && exit 1 )
"""
fi
Gen-Data:
name: Gen-Data
needs: [Build-Nvidia]
Expand Down Expand Up @@ -82,7 +113,7 @@ jobs:
set -e
source ${ENV_PATH}/pt2.0_diopi
cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && cd ${BUILD_TEST1} && cd diopi_test/python &&
srun --job-name=${GITHUB_JOB} --partition=${SLURM_PAR_V100} --time=20 --gres=gpu:1 bash -c 'python main.py --mode gen_data' \
srun --job-name=${GITHUB_JOB} --partition=${SLURM_PAR_V100} --time=30 --gres=gpu:1 bash -c 'python main.py --mode gen_data' \
|| ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${BUILD_TEST1} && git clean -xdf ${GEN_DATA} && exit 1 )
"""
else
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ jobs:
set -e
source /mnt/cache/share/platform/env/${ENV_NAME}
cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && cd ${BUILD_TEST1}_A100 && cd diopi_test/python && ls &&
srun --job-name=${GITHUB_JOB} --partition=${SLURM_PAR_SH1424} --time=20 --gres=gpu:${GPU_REQUESTS} bash -c 'python main.py --mode gen_data' \
srun --job-name=${GITHUB_JOB} --partition=${SLURM_PAR_SH1424} --time=30 --gres=gpu:${GPU_REQUESTS} bash -c 'python main.py --mode gen_data' \
|| ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${BUILD_TEST1}_A100 && git clean -xdf ${GEN_DATA} && exit 1 )
"""
- name: test-op
Expand All @@ -215,7 +215,7 @@ jobs:
source /mnt/cache/share/platform/env/${ENV_NAME} && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && cd ${BUILD_TEST1}_A100
export LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${BUILD_TEST1}_A100/impl/lib
echo \$LD_LIBRARY_PATH
srun --job-name=${GITHUB_JOB} --partition=${SLURM_PAR_SH1424} --time=20 --gres=gpu:${GPU_REQUESTS} bash -c 'cd diopi_test/python && python main.py --mode gen_case && python main.py --mode run_test' \
srun --job-name=${GITHUB_JOB} --partition=${SLURM_PAR_SH1424} --time=30 --gres=gpu:${GPU_REQUESTS} bash -c 'cd diopi_test/python && python main.py --mode gen_case && python main.py --mode run_test' \
&& git clean -xdf ${GEN_DATA} || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${BUILD_TEST1}_A100 && git clean -xdf ${GEN_DATA} && exit 1 )
"""
Expand Down
6 changes: 4 additions & 2 deletions adaptor/codegen/gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,9 @@ def prepare() -> Tuple[dict, str]:

impl_plugin = options.impl_plugin
base_device = options.base_device

assert(base_device is None or base_device == "" or base_device == "torch", f"invalid base_device:{base_device}")
if base_device == "":
base_device = None
def create_if_not_exist(name):
if not os.path.exists(name):
os.makedirs(name)
Expand Down Expand Up @@ -758,7 +760,7 @@ def gen_base_device_impl_funcs(device: str, base_device: str, dirs: dict, impl_f
impl_basedev_functions = get_all_impl_functions(base_device_impl_dir)
# remove ops already exist in device impl.
impl_basedev_functions = {op: args for op, args in impl_basedev_functions.items() if op not in impl_functions}

funcs_info, funcs_decl_raw = get_functions_support(dirs.get("source"))
func_base_decl = get_impl_funcs_declaration(
funcs_decl_raw, funcs_info, impl_basedev_functions.keys(), True,
Expand Down
48 changes: 48 additions & 0 deletions impl/cmake/ImplHelper.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
function(diopi_use_adapter adaptor_dir diopi_impl_dir config_device base_device
out_src_files)
# NB: all augements passed by func parameters instead of global variables.
file(GLOB ADAPTOR_TEMPLATE_CODE RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${adaptor_dir}/codegen/*.py)
add_custom_target(adaptor_gen_dependency DEPENDS ${ADAPTOR_TEMPLATE_CODE})
set(ADAPTOR_CSRC_PATH "${ADAPTOR_DIR}/csrc")

set(ADAPTER_GEN_FILES ${ADAPTOR_CSRC_PATH}/diopi_adaptor.cpp ${ADAPTOR_CSRC_PATH}/impl_functions.hpp)
add_custom_target(adaptor_code_gen
COMMAND python3 ${ADAPTOR_DIR}/codegen/gen.py --diopi_dir=${diopi_impl_dir}/../ --output_dir=${ADAPTOR_CSRC_PATH} --config_device=${config_device} --base_device=${base_device}
BYPRODUCTS ${ADAPTER_GEN_FILES}
DEPENDS adaptor_gen_dependency
VERBATIM
)
list(APPEND ${out_src_files} ${ADAPTOR_CSRC_PATH}/convert.cpp ${ADAPTOR_CSRC_PATH}/diopi_adaptor.cpp ${ADAPTOR_CSRC_PATH}/composite_ops.cpp)
set(${out_src_files} ${${out_src_files}} PARENT_SCOPE)
endfunction()


function(prep_dyn_load diopi_impl_dir device_impl)
set(DYN_GEN_FILE ${CMAKE_BINARY_DIR}/src/impl/wrap_function.cpp)
set(DYN_HELP_DIR ${diopi_impl_dir}/scripts/dyn_load_helper)
file(GLOB DYN_GEN_DEPS ${DYN_HELP_DIR}/dyn_wrap_gen.py)

add_custom_target(dyn_wrap_gen ALL
COMMAND python ${DYN_HELP_DIR}/dyn_wrap_gen.py -o ${DYN_GEN_FILE}
DEPENDS ${DYN_GEN_DEPS}
BYPRODUCTS ${DYN_GEN_FILE}
WORKING_DIRECTORY ${DYN_HELP_DIR})
set(DYN_SRC ${DYN_GEN_FILE} ${DYN_HELP_DIR}/dyn_helper.cpp)

add_library(${device_impl} SHARED ${DYN_SRC})
target_link_libraries(${device_impl} -ldl)
target_include_directories(${device_impl} PRIVATE ${DYN_HELP_DIR})
add_dependencies(${device_impl} dyn_wrap_gen)
endfunction()

function(handle_dyn_torch diopi_impl_dir real_impl torch_dir device_impl)
add_custom_target(dyn_torch
COMMAND ${diopi_impl_dir}/scripts/dyn_load_helper/dyn_torch_handler.sh patch_diopi
${LIBRARY_OUTPUT_PATH} ${torch_dir}/lib
DEPENDS ${real_impl}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
)
message(STATUS "handle_dyn_torch with torch: ${torch_dir}")
add_dependencies(${device_impl} dyn_torch)
endfunction()

28 changes: 28 additions & 0 deletions impl/cmake/TorchBaseFunc.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@

macro(diopi_find_torch)
execute_process(
COMMAND sh -c "python -c 'import torch;print(torch.utils.cmake_prefix_path)'"
OUTPUT_VARIABLE DIOPI_TORCH_CMAKE_PREFIX
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(DIOPI_TORCH_CMAKE_PREFIX)
# this config is appened to existing CMAKE_PREFIX_PATH and not overwrite
# user provided CMAKE_PREFIX_PATH.
list(APPEND CMAKE_PREFIX_PATH ${DIOPI_TORCH_CMAKE_PREFIX})
endif()
message(STATUS "diopi CMAKE_PREFIX_PATH:${CMAKE_PREFIX_PATH}")


find_package(Torch REQUIRED)
if (Torch_FOUND)
message(STATUS "TORCH_CXX_FLAGS: ${TORCH_CXX_FLAGS}")
message(STATUS "TORCH_LIBRARIES: ${TORCH_LIBRARIES}")

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
add_definitions(-DTORCH_VERSION_MAJOR=${Torch_VERSION_MAJOR})
add_definitions(-DTORCH_VERSION_MINOR=${Torch_VERSION_MINOR})
add_definitions(-DTORCH_VERSION_PATCH=${Torch_VERSION_PATCH})
add_definitions(-DTORCH_VERSION=${Torch_VERSION})
message(STATUS "Found Torch Version: ${Torch_VERSION}")
endif()

endmacro()
80 changes: 34 additions & 46 deletions impl/muxi/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,41 +4,30 @@ project(muxi_impl)
add_compile_definitions(USE_MACA=1)
set(USE_MACA ON)

set(BASE_TORCH_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../torch")
include(${BASE_TORCH_DIR}/cmake/TorchBaseFunc.cmake)
InitFindTorch()
include(../cmake/ImplHelper.cmake)
include(../cmake/TorchBaseFunc.cmake)
diopi_find_torch()

find_package(Torch REQUIRED)
if (Torch_FOUND)
message(STATUS "TORCH_CXX_FLAGS: ${TORCH_CXX_FLAGS}")
message(STATUS "TORCH_LIBRARIES: ${TORCH_LIBRARIES}")

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
add_definitions(-DTORCH_VERSION_MAJOR=${Torch_VERSION_MAJOR})
add_definitions(-DTORCH_VERSION_MINOR=${Torch_VERSION_MINOR})
add_definitions(-DTORCH_VERSION_PATCH=${Torch_VERSION_PATCH})
add_definitions(-DTORCH_VERSION=${Torch_VERSION})
message(STATUS "Found Torch Version: ${Torch_VERSION}")
endif()
# TODO: Report bugs to muxi
# There has conflict when muxi runtime used together with pip installed torch_cpu.
# so to use dipu with torch_cpu in muxi, maunual compile torch cpu with export BLAS=OpenBLAS.

set(BASE_TORCH_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../torch")

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")

file(GLOB REAL_IMPL_SRC
${BASE_TORCH_DIR}/functions/error.cpp
${BASE_TORCH_DIR}/functions/functions.cpp

${BASE_TORCH_DIR}/functions/functions_lightllm.cpp
${BASE_TORCH_DIR}/functions/functions_mmcv.cpp
${BASE_TORCH_DIR}/helper.cpp
${BASE_TORCH_DIR}/functions/functions_mmcv/*.cu

${BASE_TORCH_DIR}/functions/functions_ext.cpp
${BASE_TORCH_DIR}/functions/functions_ext/*.cu

${BASE_TORCH_DIR}/build_aten.cpp

# mx cpp
functions/functions.cpp
${BASE_TORCH_DIR}/functions/error.cpp
${BASE_TORCH_DIR}/functions/functions.cpp
${BASE_TORCH_DIR}/functions/functions_lightllm.cpp
${BASE_TORCH_DIR}/functions/functions_mmcv.cpp
${BASE_TORCH_DIR}/helper.cpp
${BASE_TORCH_DIR}/functions/functions_mmcv/*.cu
${BASE_TORCH_DIR}/functions/functions_ext.cpp
${BASE_TORCH_DIR}/functions/functions_ext/*.cu
${BASE_TORCH_DIR}/build_aten.cpp
# mx cpp
functions/functions.cpp
)

# adaptor
Expand All @@ -48,28 +37,27 @@ if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/convert_config.yaml")
endif()

if(USE_ADAPTOR)
# dependency
file(GLOB ADAPTOR_TEMPLATE_CODE RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${ADAPTOR_DIR}/codegen/*.py)
add_custom_target(adaptor_gen_dependency DEPENDS ${ADAPTOR_TEMPLATE_CODE})
diopi_use_adapter(${ADAPTOR_DIR} ${DIOPI_IMPL_DIR} "torch" "" REAL_IMPL_SRC)
endif()

set(ADAPTOR_CSRC_PATH "${ADAPTOR_DIR}/csrc")
set(GEN_FILES ${ADAPTOR_CSRC_PATH}/diopi_adaptor.cpp ${ADAPTOR_CSRC_PATH}/impl_functions.hpp)
add_custom_target(adaptor_code_gen
COMMAND python3 ${ADAPTOR_DIR}/codegen/gen.py --diopi_dir=${DIOPI_IMPL_DIR}/../ --output_dir=${ADAPTOR_CSRC_PATH}
--config_device=muxi --base_device=torch
BYPRODUCTS ${GEN_FILES}
DEPENDS adaptor_gen_dependency)
list(APPEND REAL_IMPL_SRC ${ADAPTOR_CSRC_PATH}/convert.cpp ${ADAPTOR_CSRC_PATH}/diopi_adaptor.cpp ${ADAPTOR_CSRC_PATH}/composite_ops.cpp)
if(${DYLOAD})
prep_dyn_load(${DIOPI_IMPL_DIR} ${DEVICEIMPL})
set(REAL_IMPL diopi_real_impl)
else()
set(REAL_IMPL ${DEVICEIMPL})
endif()

cuda_add_library(${DEVICEIMPL} SHARED ${REAL_IMPL_SRC})
target_link_libraries(${DEVICEIMPL} ${TORCH_LIBRARIES})
cuda_add_library(${REAL_IMPL} SHARED ${REAL_IMPL_SRC})
target_link_libraries(${REAL_IMPL} ${TORCH_LIBRARIES})
add_subdirectory(functions/functions_ext/flash-attention)
target_link_libraries(${DEVICEIMPL} diopi_torch_ext_flash_attn)
target_include_directories(${DEVICEIMPL} PRIVATE ${BASE_TORCH_DIR})
target_link_libraries(${REAL_IMPL} diopi_torch_ext_flash_attn)
target_include_directories(${REAL_IMPL} PRIVATE ${BASE_TORCH_DIR})

if(USE_ADAPTOR)
add_dependencies(${DEVICEIMPL} adaptor_code_gen)
add_dependencies(${REAL_IMPL} adaptor_code_gen)
endif()
if(${DYLOAD})
handle_dyn_torch(${DIOPI_IMPL_DIR} ${REAL_IMPL} ${TORCH_INSTALL_PREFIX} ${DEVICEIMPL})
endif()

if (TEST)
Expand Down
2 changes: 1 addition & 1 deletion impl/muxi/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ set(RUNTIME_SRC
# use torch cuda runtime
${BASE_TORCH_DIR}/test/conform_test.cpp
)
add_library(diopirt SHARED ${RUNTIME_SRC})
cuda_add_library(diopirt SHARED ${RUNTIME_SRC})
message(STATUS "test diopirt CUDA_LIBRARIES is:" ${CUDA_LIBRARIES})
target_link_libraries(diopirt ${CUDA_LIBRARIES})

Expand Down
6 changes: 6 additions & 0 deletions impl/scripts/build_impl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@ case $1 in
-DENABLE_COVERAGE=${USE_COVERAGE}
make_maca -j8
;;
muxi_dyload)
mkdir -p build && cd build
cmake_maca .. -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DIMPL_OPT=muxi -DCMAKE_BUILD_TYPE=Release -DDYLOAD=ON -DTEST=ON \
&& make_maca -j8
mkdir -p ${DIOPI_TEST_PATH}/lib && ln -sf ${CURRENT_DIR}/../lib/libdiopi_real_impl.so ${DIOPI_TEST_PATH}/lib
;;
camb_pytorch)
mkdir -p build && cd build
cmake .. -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DIMPL_OPT=camb_pytorch -DCMAKE_BUILD_TYPE=Release -DTEST=ON \
Expand Down
32 changes: 32 additions & 0 deletions impl/scripts/dyn_load_helper/dyn_helper.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#include <dlfcn.h>

#include <cstdio>
#include <filesystem>
#include <stdexcept>

void* dynLoadFile(const char* diopiRealName) {
namespace fs = std::filesystem;
void* handle = dlopen(diopiRealName, RTLD_LAZY | RTLD_LOCAL | RTLD_DEEPBIND);
if (!handle) {
Dl_info info;
if (dladdr(reinterpret_cast<void*>(dynLoadFile), &info) != 0 && info.dli_fname != nullptr) {
fs::path fpath(info.dli_fname);
auto diopiInLoader = fpath.parent_path().append(diopiRealName).string();
printf(
"diopi dyload fail, seems LD_LIBRARAY_PATH not contains %s, try to load "
"from loader current dir's %s \n",
diopiRealName,
diopiInLoader.c_str());

handle = dlopen(diopiInLoader.c_str(), RTLD_LAZY | RTLD_LOCAL | RTLD_DEEPBIND);
}
}
if (!handle) {
fprintf(stderr,
"! please note that dynamic loaded diopi_impl.so need explictly link to it's \
diopi_rt (now is torch_dipu), so it cannot be used for diopi-test now \n");
fprintf(stderr, "%s \n", dlerror());
throw std::runtime_error("diopi_init err");
}
return handle;
}
3 changes: 3 additions & 0 deletions impl/scripts/dyn_load_helper/dyn_helper.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#pragma once

void* dynLoadFile(const char* diopiRealName);
Loading

0 comments on commit aefa5b7

Please sign in to comment.