diff --git a/.github/workflows/android-build-test-linux.yml b/.github/workflows/android-build-test-linux.yml index 8fb866c20..a22c40284 100644 --- a/.github/workflows/android-build-test-linux.yml +++ b/.github/workflows/android-build-test-linux.yml @@ -108,21 +108,23 @@ jobs: project_id: mobile-app-build-290400 - name: Download Samsung libraries env: - SAMSUNG_LIB: samsung_libs_v5.0_20250906 + SAMSUNG_LIB: samsung_libs_v5.0.1_20251217 run: | - gsutil cp gs://mobile-app-build-290400_github-actions/lib/v5.0/${SAMSUNG_LIB}.zip /tmp/ && \ + gsutil cp gs://mobile-app-build-290400_github-actions/lib/v5.0.1/${SAMSUNG_LIB}.zip /tmp/ && \ unzip /tmp/${SAMSUNG_LIB}.zip -d /tmp/${SAMSUNG_LIB} && \ rm /tmp/${SAMSUNG_LIB}.zip && \ mkdir -p mobile_back_samsung/samsung/lib/internal && \ mv /tmp/${SAMSUNG_LIB}/* mobile_back_samsung/samsung/lib/internal/ + ls mobile_back_samsung/samsung/lib/internal/* - name: Download QTI SDK env: - QTI_SDK: qairt-2.29.0.241129-linux + QTI_SDK: qairt-v2.40.0.251030-linux run: | gsutil cp gs://mobile-app-build-290400_github-actions/lib/v5.0/${QTI_SDK}.zip /tmp/ && \ unzip /tmp/${QTI_SDK}.zip -d /tmp/${QTI_SDK} && \ rm /tmp/${QTI_SDK}.zip && \ mv /tmp/${QTI_SDK}/* mobile_back_qti/ + ls mobile_back_qti/* - name: Download QTI libraries env: QTI_LIB: StableDiffusionShared.0.1.1.250124 @@ -131,6 +133,7 @@ jobs: unzip /tmp/${QTI_LIB}.zip -d /tmp/${QTI_LIB} && \ rm /tmp/${QTI_LIB}.zip && \ mv /tmp/${QTI_LIB}/* mobile_back_qti/cpp/backend_qti/StableDiffusionShared/ + ls mobile_back_qti/cpp/backend_qti/StableDiffusionShared/* - name: Cache bazel uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4 with: @@ -271,7 +274,9 @@ jobs: test-android-apk-unified: name: ${{ matrix.backend }}-${{ matrix.device }} (unified) - needs: [ build-android-apk, computed ] + needs: + - computed + - build-android-apk if: github.event_name != 'workflow_dispatch' runs-on: ubuntu-22.04 timeout-minutes: 60 @@ -351,9 +356,9 @@ jobs: test-android-apk-unified-extended: name: ${{ matrix.backend }}-${{ matrix.device }} (unified-extended) needs: + - computed - build-android-apk - test-android-apk-unified - - computed if: github.ref == 'refs/heads/master' || vars.EXTENDED_TESTS_ON_PR == 'true' runs-on: ubuntu-22.04 timeout-minutes: 60 diff --git a/.github/workflows/code-analysis.yml b/.github/workflows/code-analysis.yml index 26b62ea08..8a5459735 100644 --- a/.github/workflows/code-analysis.yml +++ b/.github/workflows/code-analysis.yml @@ -10,6 +10,7 @@ on: jobs: sonar: + if: ${{ github.event.repository.name == 'mobile_app_open' }} name: SonarScanner runs-on: ubuntu-22.04 timeout-minutes: 120 @@ -87,6 +88,7 @@ jobs: run: make docker/scanner/scan codeql: + if: ${{ github.event.repository.name == 'mobile_app_open' }} name: CodeQL runs-on: ubuntu-latest permissions: diff --git a/.gitignore b/.gitignore index a7be10a01..affb273b7 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,8 @@ /output_logs /datasets/output /datasets/downloads +/mobile_back_qti/snpe-* +/mobile_back_qti/qaisw-* *.so * .apk * .tflite diff --git a/datasets/docker/Dockerfile b/datasets/docker/Dockerfile index 6dc26bdc8..046d691cd 100644 --- a/datasets/docker/Dockerfile +++ b/datasets/docker/Dockerfile @@ -47,7 +47,7 @@ RUN unzip /tmp/protoc-3.6.1-linux-x86_64.zip -d protoc3 && \ RUN echo "deb [arch=amd64] https://storage.googleapis.com/bazel-apt stable jdk1.8" | tee /etc/apt/sources.list.d/bazel.list RUN curl --proto '=https' https://bazel.build/bazel-release.pub.gpg | apt-key add - RUN apt-get update && \ - apt-get install -y -no-install-recommends --allow-unauthenticated bazel-3.7.2 + apt-get install -y --no-install-recommends --allow-unauthenticated bazel-3.7.2 RUN apt-get clean # Set timezone to UTC by default diff --git a/flutter/android/android-docker.mk b/flutter/android/android-docker.mk index 6c5c55f4a..595b41e5e 100644 --- a/flutter/android/android-docker.mk +++ b/flutter/android/android-docker.mk @@ -1,4 +1,4 @@ -# Copyright 2020-2024 The MLPerf Authors. All Rights Reserved. +# Copyright 2020-2025 The MLPerf Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/flutter/android/android.mk b/flutter/android/android.mk old mode 100644 new mode 100755 diff --git a/flutter/assets/text/about.md b/flutter/assets/text/about.md index cd6c1d534..2b44389a6 100644 --- a/flutter/assets/text/about.md +++ b/flutter/assets/text/about.md @@ -37,12 +37,17 @@ The Qualcomm Neural Processing SDK (SNPE) is a software accelerated runtime and The MLPerf Application uses the SNPE Hexagon Processor runtimes on the following families: +* Snapdragon 8 Elite Gen 5 * Snapdragon 8 Elite +* Snapdragon 8 Gen 5 * Snapdragon 8 Gen 3 +* Snapdragon 8s Gen 4 * Snapdragon 8s Gen 3 * Snapdragon 8 Gen 2 +* Snapdragon 7 Gen 4 * Snapdragon 7 Gen 3 * Snapdragon 7s Gen 3 +* Snapdragon 6 Gen 4 * Snapdragon 4 Gen 2 * Default fallback for all other Snapdragon mobile platforms @@ -61,6 +66,7 @@ Official website URL: [https://developer.samsung.com/neural/overview.html](https The MLPerf Mobile Benchmarking App uses the Exynos Neural Network SDK on the following families of Exynos processors [https://semiconductor.samsung.com/processor/](https://semiconductor.samsung.com/processor/): +* Exynos 2600 * Exynos 2500 * Exynos 2400 * Exynos 2300 diff --git a/flutter/cpp/binary/cmdline.mk b/flutter/cpp/binary/cmdline.mk index f920044ae..560aa1d55 100644 --- a/flutter/cpp/binary/cmdline.mk +++ b/flutter/cpp/binary/cmdline.mk @@ -1,4 +1,4 @@ -# Copyright 2023 The MLPerf Authors. All Rights Reserved. +# Copyright 2023-2025 The MLPerf Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -92,4 +92,4 @@ cmdline/windows/prepare-dlls: .PHONY: cmdline/windows/copy-dlls cmdline/windows/copy-dlls: currentDir=$$(pwd) && cd "${msvc_arm_dlls_path}" && \ - cp --target-directory $$currentDir/${windows_cmdline_folder} ${msvc_arm_dlls_list} \ No newline at end of file + cp --target-directory $$currentDir/${windows_cmdline_folder} ${msvc_arm_dlls_list} diff --git a/flutter/lib/backend/list.in b/flutter/lib/backend/list.in index 136e3576e..736bf38ee 100644 --- a/flutter/lib/backend/list.in +++ b/flutter/lib/backend/list.in @@ -2,10 +2,10 @@ part of 'list.dart'; // Always add new tags before TFLITE_TAG, otherwise your backend will never be used const _backendsList = [ + 'SAMSUNG_TAG', 'MEDIATEK_TAG', 'PIXEL_TAG', 'QTI_TAG', - 'SAMSUNG_TAG', 'APPLE_TAG', 'TFLITE_TAG', ]; diff --git a/flutter/pubspec.yaml b/flutter/pubspec.yaml index d61635f63..d4f71de6e 100644 --- a/flutter/pubspec.yaml +++ b/flutter/pubspec.yaml @@ -7,7 +7,7 @@ publish_to: 'none' # Remove this line if you wish to publish to pub.dev # version format: +. # Note: build_number will be set by CI using the CLI option --build-number -version: 5.0.3+1 +version: 5.0.4+1 environment: sdk: ^3.3.4 # Dart SDK version diff --git a/mobile_back_qti/BUILD b/mobile_back_qti/BUILD index f117de405..c79030099 100644 --- a/mobile_back_qti/BUILD +++ b/mobile_back_qti/BUILD @@ -68,20 +68,23 @@ cc_library( name = "snpe_deps", srcs = [ "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libhta.so", + "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libSnpeHtpV81Stub.so", + "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libSnpeHtpV79Stub.so", "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libSnpeHtpV75Stub.so", "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libSnpeHtpV73Stub.so", - "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libSnpeHtpV69Stub.so", - "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libSnpeHtpV68Stub.so", "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libSNPE.so", "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libSnpeHtpPrepare.so", - "qairt/" + SNPE_VERSION + "/lib/hexagon-v68/unsigned/libSnpeHtpV68Skel.so", - "qairt/" + SNPE_VERSION + "/lib/hexagon-v69/unsigned/libSnpeHtpV69Skel.so", "qairt/" + SNPE_VERSION + "/lib/hexagon-v73/unsigned/libSnpeHtpV73Skel.so", - "qairt/" + SNPE_VERSION + "/lib/hexagon-v75/unsigned/libSnpeHtpV75Skel.so", - "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libQnnHtpV73Stub.so", + "qairt/" + SNPE_VERSION + "/lib/hexagon-v75/unsigned/libSnpeHtpV79Skel.so", + "qairt/" + SNPE_VERSION + "/lib/hexagon-v79/unsigned/libSnpeHtpV75Skel.so", + "qairt/" + SNPE_VERSION + "/lib/hexagon-v81/unsigned/libSnpeHtpV81Skel.so", + "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libQnnHtpV81Stub.so", "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libQnnHtp.so", "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libQnnHtpPrepare.so", "qairt/" + SNPE_VERSION + "/lib/hexagon-v73/unsigned/libQnnHtpV73Skel.so", + "qairt/" + SNPE_VERSION + "/lib/hexagon-v75/unsigned/libQnnHtpV75Skel.so", + "qairt/" + SNPE_VERSION + "/lib/hexagon-v79/unsigned/libQnnHtpV79Skel.so", + "qairt/" + SNPE_VERSION + "/lib/hexagon-v81/unsigned/libQnnHtpV81Skel.so", ], ) diff --git a/mobile_back_qti/DLC/Makefile b/mobile_back_qti/DLC/Makefile index 650b9f5a5..9515fc826 100644 --- a/mobile_back_qti/DLC/Makefile +++ b/mobile_back_qti/DLC/Makefile @@ -56,16 +56,17 @@ endif htp-dlc: mobilenet_v4 \ mobilenet_v4_O2 \ - mobiledet \ - mobiledet_O2 \ - mosaic \ - mosaic_O2 \ - mobilebert \ - mobilebert_O2 \ - snusr \ - snusr_O2 \ - mobilenet_v4_batched \ - mobilenet_v4_batched_O2 + mobiledet \ + mobiledet_O2 \ + mosaic \ + mosaic_O2 \ + mobilebert \ + mobilebert_O2 \ + snusr \ + snusr_O2 \ + mobilenet_v4_batched \ + mobilenet_v4_batched_O2 + ifeq ($(MAKECMDGOALS),$(filter $(MAKECMDGOALS),generate-apirec \ mobilenet_v4_apirec mobilenet_v4_batched_apirec mosaic_apirec mobilebert_apirec mobiledet_apirec snusr_apirec)) @@ -80,9 +81,11 @@ generate-apirec: mobilenet_v4_apirec \ snusr_apirec mobilenet_v4_batched: \ + ${DLCBUILDDIR}/mobilenet_v4_htp_batched_8_sm8850.stamp \ ${DLCBUILDDIR}/mobilenet_v4_htp_batched_4.stamp mobilenet_v4_batched_O2: \ + ${DLCBUILDDIR}/mobilenet_v4_htp_batched_8_O2_sm8850.stamp \ ${DLCBUILDDIR}/mobilenet_v4_htp_batched_4_O2.stamp mobilenet_v4_batched_apirec: \ @@ -92,7 +95,7 @@ mosaic: \ ${DLCBUILDDIR}/mobile_mosaic_htp.stamp mosaic_O2: \ - ${DLCBUILDDIR}/mobile_mosaic_htp_O2.stamp + ${DLCBUILDDIR}/mobile_mosaic_htp_O2.stamp mosaic_apirec: \ ${DLCBUILDDIR}/generate_mosaic_apirec @@ -101,7 +104,7 @@ mobilenet_v4: \ ${DLCBUILDDIR}/mobilenet_v4_htp.stamp mobilenet_v4_O2: \ - ${DLCBUILDDIR}/mobilenet_v4_htp_O2.stamp + ${DLCBUILDDIR}/mobilenet_v4_htp_O2.stamp mobilenet_v4_apirec: \ ${DLCBUILDDIR}/generate_mobilenet_v4_apirec @@ -134,19 +137,41 @@ snusr_apirec: \ ${DLCBUILDDIR}/generate_snusr_apirec stable_diffusion_qnn: \ + sd_precompute_data \ + text_encoder \ + vae_decoder \ + unet + +sd_precompute_data: \ ${DLCBUILDDIR}/sd_precompute_data.tar \ - ${DLCBUILDDIR}/text_encoder_qnn_context_binary_generator.stamp \ - ${DLCBUILDDIR}/vae_decoder_qnn_context_binary_generator.stamp \ - ${DLCBUILDDIR}/unet_qnn_context_binary_generator.stamp text_encoder: \ - ${DLCBUILDDIR}/text_encoder_qnn_context_binary_generator.stamp + ${DLCBUILDDIR}/text_encoder_qnn_context_binary_generator.stamp \ + ${DLCBUILDDIR}/text_encoder_qnn_context_binary_generator_v75.stamp \ + ${DLCBUILDDIR}/text_encoder_qnn_context_binary_generator_v79.stamp \ + ${DLCBUILDDIR}/text_encoder_qnn_context_binary_generator_v73_sd8sG4.stamp \ + ${DLCBUILDDIR}/text_encoder_qnn_context_binary_generator_v73_sd7G4.stamp \ + ${DLCBUILDDIR}/text_encoder_qnn_context_binary_generator_v81_sd8eliteG5.stamp \ + ${DLCBUILDDIR}/text_encoder_qnn_context_binary_generator_v81_sd8G5.stamp \ vae_decoder: \ - ${DLCBUILDDIR}/vae_decoder_qnn_context_binary_generator.stamp + ${DLCBUILDDIR}/vae_decoder_qnn_context_binary_generator.stamp \ + ${DLCBUILDDIR}/vae_decoder_qnn_context_binary_generator_v75.stamp \ + ${DLCBUILDDIR}/vae_decoder_qnn_context_binary_generator_v79.stamp \ + ${DLCBUILDDIR}/vae_decoder_qnn_context_binary_generator_v73_sd8sG4.stamp \ + ${DLCBUILDDIR}/vae_decoder_qnn_context_binary_generator_v73_sd7G4.stamp \ + ${DLCBUILDDIR}/vae_decoder_qnn_context_binary_generator_v81_sd8eliteG5.stamp \ + ${DLCBUILDDIR}/vae_decoder_qnn_context_binary_generator_v81_sd8G5.stamp \ + unet: \ - ${DLCBUILDDIR}/unet_qnn_context_binary_generator.stamp + ${DLCBUILDDIR}/unet_qnn_context_binary_generator.stamp \ + ${DLCBUILDDIR}/unet_qnn_context_binary_generator_v75.stamp \ + ${DLCBUILDDIR}/unet_qnn_context_binary_generator_v79.stamp \ + ${DLCBUILDDIR}/unet_qnn_context_binary_generator_v73_sd8sG4.stamp \ + ${DLCBUILDDIR}/unet_qnn_context_binary_generator_v73_sd7G4.stamp \ + ${DLCBUILDDIR}/unet_qnn_context_binary_generator_v81_sd8eliteG5.stamp \ + ${DLCBUILDDIR}/unet_qnn_context_binary_generator_v81_sd8G5.stamp \ mlperf_models: \ ${DLCBUILDDIR}/mlperf_models.stamp @@ -170,6 +195,7 @@ ${DLCBUILDDIR}/mlperf_models.stamp: (mkdir -p ${MLPERF_MODELS_PATH}) touch $@ + ${DLCBUILDDIR}/mobilenet_v4_float.dlc: \ ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ ${DLCBUILDDIR}/mobile/.stamp \ @@ -211,6 +237,7 @@ ${DLCBUILDDIR}/mobilenet_v4_quant.stamp: \ --input_list=imagenet_image_384_list.txt \ --output_dlc=/output/mobilenet_v4_quant.dlc \ # Mobilenetedge TPU model conversion completed + touch $@ ${DLCBUILDDIR}/mobilenet_v4_htp_sm8750.stamp: \ ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ @@ -231,8 +258,8 @@ ${DLCBUILDDIR}/mobilenet_v4_htp_sm8750.stamp: \ --input_dlc=/output/mobilenet_v4_quant.dlc \ --output_dlc=/output/mobilenet_v4_htp.dlc \ --optimization_level 3 \ - --htp_socs=sm8750 \ - --memorymapped_buffer_hint=true + --htp_socs=sm8750,sm8845 \ + --memorymapped_buffer_hint=true # MobilenetV4 TPU model conversion completed cp ${DLCBUILDDIR}/mobilenet_v4_htp.dlc ${MLPERF_MODELS_PATH} touch $@ @@ -255,8 +282,33 @@ ${DLCBUILDDIR}/mobilenet_v4_htp_sm7550.stamp: \ --input_dlc=/output/mobilenet_v4_htp.dlc \ --output_dlc=/output/mobilenet_v4_htp.dlc \ --optimization_level 3 \ - --htp_socs=sm7550,sm7635 \ - --memorymapped_buffer_hint=true + --htp_socs=sm7550,sm6650,sm7635 \ + --memorymapped_buffer_hint=true + # MobilenetV4 TPU model conversion completed + cp ${DLCBUILDDIR}/mobilenet_v4_htp.dlc ${MLPERF_MODELS_PATH} + touch $@ + +${DLCBUILDDIR}/mobilenet_v4_htp_sm8850.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mobilenet_v4_quant.stamp \ + # Offline prepare of MobilenetV4 DLC for HTP + docker run \ + -e PYTHONPATH=/snpe_sdk/lib/python \ + -e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \ + -v ${SNPE_SDK}:/snpe_sdk \ + -v ${DLCBUILDDIR}:/output \ + -v ${DLCBUILDDIR}/imagenet:/imagenet \ + -w /imagenet \ + -u ${USERID}:${GROUPID} \ + mlperf_dlc_prepare \ + /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ + --num_hvx_threads=8 \ + --htp_dlbc=true \ + --input_dlc=/output/mobilenet_v4_htp.dlc \ + --output_dlc=/output/mobilenet_v4_htp.dlc \ + --optimization_level 3 \ + --htp_socs=sm8850 \ + --memorymapped_buffer_hint=true # MobilenetV4 TPU model conversion completed cp ${DLCBUILDDIR}/mobilenet_v4_htp.dlc ${MLPERF_MODELS_PATH} touch $@ @@ -265,6 +317,7 @@ ${DLCBUILDDIR}/mobilenet_v4_htp.stamp: \ ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ ${DLCBUILDDIR}/mobilenet_v4_htp_sm8750.stamp \ ${DLCBUILDDIR}/mobilenet_v4_htp_sm7550.stamp \ + ${DLCBUILDDIR}/mobilenet_v4_htp_sm8850.stamp \ # Offline prepare of MobilenetV4 DLC for HTP docker run \ -e PYTHONPATH=/snpe_sdk/lib/python \ @@ -281,8 +334,8 @@ ${DLCBUILDDIR}/mobilenet_v4_htp.stamp: \ --input_dlc=/output/mobilenet_v4_htp.dlc \ --output_dlc=/output/mobilenet_v4_htp.dlc \ --optimization_level 3 \ - --htp_socs=sm8650,sm8735,sm8635,sm8350,sm7325,sm8450,sm8550,sm7475,sc8380xp,sc8280x \ - --memorymapped_buffer_hint=true + --htp_socs=sm8650,sm7750,sm8735,sm8635,sm8350,sm7325,sm8450,sm8550,sm7475,sc8380xp,sc8280x \ + --memorymapped_buffer_hint=true # MobilenetV4 TPU model conversion completed cp ${DLCBUILDDIR}/mobilenet_v4_htp.dlc ${MLPERF_MODELS_PATH} touch $@ @@ -306,8 +359,8 @@ ${DLCBUILDDIR}/mobilenet_v4_htp_O2_sm8750.stamp: \ --input_dlc=/output/mobilenet_v4_quant.dlc \ --output_dlc=/output/mobilenet_v4_htp_O2.dlc \ --optimization_level 2 \ - --htp_socs=sm8750 \ - --memorymapped_buffer_hint=true + --htp_socs=sm8750,sm8845 \ + --memorymapped_buffer_hint=true # Mobilenetedge TPU model conversion completed cp ${DLCBUILDDIR}/mobilenet_v4_htp_O2.dlc ${MLPERF_MODELS_PATH} touch $@ @@ -330,16 +383,41 @@ ${DLCBUILDDIR}/mobilenet_v4_htp_O2_sm7550.stamp: \ --input_dlc=/output/mobilenet_v4_htp_O2.dlc \ --output_dlc=/output/mobilenet_v4_htp_O2.dlc \ --optimization_level 2 \ - --htp_socs=sm7550,sm7635 \ - --memorymapped_buffer_hint=true + --htp_socs=sm7550,sm6650,sm7635 \ + --memorymapped_buffer_hint=true # Mobilenetedge TPU model conversion completed cp ${DLCBUILDDIR}/mobilenet_v4_htp_O2.dlc ${MLPERF_MODELS_PATH} touch $@ +${DLCBUILDDIR}/mobilenet_v4_htp_O2_sm8850.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + # Offline prepare of MobilenetV4 DLC for HTP + docker run \ + -e PYTHONPATH=/snpe_sdk/lib/python \ + -e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \ + -v ${SNPE_SDK}:/snpe_sdk \ + -v ${DLCBUILDDIR}:/output \ + -v ${DLCBUILDDIR}/imagenet:/imagenet \ + -w /imagenet \ + -u ${USERID}:${GROUPID} \ + mlperf_dlc_prepare \ + /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ + --num_hvx_threads=8 \ + --htp_dlbc=true \ + --input_dlc=/output/mobilenet_v4_htp_O2.dlc \ + --output_dlc=/output/mobilenet_v4_htp_O2.dlc \ + --optimization_level 2 \ + --htp_socs=sm8850 \ + --memorymapped_buffer_hint=true + # MobilenetV4 TPU model conversion completed + cp ${DLCBUILDDIR}/mobilenet_v4_htp_O2.dlc ${MLPERF_MODELS_PATH} + touch $@ + ${DLCBUILDDIR}/mobilenet_v4_htp_O2.stamp: \ ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ ${DLCBUILDDIR}/mobilenet_v4_htp_O2_sm8750.stamp \ ${DLCBUILDDIR}/mobilenet_v4_htp_O2_sm7550.stamp \ + ${DLCBUILDDIR}/mobilenet_v4_htp_O2_sm8850.stamp \ # Offline prepare of MobilenetEdgeTPU DLC for HTP docker run \ -e PYTHONPATH=/snpe_sdk/lib/python \ @@ -356,12 +434,101 @@ ${DLCBUILDDIR}/mobilenet_v4_htp_O2.stamp: \ --input_dlc=/output/mobilenet_v4_htp_O2.dlc \ --output_dlc=/output/mobilenet_v4_htp_O2.dlc \ --optimization_level 2 \ - --htp_socs=sm8650,sm8735,sm8635,sm8350,sm7325,sm8450,sm8550,sm7475,sc8380xp,sc8280x \ - --memorymapped_buffer_hint=true + --htp_socs=sm8650,sm7750,sm8735,sm8635,sm8350,sm7325,sm8450,sm8550,sm7475,sc8380xp,sc8280x \ + --memorymapped_buffer_hint=true # Mobilenetedge TPU model conversion completed cp ${DLCBUILDDIR}/mobilenet_v4_htp_O2.dlc ${MLPERF_MODELS_PATH} touch $@ +${DLCBUILDDIR}/mobilenet_v4_float_batched_8.dlc: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mobile/.stamp \ + ${DLCBUILDDIR}/mlperf_models.stamp + # MobilenetV4 TPU model conversion .... + # Batched Float model + mkdir -p ${DLCBUILDDIR} + docker run \ + -e PYTHONPATH=/snpe_sdk/lib/python \ + -v ${SNPE_SDK}:/snpe_sdk \ + -v ${MOBILENETV4_MODEL_PATH}:/models \ + -v ${DLCBUILDDIR}:/output \ + -u ${USERID}:${GROUPID} \ + mlperf_dlc_prepare \ + /snpe_sdk/bin/x86_64-linux-clang/snpe-tensorflow-to-dlc \ + -i /models/ \ + -d inputs "8,384,384,3" --out_node "probs" \ + -o /output/mobilenet_v4_float_batched_8.dlc + touch $@ + +${DLCBUILDDIR}/mobilenet_v4_quant_batched_8.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/imagenet/imagenet_image_384_list.txt \ + ${DLCBUILDDIR}/mobilenet_v4_float_batched_8.dlc + # Quantization of MobilenetV4 Batched DLC for HTP + docker run \ + -e PYTHONPATH=/snpe_sdk/lib/python \ + -e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \ + -v ${SNPE_SDK}:/snpe_sdk \ + -v ${DLCBUILDDIR}:/output \ + -v ${DLCBUILDDIR}/imagenet:/imagenet-out \ + -w /imagenet-out \ + -u ${USERID}:${GROUPID} \ + mlperf_dlc_prepare \ + /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-quant \ + --input_dlc=/output/mobilenet_v4_float_batched_8.dlc \ + --input_list=imagenet_image_384_list.txt \ + --output_dlc=/output/mobilenet_v4_quant_batched_8.dlc + # MobilenetV4 model conversion completed + touch $@ + +${DLCBUILDDIR}/mobilenet_v4_htp_batched_8_sm8850.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mobilenet_v4_quant_batched_8.stamp + # Offline prepare of MobilenetV4 Batched DLC for HTP + docker run \ + -e PYTHONPATH=/snpe_sdk/lib/python \ + -e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \ + -v ${SNPE_SDK}:/snpe_sdk \ + -v ${DLCBUILDDIR}:/output \ + -u ${USERID}:${GROUPID} \ + -w /output \ + mlperf_dlc_prepare \ + /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ + --num_hvx_threads=8 \ + --input_dlc=/output/mobilenet_v4_quant_batched_8.dlc \ + --output_dlc=/output/mobilenet_v4_htp_batched_8.dlc \ + --optimization_level 3 \ + --htp_socs=sm8850 \ + --htp_dlbc=true \ + --memorymapped_buffer_hint=true + # MobilenetV4 model conversion completed + cp ${DLCBUILDDIR}/mobilenet_v4_htp_batched_8.dlc ${MLPERF_MODELS_PATH} + touch $@ + +${DLCBUILDDIR}/mobilenet_v4_htp_batched_8_O2_sm8850.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mobilenet_v4_quant_batched_8.stamp + # Offline prepare of MobilenetV4 Batched DLC for HTP + docker run \ + -e PYTHONPATH=/snpe_sdk/lib/python \ + -e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \ + -v ${SNPE_SDK}:/snpe_sdk \ + -v ${DLCBUILDDIR}:/output \ + -u ${USERID}:${GROUPID} \ + -w /output \ + mlperf_dlc_prepare \ + /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ + --num_hvx_threads=8 \ + --input_dlc=/output/mobilenet_v4_quant_batched_8.dlc \ + --output_dlc=/output/mobilenet_v4_htp_batched_8_O2.dlc \ + --optimization_level 2 \ + --htp_socs=sm8850 \ + --htp_dlbc=true \ + --memorymapped_buffer_hint=true + # MobilenetV4 model conversion completed + cp ${DLCBUILDDIR}/mobilenet_v4_htp_batched_8_O2.dlc ${MLPERF_MODELS_PATH} + touch $@ + ${DLCBUILDDIR}/mobilenet_v4_float_batched_4.dlc: \ ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ ${DLCBUILDDIR}/mobile/.stamp \ @@ -380,6 +547,7 @@ ${DLCBUILDDIR}/mobilenet_v4_float_batched_4.dlc: \ -i /models/ \ -d inputs "4,384,384,3" --out_node "probs" \ -o /output/mobilenet_v4_float_batched_4.dlc + touch $@ ${DLCBUILDDIR}/mobilenet_v4_quant_batched_4.stamp: \ ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ @@ -399,6 +567,7 @@ ${DLCBUILDDIR}/mobilenet_v4_quant_batched_4.stamp: \ --input_dlc=/output/mobilenet_v4_float_batched_4.dlc \ --input_list=imagenet_image_384_list.txt \ --output_dlc=/output/mobilenet_v4_quant_batched_4.dlc + touch $@ # MobilenetV4 model conversion completed ${DLCBUILDDIR}/mobilenet_v4_htp_batched_4_sm8750.stamp: \ @@ -411,14 +580,15 @@ ${DLCBUILDDIR}/mobilenet_v4_htp_batched_4_sm8750.stamp: \ -v ${SNPE_SDK}:/snpe_sdk \ -v ${DLCBUILDDIR}:/output \ -u ${USERID}:${GROUPID} \ + -w /output \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ --num_hvx_threads=6 \ --input_dlc=/output/mobilenet_v4_quant_batched_4.dlc \ --output_dlc=/output/mobilenet_v4_htp_batched_4.dlc \ --optimization_level 3 \ - --htp_socs=sm8750 \ - --memorymapped_buffer_hint=true + --htp_socs=sm8750,sm8845 \ + --memorymapped_buffer_hint=true # MobilenetV4 model conversion completed cp ${DLCBUILDDIR}/mobilenet_v4_htp_batched_4.dlc ${MLPERF_MODELS_PATH} touch $@ @@ -432,14 +602,37 @@ ${DLCBUILDDIR}/mobilenet_v4_htp_batched_4_sm7550.stamp: \ -v ${SNPE_SDK}:/snpe_sdk \ -v ${DLCBUILDDIR}:/output \ -u ${USERID}:${GROUPID} \ + -w /output \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ --num_hvx_threads=2 \ --input_dlc=/output/mobilenet_v4_htp_batched_4.dlc \ --output_dlc=/output/mobilenet_v4_htp_batched_4.dlc \ --optimization_level 3 \ - --htp_socs=sm7550,sm7635 \ - --memorymapped_buffer_hint=true + --htp_socs=sm7550,sm6650,sm7635 \ + --memorymapped_buffer_hint=true + # MobilenetV4 model conversion completed + cp ${DLCBUILDDIR}/mobilenet_v4_htp_batched_4.dlc ${MLPERF_MODELS_PATH} + touch $@ + +${DLCBUILDDIR}/mobilenet_v4_htp_batched_4_sm8850.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + # Offline prepare of MobilenetV4 Batched DLC for HTP + docker run \ + -e PYTHONPATH=/snpe_sdk/lib/python \ + -e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \ + -v ${SNPE_SDK}:/snpe_sdk \ + -v ${DLCBUILDDIR}:/output \ + -u ${USERID}:${GROUPID} \ + -w /output \ + mlperf_dlc_prepare \ + /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ + --num_hvx_threads=8 \ + --input_dlc=/output/mobilenet_v4_htp_batched_4.dlc \ + --output_dlc=/output/mobilenet_v4_htp_batched_4.dlc \ + --optimization_level 3 \ + --htp_socs=sm8850 \ + --memorymapped_buffer_hint=true # MobilenetV4 model conversion completed cp ${DLCBUILDDIR}/mobilenet_v4_htp_batched_4.dlc ${MLPERF_MODELS_PATH} touch $@ @@ -448,6 +641,7 @@ ${DLCBUILDDIR}/mobilenet_v4_htp_batched_4.stamp: \ ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ ${DLCBUILDDIR}/mobilenet_v4_htp_batched_4_sm8750.stamp \ ${DLCBUILDDIR}/mobilenet_v4_htp_batched_4_sm7550.stamp \ + ${DLCBUILDDIR}/mobilenet_v4_htp_batched_4_sm8850.stamp \ # Offline prepare of MobilenetV4 Batched DLC for HTP docker run \ -e PYTHONPATH=/snpe_sdk/lib/python \ @@ -455,14 +649,15 @@ ${DLCBUILDDIR}/mobilenet_v4_htp_batched_4.stamp: \ -v ${SNPE_SDK}:/snpe_sdk \ -v ${DLCBUILDDIR}:/output \ -u ${USERID}:${GROUPID} \ + -w /output \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ --num_hvx_threads=4 \ --input_dlc=/output/mobilenet_v4_htp_batched_4.dlc \ --output_dlc=/output/mobilenet_v4_htp_batched_4.dlc \ --optimization_level 3 \ - --htp_socs=sm8650,sm8735,sm8635,sm8350,sm7325,sm8450,sm8550,sm7475,sc8380xp,sc8280x \ - --memorymapped_buffer_hint=true + --htp_socs=sm8650,sm7750,sm8735,sm8635,sm8350,sm7325,sm8450,sm8550,sm7475,sc8380xp,sc8280x \ + --memorymapped_buffer_hint=true # MobilenetV4 model conversion completed cp ${DLCBUILDDIR}/mobilenet_v4_htp_batched_4.dlc ${MLPERF_MODELS_PATH} touch $@ @@ -477,14 +672,15 @@ ${DLCBUILDDIR}/mobilenet_v4_htp_batched_4_O2_sm8750.stamp: \ -v ${SNPE_SDK}:/snpe_sdk \ -v ${DLCBUILDDIR}:/output \ -u ${USERID}:${GROUPID} \ + -w /output \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ --num_hvx_threads=6 \ --input_dlc=/output/mobilenet_v4_quant_batched_4.dlc \ --output_dlc=/output/mobilenet_v4_htp_batched_4_O2.dlc \ --optimization_level 2 \ - --htp_socs=sm8750 \ - --memorymapped_buffer_hint=true + --htp_socs=sm8750,sm8845 \ + --memorymapped_buffer_hint=true # MobilenetV4 model conversion completed cp ${DLCBUILDDIR}/mobilenet_v4_htp_batched_4_O2.dlc ${MLPERF_MODELS_PATH} touch $@ @@ -498,14 +694,37 @@ ${DLCBUILDDIR}/mobilenet_v4_htp_batched_4_O2_sm7550.stamp: \ -v ${SNPE_SDK}:/snpe_sdk \ -v ${DLCBUILDDIR}:/output \ -u ${USERID}:${GROUPID} \ + -w /output \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ --num_hvx_threads=2 \ --input_dlc=/output/mobilenet_v4_htp_batched_4_O2.dlc \ --output_dlc=/output/mobilenet_v4_htp_batched_4_O2.dlc \ --optimization_level 2 \ - --htp_socs=sm7550,sm7635 \ - --memorymapped_buffer_hint=true + --htp_socs=sm7550,sm6650,sm7635 \ + --memorymapped_buffer_hint=true + # MobilenetV4 model conversion completed + cp ${DLCBUILDDIR}/mobilenet_v4_htp_batched_4_O2.dlc ${MLPERF_MODELS_PATH} + touch $@ + +${DLCBUILDDIR}/mobilenet_v4_htp_batched_4_O2_sm8850.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + # Offline prepare of MobilenetV4 Batched DLC for HTP + docker run \ + -e PYTHONPATH=/snpe_sdk/lib/python \ + -e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \ + -v ${SNPE_SDK}:/snpe_sdk \ + -v ${DLCBUILDDIR}:/output \ + -u ${USERID}:${GROUPID} \ + -w /output \ + mlperf_dlc_prepare \ + /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ + --num_hvx_threads=8 \ + --input_dlc=/output/mobilenet_v4_htp_batched_4_O2.dlc \ + --output_dlc=/output/mobilenet_v4_htp_batched_4_O2.dlc \ + --optimization_level 2 \ + --htp_socs=sm8850 \ + --memorymapped_buffer_hint=true # MobilenetV4 model conversion completed cp ${DLCBUILDDIR}/mobilenet_v4_htp_batched_4_O2.dlc ${MLPERF_MODELS_PATH} touch $@ @@ -514,6 +733,7 @@ ${DLCBUILDDIR}/mobilenet_v4_htp_batched_4_O2.stamp: \ ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ ${DLCBUILDDIR}/mobilenet_v4_htp_batched_4_O2_sm8750.stamp \ ${DLCBUILDDIR}/mobilenet_v4_htp_batched_4_O2_sm7550.stamp \ + ${DLCBUILDDIR}/mobilenet_v4_htp_batched_4_O2_sm8850.stamp \ # Offline prepare of MobilenetV4 Batched DLC for HTP docker run \ -e PYTHONPATH=/snpe_sdk/lib/python \ @@ -521,14 +741,15 @@ ${DLCBUILDDIR}/mobilenet_v4_htp_batched_4_O2.stamp: \ -v ${SNPE_SDK}:/snpe_sdk \ -v ${DLCBUILDDIR}:/output \ -u ${USERID}:${GROUPID} \ + -w /output \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ --num_hvx_threads=4 \ --input_dlc=/output/mobilenet_v4_htp_batched_4_O2.dlc \ --output_dlc=/output/mobilenet_v4_htp_batched_4_O2.dlc \ --optimization_level 2 \ - --htp_socs=sm8650,sm8735,sm8635,sm8350,sm7325,sm8450,sm8550,sm7475,sc8380xp,sc8280x \ - --memorymapped_buffer_hint=true + --htp_socs=sm8650,sm7750,sm8735,sm8635,sm8350,sm7325,sm8450,sm8550,sm7475,sc8380xp,sc8280x \ + --memorymapped_buffer_hint=true # MobilenetV4 model conversion completed cp ${DLCBUILDDIR}/mobilenet_v4_htp_batched_4_O2.dlc ${MLPERF_MODELS_PATH} touch $@ @@ -589,16 +810,17 @@ ${DLCBUILDDIR}/ssd_mobiledet_qat_htp_sm8750.stamp: \ -v ${SNPE_SDK}:/snpe_sdk \ -v ${DLCBUILDDIR}:/output \ -u ${USERID}:${GROUPID} \ + -w /output \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ --num_hvx_threads=6 \ - --htp_dlbc=true \ + --htp_dlbc=true \ --input_dlc=/output/ssd_mobiledet_qat_quant.dlc \ --output_dlc=/output/ssd_mobiledet_qat_htp.dlc \ --set_output_tensors="Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0"\ --optimization_level 3 \ - --htp_socs=sm8750 \ - --memorymapped_buffer_hint=true + --htp_socs=sm8750,sm8845 \ + --memorymapped_buffer_hint=true # SSD MobileDET model offline prepare for HTP completed cp ${DLCBUILDDIR}/ssd_mobiledet_qat_htp.dlc ${MLPERF_MODELS_PATH} touch $@ @@ -612,16 +834,41 @@ ${DLCBUILDDIR}/ssd_mobiledet_qat_htp_sm7550.stamp: \ -v ${SNPE_SDK}:/snpe_sdk \ -v ${DLCBUILDDIR}:/output \ -u ${USERID}:${GROUPID} \ + -w /output \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ --num_hvx_threads=2 \ - --htp_dlbc=true \ + --htp_dlbc=true \ + --input_dlc=/output/ssd_mobiledet_qat_htp.dlc \ + --output_dlc=/output/ssd_mobiledet_qat_htp.dlc \ + --set_output_tensors="Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0"\ + --optimization_level 3 \ + --htp_socs=sm7550,sm6650,sm7635 \ + --memorymapped_buffer_hint=true + # SSD MobileDET model offline prepare for HTP completed + cp ${DLCBUILDDIR}/ssd_mobiledet_qat_htp.dlc ${MLPERF_MODELS_PATH} + touch $@ + +${DLCBUILDDIR}/ssd_mobiledet_qat_htp_sm8850.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + # Offline prepare of MobileDET SSD DLC for HTP + docker run \ + -e PYTHONPATH=/snpe_sdk/lib/python \ + -e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \ + -v ${SNPE_SDK}:/snpe_sdk \ + -v ${DLCBUILDDIR}:/output \ + -u ${USERID}:${GROUPID} \ + -w /output \ + mlperf_dlc_prepare \ + /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ + --num_hvx_threads=8 \ + --htp_dlbc=true \ --input_dlc=/output/ssd_mobiledet_qat_htp.dlc \ --output_dlc=/output/ssd_mobiledet_qat_htp.dlc \ --set_output_tensors="Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0"\ --optimization_level 3 \ - --htp_socs=sm7550,sm7635 \ - --memorymapped_buffer_hint=true + --htp_socs=sm8850 \ + --memorymapped_buffer_hint=true # SSD MobileDET model offline prepare for HTP completed cp ${DLCBUILDDIR}/ssd_mobiledet_qat_htp.dlc ${MLPERF_MODELS_PATH} touch $@ @@ -630,6 +877,7 @@ ${DLCBUILDDIR}/ssd_mobiledet_qat_htp.stamp: \ ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ ${DLCBUILDDIR}/ssd_mobiledet_qat_htp_sm8750.stamp \ ${DLCBUILDDIR}/ssd_mobiledet_qat_htp_sm7550.stamp \ + ${DLCBUILDDIR}/ssd_mobiledet_qat_htp_sm8850.stamp \ # Offline prepare of MobileDET SSD DLC for HTP docker run \ -e PYTHONPATH=/snpe_sdk/lib/python \ @@ -637,6 +885,7 @@ ${DLCBUILDDIR}/ssd_mobiledet_qat_htp.stamp: \ -v ${SNPE_SDK}:/snpe_sdk \ -v ${DLCBUILDDIR}:/output \ -u ${USERID}:${GROUPID} \ + -w /output \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ --num_hvx_threads=4 \ @@ -645,8 +894,8 @@ ${DLCBUILDDIR}/ssd_mobiledet_qat_htp.stamp: \ --output_dlc=/output/ssd_mobiledet_qat_htp.dlc \ --set_output_tensors="Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0"\ --optimization_level 3 \ - --htp_socs=sm8650,sm8735,sm8635,sm8350,sm7325,sm8450,sm8550,sm7475,sc8380xp,sc8280x \ - --memorymapped_buffer_hint=true + --htp_socs=sm8650,sm7750,sm8735,sm8635,sm8350,sm7325,sm8450,sm8550,sm7475,sc8380xp,sc8280x \ + --memorymapped_buffer_hint=true # SSD MobileDET model offline prepare for HTP completed cp ${DLCBUILDDIR}/ssd_mobiledet_qat_htp.dlc ${MLPERF_MODELS_PATH} touch $@ @@ -661,6 +910,7 @@ ${DLCBUILDDIR}/ssd_mobiledet_qat_htp_O2_sm8750.stamp: \ -v ${SNPE_SDK}:/snpe_sdk \ -v ${DLCBUILDDIR}:/output \ -u ${USERID}:${GROUPID} \ + -w /output \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ --num_hvx_threads=6 \ @@ -668,8 +918,8 @@ ${DLCBUILDDIR}/ssd_mobiledet_qat_htp_O2_sm8750.stamp: \ --output_dlc=/output/ssd_mobiledet_qat_htp_O2.dlc \ --set_output_tensors="Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0"\ --optimization_level 2 \ - --htp_socs=sm8750 \ - --memorymapped_buffer_hint=true + --htp_socs=sm8750,sm8845 \ + --memorymapped_buffer_hint=true # SSD MobileDET model offline prepare for HTP completed cp ${DLCBUILDDIR}/ssd_mobiledet_qat_htp_O2.dlc ${MLPERF_MODELS_PATH} touch $@ @@ -683,6 +933,7 @@ ${DLCBUILDDIR}/ssd_mobiledet_qat_htp_O2_sm7550.stamp: \ -v ${SNPE_SDK}:/snpe_sdk \ -v ${DLCBUILDDIR}:/output \ -u ${USERID}:${GROUPID} \ + -w /output \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ --num_hvx_threads=2 \ @@ -690,8 +941,33 @@ ${DLCBUILDDIR}/ssd_mobiledet_qat_htp_O2_sm7550.stamp: \ --output_dlc=/output/ssd_mobiledet_qat_htp_O2.dlc \ --set_output_tensors="Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0"\ --optimization_level 2 \ - --htp_socs=sm7550,sm7635 \ - --memorymapped_buffer_hint=true + --htp_socs=sm7550,sm6650,sm7635 \ + --memorymapped_buffer_hint=true + # SSD MobileDET model offline prepare for HTP completed + cp ${DLCBUILDDIR}/ssd_mobiledet_qat_htp_O2.dlc ${MLPERF_MODELS_PATH} + touch $@ + +${DLCBUILDDIR}/ssd_mobiledet_qat_htp_O2_sm8850.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/ssd_mobiledet_qat_quant.stamp + # Offline prepare of MobileDET SSD DLC for HTP + docker run \ + -e PYTHONPATH=/snpe_sdk/lib/python \ + -e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \ + -v ${SNPE_SDK}:/snpe_sdk \ + -v ${DLCBUILDDIR}:/output \ + -u ${USERID}:${GROUPID} \ + -w /output \ + mlperf_dlc_prepare \ + /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ + --num_hvx_threads=8 \ + --htp_dlbc=true \ + --input_dlc=/output/ssd_mobiledet_qat_htp_O2.dlc \ + --output_dlc=/output/ssd_mobiledet_qat_htp_O2.dlc \ + --set_output_tensors="Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0"\ + --optimization_level 2 \ + --htp_socs=sm8850 \ + --memorymapped_buffer_hint=true # SSD MobileDET model offline prepare for HTP completed cp ${DLCBUILDDIR}/ssd_mobiledet_qat_htp_O2.dlc ${MLPERF_MODELS_PATH} touch $@ @@ -700,6 +976,7 @@ ${DLCBUILDDIR}/ssd_mobiledet_qat_htp_O2.stamp: \ ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ ${DLCBUILDDIR}/ssd_mobiledet_qat_htp_O2_sm8750.stamp \ ${DLCBUILDDIR}/ssd_mobiledet_qat_htp_O2_sm7550.stamp \ + ${DLCBUILDDIR}/ssd_mobiledet_qat_htp_O2_sm8850.stamp \ # Offline prepare of MobileDET SSD DLC for HTP docker run \ -e PYTHONPATH=/snpe_sdk/lib/python \ @@ -707,6 +984,7 @@ ${DLCBUILDDIR}/ssd_mobiledet_qat_htp_O2.stamp: \ -v ${SNPE_SDK}:/snpe_sdk \ -v ${DLCBUILDDIR}:/output \ -u ${USERID}:${GROUPID} \ + -w /output \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ --num_hvx_threads=4 \ @@ -714,8 +992,8 @@ ${DLCBUILDDIR}/ssd_mobiledet_qat_htp_O2.stamp: \ --output_dlc=/output/ssd_mobiledet_qat_htp_O2.dlc \ --set_output_tensors="Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0"\ --optimization_level 2 \ - --htp_socs=sm8650,sm8735,sm8635,sm8350,sm7325,sm8450,sm8550,sm7475,sc8380xp,sc8280x \ - --memorymapped_buffer_hint=true + --htp_socs=sm8650,sm7750,sm8735,sm8635,sm8350,sm7325,sm8450,sm8550,sm7475,sc8380xp,sc8280x \ + --memorymapped_buffer_hint=true # SSD MobileDET model offline prepare for HTP completed cp ${DLCBUILDDIR}/ssd_mobiledet_qat_htp_O2.dlc ${MLPERF_MODELS_PATH} touch $@ @@ -772,6 +1050,7 @@ ${DLCBUILDDIR}/mobilebert_float.dlc: \ --input_type segment_ids opaque \ --out_node transpose \ -o /output/mobilebert_float.dlc + touch $@ # MobileBERT Float DLC conversion completed ${DLCBUILDDIR}/mobilebert_quant.stamp: \ @@ -796,6 +1075,7 @@ ${DLCBUILDDIR}/mobilebert_quant.stamp: \ --act_bitwidth 8 \ --override_params \ --output_dlc=/output/mobilebert_quant.dlc + touch $@ ${DLCBUILDDIR}/mobilebert_htp_sm8750.stamp: \ ${DLCBUILDDIR}/mobilebert_quant.stamp \ @@ -808,21 +1088,22 @@ ${DLCBUILDDIR}/mobilebert_htp_sm8750.stamp: \ -v ${SNPE_SDK}:/snpe_sdk \ -v ${DLCBUILDDIR}:/output \ -u ${USERID}:${GROUPID} \ + -w /output \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ --num_hvx_threads=6 \ - --htp_dlbc=true \ + --htp_dlbc=true \ --input_dlc=/output/mobilebert_quant.dlc \ --output_dlc=/output/mobilebert_quantized_htp.dlc \ --set_output_tensors="transpose:0" \ --optimization_level 3 \ - --htp_socs=sm8750 \ + --htp_socs=sm8750,sm8845 \ --memorymapped_buffer_hint=true cp ${DLCBUILDDIR}/mobilebert_quantized_htp.dlc ${MLPERF_MODELS_PATH} touch $@ ${DLCBUILDDIR}/mobilebert_htp_sm7550.stamp: \ - ${DLCBUILDDIR}/mobilebert_quant.stamp \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ # MobileBERT quant model offline prepare ... mkdir -p ${DLCBUILDDIR} docker run \ @@ -831,15 +1112,40 @@ ${DLCBUILDDIR}/mobilebert_htp_sm7550.stamp: \ -v ${SNPE_SDK}:/snpe_sdk \ -v ${DLCBUILDDIR}:/output \ -u ${USERID}:${GROUPID} \ + -w /output \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ --num_hvx_threads=2 \ - --htp_dlbc=true \ + --htp_dlbc=true \ + --input_dlc=/output/mobilebert_quantized_htp.dlc \ + --output_dlc=/output/mobilebert_quantized_htp.dlc \ + --set_output_tensors="transpose:0" \ + --optimization_level 3 \ + --htp_socs=sm7550,sm6650,sm7635 \ + --memorymapped_buffer_hint=true + cp ${DLCBUILDDIR}/mobilebert_quantized_htp.dlc ${MLPERF_MODELS_PATH} + touch $@ + +${DLCBUILDDIR}/mobilebert_htp_sm8850.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + # MobileBERT quant model offline prepare ... + mkdir -p ${DLCBUILDDIR} + docker run \ + -e PYTHONPATH=/snpe_sdk/lib/python \ + -e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \ + -v ${SNPE_SDK}:/snpe_sdk \ + -v ${DLCBUILDDIR}:/output \ + -w /output \ + -u ${USERID}:${GROUPID} \ + mlperf_dlc_prepare \ + /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ + --num_hvx_threads=8 \ + --htp_dlbc=true \ --input_dlc=/output/mobilebert_quantized_htp.dlc \ --output_dlc=/output/mobilebert_quantized_htp.dlc \ --set_output_tensors="transpose:0" \ --optimization_level 3 \ - --htp_socs=sm7550,sm7635 \ + --htp_socs=sm8850 \ --memorymapped_buffer_hint=true cp ${DLCBUILDDIR}/mobilebert_quantized_htp.dlc ${MLPERF_MODELS_PATH} touch $@ @@ -848,6 +1154,7 @@ ${DLCBUILDDIR}/mobilebert_htp.stamp: \ ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ ${DLCBUILDDIR}/mobilebert_htp_sm8750.stamp \ ${DLCBUILDDIR}/mobilebert_htp_sm7550.stamp \ + ${DLCBUILDDIR}/mobilebert_htp_sm8850.stamp \ # MobileBERT quant model offline prepare ... mkdir -p ${DLCBUILDDIR} docker run \ @@ -856,15 +1163,16 @@ ${DLCBUILDDIR}/mobilebert_htp.stamp: \ -v ${SNPE_SDK}:/snpe_sdk \ -v ${DLCBUILDDIR}:/output \ -u ${USERID}:${GROUPID} \ + -w /output \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ --num_hvx_threads=4 \ - --htp_dlbc=true \ + --htp_dlbc=true \ --input_dlc=/output/mobilebert_quantized_htp.dlc \ --output_dlc=/output/mobilebert_quantized_htp.dlc \ --set_output_tensors="transpose:0" \ --optimization_level 3 \ - --htp_socs=sm8650,sm8735,sm8635,sm7325,sm8350,sm8450,sm8550,sm7475,sc8380xp,sc8280x \ + --htp_socs=sm8650,sm7750,sm8735,sm8635,sm7325,sm8350,sm8450,sm8550,sm7475,sc8380xp,sc8280x \ --memorymapped_buffer_hint=true cp ${DLCBUILDDIR}/mobilebert_quantized_htp.dlc ${MLPERF_MODELS_PATH} touch $@ @@ -880,15 +1188,16 @@ ${DLCBUILDDIR}/mobilebert_htp_O2_sm8750.stamp: \ -v ${SNPE_SDK}:/snpe_sdk \ -v ${DLCBUILDDIR}:/output \ -u ${USERID}:${GROUPID} \ + -w /output \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ --num_hvx_threads=6 \ - --htp_dlbc=true \ + --htp_dlbc=true \ --input_dlc=/output/mobilebert_quant.dlc \ --output_dlc=/output/mobilebert_quantized_htp_O2.dlc \ --set_output_tensors="transpose:0" \ --optimization_level 2 \ - --htp_socs=sm8750 \ + --htp_socs=sm8750,sm8845 \ --memorymapped_buffer_hint=true cp ${DLCBUILDDIR}/mobilebert_quantized_htp_O2.dlc ${MLPERF_MODELS_PATH} touch $@ @@ -903,6 +1212,7 @@ ${DLCBUILDDIR}/mobilebert_htp_O2_sm7550.stamp: \ -v ${SNPE_SDK}:/snpe_sdk \ -v ${DLCBUILDDIR}:/output \ -u ${USERID}:${GROUPID} \ + -w /output \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ --num_hvx_threads=2 \ @@ -911,15 +1221,13 @@ ${DLCBUILDDIR}/mobilebert_htp_O2_sm7550.stamp: \ --output_dlc=/output/mobilebert_quantized_htp_O2.dlc \ --set_output_tensors="transpose:0" \ --optimization_level 2 \ - --htp_socs=sm7550,sm7635 \ + --htp_socs=sm7550,sm6650,sm7635 \ --memorymapped_buffer_hint=true cp ${DLCBUILDDIR}/mobilebert_quantized_htp_O2.dlc ${MLPERF_MODELS_PATH} touch $@ -${DLCBUILDDIR}/mobilebert_htp_O2.stamp: \ +${DLCBUILDDIR}/mobilebert_htp_O2_sm8850.stamp: \ ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ - ${DLCBUILDDIR}/mobilebert_htp_O2_sm8750.stamp \ - ${DLCBUILDDIR}/mobilebert_htp_O2_sm7550.stamp \ # MobileBERT quant model offline prepare ... mkdir -p ${DLCBUILDDIR} docker run \ @@ -927,16 +1235,44 @@ ${DLCBUILDDIR}/mobilebert_htp_O2.stamp: \ -e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \ -v ${SNPE_SDK}:/snpe_sdk \ -v ${DLCBUILDDIR}:/output \ + -w /output \ -u ${USERID}:${GROUPID} \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ - --num_hvx_threads=4 \ - --htp_dlbc=true \ + --num_hvx_threads=8 \ + --htp_dlbc=true \ --input_dlc=/output/mobilebert_quantized_htp_O2.dlc \ --output_dlc=/output/mobilebert_quantized_htp_O2.dlc \ --set_output_tensors="transpose:0" \ --optimization_level 2 \ - --htp_socs=sm8650,sm8735,sm8635,sm7325,sm8350,sm8450,sm8550,sm7475,sc8380xp,sc8280x \ + --htp_socs=sm8850 \ + --memorymapped_buffer_hint=true + cp ${DLCBUILDDIR}/mobilebert_quantized_htp_O2.dlc ${MLPERF_MODELS_PATH} + touch $@ + +${DLCBUILDDIR}/mobilebert_htp_O2.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mobilebert_htp_O2_sm8750.stamp \ + ${DLCBUILDDIR}/mobilebert_htp_O2_sm7550.stamp \ + ${DLCBUILDDIR}/mobilebert_htp_O2_sm8850.stamp \ + # MobileBERT quant model offline prepare ... + mkdir -p ${DLCBUILDDIR} + docker run \ + -e PYTHONPATH=/snpe_sdk/lib/python \ + -e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \ + -v ${SNPE_SDK}:/snpe_sdk \ + -v ${DLCBUILDDIR}:/output \ + -u ${USERID}:${GROUPID} \ + -w /output \ + mlperf_dlc_prepare \ + /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ + --num_hvx_threads=4 \ + --htp_dlbc=true \ + --input_dlc=/output/mobilebert_quantized_htp_O2.dlc \ + --output_dlc=/output/mobilebert_quantized_htp_O2.dlc \ + --set_output_tensors="transpose:0" \ + --optimization_level 2 \ + --htp_socs=sm8650,sm7750,sm8735,sm8635,sm7325,sm8350,sm8450,sm8550,sm7475,sc8380xp,sc8280x \ --memorymapped_buffer_hint=true cp ${DLCBUILDDIR}/mobilebert_quantized_htp_O2.dlc ${MLPERF_MODELS_PATH} touch $@ @@ -995,6 +1331,7 @@ ${DLCBUILDDIR}/mobile_mosaic_htp_sm8750.stamp: \ -v ${SNPE_SDK}:/snpe_sdk \ -v ${DLCBUILDDIR}:/output \ -u ${USERID}:${GROUPID} \ + -w /output \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ --num_hvx_threads=6 \ @@ -1002,7 +1339,7 @@ ${DLCBUILDDIR}/mobile_mosaic_htp_sm8750.stamp: \ --input_dlc=/output/mobile_mosaic_quant.dlc \ --output_dlc=/output/mobile_mosaic_htp.dlc \ --optimization_level 3 \ - --htp_socs=sm8750 \ + --htp_socs=sm8750,sm8845 \ --memorymapped_buffer_hint=true #Mobile Mosaic HTP model offline prepare completed cp ${DLCBUILDDIR}/mobile_mosaic_htp.dlc ${MLPERF_MODELS_PATH} @@ -1017,6 +1354,7 @@ ${DLCBUILDDIR}/mobile_mosaic_htp_sm7550.stamp: \ -v ${SNPE_SDK}:/snpe_sdk \ -v ${DLCBUILDDIR}:/output \ -u ${USERID}:${GROUPID} \ + -w /output \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ --num_hvx_threads=2 \ @@ -1024,7 +1362,30 @@ ${DLCBUILDDIR}/mobile_mosaic_htp_sm7550.stamp: \ --input_dlc=/output/mobile_mosaic_htp.dlc \ --output_dlc=/output/mobile_mosaic_htp.dlc \ --optimization_level 3 \ - --htp_socs=sm7550,sm7635 \ + --htp_socs=sm7550,sm6650,sm7635 \ + --memorymapped_buffer_hint=true + #Mobile Mosaic HTP model offline prepare completed + cp ${DLCBUILDDIR}/mobile_mosaic_htp.dlc ${MLPERF_MODELS_PATH} + touch $@ + +${DLCBUILDDIR}/mobile_mosaic_htp_sm8850.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + # Offline prepare of Mobile Mosaic DLC + docker run \ + -e PYTHONPATH=/snpe_sdk/lib/python \ + -e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \ + -v ${SNPE_SDK}:/snpe_sdk \ + -v ${DLCBUILDDIR}:/output \ + -u ${USERID}:${GROUPID} \ + -w /output \ + mlperf_dlc_prepare \ + /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ + --num_hvx_threads=8 \ + --htp_dlbc=true \ + --input_dlc=/output/mobile_mosaic_htp.dlc \ + --output_dlc=/output/mobile_mosaic_htp.dlc \ + --optimization_level 3 \ + --htp_socs=sm8850 \ --memorymapped_buffer_hint=true #Mobile Mosaic HTP model offline prepare completed cp ${DLCBUILDDIR}/mobile_mosaic_htp.dlc ${MLPERF_MODELS_PATH} @@ -1034,6 +1395,7 @@ ${DLCBUILDDIR}/mobile_mosaic_htp.stamp: \ ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ ${DLCBUILDDIR}/mobile_mosaic_htp_sm8750.stamp \ ${DLCBUILDDIR}/mobile_mosaic_htp_sm7550.stamp \ + ${DLCBUILDDIR}/mobile_mosaic_htp_sm8850.stamp \ # Offline prepare of Mobile Mosaic DLC docker run \ -e PYTHONPATH=/snpe_sdk/lib/python \ @@ -1041,6 +1403,7 @@ ${DLCBUILDDIR}/mobile_mosaic_htp.stamp: \ -v ${SNPE_SDK}:/snpe_sdk \ -v ${DLCBUILDDIR}:/output \ -u ${USERID}:${GROUPID} \ + -w /output \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ --num_hvx_threads=4 \ @@ -1048,7 +1411,7 @@ ${DLCBUILDDIR}/mobile_mosaic_htp.stamp: \ --input_dlc=/output/mobile_mosaic_htp.dlc \ --output_dlc=/output/mobile_mosaic_htp.dlc \ --optimization_level 3 \ - --htp_socs=sm8650,sm8735,sm8635,sm7325,sm8350,sm8450,sm8550,sm7475,sc8380xp,sc8280x \ + --htp_socs=sm8650,sm7750,sm8735,sm8635,sm7325,sm8350,sm8450,sm8550,sm7475,sc8380xp,sc8280x \ --memorymapped_buffer_hint=true #Mobile Mosaic HTP model offline prepare completed cp ${DLCBUILDDIR}/mobile_mosaic_htp.dlc ${MLPERF_MODELS_PATH} @@ -1064,13 +1427,14 @@ ${DLCBUILDDIR}/mobile_mosaic_htp_O2_sm8750.stamp: \ -v ${SNPE_SDK}:/snpe_sdk \ -v ${DLCBUILDDIR}:/output \ -u ${USERID}:${GROUPID} \ + -w /output \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ --num_hvx_threads=6 \ --input_dlc=/output/mobile_mosaic_quant.dlc \ --output_dlc=/output/mobile_mosaic_htp_O2.dlc \ --optimization_level 2 \ - --htp_socs=sm8750 \ + --htp_socs=sm8750,sm8845 \ --memorymapped_buffer_hint=true #Mobile Mosaic HTP model offline prepare completed cp ${DLCBUILDDIR}/mobile_mosaic_htp_O2.dlc ${MLPERF_MODELS_PATH} @@ -1085,13 +1449,37 @@ ${DLCBUILDDIR}/mobile_mosaic_htp_O2_sm7550.stamp: \ -v ${SNPE_SDK}:/snpe_sdk \ -v ${DLCBUILDDIR}:/output \ -u ${USERID}:${GROUPID} \ + -w /output \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ --num_hvx_threads=2 \ --input_dlc=/output/mobile_mosaic_htp_O2.dlc \ --output_dlc=/output/mobile_mosaic_htp_O2.dlc \ --optimization_level 2 \ - --htp_socs=sm7550,sm7635 \ + --htp_socs=sm7550,sm6650,sm7635 \ + --memorymapped_buffer_hint=true + #Mobile Mosaic HTP model offline prepare completed + cp ${DLCBUILDDIR}/mobile_mosaic_htp_O2.dlc ${MLPERF_MODELS_PATH} + touch $@ + +${DLCBUILDDIR}/mobile_mosaic_htp_O2_sm8850.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + # Offline prepare of Mobile Mosaic DLC + docker run \ + -e PYTHONPATH=/snpe_sdk/lib/python \ + -e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \ + -v ${SNPE_SDK}:/snpe_sdk \ + -v ${DLCBUILDDIR}:/output \ + -u ${USERID}:${GROUPID} \ + -w /output \ + mlperf_dlc_prepare \ + /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ + --num_hvx_threads=8 \ + --htp_dlbc=true \ + --input_dlc=/output/mobile_mosaic_htp_O2.dlc \ + --output_dlc=/output/mobile_mosaic_htp_O2.dlc \ + --optimization_level 2 \ + --htp_socs=sm8850 \ --memorymapped_buffer_hint=true #Mobile Mosaic HTP model offline prepare completed cp ${DLCBUILDDIR}/mobile_mosaic_htp_O2.dlc ${MLPERF_MODELS_PATH} @@ -1101,6 +1489,7 @@ ${DLCBUILDDIR}/mobile_mosaic_htp_O2.stamp: \ ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ ${DLCBUILDDIR}/mobile_mosaic_htp_O2_sm8750.stamp \ ${DLCBUILDDIR}/mobile_mosaic_htp_O2_sm7550.stamp \ + ${DLCBUILDDIR}/mobile_mosaic_htp_O2_sm8850.stamp \ # Offline prepare of Mobile Mosaic DLC docker run \ -e PYTHONPATH=/snpe_sdk/lib/python \ @@ -1108,13 +1497,14 @@ ${DLCBUILDDIR}/mobile_mosaic_htp_O2.stamp: \ -v ${SNPE_SDK}:/snpe_sdk \ -v ${DLCBUILDDIR}:/output \ -u ${USERID}:${GROUPID} \ + -w /output \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ --num_hvx_threads=4 \ --input_dlc=/output/mobile_mosaic_htp_O2.dlc \ --output_dlc=/output/mobile_mosaic_htp_O2.dlc \ --optimization_level 2 \ - --htp_socs=sm8650,sm8735,sm8635,sm7325,sm8350,sm8450,sm8550,sm7475,sc8380xp,sc8280x \ + --htp_socs=sm8650,sm7750,sm8735,sm8635,sm7325,sm8350,sm8450,sm8550,sm7475,sc8380xp,sc8280x \ --memorymapped_buffer_hint=true #Mobile Mosaic HTP model offline prepare completed cp ${DLCBUILDDIR}/mobile_mosaic_htp_O2.dlc ${MLPERF_MODELS_PATH} @@ -1143,7 +1533,7 @@ ${DLCBUILDDIR}/snusr_float.dlc: \ touch $@ ${DLCBUILDDIR}/snusr_quant.stamp: \ - ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ ${DLCBUILDDIR}/snusr_float.dlc \ ${DLCBUILDDIR}/snusr_calibration_list.txt # Offline prepare of SNUSR DLC @@ -1165,7 +1555,7 @@ ${DLCBUILDDIR}/snusr_quant.stamp: \ touch $@ ${DLCBUILDDIR}/snusr_htp_sm8750.stamp: \ - ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ ${DLCBUILDDIR}/snusr_quant.stamp \ ${DLCBUILDDIR}/snusr_calibration_list.txt #HTP Graph prepare of SNUSR DLC @@ -1175,21 +1565,22 @@ ${DLCBUILDDIR}/snusr_htp_sm8750.stamp: \ -v ${SNPE_SDK}:/snpe_sdk \ -v ${DLCBUILDDIR}:/output \ -u ${USERID}:${GROUPID} \ + -w /output \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ --num_hvx_threads=6 \ - --htp_dlbc=true \ + --htp_dlbc=true \ --input_dlc=/output/snusr_quant.dlc \ --output_dlc=/output/snusr_htp.dlc \ --optimization_level 3 \ - --htp_socs=sm8750 \ + --htp_socs=sm8750,sm8845 \ --memorymapped_buffer_hint=true #HTP Graph prepare of SNUSR model completed cp ${DLCBUILDDIR}/snusr_htp.dlc ${MLPERF_MODELS_PATH} touch $@ ${DLCBUILDDIR}/snusr_htp_sm7550.stamp: \ - ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ #HTP Graph prepare of SNUSR DLC docker run \ -e PYTHONPATH=/snpe_sdk/lib/python \ @@ -1197,23 +1588,48 @@ ${DLCBUILDDIR}/snusr_htp_sm7550.stamp: \ -v ${SNPE_SDK}:/snpe_sdk \ -v ${DLCBUILDDIR}:/output \ -u ${USERID}:${GROUPID} \ + -w /output \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ --num_hvx_threads=2 \ - --htp_dlbc=true \ + --htp_dlbc=true \ --input_dlc=/output/snusr_htp.dlc \ --output_dlc=/output/snusr_htp.dlc \ --optimization_level 3 \ - --htp_socs=sm7550,sm7635 \ + --htp_socs=sm7550,sm6650,sm7635 \ + --memorymapped_buffer_hint=true + #HTP Graph prepare of SNUSR model completed + cp ${DLCBUILDDIR}/snusr_htp.dlc ${MLPERF_MODELS_PATH} + touch $@ + +${DLCBUILDDIR}/snusr_htp_sm8850.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + #HTP Graph prepare of SNUSR DLC + docker run \ + -e PYTHONPATH=/snpe_sdk/lib/python \ + -e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \ + -v ${SNPE_SDK}:/snpe_sdk \ + -v ${DLCBUILDDIR}:/output \ + -u ${USERID}:${GROUPID} \ + -w /output \ + mlperf_dlc_prepare \ + /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ + --num_hvx_threads=8 \ + --htp_dlbc=true \ + --input_dlc=/output/snusr_htp.dlc \ + --output_dlc=/output/snusr_htp.dlc \ + --optimization_level 3 \ + --htp_socs=sm8850 \ --memorymapped_buffer_hint=true #HTP Graph prepare of SNUSR model completed cp ${DLCBUILDDIR}/snusr_htp.dlc ${MLPERF_MODELS_PATH} touch $@ ${DLCBUILDDIR}/snusr_htp.stamp: \ - ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ - ${DLCBUILDDIR}/snusr_htp_sm8750.stamp \ - ${DLCBUILDDIR}/snusr_htp_sm7550.stamp \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/snusr_htp_sm8750.stamp \ + ${DLCBUILDDIR}/snusr_htp_sm7550.stamp \ + ${DLCBUILDDIR}/snusr_htp_sm8850.stamp \ #HTP Graph prepare of SNUSR DLC docker run \ -e PYTHONPATH=/snpe_sdk/lib/python \ @@ -1221,21 +1637,22 @@ ${DLCBUILDDIR}/snusr_htp.stamp: \ -v ${SNPE_SDK}:/snpe_sdk \ -v ${DLCBUILDDIR}:/output \ -u ${USERID}:${GROUPID} \ + -w /output \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ --num_hvx_threads=4 \ - --htp_dlbc=true \ + --htp_dlbc=true \ --input_dlc=/output/snusr_htp.dlc \ --output_dlc=/output/snusr_htp.dlc \ --optimization_level 3 \ - --htp_socs=sm8650,sm8735,sm8635,sm7325,sm8350,sm8450,sm8550,sm7475,sc8380xp,sc8280x \ + --htp_socs=sm8650,sm7750,sm8735,sm8635,sm7325,sm8350,sm8450,sm8550,sm7475,sc8380xp,sc8280x \ --memorymapped_buffer_hint=true #HTP Graph prepare of SNUSR model completed cp ${DLCBUILDDIR}/snusr_htp.dlc ${MLPERF_MODELS_PATH} touch $@ ${DLCBUILDDIR}/snusr_htp_O2_sm8750.stamp: \ - ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ ${DLCBUILDDIR}/snusr_quant.stamp \ ${DLCBUILDDIR}/snusr_calibration_list.txt #HTP Graph prepare of SNUSR DLC @@ -1245,13 +1662,14 @@ ${DLCBUILDDIR}/snusr_htp_O2_sm8750.stamp: \ -v ${SNPE_SDK}:/snpe_sdk \ -v ${DLCBUILDDIR}:/output \ -u ${USERID}:${GROUPID} \ + -w /output \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ --num_hvx_threads=6 \ --input_dlc=/output/snusr_quant.dlc \ --output_dlc=/output/snusr_htp_O2.dlc \ --optimization_level 2 \ - --htp_socs=sm8750 \ + --htp_socs=sm8750,sm8845 \ --memorymapped_buffer_hint=true #HTP Graph prepare of SNUSR model completed cp ${DLCBUILDDIR}/snusr_htp_O2.dlc ${MLPERF_MODELS_PATH} @@ -1266,22 +1684,47 @@ ${DLCBUILDDIR}/snusr_htp_O2_sm7550.stamp: \ -v ${SNPE_SDK}:/snpe_sdk \ -v ${DLCBUILDDIR}:/output \ -u ${USERID}:${GROUPID} \ + -w /output \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ --num_hvx_threads=2 \ - --input_dlc=/output/snusr_quant.dlc \ + --input_dlc=/output/snusr_htp_O2.dlc \ + --output_dlc=/output/snusr_htp_O2.dlc \ + --optimization_level 2 \ + --htp_socs=sm7550,sm6650,sm7635 \ + --memorymapped_buffer_hint=true + #HTP Graph prepare of SNUSR model completed + cp ${DLCBUILDDIR}/snusr_htp_O2.dlc ${MLPERF_MODELS_PATH} + touch $@ + +${DLCBUILDDIR}/snusr_htp_O2_sm8850.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + #HTP Graph prepare of SNUSR DLC + docker run \ + -e PYTHONPATH=/snpe_sdk/lib/python \ + -e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \ + -v ${SNPE_SDK}:/snpe_sdk \ + -v ${DLCBUILDDIR}:/output \ + -u ${USERID}:${GROUPID} \ + -w /output \ + mlperf_dlc_prepare \ + /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ + --num_hvx_threads=8 \ + --htp_dlbc=true \ + --input_dlc=/output/snusr_htp_O2.dlc \ --output_dlc=/output/snusr_htp_O2.dlc \ --optimization_level 2 \ - --htp_socs=sm7550,sm7635 \ + --htp_socs=sm8850 \ --memorymapped_buffer_hint=true #HTP Graph prepare of SNUSR model completed cp ${DLCBUILDDIR}/snusr_htp_O2.dlc ${MLPERF_MODELS_PATH} touch $@ ${DLCBUILDDIR}/snusr_htp_O2.stamp: \ - ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ - ${DLCBUILDDIR}/snusr_htp_O2_sm8750.stamp \ - ${DLCBUILDDIR}/snusr_htp_O2_sm7550.stamp \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/snusr_htp_O2_sm8750.stamp \ + ${DLCBUILDDIR}/snusr_htp_O2_sm7550.stamp \ + ${DLCBUILDDIR}/snusr_htp_O2_sm8850.stamp \ #HTP Graph prepare of SNUSR DLC docker run \ -e PYTHONPATH=/snpe_sdk/lib/python \ @@ -1289,13 +1732,14 @@ ${DLCBUILDDIR}/snusr_htp_O2.stamp: \ -v ${SNPE_SDK}:/snpe_sdk \ -v ${DLCBUILDDIR}:/output \ -u ${USERID}:${GROUPID} \ + -w /output \ mlperf_dlc_prepare \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \ --num_hvx_threads=4 \ --input_dlc=/output/snusr_htp_O2.dlc \ --output_dlc=/output/snusr_htp_O2.dlc \ --optimization_level 2 \ - --htp_socs=sm8650,sm8735,sm8635,sm7325,sm8350,sm8450,sm8550,sm7475,sc8380xp,sc8280x \ + --htp_socs=sm8650,sm7750,sm8735,sm8635,sm7325,sm8350,sm8450,sm8550,sm7475,sc8380xp,sc8280x \ --memorymapped_buffer_hint=true #HTP Graph prepare of SNUSR model completed cp ${DLCBUILDDIR}/snusr_htp_O2.dlc ${MLPERF_MODELS_PATH} @@ -1307,25 +1751,62 @@ ${DLCBUILDDIR}/sd_precompute_data.tar: \ # Preparing sd_precompute_data.tar docker run \ -v ${SNPE_SDK}:/qnn_sdk \ - -v ${TOPDIR}/mobile_back_qti/DLC/util/:/util \ + -v ${TOPDIR}/mobile_back_qti/DLC/util/StableDiffusion:/util \ -e PYTHONPATH=/qnn_sdk/lib/python \ -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ -v ${DLCBUILDDIR}:/output \ -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \ -w /stable_diffusion \ mlperf_dlc_prepare \ - python3 /util/StableDiffusion/flatten.py \ + python3 /util/flatten.py \ --random_latent_init /stable_diffusion/random_latent_init/random_init_1.pkl \ --time_step_embedding /stable_diffusion/time_step_embeddings/unet_time_step_embeddings_20.pkl \ --time_step_list /stable_diffusion/scheduler/scheduler_time_steps_20.pkl \ --unconditional_text_emb /stable_diffusion/unconditional_text_emb.pkl \ --dumpdir /stable_diffusion/. mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion + mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion_v75 + mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion_v79 + mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd8sG4 + mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd7G4 + mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8eliteG5 + mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8G5 chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v75 + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v79 + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd8sG4 + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd7G4 + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8eliteG5 + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8G5 cp ${DLCBUILDDIR}/stable_diffusion/sd_precompute_data.tar ${MLPERF_MODELS_PATH}/stable_diffusion/. cp ${DLCBUILDDIR}/stable_diffusion/scheduler/lambdas.bin ${MLPERF_MODELS_PATH}/stable_diffusion/. cp ${DLCBUILDDIR}/stable_diffusion/scheduler/betas.bin ${MLPERF_MODELS_PATH}/stable_diffusion/. + cp ${DLCBUILDDIR}/stable_diffusion/sd_precompute_data.tar ${MLPERF_MODELS_PATH}/stable_diffusion_v75/. + cp ${DLCBUILDDIR}/stable_diffusion/scheduler/lambdas.bin ${MLPERF_MODELS_PATH}/stable_diffusion_v75/. + cp ${DLCBUILDDIR}/stable_diffusion/scheduler/betas.bin ${MLPERF_MODELS_PATH}/stable_diffusion_v75/. + cp ${DLCBUILDDIR}/stable_diffusion/sd_precompute_data.tar ${MLPERF_MODELS_PATH}/stable_diffusion_v79/. + cp ${DLCBUILDDIR}/stable_diffusion/scheduler/lambdas.bin ${MLPERF_MODELS_PATH}/stable_diffusion_v79/. + cp ${DLCBUILDDIR}/stable_diffusion/scheduler/betas.bin ${MLPERF_MODELS_PATH}/stable_diffusion_v79/. + cp ${DLCBUILDDIR}/stable_diffusion/sd_precompute_data.tar ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd8sG4/. + cp ${DLCBUILDDIR}/stable_diffusion/scheduler/lambdas.bin ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd8sG4/. + cp ${DLCBUILDDIR}/stable_diffusion/scheduler/betas.bin ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd8sG4/. + cp ${DLCBUILDDIR}/stable_diffusion/sd_precompute_data.tar ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd7G4/. + cp ${DLCBUILDDIR}/stable_diffusion/scheduler/lambdas.bin ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd7G4/. + cp ${DLCBUILDDIR}/stable_diffusion/scheduler/betas.bin ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd7G4/. + cp ${DLCBUILDDIR}/stable_diffusion/sd_precompute_data.tar ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8eliteG5/. + cp ${DLCBUILDDIR}/stable_diffusion/scheduler/lambdas.bin ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8eliteG5/. + cp ${DLCBUILDDIR}/stable_diffusion/scheduler/betas.bin ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8eliteG5/. + cp ${DLCBUILDDIR}/stable_diffusion/sd_precompute_data.tar ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8G5/. + cp ${DLCBUILDDIR}/stable_diffusion/scheduler/lambdas.bin ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8G5/. + cp ${DLCBUILDDIR}/stable_diffusion/scheduler/betas.bin ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8G5/. chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v75 + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v79 + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd8sG4 + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd7G4 + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8eliteG5 + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8G5 + touch $@ ${DLCBUILDDIR}/text_encoder_qnn.cpp: \ ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ @@ -1350,6 +1831,7 @@ ${DLCBUILDDIR}/text_encoder_qnn.cpp: \ --bias_bitwidth 32 \ --quantization_overrides text_encoder_onnx/text_encoder.encodings \ --output_path /output/text_encoder.cpp + touch $@ ${DLCBUILDDIR}/text_encoder_qnn_model_generator.stamp: \ ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ @@ -1357,17 +1839,20 @@ ${DLCBUILDDIR}/text_encoder_qnn_model_generator.stamp: \ ${DLCBUILDDIR}/text_encoder_qnn.cpp # TEXT-ENCODER lib generation started docker run \ - -v ${SNPE_SDK}:/qnn_sdk \ - -e PYTHONPATH=/qnn_sdk/lib/python \ - -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ - -v ${DLCBUILDDIR}:/output \ - -u ${USERID}:${GROUPID} \ - mlperf_dlc_prepare \ + -v ${SNPE_SDK}:/qnn_sdk \ + -e PYTHONPATH=/qnn_sdk/lib/python \ + -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ + -v ${DLCBUILDDIR}:/output \ + -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \ + -w /stable_diffusion/text_encoder \ + -u ${USERID}:${GROUPID} \ + mlperf_dlc_prepare \ /qnn_sdk/bin/x86_64-linux-clang/qnn-model-lib-generator \ -c /output/text_encoder.cpp \ -b /output/text_encoder.bin \ -o /output/model_libs \ -t x86_64-linux-clang + touch $@ # Text-encoder lib generation completed ${DLCBUILDDIR}/text_encoder_qnn_context_binary_generator.stamp: \ @@ -1388,30 +1873,189 @@ ${DLCBUILDDIR}/text_encoder_qnn_context_binary_generator.stamp: \ /qnn_sdk/bin/x86_64-linux-clang/qnn-context-binary-generator \ --backend /qnn_sdk/lib/x86_64-linux-clang/libQnnHtp.so \ --model /output/model_libs/x86_64-linux-clang/libtext_encoder.so \ - --binary_file /output/text_encoder.serialized \ - --config_file mcp_config.json + --binary_file /output/text_encoder.serialized mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion cp ${DLCBUILDDIR}/text_encoder.serialized.bin ${MLPERF_MODELS_PATH}/stable_diffusion/. chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion - chmod -R 777 ${DLCBUILDDIR}/stable_diffusion + touch $@ # TEXT-ENCODER context binary generation completed +${DLCBUILDDIR}/text_encoder_qnn_context_binary_generator_v75.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mobile/.stamp \ + ${DLCBUILDDIR}/text_encoder_qnn_model_generator.stamp + # TEXT-ENCODER context-binary generation started + docker run \ + -v ${SNPE_SDK}:/qnn_sdk \ + -v ${TEXTENCODER_MODEL_PATH}:/models \ + -e PYTHONPATH=/qnn_sdk/lib/python \ + -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ + -v ${DLCBUILDDIR}:/output \ + -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \ + -w /stable_diffusion/text_encoder \ + -u ${USERID}:${GROUPID} \ + mlperf_dlc_prepare \ + /qnn_sdk/bin/x86_64-linux-clang/qnn-context-binary-generator \ + --backend /qnn_sdk/lib/x86_64-linux-clang/libQnnHtp.so \ + --model /output/model_libs/x86_64-linux-clang/libtext_encoder.so \ + --binary_file /output/text_encoder.serialized \ + --config_file /models/mcp_config_v75.json + mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion_v75 + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v75 + cp ${DLCBUILDDIR}/text_encoder.serialized.bin ${MLPERF_MODELS_PATH}/stable_diffusion_v75/. + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v75 + touch $@ + # TEXT-ENCODER context binary generation for v75 completed + +${DLCBUILDDIR}/text_encoder_qnn_context_binary_generator_v79.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mobile/.stamp \ + ${DLCBUILDDIR}/text_encoder_qnn_model_generator.stamp + # TEXT-ENCODER context-binary generation started + docker run \ + -v ${SNPE_SDK}:/qnn_sdk \ + -v ${TEXTENCODER_MODEL_PATH}:/models \ + -e PYTHONPATH=/qnn_sdk/lib/python \ + -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ + -v ${DLCBUILDDIR}:/output \ + -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \ + -w /stable_diffusion/text_encoder \ + -u ${USERID}:${GROUPID} \ + mlperf_dlc_prepare \ + /qnn_sdk/bin/x86_64-linux-clang/qnn-context-binary-generator \ + --backend /qnn_sdk/lib/x86_64-linux-clang/libQnnHtp.so \ + --model /output/model_libs/x86_64-linux-clang/libtext_encoder.so \ + --binary_file /output/text_encoder.serialized \ + --config_file /models/mcp_config_v79.json + mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion_v79 + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v79 + cp ${DLCBUILDDIR}/text_encoder.serialized.bin ${MLPERF_MODELS_PATH}/stable_diffusion_v79/. + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v79 + touch $@ + # TEXT-ENCODER context binary generation for v79 completed + +${DLCBUILDDIR}/text_encoder_qnn_context_binary_generator_v73_sd8sG4.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mobile/.stamp \ + ${DLCBUILDDIR}/text_encoder_qnn_model_generator.stamp + # TEXT-ENCODER context-binary generation started + docker run \ + -v ${SNPE_SDK}:/qnn_sdk \ + -v ${TEXTENCODER_MODEL_PATH}:/models \ + -e PYTHONPATH=/qnn_sdk/lib/python \ + -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ + -v ${DLCBUILDDIR}:/output \ + -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \ + -w /stable_diffusion/text_encoder \ + -u ${USERID}:${GROUPID} \ + mlperf_dlc_prepare \ + /qnn_sdk/bin/x86_64-linux-clang/qnn-context-binary-generator \ + --backend /qnn_sdk/lib/x86_64-linux-clang/libQnnHtp.so \ + --model /output/model_libs/x86_64-linux-clang/libtext_encoder.so \ + --binary_file /output/text_encoder.serialized \ + --config_file /models/mcp_config_v73_sd8sG4.json + mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd8sG4 + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd8sG4 + cp ${DLCBUILDDIR}/text_encoder.serialized.bin ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd8sG4/. + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd8sG4 + # TEXT-ENCODER context binary generation for v73 sd8sG4 completed + +${DLCBUILDDIR}/text_encoder_qnn_context_binary_generator_v73_sd7G4.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mobile/.stamp \ + ${DLCBUILDDIR}/text_encoder_qnn_model_generator.stamp + # TEXT-ENCODER context-binary generation started + docker run \ + -v ${SNPE_SDK}:/qnn_sdk \ + -v ${TEXTENCODER_MODEL_PATH}:/models \ + -e PYTHONPATH=/qnn_sdk/lib/python \ + -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ + -v ${DLCBUILDDIR}:/output \ + -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \ + -w /stable_diffusion/text_encoder \ + -u ${USERID}:${GROUPID} \ + mlperf_dlc_prepare \ + /qnn_sdk/bin/x86_64-linux-clang/qnn-context-binary-generator \ + --backend /qnn_sdk/lib/x86_64-linux-clang/libQnnHtp.so \ + --model /output/model_libs/x86_64-linux-clang/libtext_encoder.so \ + --binary_file /output/text_encoder.serialized \ + --config_file /models/mcp_config_v73_sd7G4.json + mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd7G4 + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd7G4 + cp ${DLCBUILDDIR}/text_encoder.serialized.bin ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd7G4/. + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd7G4 + # TEXT-ENCODER context binary generation for v73 sd7G4 completed + +${DLCBUILDDIR}/text_encoder_qnn_context_binary_generator_v81_sd8eliteG5.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mobile/.stamp \ + ${DLCBUILDDIR}/text_encoder_qnn_model_generator.stamp + # TEXT-ENCODER context-binary generation started + docker run \ + -v ${SNPE_SDK}:/qnn_sdk \ + -v ${TEXTENCODER_MODEL_PATH}:/models \ + -e PYTHONPATH=/qnn_sdk/lib/python \ + -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ + -v ${DLCBUILDDIR}:/output \ + -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \ + -w /stable_diffusion/text_encoder \ + -u ${USERID}:${GROUPID} \ + mlperf_dlc_prepare \ + /qnn_sdk/bin/x86_64-linux-clang/qnn-context-binary-generator \ + --backend /qnn_sdk/lib/x86_64-linux-clang/libQnnHtp.so \ + --model /output/model_libs/x86_64-linux-clang/libtext_encoder.so \ + --binary_file /output/text_encoder.serialized \ + --config_file /models/mcp_config_v81_sd8eliteG5.json + mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8eliteG5 + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8eliteG5 + cp ${DLCBUILDDIR}/text_encoder.serialized.bin ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8eliteG5/. + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8eliteG5 + touch $@ + # TEXT-ENCODER context binary generation for v81 sd8eliteG5 completed + +${DLCBUILDDIR}/text_encoder_qnn_context_binary_generator_v81_sd8G5.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mobile/.stamp \ + ${DLCBUILDDIR}/text_encoder_qnn_model_generator.stamp + # TEXT-ENCODER context-binary generation started + docker run \ + -v ${SNPE_SDK}:/qnn_sdk \ + -v ${TEXTENCODER_MODEL_PATH}:/models \ + -e PYTHONPATH=/qnn_sdk/lib/python \ + -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ + -v ${DLCBUILDDIR}:/output \ + -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \ + -w /stable_diffusion/text_encoder \ + -u ${USERID}:${GROUPID} \ + mlperf_dlc_prepare \ + /qnn_sdk/bin/x86_64-linux-clang/qnn-context-binary-generator \ + --backend /qnn_sdk/lib/x86_64-linux-clang/libQnnHtp.so \ + --model /output/model_libs/x86_64-linux-clang/libtext_encoder.so \ + --binary_file /output/text_encoder.serialized \ + --config_file /models/mcp_config_v81_sd8G5.json + mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8G5 + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8G5 + cp ${DLCBUILDDIR}/text_encoder.serialized.bin ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8G5/. + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8G5 + touch $@ + # TEXT-ENCODER context binary generation for v81 sd8G5 completed + ${DLCBUILDDIR}/vae_decoder_qnn.cpp: \ ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ ${DLCBUILDDIR}/mobile/.stamp \ ${DLCBUILDDIR}/mlperf_models.stamp # VAE-DECODER conversion and quantization - # cpp & bin files + # cpp & bin files docker run \ -v ${SNPE_SDK}:/qnn_sdk \ -v ${VAEDECODER_MODEL_PATH}:/models \ - -e PYTHONPATH=/qnn_sdk/lib/python \ + -e PYTHONPATH=/qnn_sdk/lib/python \ -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ -v ${DLCBUILDDIR}:/output \ - -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \ - -w /stable_diffusion/vae_decoder \ - -v ${TOPDIR}:/dir \ + -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \ + -w /stable_diffusion/vae_decoder \ + -v ${TOPDIR}:/dir \ mlperf_dlc_prepare \ /qnn_sdk/bin/x86_64-linux-clang/qnn-onnx-converter \ --input_network vae_decoder_onnx/vae_decoder.onnx \ @@ -1420,6 +2064,7 @@ ${DLCBUILDDIR}/vae_decoder_qnn.cpp: \ --bias_bitwidth 32 \ --quantization_overrides vae_decoder_onnx/vae_decoder.encodings \ --output_path /output/vae_decoder.cpp + touch $@ ${DLCBUILDDIR}/vae_decoder_qnn_model_generator.stamp: \ ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ @@ -1431,6 +2076,8 @@ ${DLCBUILDDIR}/vae_decoder_qnn_model_generator.stamp: \ -e PYTHONPATH=/qnn_sdk/lib/python \ -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ -v ${DLCBUILDDIR}:/output \ + -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \ + -w /stable_diffusion/vae_decoder \ -u ${USERID}:${GROUPID} \ mlperf_dlc_prepare \ /qnn_sdk/bin/x86_64-linux-clang/qnn-model-lib-generator \ @@ -1438,6 +2085,7 @@ ${DLCBUILDDIR}/vae_decoder_qnn_model_generator.stamp: \ -b /output/vae_decoder.bin \ -o /output/model_libs \ -t x86_64-linux-clang + touch $@ # vae-decoder lib generation completed ${DLCBUILDDIR}/vae_decoder_qnn_context_binary_generator.stamp: \ @@ -1451,20 +2099,181 @@ ${DLCBUILDDIR}/vae_decoder_qnn_context_binary_generator.stamp: \ -e PYTHONPATH=/qnn_sdk/lib/python \ -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ -v ${DLCBUILDDIR}:/output \ + -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \ + -w /stable_diffusion/vae_decoder \ -u ${USERID}:${GROUPID} \ mlperf_dlc_prepare \ /qnn_sdk/bin/x86_64-linux-clang/qnn-context-binary-generator \ --backend /qnn_sdk/lib/x86_64-linux-clang/libQnnHtp.so \ --model /output/model_libs/x86_64-linux-clang/libvae_decoder.so \ - --binary_file /output/vae_decoder.serialized \ - --config_file /models/mcp_config.json + --binary_file /output/vae_decoder.serialized mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion cp ${DLCBUILDDIR}/vae_decoder.serialized.bin ${MLPERF_MODELS_PATH}/stable_diffusion/. chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion - chmod -R 777 ${DLCBUILDDIR}/stable_diffusion + touch $@ # VAE context binary generation completed +${DLCBUILDDIR}/vae_decoder_qnn_context_binary_generator_v75.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mobile/.stamp \ + ${DLCBUILDDIR}/vae_decoder_qnn_model_generator.stamp + # VAE-DECODER lib generation started + docker run \ + -v ${SNPE_SDK}:/qnn_sdk \ + -v ${VAEDECODER_MODEL_PATH}:/models \ + -e PYTHONPATH=/qnn_sdk/lib/python \ + -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ + -v ${DLCBUILDDIR}:/output \ + -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \ + -w /stable_diffusion/vae_decoder \ + -u ${USERID}:${GROUPID} \ + mlperf_dlc_prepare \ + /qnn_sdk/bin/x86_64-linux-clang/qnn-context-binary-generator \ + --backend /qnn_sdk/lib/x86_64-linux-clang/libQnnHtp.so \ + --model /output/model_libs/x86_64-linux-clang/libvae_decoder.so \ + --binary_file /output/vae_decoder.serialized \ + --config_file /models/mcp_config_v75.json + mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion_v75 + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v75 + cp ${DLCBUILDDIR}/vae_decoder.serialized.bin ${MLPERF_MODELS_PATH}/stable_diffusion_v75/. + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v75 + touch $@ + # VAE context binary generation completed for v75 + +${DLCBUILDDIR}/vae_decoder_qnn_context_binary_generator_v79.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mobile/.stamp \ + ${DLCBUILDDIR}/vae_decoder_qnn_model_generator.stamp + # VAE-DECODER lib generation started + docker run \ + -v ${SNPE_SDK}:/qnn_sdk \ + -v ${VAEDECODER_MODEL_PATH}:/models \ + -e PYTHONPATH=/qnn_sdk/lib/python \ + -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ + -v ${DLCBUILDDIR}:/output \ + -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \ + -w /stable_diffusion/vae_decoder \ + -u ${USERID}:${GROUPID} \ + mlperf_dlc_prepare \ + /qnn_sdk/bin/x86_64-linux-clang/qnn-context-binary-generator \ + --backend /qnn_sdk/lib/x86_64-linux-clang/libQnnHtp.so \ + --model /output/model_libs/x86_64-linux-clang/libvae_decoder.so \ + --binary_file /output/vae_decoder.serialized \ + --config_file /models/mcp_config_v79.json + mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion_v79 + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v79 + cp ${DLCBUILDDIR}/vae_decoder.serialized.bin ${MLPERF_MODELS_PATH}/stable_diffusion_v79/. + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v79 + touch $@ + # VAE context binary generation completed for v79 + +${DLCBUILDDIR}/vae_decoder_qnn_context_binary_generator_v73_sd8sG4.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mobile/.stamp \ + ${DLCBUILDDIR}/vae_decoder_qnn_model_generator.stamp + # VAE-DECODER lib generation started + docker run \ + -v ${SNPE_SDK}:/qnn_sdk \ + -v ${VAEDECODER_MODEL_PATH}:/models \ + -e PYTHONPATH=/qnn_sdk/lib/python \ + -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ + -v ${DLCBUILDDIR}:/output \ + -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \ + -w /stable_diffusion/vae_decoder \ + -u ${USERID}:${GROUPID} \ + mlperf_dlc_prepare \ + /qnn_sdk/bin/x86_64-linux-clang/qnn-context-binary-generator \ + --backend /qnn_sdk/lib/x86_64-linux-clang/libQnnHtp.so \ + --model /output/model_libs/x86_64-linux-clang/libvae_decoder.so \ + --binary_file /output/vae_decoder.serialized \ + --config_file /models/mcp_config_v73_sd8sG4.json + mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd8sG4 + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd8sG4 + cp ${DLCBUILDDIR}/vae_decoder.serialized.bin ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd8sG4/. + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd8sG4 + # VAE context binary generation completed for v73 sd8sG4 + +${DLCBUILDDIR}/vae_decoder_qnn_context_binary_generator_v73_sd7G4.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mobile/.stamp \ + ${DLCBUILDDIR}/vae_decoder_qnn_model_generator.stamp + # VAE-DECODER lib generation started + docker run \ + -v ${SNPE_SDK}:/qnn_sdk \ + -v ${VAEDECODER_MODEL_PATH}:/models \ + -e PYTHONPATH=/qnn_sdk/lib/python \ + -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ + -v ${DLCBUILDDIR}:/output \ + -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \ + -w /stable_diffusion/vae_decoder \ + -u ${USERID}:${GROUPID} \ + mlperf_dlc_prepare \ + /qnn_sdk/bin/x86_64-linux-clang/qnn-context-binary-generator \ + --backend /qnn_sdk/lib/x86_64-linux-clang/libQnnHtp.so \ + --model /output/model_libs/x86_64-linux-clang/libvae_decoder.so \ + --binary_file /output/vae_decoder.serialized \ + --config_file /models/mcp_config_v73_sd7G4.json + mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd7G4 + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd7G4 + cp ${DLCBUILDDIR}/vae_decoder.serialized.bin ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd7G4/. + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd7G4 + # VAE context binary generation completed for v73 sd7G4 + +${DLCBUILDDIR}/vae_decoder_qnn_context_binary_generator_v81_sd8eliteG5.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mobile/.stamp \ + ${DLCBUILDDIR}/vae_decoder_qnn_model_generator.stamp + # VAE-DECODER lib generation started + docker run \ + -v ${SNPE_SDK}:/qnn_sdk \ + -v ${VAEDECODER_MODEL_PATH}:/models \ + -e PYTHONPATH=/qnn_sdk/lib/python \ + -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ + -v ${DLCBUILDDIR}:/output \ + -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \ + -w /stable_diffusion/vae_decoder \ + -u ${USERID}:${GROUPID} \ + mlperf_dlc_prepare \ + /qnn_sdk/bin/x86_64-linux-clang/qnn-context-binary-generator \ + --backend /qnn_sdk/lib/x86_64-linux-clang/libQnnHtp.so \ + --model /output/model_libs/x86_64-linux-clang/libvae_decoder.so \ + --binary_file /output/vae_decoder.serialized \ + --config_file /models/mcp_config_v81_sd8eliteG5.json + mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8eliteG5 + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8eliteG5 + cp ${DLCBUILDDIR}/vae_decoder.serialized.bin ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8eliteG5/. + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8eliteG5 + touch $@ + # VAE context binary generation completed for v81 sd8eliteG5 + +${DLCBUILDDIR}/vae_decoder_qnn_context_binary_generator_v81_sd8G5.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mobile/.stamp \ + ${DLCBUILDDIR}/vae_decoder_qnn_model_generator.stamp + # VAE-DECODER lib generation started + docker run \ + -v ${SNPE_SDK}:/qnn_sdk \ + -v ${VAEDECODER_MODEL_PATH}:/models \ + -e PYTHONPATH=/qnn_sdk/lib/python \ + -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ + -v ${DLCBUILDDIR}:/output \ + -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \ + -w /stable_diffusion/vae_decoder \ + -u ${USERID}:${GROUPID} \ + mlperf_dlc_prepare \ + /qnn_sdk/bin/x86_64-linux-clang/qnn-context-binary-generator \ + --backend /qnn_sdk/lib/x86_64-linux-clang/libQnnHtp.so \ + --model /output/model_libs/x86_64-linux-clang/libvae_decoder.so \ + --binary_file /output/vae_decoder.serialized \ + --config_file /models/mcp_config_v81_sd8G5.json + mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8G5 + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8G5 + cp ${DLCBUILDDIR}/vae_decoder.serialized.bin ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8G5/. + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8G5 + touch $@ + # VAE context binary generation completed for v81 sd8G5 + ${DLCBUILDDIR}/unet_qnn.cpp: \ ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ ${DLCBUILDDIR}/mobile/.stamp \ @@ -1489,6 +2298,7 @@ ${DLCBUILDDIR}/unet_qnn.cpp: \ --bias_bitwidth 32 \ --quantization_overrides unet_onnx_batch_1/unet.encodings \ --output_path /output/unet.cpp + touch $@ ${DLCBUILDDIR}/unet_qnn_model_generator.stamp: \ ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ @@ -1500,6 +2310,8 @@ ${DLCBUILDDIR}/unet_qnn_model_generator.stamp: \ -e PYTHONPATH=/qnn_sdk/lib/python \ -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ -v ${DLCBUILDDIR}:/output \ + -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \ + -w /stable_diffusion/unet \ -u ${USERID}:${GROUPID} \ mlperf_dlc_prepare \ /qnn_sdk/bin/x86_64-linux-clang/qnn-model-lib-generator \ @@ -1508,6 +2320,7 @@ ${DLCBUILDDIR}/unet_qnn_model_generator.stamp: \ -o /output/model_libs \ -t x86_64-linux-clang \ -t aarch64-android + touch $@ # UNET lib generation completed ${DLCBUILDDIR}/unet_qnn_context_binary_generator.stamp: \ @@ -1521,20 +2334,182 @@ ${DLCBUILDDIR}/unet_qnn_context_binary_generator.stamp: \ -e PYTHONPATH=/qnn_sdk/lib/python \ -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ -v ${DLCBUILDDIR}:/output \ + -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \ + -w /stable_diffusion/unet \ -u ${USERID}:${GROUPID} \ mlperf_dlc_prepare \ /qnn_sdk/bin/x86_64-linux-clang/qnn-context-binary-generator \ --backend /qnn_sdk/lib/x86_64-linux-clang/libQnnHtp.so \ --model /output/model_libs/x86_64-linux-clang/libunet.so \ - --binary_file /output/unet.serialized \ - --config_file /models/mcp_config.json + --binary_file /output/unet.serialized mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion cp ${DLCBUILDDIR}/unet.serialized.bin ${MLPERF_MODELS_PATH}/stable_diffusion/. chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion - chmod -R 777 ${DLCBUILDDIR}/stable_diffusion + touch $@ # UNET context binary generation completed +${DLCBUILDDIR}/unet_qnn_context_binary_generator_v75.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mobile/.stamp \ + ${DLCBUILDDIR}/unet_qnn_model_generator.stamp + # UNET context binary generation started + docker run \ + -v ${SNPE_SDK}:/qnn_sdk \ + -v ${UNET_MODEL_PATH}:/models \ + -e PYTHONPATH=/qnn_sdk/lib/python \ + -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ + -v ${DLCBUILDDIR}:/output \ + -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \ + -w /stable_diffusion/unet \ + -u ${USERID}:${GROUPID} \ + mlperf_dlc_prepare \ + /qnn_sdk/bin/x86_64-linux-clang/qnn-context-binary-generator \ + --backend /qnn_sdk/lib/x86_64-linux-clang/libQnnHtp.so \ + --model /output/model_libs/x86_64-linux-clang/libunet.so \ + --binary_file /output/unet.serialized \ + --config_file /models/mcp_config_v75.json + mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion_v75 + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v75 + cp ${DLCBUILDDIR}/unet.serialized.bin ${MLPERF_MODELS_PATH}/stable_diffusion_v75/. + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v75 + touch $@ + # UNET context binary generation completed for v75 + +${DLCBUILDDIR}/unet_qnn_context_binary_generator_v79.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mobile/.stamp \ + ${DLCBUILDDIR}/unet_qnn_model_generator.stamp + # UNET context binary generation started + docker run \ + -v ${SNPE_SDK}:/qnn_sdk \ + -v ${UNET_MODEL_PATH}:/models \ + -e PYTHONPATH=/qnn_sdk/lib/python \ + -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ + -v ${DLCBUILDDIR}:/output \ + -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \ + -w /stable_diffusion/unet \ + -u ${USERID}:${GROUPID} \ + mlperf_dlc_prepare \ + /qnn_sdk/bin/x86_64-linux-clang/qnn-context-binary-generator \ + --backend /qnn_sdk/lib/x86_64-linux-clang/libQnnHtp.so \ + --model /output/model_libs/x86_64-linux-clang/libunet.so \ + --binary_file /output/unet.serialized \ + --config_file /models/mcp_config_v79.json + mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion_v79 + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v79 + cp ${DLCBUILDDIR}/unet.serialized.bin ${MLPERF_MODELS_PATH}/stable_diffusion_v79/. + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v79 + touch $@ + # UNET context binary generation completed for v79 + +${DLCBUILDDIR}/unet_qnn_context_binary_generator_v73_sd8sG4.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mobile/.stamp \ + ${DLCBUILDDIR}/unet_qnn_model_generator.stamp + # UNET context binary generation started + docker run \ + -v ${SNPE_SDK}:/qnn_sdk \ + -v ${UNET_MODEL_PATH}:/models \ + -e PYTHONPATH=/qnn_sdk/lib/python \ + -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ + -v ${DLCBUILDDIR}:/output \ + -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \ + -w /stable_diffusion/unet \ + -u ${USERID}:${GROUPID} \ + mlperf_dlc_prepare \ + /qnn_sdk/bin/x86_64-linux-clang/qnn-context-binary-generator \ + --backend /qnn_sdk/lib/x86_64-linux-clang/libQnnHtp.so \ + --model /output/model_libs/x86_64-linux-clang/libunet.so \ + --binary_file /output/unet.serialized \ + --config_file /models/mcp_config_v73_sd8sG4.json + mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd8sG4 + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd8sG4 + cp ${DLCBUILDDIR}/unet.serialized.bin ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd8sG4/. + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd8sG4 + # UNET context binary generation completed for v73 sd8sG4 + +${DLCBUILDDIR}/unet_qnn_context_binary_generator_v73_sd7G4.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mobile/.stamp \ + ${DLCBUILDDIR}/unet_qnn_model_generator.stamp + # UNET context binary generation started + docker run \ + -v ${SNPE_SDK}:/qnn_sdk \ + -v ${UNET_MODEL_PATH}:/models \ + -e PYTHONPATH=/qnn_sdk/lib/python \ + -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ + -v ${DLCBUILDDIR}:/output \ + -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \ + -w /stable_diffusion/unet \ + -u ${USERID}:${GROUPID} \ + mlperf_dlc_prepare \ + /qnn_sdk/bin/x86_64-linux-clang/qnn-context-binary-generator \ + --backend /qnn_sdk/lib/x86_64-linux-clang/libQnnHtp.so \ + --model /output/model_libs/x86_64-linux-clang/libunet.so \ + --binary_file /output/unet.serialized \ + --config_file /models/mcp_config_v73_sd7G4.json + mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd7G4 + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd7G4 + cp ${DLCBUILDDIR}/unet.serialized.bin ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd7G4/. + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v73_sd7G4 + # UNET context binary generation completed for v73 sd7G4 + +${DLCBUILDDIR}/unet_qnn_context_binary_generator_v81_sd8eliteG5.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mobile/.stamp \ + ${DLCBUILDDIR}/unet_qnn_model_generator.stamp + # UNET context binary generation started + docker run \ + -v ${SNPE_SDK}:/qnn_sdk \ + -v ${UNET_MODEL_PATH}:/models \ + -e PYTHONPATH=/qnn_sdk/lib/python \ + -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ + -v ${DLCBUILDDIR}:/output \ + -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \ + -w /stable_diffusion/unet \ + -u ${USERID}:${GROUPID} \ + mlperf_dlc_prepare \ + /qnn_sdk/bin/x86_64-linux-clang/qnn-context-binary-generator \ + --backend /qnn_sdk/lib/x86_64-linux-clang/libQnnHtp.so \ + --model /output/model_libs/x86_64-linux-clang/libunet.so \ + --binary_file /output/unet.serialized \ + --config_file /models/mcp_config_v81_sd8eliteG5.json + mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8eliteG5 + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8eliteG5 + cp ${DLCBUILDDIR}/unet.serialized.bin ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8eliteG5/. + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8eliteG5 + touch $@ + # UNET context binary generation completed for v81 sd8eliteG5 + +${DLCBUILDDIR}/unet_qnn_context_binary_generator_v81_sd8G5.stamp: \ + ${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \ + ${DLCBUILDDIR}/mobile/.stamp \ + ${DLCBUILDDIR}/unet_qnn_model_generator.stamp + # UNET context binary generation started + docker run \ + -v ${SNPE_SDK}:/qnn_sdk \ + -v ${UNET_MODEL_PATH}:/models \ + -e PYTHONPATH=/qnn_sdk/lib/python \ + -e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \ + -v ${DLCBUILDDIR}:/output \ + -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \ + -w /stable_diffusion/unet \ + -u ${USERID}:${GROUPID} \ + mlperf_dlc_prepare \ + /qnn_sdk/bin/x86_64-linux-clang/qnn-context-binary-generator \ + --backend /qnn_sdk/lib/x86_64-linux-clang/libQnnHtp.so \ + --model /output/model_libs/x86_64-linux-clang/libunet.so \ + --binary_file /output/unet.serialized \ + --config_file /models/mcp_config_v81_sd8G5.json + mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8G5 + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8G5 + cp ${DLCBUILDDIR}/unet.serialized.bin ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8G5/. + chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion_v81_sd8G5 + touch $@ + # UNET context binary generation completed for v81 sd8G5 + + #################################################################################### # CALIBRATION / QUANTIZATION #################################################################################### @@ -1684,6 +2659,7 @@ gen-htp-dlc-info: \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-info -i /dlc/mobilenet_v4_htp.dlc && \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-info -i /dlc/ssd_mobiledet_qat_htp.dlc && \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-info -i /dlc/mobilenet_v4_htp_batched_4.dlc && \ + /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-info -i /dlc/mobilenet_v4_htp_batched_8_sm8850.dlc && \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-info -i /dlc/mobile_mosaic_htp.dlc && \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-info -i /dlc/snusr_htp.dlc && \ /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-info -i /dlc/mobilebert_quantized_htp.dlc' diff --git a/mobile_back_qti/DLC/util/StableDiffusion/AIMET/Makefile b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/Makefile index 15d358432..5853203f7 100644 --- a/mobile_back_qti/DLC/util/StableDiffusion/AIMET/Makefile +++ b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/Makefile @@ -32,6 +32,7 @@ ${AIMETBUILDDIR}/nvidia_docker_runtime.stamp: \ aimet_calibration: \ clean \ + ${AIMETBUILDDIR}/aimet_calibration.stamp \ ${AIMETBUILDDIR}/input_vectors_generation.stamp \ copy_files_to_ouptut \ copy_configs_to_ouptut @@ -82,6 +83,8 @@ copy_files_to_ouptut: mkdir -p ${TOPDIR}/output/DLC/stable_diffusion cp -rv ${AIMETBUILDDIR}/example1/_exports_/* ${TOPDIR}/output/DLC/stable_diffusion/. cp -rv ${AIMETBUILDDIR}/stable_diffusion_models ${TOPDIR}/output/DLC/stable_diffusion/. + rm -f ${TOPDIR}/output/DLC/stable_diffusion/time_step_embeddings/unet_time_step_embeddings_20.pkl + (cd ${TOPDIR}/output/DLC/stable_diffusion/time_step_embeddings/. && wget https://github.com/mlcommons/mobile_open/releases/download/v4.1-sd-tflite/unet_time_step_embeddings_20.pkl && chmod -R 777 *) copy_configs_to_ouptut: #Copies configs to output @@ -91,12 +94,47 @@ copy_configs_to_ouptut: mv ${TOPDIR}/output/DLC/stable_diffusion/text_encoder_onnx ${TOPDIR}/output/DLC/stable_diffusion/text_encoder/. mv ${TOPDIR}/output/DLC/stable_diffusion/vae_decoder_onnx ${TOPDIR}/output/DLC/stable_diffusion/vae_decoder/. mv ${TOPDIR}/output/DLC/stable_diffusion/unet_onnx_batch_1 ${TOPDIR}/output/DLC/stable_diffusion/unet/. - cp -rv ${AIMETBUILDDIR}/graph_config_text_encoder.json ${TOPDIR}/output/DLC/stable_diffusion/text_encoder/graph_config.json - cp -rv ${AIMETBUILDDIR}/mcp_config.json ${TOPDIR}/output/DLC/stable_diffusion/text_encoder/. - cp -rv ${AIMETBUILDDIR}/graph_config_vae.json ${TOPDIR}/output/DLC/stable_diffusion/vae_decoder/graph_config.json - cp -rv ${AIMETBUILDDIR}/mcp_config.json ${TOPDIR}/output/DLC/stable_diffusion/vae_decoder/. - cp -rv ${AIMETBUILDDIR}/graph_config_unet.json ${TOPDIR}/output/DLC/stable_diffusion/unet/graph_config.json - cp -rv ${AIMETBUILDDIR}/mcp_config.json ${TOPDIR}/output/DLC/stable_diffusion/unet/. + + cat ${AIMETBUILDDIR}/graph_config_text_encoder.json | sed -e 's/$$dsp_arch/v75/g' | sed -e 's/"vtcm_size"/8/g'>${TOPDIR}/output/DLC/stable_diffusion/text_encoder/graph_config_v75.json + cat ${AIMETBUILDDIR}/graph_config_text_encoder.json | sed -e 's/$$dsp_arch/v79/g' | sed -e 's/57/69/g' | sed -e 's/"vtcm_size"/8/g'>${TOPDIR}/output/DLC/stable_diffusion/text_encoder/graph_config_v79.json + cat ${AIMETBUILDDIR}/graph_config_text_encoder.json | sed -e 's/$$dsp_arch/v73_sd8sG4/g' | sed -e 's/57/85/g' | sed -e 's/"vtcm_size"/8/g'>${TOPDIR}/output/DLC/stable_diffusion/text_encoder/graph_config_v73_sd8sG4.json + cat ${AIMETBUILDDIR}/graph_config_text_encoder.json | sed -e 's/$$dsp_arch/v73_sd7G4/g' | sed -e 's/57/86/g' | sed -e 's/"vtcm_size"/4/g'>${TOPDIR}/output/DLC/stable_diffusion/text_encoder/graph_config_v73_sd7G4.json + cat ${AIMETBUILDDIR}/graph_config_text_encoder.json | sed -e 's/$$dsp_arch/v81_sd8eliteG5/g' | sed -e 's/57/87/g' | sed -e 's/"vtcm_size"/8/g'>${TOPDIR}/output/DLC/stable_diffusion/text_encoder/graph_config_v81_sd8eliteG5.json + cat ${AIMETBUILDDIR}/graph_config_text_encoder.json | sed -e 's/$$dsp_arch/v81_sd8G5/g' | sed -e 's/57/97/g' | sed -e 's/"vtcm_size"/8/g'>${TOPDIR}/output/DLC/stable_diffusion/text_encoder/graph_config_v81_sd8G5.json + cat ${AIMETBUILDDIR}/mcp_config.json | sed -e 's/_$$dsp_arch/_v75/g' >${TOPDIR}/output/DLC/stable_diffusion/text_encoder/mcp_config_v75.json; + cat ${AIMETBUILDDIR}/mcp_config.json | sed -e 's/_$$dsp_arch/_v79/g' >${TOPDIR}/output/DLC/stable_diffusion/text_encoder/mcp_config_v79.json; + cat ${AIMETBUILDDIR}/mcp_config.json | sed -e 's/_$$dsp_arch/_v73_sd8sG4/g' >${TOPDIR}/output/DLC/stable_diffusion/text_encoder/mcp_config_v73_sd8sG4.json; + cat ${AIMETBUILDDIR}/mcp_config.json | sed -e 's/_$$dsp_arch/_v73_sd7G4/g' >${TOPDIR}/output/DLC/stable_diffusion/text_encoder/mcp_config_v73_sd7G4.json; + cat ${AIMETBUILDDIR}/mcp_config.json | sed -e 's/_$$dsp_arch/_v81_sd8eliteG5/g' >${TOPDIR}/output/DLC/stable_diffusion/text_encoder/mcp_config_v81_sd8eliteG5.json; + cat ${AIMETBUILDDIR}/mcp_config.json | sed -e 's/_$$dsp_arch/_v81_sd8G5/g' >${TOPDIR}/output/DLC/stable_diffusion/text_encoder/mcp_config_v81_sd8G5.json; + + cat ${AIMETBUILDDIR}/graph_config_vae.json | sed -e 's/$$dsp_arch/v75/g' | sed -e 's/"vtcm_size"/8/g'>${TOPDIR}/output/DLC/stable_diffusion/vae_decoder/graph_config_v75.json + cat ${AIMETBUILDDIR}/graph_config_vae.json | sed -e 's/$$dsp_arch/v79/g' | sed -e 's/57/69/g' | sed -e 's/"vtcm_size"/8/g'>${TOPDIR}/output/DLC/stable_diffusion/vae_decoder/graph_config_v79.json + cat ${AIMETBUILDDIR}/graph_config_vae.json | sed -e 's/$$dsp_arch/v73_sd8sG4/g' | sed -e 's/57/85/g' | sed -e 's/"vtcm_size"/8/g'>${TOPDIR}/output/DLC/stable_diffusion/vae_decoder/graph_config_v73_sd8sG4.json + cat ${AIMETBUILDDIR}/graph_config_vae.json | sed -e 's/$$dsp_arch/v73_sd7G4/g' | sed -e 's/57/86/g' | sed -e 's/"vtcm_size"/4/g'>${TOPDIR}/output/DLC/stable_diffusion/vae_decoder/graph_config_v73_sd7G4.json + cat ${AIMETBUILDDIR}/graph_config_vae.json | sed -e 's/$$dsp_arch/v81_sd8eliteG5/g' | sed -e 's/57/87/g' | sed -e 's/"vtcm_size"/8/g'>${TOPDIR}/output/DLC/stable_diffusion/vae_decoder/graph_config_v81_sd8eliteG5.json + cat ${AIMETBUILDDIR}/graph_config_vae.json | sed -e 's/$$dsp_arch/v81_sd8G5/g' | sed -e 's/57/97/g' | sed -e 's/"vtcm_size"/8/g'>${TOPDIR}/output/DLC/stable_diffusion/vae_decoder/graph_config_v81_sd8G5.json + cat ${AIMETBUILDDIR}/mcp_config.json | sed -e 's/_$$dsp_arch/_v75/g' >${TOPDIR}/output/DLC/stable_diffusion/vae_decoder/mcp_config_v75.json; + cat ${AIMETBUILDDIR}/mcp_config.json | sed -e 's/_$$dsp_arch/_v79/g' >${TOPDIR}/output/DLC/stable_diffusion/vae_decoder/mcp_config_v79.json; + cat ${AIMETBUILDDIR}/mcp_config.json | sed -e 's/_$$dsp_arch/_v73_sd8sG4/g' >${TOPDIR}/output/DLC/stable_diffusion/vae_decoder/mcp_config_v73_sd8sG4.json; + cat ${AIMETBUILDDIR}/mcp_config.json | sed -e 's/_$$dsp_arch/_v73_sd7G4/g' >${TOPDIR}/output/DLC/stable_diffusion/vae_decoder/mcp_config_v73_sd7G4.json; + cat ${AIMETBUILDDIR}/mcp_config.json | sed -e 's/_$$dsp_arch/_v81_sd8eliteG5/g' >${TOPDIR}/output/DLC/stable_diffusion/vae_decoder/mcp_config_v81_sd8eliteG5.json; + cat ${AIMETBUILDDIR}/mcp_config.json | sed -e 's/_$$dsp_arch/_v81_sd8G5/g' >${TOPDIR}/output/DLC/stable_diffusion/vae_decoder/mcp_config_v81_sd8G5.json; + + + cat ${AIMETBUILDDIR}/graph_config_unet.json | sed -e 's/$$dsp_arch/v75/g' | sed -e 's/"vtcm_size"/8/g'>${TOPDIR}/output/DLC/stable_diffusion/unet/graph_config_v75.json + cat ${AIMETBUILDDIR}/graph_config_unet.json | sed -e 's/$$dsp_arch/v79/g' | sed -e 's/57/69/g' | sed -e 's/"vtcm_size"/8/g'>${TOPDIR}/output/DLC/stable_diffusion/unet/graph_config_v79.json + cat ${AIMETBUILDDIR}/graph_config_unet.json | sed -e 's/$$dsp_arch/v73_sd8sG4/g' | sed -e 's/57/85/g' | sed -e 's/"vtcm_size"/8/g'>${TOPDIR}/output/DLC/stable_diffusion/unet/graph_config_v73_sd8sG4.json + cat ${AIMETBUILDDIR}/graph_config_unet.json | sed -e 's/$$dsp_arch/v73_sd7G4/g' | sed -e 's/57/86/g' | sed -e 's/"vtcm_size"/4/g'>${TOPDIR}/output/DLC/stable_diffusion/unet/graph_config_v73_sd7G4.json + cat ${AIMETBUILDDIR}/graph_config_unet.json | sed -e 's/$$dsp_arch/v81_sd8eliteG5/g' | sed -e 's/57/87/g' | sed -e 's/"vtcm_size"/8/g'>${TOPDIR}/output/DLC/stable_diffusion/unet/graph_config_v81_sd8eliteG5.json + cat ${AIMETBUILDDIR}/graph_config_unet.json | sed -e 's/$$dsp_arch/v81_sd8G5/g' | sed -e 's/57/97/g' | sed -e 's/"vtcm_size"/8/g'>${TOPDIR}/output/DLC/stable_diffusion/unet/graph_config_v81_sd8G5.json + cat ${AIMETBUILDDIR}/mcp_config.json | sed -e 's/_$$dsp_arch/_v75/g' >${TOPDIR}/output/DLC/stable_diffusion/unet/mcp_config_v75.json; + cat ${AIMETBUILDDIR}/mcp_config.json | sed -e 's/_$$dsp_arch/_v79/g' >${TOPDIR}/output/DLC/stable_diffusion/unet/mcp_config_v79.json; + cat ${AIMETBUILDDIR}/mcp_config.json | sed -e 's/_$$dsp_arch/_v73_sd8sG4/g' >${TOPDIR}/output/DLC/stable_diffusion/unet/mcp_config_v73_sd8sG4.json; + cat ${AIMETBUILDDIR}/mcp_config.json | sed -e 's/_$$dsp_arch/_v73_sd7G4/g' >${TOPDIR}/output/DLC/stable_diffusion/unet/mcp_config_v73_sd7G4.json; + cat ${AIMETBUILDDIR}/mcp_config.json | sed -e 's/_$$dsp_arch/_v81_sd8eliteG5/g' >${TOPDIR}/output/DLC/stable_diffusion/unet/mcp_config_v81_sd8eliteG5.json; + cat ${AIMETBUILDDIR}/mcp_config.json | sed -e 's/_$$dsp_arch/_v81_sd8G5/g' >${TOPDIR}/output/DLC/stable_diffusion/unet/mcp_config_v81_sd8G5.json; + mkdir -p ${TOPDIR}/output/DLC/stable_diffusion/text_encoder/stable_diffusion_models cp -rv ${AIMETBUILDDIR}/stable_diffusion_models/text_encoder_onnx ${TOPDIR}/output/DLC/stable_diffusion/text_encoder/stable_diffusion_models/. mkdir -p ${TOPDIR}/output/DLC/stable_diffusion/vae_decoder/stable_diffusion_models diff --git a/mobile_back_qti/DLC/util/StableDiffusion/AIMET/graph_config_text_encoder.json b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/graph_config_text_encoder.json index 3597ab448..e3e2cee7b 100644 --- a/mobile_back_qti/DLC/util/StableDiffusion/AIMET/graph_config_text_encoder.json +++ b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/graph_config_text_encoder.json @@ -1,17 +1,18 @@ { "graphs": [{ "graph_names":["text_encoder"], - "vtcm_mb":8, + "vtcm_mb":"vtcm_size", "O" : 3, "fp16_relaxed_precision": 0 }], "devices": [ { "pd_session": "unsigned", - "dsp_arch": "v79", + "dsp_arch": "$dsp_arch", + "soc_model": 57, "cores":[{ "rpc_control_latency": 100, "perf_profile": "burst" }] } ] - } +} \ No newline at end of file diff --git a/mobile_back_qti/DLC/util/StableDiffusion/AIMET/graph_config_unet.json b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/graph_config_unet.json index 26d334d43..6951986fb 100644 --- a/mobile_back_qti/DLC/util/StableDiffusion/AIMET/graph_config_unet.json +++ b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/graph_config_unet.json @@ -1,17 +1,18 @@ { "graphs": [{ "graph_names":["unet"], - "vtcm_mb":8, + "vtcm_mb":"vtcm_size", "O" : 3, "fp16_relaxed_precision": 0 }], "devices": [ { "pd_session": "unsigned", - "dsp_arch": "v79", + "dsp_arch": "$dsp_arch", + "soc_model": 57, "cores":[{ "rpc_control_latency": 100, "perf_profile": "burst" }] } ] - } +} \ No newline at end of file diff --git a/mobile_back_qti/DLC/util/StableDiffusion/AIMET/graph_config_vae.json b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/graph_config_vae.json index 922a93a29..eedc1e490 100644 --- a/mobile_back_qti/DLC/util/StableDiffusion/AIMET/graph_config_vae.json +++ b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/graph_config_vae.json @@ -1,17 +1,18 @@ { "graphs": [{ "graph_names":["vae_decoder"], - "vtcm_mb":8, + "vtcm_mb":"vtcm_size", "O" : 3, "fp16_relaxed_precision": 0 }], "devices": [ { "pd_session": "unsigned", - "dsp_arch": "v79", + "dsp_arch": "$dsp_arch", + "soc_model": 57, "cores":[{ "rpc_control_latency": 100, "perf_profile": "burst" }] } ] - } +} \ No newline at end of file diff --git a/mobile_back_qti/DLC/util/StableDiffusion/AIMET/mcp_config.json b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/mcp_config.json index 7a3a1927a..4faa288be 100644 --- a/mobile_back_qti/DLC/util/StableDiffusion/AIMET/mcp_config.json +++ b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/mcp_config.json @@ -1,5 +1,5 @@ { -"backend_extensions" : + "backend_extensions" : {"shared_library_path" : "/qnn_sdk/lib/x86_64-linux-clang/libQnnHtpNetRunExtensions.so", - "config_file_path" : "/models/graph_config.json"} + "config_file_path" : "/models/graph_config_$dsp_arch.json"} } \ No newline at end of file diff --git a/mobile_back_qti/DLC/util/StableDiffusion/README.md b/mobile_back_qti/DLC/util/StableDiffusion/README.md index 5b2ed6ed3..cb2eb08af 100644 --- a/mobile_back_qti/DLC/util/StableDiffusion/README.md +++ b/mobile_back_qti/DLC/util/StableDiffusion/README.md @@ -22,7 +22,7 @@ * Clone the mobile_app_open repository -* Extract the QAIRT SDK from +* Extract the QAIRT SDK from * Copy the qairt folder from the extract into mobile_app_open/mobile_back_qti @@ -34,6 +34,8 @@ Once done, sudo dpkg -i ./QualcommPackageManager3.3.0.111.1.Linux-x86.deb ``` +* Notebook version to be used: 0.1.1.250124 + * Clone the AIMET SD notebook repository inside `/mobile_back_qti/DLC/util/StableDiffusion/AIMET` @@ -59,5 +61,7 @@ Once done, sudo make stable_diffusion_qnn SNPE_SDK=/mobile_back_qti/qairt/ ``` -* After successful execution, all the artifacts needed to run stable diffusion inference on device will be located in - `/output/DLC/mlperf_models/stable_diffusion` +* After successful execution, all the artifacts needed to run stable diffusion inference on a device will be located in + `/output/DLC/mlperf_models/` + +* There will be multiple `stable_diffusion*` folders which corresponds to different targets architecture diff --git a/mobile_back_qti/README.md b/mobile_back_qti/README.md index 2f1c3e71c..4d647b9b8 100644 --- a/mobile_back_qti/README.md +++ b/mobile_back_qti/README.md @@ -12,8 +12,8 @@ uploaded with the other submission files to here: ` -* [Qualcomm Package Manager 3](https://qpm.qualcomm.com/#/main/tools/details/QPM3) -* [SNPE SDK](https://qpm.qualcomm.com/#/main/tools/details/qualcomm_neural_processing_sdk) (Version 2.29.0.241129) +* [Qualcomm QAIRT SDK](https://softwarecenter.qualcomm.com/api/download/software/sdks/Qualcomm_AI_Runtime_Community/All/2.40.0.251030/v2.40.0.251030.zip) +* [SNPE SDK](https://qpm.qualcomm.com/#/main/tools/details/qualcomm_neural_processing_sdk) (Version 2.40.0.251030) * Linux machine capable of running Ubuntu docker images ### Optional @@ -48,20 +48,6 @@ git clone https://github.com/mlcommons/mobile_app_open cd mobile_app_open ``` -* Install Qualcomm Package manager on the linux machine - -```shell -sudo dpkg -i ./QualcommPackageManager3.3.0.111.1.Linux-x86.deb -``` - -* Extract the QAIRT SDK (from Requirements above) to mobile_app_open/mobile_back_qti - -```shell -qpm-cli --extract ./qualcomm_neural_processing_sdk.2.25.0.240728.Linux-AnyCPU.qik -mkdir mobile_app_open/mobile_back_qti/qairt/ -cp -rv /opt/qcom/aistack/qairt/2.25.0.240728 mobile_app_open/mobile_back_qti/qairt/ -``` - * If you have an HTTP proxy, you may need the following ```shell @@ -69,6 +55,10 @@ sudo apt install ca-certificates-java export USE_PROXY_WORKAROUND=1 ``` +Download and extract the SNPE SDK (from Requirements above) to mobile_app_open/mobile_back_qti + +So, the SDK should be placed in the format of mobile_app_open/mobile_back_qti/qairt + Build with the following build command. ```shell @@ -112,8 +102,8 @@ uploaded with the other submission files to here: ` --dataset --usecase --mode accuracy ``` -* --usecase parameter can take one of these arguments => image_classification_v2, object_detection, image_segmentation, language_understanding, super_resolution, image_classification_offline_v2 +* --usecase parameter can take one of these arguments => image_classification_v2, object_detection, image_segmentation, language_understanding, super_resolution, image_classification_offline_v2, stable_diffusion * see the results in accuracy_results.txt and performance_results.txt ## FAQ ### What devices does this backend support? -This backend only supports SD8_Elite, SD7SG3, SDX_Elite, SD8SG3, SD8G3 devices. +This backend only supports SD8EliteG5, SD8G5, SD8sG4, SD7G4, SD8Elite devices. Other already launched Snapdragon based devices can also run the MLPerf app as default fallback. ### Is SNPE used to run all the models? @@ -184,4 +174,4 @@ Yes. All the models use Qualcomm AI Runtime(QAIRT) for execution for current ver ### What devices supports stable diffusion? -Currently, SD8_Elite and SD8G3 device supports stable_diffusion. +Currently, SD8EliteG5, SD8G5, SD8sG4, SD7G4 and SD8Elite device supports stable_diffusion. diff --git a/mobile_back_qti/cpp/backend_qti/BUILD b/mobile_back_qti/cpp/backend_qti/BUILD index 8422e45b9..24d190ae3 100644 --- a/mobile_back_qti/cpp/backend_qti/BUILD +++ b/mobile_back_qti/cpp/backend_qti/BUILD @@ -119,17 +119,19 @@ pbtxt2header( "settings/qti_settings_default_dsp.pbtxt", "settings/qti_settings_default_gpu.pbtxt", "settings/qti_settings_gpufp16.pbtxt", - "settings/qti_settings_sd4g2.pbtxt", - "settings/qti_settings_sd7cxg3.pbtxt", - "settings/qti_settings_sd7g3.pbtxt", - "settings/qti_settings_sd7sg3.pbtxt", + "settings/qti_settings_sd7G4.pbtxt", + "settings/qti_settings_sd8G5.pbtxt", "settings/qti_settings_sd8cxg3.pbtxt", "settings/qti_settings_sd8elite.pbtxt", - "settings/qti_settings_sd8g3.pbtxt", - "settings/qti_settings_sd8sg3.pbtxt", + "settings/qti_settings_sd8eliteG5.pbtxt", + "settings/qti_settings_sd8sG4.pbtxt", "settings/qti_settings_stablediffusion.pbtxt", + "settings/qti_settings_stablediffusion_v73_sd7G4.pbtxt", + "settings/qti_settings_stablediffusion_v73_sd8sG4.pbtxt", "settings/qti_settings_stablediffusion_v75.pbtxt", "settings/qti_settings_stablediffusion_v79.pbtxt", + "settings/qti_settings_stablediffusion_v81_sd8G5.pbtxt", + "settings/qti_settings_stablediffusion_v81_sd8eliteG5.pbtxt", ], ) diff --git a/mobile_back_qti/cpp/backend_qti/mlperf_helper.h b/mobile_back_qti/cpp/backend_qti/mlperf_helper.h index 66c83fa21..e6926baab 100644 --- a/mobile_back_qti/cpp/backend_qti/mlperf_helper.h +++ b/mobile_back_qti/cpp/backend_qti/mlperf_helper.h @@ -122,6 +122,50 @@ static void process_config(const mlperf_backend_configuration_t *configs, backend_data->perfProfile_ = SNPE_PERFORMANCE_PROFILE_BURST; perfProfile = "burst"; } + } else if (strcmp(configs->keys[i], "bus_voltage_start") == 0) { + backend_data->customPerfProfileMap_["BUS_VOLTAGE_CORNER_MIN_START"] = + configs->values[i]; + backend_data->customPerfProfileMap_["BUS_VOLTAGE_CORNER_TARGET_START"] = + configs->values[i]; + backend_data->customPerfProfileMap_["BUS_VOLTAGE_CORNER_MAX_START"] = + configs->values[i]; + } else if (strcmp(configs->keys[i], "core_voltage_start") == 0) { + backend_data->customPerfProfileMap_["CORE_VOLTAGE_CORNER_MIN_START"] = + configs->values[i]; + backend_data->customPerfProfileMap_["CORE_VOLTAGE_CORNER_TARGET_START"] = + configs->values[i]; + backend_data->customPerfProfileMap_["CORE_VOLTAGE_CORNER_MAX_START"] = + configs->values[i]; + } else if (strcmp(configs->keys[i], "bus_voltage_done") == 0) { + backend_data->customPerfProfileMap_["BUS_VOLTAGE_CORNER_MIN_DONE"] = + configs->values[i]; + backend_data->customPerfProfileMap_["BUS_VOLTAGE_CORNER_TARGET_DONE"] = + configs->values[i]; + backend_data->customPerfProfileMap_["BUS_VOLTAGE_CORNER_MAX_DONE"] = + configs->values[i]; + } else if (strcmp(configs->keys[i], "bus_voltage_done") == 0) { + backend_data->customPerfProfileMap_["CORE_VOLTAGE_CORNER_MIN_DONE"] = + configs->values[i]; + backend_data->customPerfProfileMap_["CORE_VOLTAGE_CORNER_TARGET_DONE"] = + configs->values[i]; + backend_data->customPerfProfileMap_["CORE_VOLTAGE_CORNER_MAX_DONE"] = + configs->values[i]; + } else if (strcmp(configs->keys[i], "hmx_voltage") == 0) { + backend_data->customPerfProfileMap_["DSP_HMX_VOLTAGE_CORNER_TARGET"] = + configs->values[i]; + backend_data->customPerfProfileMap_["DSP_HMX_VOLTAGE_CORNER_MAX"] = + configs->values[i]; + backend_data->customPerfProfileMap_["DSP_HMX_VOLTAGE_CORNER_MIN"] = + configs->values[i]; + } else if (strcmp(configs->keys[i], "hmx_clock_perf") == 0) { + backend_data->customPerfProfileMap_["DSP_HMX_CLOCK_PERF_MODE"] = + configs->values[i]; + } else if (strcmp(configs->keys[i], "dsp_start_sleep_latency") == 0) { + backend_data->customPerfProfileMap_["DSP_SLEEP_LATENCY_START_US"] = + configs->values[i]; + } else if (strcmp(configs->keys[i], "dsp_done_sleep_latency") == 0) { + backend_data->customPerfProfileMap_["DSP_SLEEP_LATENCY_DONE_US"] = + configs->values[i]; } else if (strcmp(configs->keys[i], "profiling_level") == 0) { profileLevel = configs->values[i]; if (std::strcmp(configs->values[i], "off") == 0) { diff --git a/mobile_back_qti/cpp/backend_qti/qti_backend_helper.cc b/mobile_back_qti/cpp/backend_qti/qti_backend_helper.cc index 1ef836da9..e57b019f7 100644 --- a/mobile_back_qti/cpp/backend_qti/qti_backend_helper.cc +++ b/mobile_back_qti/cpp/backend_qti/qti_backend_helper.cc @@ -186,7 +186,9 @@ void QTIBackendHelper::use_psnpe(const char *model_path) { Snpe_SNPEBuilder_Create(containerHandle); dummyInputRuntimeListHandle = Snpe_RuntimeList_Create(); Snpe_RuntimeList_Add(dummyInputRuntimeListHandle, SNPE_RUNTIME_CPU); - Snpe_SNPEBuilder_SetPerformanceProfile(snpeBuilderHandle, perfProfile_); + setupPerfHandle(); + Snpe_SNPEBuilder_SetCustomPerfProfile(snpeBuilderHandle, + customPerfProfile_); Snpe_SNPEBuilder_SetExecutionPriorityHint(snpeBuilderHandle, SNPE_EXECUTION_PRIORITY_HIGH); Snpe_SNPEBuilder_SetRuntimeProcessorOrder(snpeBuilderHandle, @@ -328,7 +330,9 @@ void QTIBackendHelper::use_snpe(const char *model_path) { ResolveCommaSeparatedList(snpeOutputLayers_); Snpe_StringList_Handle_t outputTensors = ResolveCommaSeparatedList(snpeOutputTensors_); - Snpe_SNPEBuilder_SetPerformanceProfile(snpeBuilderHandle, perfProfile_); + setupPerfHandle(); + Snpe_SNPEBuilder_SetCustomPerfProfile(snpeBuilderHandle, + customPerfProfile_); Snpe_SNPEBuilder_SetProfilingLevel(snpeBuilderHandle, profilingLevel_); Snpe_SNPEBuilder_SetExecutionPriorityHint(snpeBuilderHandle, SNPE_EXECUTION_PRIORITY_HIGH); @@ -726,7 +730,9 @@ void QTIBackendHelper::set_runtime_config() { auto runtimeConfigHandle = Snpe_RuntimeConfig_Create(); Snpe_RuntimeConfig_SetRuntime(runtimeConfigHandle, runtime); - Snpe_RuntimeConfig_SetPerformanceProfile(runtimeConfigHandle, perfProfile_); + setupPerfHandle(); + Snpe_RuntimeConfig_SetCustomPerfProfile(runtimeConfigHandle, + customPerfProfile_); Snpe_RuntimeConfigList_PushBack(runtimeConfigsListHandle, runtimeConfigHandle); Snpe_RuntimeList_Add(inputRuntimeListHandle, runtime); @@ -740,7 +746,9 @@ void QTIBackendHelper::set_runtime_config() { } auto runtimeConfigHandle = Snpe_RuntimeConfig_Create(); Snpe_RuntimeConfig_SetRuntime(runtimeConfigHandle, runtime); - Snpe_RuntimeConfig_SetPerformanceProfile(runtimeConfigHandle, perfProfile_); + setupPerfHandle(); + Snpe_RuntimeConfig_SetCustomPerfProfile(runtimeConfigHandle, + customPerfProfile_); Snpe_RuntimeConfigList_PushBack(runtimeConfigsListHandle, runtimeConfigHandle); Snpe_RuntimeList_Add(inputRuntimeListHandle, runtime); @@ -753,7 +761,9 @@ void QTIBackendHelper::set_runtime_config() { } auto runtimeConfigHandle = Snpe_RuntimeConfig_Create(); Snpe_RuntimeConfig_SetRuntime(runtimeConfigHandle, runtime); - Snpe_RuntimeConfig_SetPerformanceProfile(runtimeConfigHandle, perfProfile_); + setupPerfHandle(); + Snpe_RuntimeConfig_SetCustomPerfProfile(runtimeConfigHandle, + customPerfProfile_); Snpe_RuntimeConfigList_PushBack(runtimeConfigsListHandle, runtimeConfigHandle); Snpe_RuntimeList_Add(inputRuntimeListHandle, runtime); @@ -766,7 +776,9 @@ void QTIBackendHelper::set_runtime_config() { } auto runtimeConfigHandle = Snpe_RuntimeConfig_Create(); Snpe_RuntimeConfig_SetRuntime(runtimeConfigHandle, runtime); - Snpe_RuntimeConfig_SetPerformanceProfile(runtimeConfigHandle, perfProfile_); + setupPerfHandle(); + Snpe_RuntimeConfig_SetCustomPerfProfile(runtimeConfigHandle, + customPerfProfile_); Snpe_RuntimeConfigList_PushBack(runtimeConfigsListHandle, runtimeConfigHandle); Snpe_RuntimeList_Add(inputRuntimeListHandle, runtime); @@ -884,3 +896,148 @@ void QTIBackendHelper::deinitSd() { sd_pipeline = nullptr; #endif } + +bool QTIBackendHelper::setupPerfHandle() { + customPerfProfile_ = Snpe_SNPEPerfProfile_CreatePreset(perfProfile_); + for (std::unordered_map::iterator mapIter = + customPerfProfileMap_.begin(); + mapIter != customPerfProfileMap_.end(); ++mapIter) { + Snpe_ErrorCode_t err; + std::string setting = mapIter->first; + std::string value = mapIter->second; + // Set various settings using the APIs + if (setting == "DSP_ENABLE_DCVS_START") { + err = Snpe_SNPEPerfProfile_SetEnableDspDcvsStart(customPerfProfile_, + value == "true"); + } else if (setting == "DSP_ENABLE_DCVS_DONE") { + err = Snpe_SNPEPerfProfile_SetEnableDspDcvsDone(customPerfProfile_, + value == "true"); + } else if (setting == "ASYNC_VOTING_ENABLE") { + err = Snpe_SNPEPerfProfile_SetEnableAsyncVoting(customPerfProfile_, + value == "true"); + } else if (setting == "DSP_SLEEP_LATENCY_START_US") { + err = Snpe_SNPEPerfProfile_SetSleepLatencyStart(customPerfProfile_, + std::stoi(value)); + } else if (setting == "DSP_SLEEP_LATENCY_DONE_US") { + err = Snpe_SNPEPerfProfile_SetSleepLatencyDone(customPerfProfile_, + std::stoi(value)); + } else if (setting == "HIGH_PERFORMANCE_MODE") { + err = Snpe_SNPEPerfProfile_SetHighPerformanceModeEnabled( + customPerfProfile_, value == "true"); + } else if (setting == "DSP_HYSTERESIS_TIME_US") { + err = Snpe_SNPEPerfProfile_SetDspHysteresisTime(customPerfProfile_, + std::stoi(value)); + } else if (setting == "POWERMODE_START") { + err = Snpe_SNPEPerfProfile_SetPowerModeStart( + customPerfProfile_, + static_cast(powerModeMap[value])); + } else if (setting == "DCVS_VOLTAGE_CORNER_MIN_START") { + err = Snpe_SNPEPerfProfile_SetDcvsVoltageCornerDcvsVCornerMinStart( + customPerfProfile_, + static_cast(voltageCornerMap[value])); + } else if (setting == "DCVS_VOLTAGE_CORNER_MIN_DONE") { + err = Snpe_SNPEPerfProfile_SetDcvsVoltageCornerDcvsVCornerMinDone( + customPerfProfile_, + static_cast(voltageCornerMap[value])); + } else if (setting == "DCVS_VOLTAGE_CORNER_MAX_START") { + err = Snpe_SNPEPerfProfile_SetDcvsVoltageCornerDcvsVCornerMaxStart( + customPerfProfile_, + static_cast(voltageCornerMap[value])); + } else if (setting == "DCVS_VOLTAGE_CORNER_MAX_DONE") { + err = Snpe_SNPEPerfProfile_SetDcvsVoltageCornerDcvsVCornerMaxDone( + customPerfProfile_, + static_cast(voltageCornerMap[value])); + } else if (setting == "DCVS_VOLTAGE_CORNER_TARGET_START") { + err = Snpe_SNPEPerfProfile_SetDcvsVoltageCornerDcvsVCornerTargetStart( + customPerfProfile_, + static_cast(voltageCornerMap[value])); + } else if (setting == "DCVS_VOLTAGE_CORNER_TARGET_DONE") { + err = Snpe_SNPEPerfProfile_SetDcvsVoltageCornerDcvsVCornerTargetDone( + customPerfProfile_, + static_cast(voltageCornerMap[value])); + } else if (setting == "DSP_SLEEP_DISABLE_MS") { + err = Snpe_SNPEPerfProfile_SetSleepDisable(customPerfProfile_, + std::stoi(value)); + } else if (setting == "DSP_RPC_POLLING_TIME_US") { + err = Snpe_SNPEPerfProfile_SetDspRpcPollingTime(customPerfProfile_, + std::stoi(value)); + } else if (setting == "BUS_VOLTAGE_CORNER_MIN_START") { + err = Snpe_SNPEPerfProfile_SetBusVoltageCornerMinStart( + customPerfProfile_, + static_cast(voltageCornerMap[value])); + } else if (setting == "BUS_VOLTAGE_CORNER_MIN_DONE") { + err = Snpe_SNPEPerfProfile_SetBusVoltageCornerMinDone( + customPerfProfile_, + static_cast(voltageCornerMap[value])); + } else if (setting == "BUS_VOLTAGE_CORNER_MAX_START") { + err = Snpe_SNPEPerfProfile_SetBusVoltageCornerMaxStart( + customPerfProfile_, + static_cast(voltageCornerMap[value])); + } else if (setting == "BUS_VOLTAGE_CORNER_MAX_DONE") { + err = Snpe_SNPEPerfProfile_SetBusVoltageCornerMaxDone( + customPerfProfile_, + static_cast(voltageCornerMap[value])); + } else if (setting == "BUS_VOLTAGE_CORNER_TARGET_START") { + err = Snpe_SNPEPerfProfile_SetBusVoltageCornerTargetStart( + customPerfProfile_, + static_cast(voltageCornerMap[value])); + } else if (setting == "BUS_VOLTAGE_CORNER_TARGET_DONE") { + err = Snpe_SNPEPerfProfile_SetBusVoltageCornerTargetDone( + customPerfProfile_, + static_cast(voltageCornerMap[value])); + } else if (setting == "CORE_VOLTAGE_CORNER_MIN_START") { + err = Snpe_SNPEPerfProfile_SetCoreVoltageCornerminMvStart( + customPerfProfile_, + static_cast(voltageCornerMap[value])); + } else if (setting == "CORE_VOLTAGE_CORNER_MIN_DONE") { + err = Snpe_SNPEPerfProfile_SetCoreVoltageCornerMinMvDone( + customPerfProfile_, + static_cast(voltageCornerMap[value])); + } else if (setting == "CORE_VOLTAGE_CORNER_MAX_START") { + err = Snpe_SNPEPerfProfile_SetCoreVoltageCornerMaxMvStart( + customPerfProfile_, + static_cast(voltageCornerMap[value])); + } else if (setting == "CORE_VOLTAGE_CORNER_MAX_DONE") { + err = Snpe_SNPEPerfProfile_SetCoreVoltageCornerMaxMvDone( + customPerfProfile_, + static_cast(voltageCornerMap[value])); + } else if (setting == "CORE_VOLTAGE_CORNER_TARGET_START") { + err = Snpe_SNPEPerfProfile_SetCoreVoltageCornerTargetMvStart( + customPerfProfile_, + static_cast(voltageCornerMap[value])); + } else if (setting == "CORE_VOLTAGE_CORNER_TARGET_DONE") { + err = Snpe_SNPEPerfProfile_SetCoreVoltageCornerTargetMvDone( + customPerfProfile_, + static_cast(voltageCornerMap[value])); + } else if (setting == "POWERMODE_DONE") { + err = Snpe_SNPEPerfProfile_SetPowerModeDone( + customPerfProfile_, + static_cast(powerModeMap[value])); + } else if (setting == "FAST_INIT_ENABLE") { + err = Snpe_SNPEPerfProfile_SetFastInitEnabled(customPerfProfile_, + value == "true"); + } else if (setting == "DSP_HMX_CLOCK_PERF_MODE") { + err = Snpe_SNPEPerfProfile_SetHmxClkPerfMode( + customPerfProfile_, + static_cast(hmxClkPerfModeMap[value])); + } else if (setting == "DSP_HMX_VOLTAGE_CORNER_MIN") { + err = Snpe_SNPEPerfProfile_SetHmxVoltageCornerMin( + customPerfProfile_, static_cast( + hmxVoltageCornerMap[value])); + } else if (setting == "DSP_HMX_VOLTAGE_CORNER_MAX") { + err = Snpe_SNPEPerfProfile_SetHmxVoltageCornerMax( + customPerfProfile_, static_cast( + hmxVoltageCornerMap[value])); + } else if (setting == "DSP_HMX_VOLTAGE_CORNER_TARGET") { + err = Snpe_SNPEPerfProfile_SetHmxVoltageCornerTarget( + customPerfProfile_, static_cast( + hmxVoltageCornerMap[value])); + } + if (err != SNPE_SUCCESS) { + LOG(ERROR) << "could not parse setting " << setting << std::endl; + return false; + } + LOG(INFO) << "Setting " << setting << " to " << value << std::endl; + } + return true; +} \ No newline at end of file diff --git a/mobile_back_qti/cpp/backend_qti/qti_backend_helper.h b/mobile_back_qti/cpp/backend_qti/qti_backend_helper.h index e62dbc6b7..f60c0f6f1 100644 --- a/mobile_back_qti/cpp/backend_qti/qti_backend_helper.h +++ b/mobile_back_qti/cpp/backend_qti/qti_backend_helper.h @@ -19,6 +19,7 @@ limitations under the License. #include #include +#include "DlSystem/DlEnums.hpp" #include "SNPE/PSNPE.h" #include "SNPE/SNPE.h" #include "allocator.h" @@ -29,6 +30,100 @@ limitations under the License. #include "StableDiffusionShared/include/QnnApiHelpers.hpp" #endif +static std::unordered_map + powerModeMap( + {{"SNPE_DSP_PERF_INFRASTRUCTURE_POWERMODE_ADJUST_UP_DOWN", + DlSystem::DspPerfPowerMode_t:: + DSP_PERF_INFRASTRUCTURE_POWERMODE_ADJUST_UP_DOWN}, + {"SNPE_DSP_PERF_INFRASTRUCTURE_POWERMODE_ADJUST_ONLY_UP", + DlSystem::DspPerfPowerMode_t:: + DSP_PERF_INFRASTRUCTURE_POWERMODE_ADJUST_ONLY_UP}, + {"SNPE_DSP_PERF_INFRASTRUCTURE_POWERMODE_POWER_SAVER_MODE", + DlSystem::DspPerfPowerMode_t:: + DSP_PERF_INFRASTRUCTURE_POWERMODE_POWER_SAVER_MODE}, + {"SNPE_DSP_PERF_INFRASTRUCTURE_POWERMODE_POWER_SAVER_AGGRESSIVE_MODE", + DlSystem::DspPerfPowerMode_t:: + DSP_PERF_INFRASTRUCTURE_POWERMODE_POWER_SAVER_AGGRESSIVE_MODE}, + {"SNPE_DSP_PERF_INFRASTRUCTURE_POWERMODE_PERFORMANCE_MODE", + DlSystem::DspPerfPowerMode_t:: + DSP_PERF_INFRASTRUCTURE_POWERMODE_PERFORMANCE_MODE}, + {"SNPE_DSP_PERF_INFRASTRUCTURE_POWERMODE_DUTY_CYCLE_MODE", + DlSystem::DspPerfPowerMode_t:: + DSP_PERF_INFRASTRUCTURE_POWERMODE_DUTY_CYCLE_MODE}, + {"SNPE_DSP_PERF_INFRASTRUCTURE_POWERMODE_UNKNOWN", + DlSystem::DspPerfPowerMode_t:: + DSP_PERF_INFRASTRUCTURE_POWERMODE_UNKNOWN}}); + +static std::unordered_map + voltageCornerMap( + {{"SNPE_DCVS_VOLTAGE_CORNER_DISABLE", + DlSystem::DspPerfVoltageCorner_t::DCVS_VOLTAGE_CORNER_DISABLE}, + {"SNPE_DCVS_VOLTAGE_VCORNER_MIN_VOLTAGE_CORNER", + DlSystem::DspPerfVoltageCorner_t:: + DCVS_VOLTAGE_VCORNER_MIN_VOLTAGE_CORNER}, + {"SNPE_DCVS_VOLTAGE_VCORNER_SVS2", + DlSystem::DspPerfVoltageCorner_t::DCVS_VOLTAGE_VCORNER_SVS2}, + {"SNPE_DCVS_VOLTAGE_VCORNER_SVS", + DlSystem::DspPerfVoltageCorner_t::DCVS_VOLTAGE_VCORNER_SVS}, + {"SNPE_DCVS_VOLTAGE_VCORNER_SVS_PLUS", + DlSystem::DspPerfVoltageCorner_t::DCVS_VOLTAGE_VCORNER_SVS_PLUS}, + {"SNPE_DCVS_VOLTAGE_VCORNER_NOM", + DlSystem::DspPerfVoltageCorner_t::DCVS_VOLTAGE_VCORNER_NOM}, + {"SNPE_DCVS_VOLTAGE_VCORNER_NOM_PLUS", + DlSystem::DspPerfVoltageCorner_t::DCVS_VOLTAGE_VCORNER_NOM_PLUS}, + {"SNPE_DCVS_VOLTAGE_VCORNER_TURBO", + DlSystem::DspPerfVoltageCorner_t::DCVS_VOLTAGE_VCORNER_TURBO}, + {"SNPE_DCVS_VOLTAGE_VCORNER_TURBO_PLUS", + DlSystem::DspPerfVoltageCorner_t::DCVS_VOLTAGE_VCORNER_TURBO_PLUS}, + {"SNPE_DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER", + DlSystem::DspPerfVoltageCorner_t:: + DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER}, + {"SNPE_DCVS_VOLTAGE_VCORNER_TURBO_L1", + DlSystem::DspPerfVoltageCorner_t::DCVS_VOLTAGE_VCORNER_TURBO_L1}, + {"SNPE_DCVS_VOLTAGE_VCORNER_TURBO_L2", + DlSystem::DspPerfVoltageCorner_t::DCVS_VOLTAGE_VCORNER_TURBO_L2}, + {"SNPE_DCVS_VOLTAGE_VCORNER_TURBO_L3", + DlSystem::DspPerfVoltageCorner_t::DCVS_VOLTAGE_VCORNER_TURBO_L3}, + {"SNPE_DCVS_VOLTAGE_VCORNER_UNKNOWN", + DlSystem::DspPerfVoltageCorner_t::DCVS_VOLTAGE_VCORNER_UNKNOWN}}); + +static std::unordered_map + hmxClkPerfModeMap({{"SNPE_HMX_CLK_PERF_HIGH", + DlSystem::DspHmx_ClkPerfMode_t::HMX_CLK_PERF_HIGH}, + {"SNPE_HMX_CLK_PERF_LOW", + DlSystem::DspHmx_ClkPerfMode_t::HMX_CLK_PERF_LOW}}); + +static std::unordered_map + hmxVoltageCornerMap( + {{"SNPE_DCVS_EXP_VCORNER_DISABLE", + DlSystem::DspHmx_ExpVoltageCorner_t::DCVS_EXP_VCORNER_DISABLE}, + {"SNPE_DCVS_EXP_VCORNER_MIN", + DlSystem::DspHmx_ExpVoltageCorner_t::DCVS_EXP_VCORNER_MIN}, + {"SNPE_DCVS_EXP_VCORNER_LOW_SVS_D2", + DlSystem::DspHmx_ExpVoltageCorner_t::DCVS_EXP_VCORNER_LOW_SVS_D2}, + {"SNPE_DCVS_EXP_VCORNER_LOW_SVS_D1", + DlSystem::DspHmx_ExpVoltageCorner_t::DCVS_EXP_VCORNER_LOW_SVS_D1}, + {"SNPE_DCVS_EXP_VCORNER_LOW_SVS", + DlSystem::DspHmx_ExpVoltageCorner_t::DCVS_EXP_VCORNER_LOW_SVS}, + {"SNPE_DCVS_EXP_VCORNER_SVS", + DlSystem::DspHmx_ExpVoltageCorner_t::DCVS_EXP_VCORNER_SVS}, + {"SNPE_DCVS_EXP_VCORNER_SVS_L1", + DlSystem::DspHmx_ExpVoltageCorner_t::DCVS_EXP_VCORNER_SVS_L1}, + {"SNPE_DCVS_EXP_VCORNER_NOM", + DlSystem::DspHmx_ExpVoltageCorner_t::DCVS_EXP_VCORNER_NOM}, + {"SNPE_DCVS_EXP_VCORNER_NOM_L1", + DlSystem::DspHmx_ExpVoltageCorner_t::DCVS_EXP_VCORNER_NOM_L1}, + {"SNPE_DCVS_EXP_VCORNER_TUR", + DlSystem::DspHmx_ExpVoltageCorner_t::DCVS_EXP_VCORNER_TUR}, + {"SNPE_DCVS_EXP_VCORNER_TUR_L1", + DlSystem::DspHmx_ExpVoltageCorner_t::DCVS_EXP_VCORNER_TUR_L1}, + {"SNPE_DCVS_EXP_VCORNER_TUR_L2", + DlSystem::DspHmx_ExpVoltageCorner_t::DCVS_EXP_VCORNER_TUR_L2}, + {"SNPE_DCVS_EXP_VCORNER_TUR_L3", + DlSystem::DspHmx_ExpVoltageCorner_t::DCVS_EXP_VCORNER_TUR_L3}, + {"SNPE_DCVS_EXP_VCORNER_MAX", + DlSystem::DspHmx_ExpVoltageCorner_t::DCVS_EXP_VCORNER_MAX}}); + class snpe_handler { public: Snpe_SNPE_Handle_t snpeHandle; @@ -62,6 +157,8 @@ class QTIBackendHelper { void get_accelerator_instances(int &numDSP, int &numGPU, int &numCPU, int &numGPU_FP16); + bool setupPerfHandle(); + public: enum QTIBufferType { FLOAT_32 = 0, UINT_8 = 1, INT_32 = 2 }; const char *name_ = "snpe"; @@ -87,6 +184,8 @@ class QTIBackendHelper { Snpe_StringList_Handle_t networkInputTensorNamesHandle_; Snpe_StringList_Handle_t networkOutputTensorNamesHandle_; Snpe_PerformanceProfile_t perfProfile_; + Snpe_SNPEPerfProfile_Handle_t customPerfProfile_ = nullptr; + std::unordered_map customPerfProfileMap_; Snpe_ProfilingLevel_t profilingLevel_; int32_t fd = -1; bool isTflite_; diff --git a/mobile_back_qti/cpp/backend_qti/qti_c.cc b/mobile_back_qti/cpp/backend_qti/qti_c.cc index 08de9874c..2b19d23f1 100644 --- a/mobile_back_qti/cpp/backend_qti/qti_c.cc +++ b/mobile_back_qti/cpp/backend_qti/qti_c.cc @@ -111,6 +111,8 @@ mlperf_backend_ptr_t mlperf_backend_create( backend_data_->useIonBuffers_ = useIonBuffer_g; } + CpuCtrl::init(); + if (backend_data->bgLoad_) { CpuCtrl::startLoad(backend_data->loadOffTime_, backend_data->loadOnTime_); } @@ -121,8 +123,14 @@ mlperf_backend_ptr_t mlperf_backend_create( return backend_data; } - // use lowLatency cores for all snpe models - CpuCtrl::lowLatency(); + // use high latency cores for SD8EliteG5 and low latency cores for other + // devices + if (Socs::get_soc_name() == "SD8EliteG5") { + CpuCtrl::highLatency(); + } else { + CpuCtrl::lowLatency(); + } + set_system_paths(native_lib_path); std::string snpe_version = xverstr(SNPE_VERSION_STRING); @@ -210,7 +218,6 @@ mlperf_status_t mlperf_backend_issue_query(mlperf_backend_ptr_t backend_ptr) { if (backend_data->isTflite_) { return tflite_backend_issue_query(backend_data->tfliteBackend_); } - if (backend_data->isStableDiffusion) { if (backend_data->executeSd()) { ret = MLPERF_SUCCESS; @@ -224,8 +231,7 @@ mlperf_status_t mlperf_backend_issue_query(mlperf_backend_ptr_t backend_ptr) { #ifdef DEBUG_FLAG auto end = high_resolution_clock::now(); auto duration = duration_cast(end - start); - LOG(INFO) << "Query cnt: " << backend_data->queryCount_ - << "Inference Time(ms): " << duration.count(); + LOG(INFO) << "Inference Time(ms): " << duration.count(); #endif backend_data->queryCount_++; return ret; diff --git a/mobile_back_qti/cpp/backend_qti/qti_settings.h b/mobile_back_qti/cpp/backend_qti/qti_settings.h index 00cc57f73..b485be68b 100644 --- a/mobile_back_qti/cpp/backend_qti/qti_settings.h +++ b/mobile_back_qti/cpp/backend_qti/qti_settings.h @@ -31,13 +31,11 @@ const std::string empty_settings = ""; #define STRING_SETTINGS(device) SETTINGS_LHS(device) = SETTINGS_RHS(device); #include INCLUDE_SETTINGS(sd8cxg3) -#include INCLUDE_SETTINGS(sd7cxg3) -#include INCLUDE_SETTINGS(sd4g2) -#include INCLUDE_SETTINGS(sd8g3) +#include INCLUDE_SETTINGS(sd8sG4) +#include INCLUDE_SETTINGS(sd7G4) #include INCLUDE_SETTINGS(sd8elite) -#include INCLUDE_SETTINGS(sd8sg3) -#include INCLUDE_SETTINGS(sd7g3) -#include INCLUDE_SETTINGS(sd7sg3) +#include INCLUDE_SETTINGS(sd8eliteG5) +#include INCLUDE_SETTINGS(sd8G5) #include INCLUDE_SETTINGS(gpufp16) #include INCLUDE_SETTINGS(default_dsp) #include INCLUDE_SETTINGS(default_cpu) @@ -45,15 +43,17 @@ const std::string empty_settings = ""; #include INCLUDE_SETTINGS(stablediffusion) #include INCLUDE_SETTINGS(stablediffusion_v75) #include INCLUDE_SETTINGS(stablediffusion_v79) +#include INCLUDE_SETTINGS(stablediffusion_v73_sd8sG4) +#include INCLUDE_SETTINGS(stablediffusion_v73_sd7G4) +#include INCLUDE_SETTINGS(stablediffusion_v81_sd8eliteG5) +#include INCLUDE_SETTINGS(stablediffusion_v81_sd8G5) STRING_SETTINGS(sd8cxg3) -STRING_SETTINGS(sd7cxg3) -STRING_SETTINGS(sd4g2) -STRING_SETTINGS(sd8g3) +STRING_SETTINGS(sd8sG4) +STRING_SETTINGS(sd7G4) STRING_SETTINGS(sd8elite) -STRING_SETTINGS(sd8sg3) -STRING_SETTINGS(sd7g3) -STRING_SETTINGS(sd7sg3) +STRING_SETTINGS(sd8eliteG5) +STRING_SETTINGS(sd8G5) STRING_SETTINGS(gpufp16) STRING_SETTINGS(default_dsp) STRING_SETTINGS(default_cpu) @@ -61,5 +61,9 @@ STRING_SETTINGS(default_gpu) STRING_SETTINGS(stablediffusion) STRING_SETTINGS(stablediffusion_v75) STRING_SETTINGS(stablediffusion_v79) +STRING_SETTINGS(stablediffusion_v73_sd8sG4) +STRING_SETTINGS(stablediffusion_v73_sd7G4) +STRING_SETTINGS(stablediffusion_v81_sd8eliteG5) +STRING_SETTINGS(stablediffusion_v81_sd8G5) #endif diff --git a/mobile_back_qti/cpp/backend_qti/settings/checksums.txt b/mobile_back_qti/cpp/backend_qti/settings/checksums.txt new file mode 100644 index 000000000..820ed04fe --- /dev/null +++ b/mobile_back_qti/cpp/backend_qti/settings/checksums.txt @@ -0,0 +1,56 @@ +MD5 (mobile_mosaic_htp_O2.dlc) = 6774a1d6930589d17279122348bbd5e4 +MD5 (mobile_mosaic_htp.dlc) = 0a4cf9d1349a66ef86dc1f1f2b1070cb +MD5 (mobilebert_quantized_htp_O2.dlc) = fa0c8f33c81c4d39c74e0e8cacb60fdb +MD5 (mobilebert_quantized_htp.dlc) = b346d91294c6faee2d1b88e4846fae6e +MD5 (mobilenet_v4_htp_batched_4_O2.dlc) = c7d4e8aa04c8a871ab4074747a1e7328 +MD5 (mobilenet_v4_htp_batched_4.dlc) = e34f79344c71ea2e625b17ca166cf144 +MD5 (mobilenet_v4_htp_batched_8_O2.dlc) = d8cbc539ee1884e364f784605ad3bc94 +MD5 (mobilenet_v4_htp_batched_8.dlc) = 1ba28c0c681ab398c50b8ed69285d4d8 +MD5 (mobilenet_v4_htp_O2.dlc) = b68d1634c0d111b9df217df2f11c601b +MD5 (mobilenet_v4_htp.dlc) = 769cc5495681c010eedaade2f4fee5bf +MD5 (snusr_htp_O2.dlc) = f78518906c08ab8b95b6fdc73c69f44f +MD5 (snusr_htp.dlc) = 81b860b70814bbfab3079805b6826eab +MD5 (ssd_mobiledet_qat_htp_O2.dlc) = cc0fe91b1ab2550032aa34c36dec74ee +MD5 (ssd_mobiledet_qat_htp.dlc) = ecd8e895a6ffd9d0f558af143aa6a483 +MD5 (stable_diffusion_v73_sd7G4/betas.bin) = 09d2e4306d319caf1b34e6afb5c63c22 +MD5 (stable_diffusion_v73_sd7G4/lambdas.bin) = c7179725ec31a6e2c7daf008a5e1ff23 +MD5 (stable_diffusion_v73_sd7G4/sd_precompute_data.tar) = 4c4bc6652b3f6c0b61de5a2212e1a538 +MD5 (stable_diffusion_v73_sd7G4/text_encoder.serialized.bin) = f012f6d5e2f1715199a9c72b4bae771d +MD5 (stable_diffusion_v73_sd7G4/unet.serialized.bin) = 079213263ec3ab43451c44b9403df1ee +MD5 (stable_diffusion_v73_sd7G4/vae_decoder.serialized.bin) = 9a0703295816fb6462b13335bb7729b8 +MD5 (stable_diffusion_v73_sd8sG4/betas.bin) = 09d2e4306d319caf1b34e6afb5c63c22 +MD5 (stable_diffusion_v73_sd8sG4/lambdas.bin) = c7179725ec31a6e2c7daf008a5e1ff23 +MD5 (stable_diffusion_v73_sd8sG4/sd_precompute_data.tar) = 4c4bc6652b3f6c0b61de5a2212e1a538 +MD5 (stable_diffusion_v73_sd8sG4/text_encoder.serialized.bin) = bf77554a27e38adeaabd4e74a869da76 +MD5 (stable_diffusion_v73_sd8sG4/unet.serialized.bin) = d5c7d4b4fff26f92a622ca29b91f4299 +MD5 (stable_diffusion_v73_sd8sG4/vae_decoder.serialized.bin) = 1c0ed244d3f4cd5e96ab8ebcd1e7b0d7 +MD5 (stable_diffusion_v75/betas.bin) = 09d2e4306d319caf1b34e6afb5c63c22 +MD5 (stable_diffusion_v75/lambdas.bin) = c7179725ec31a6e2c7daf008a5e1ff23 +MD5 (stable_diffusion_v75/sd_precompute_data.tar) = 4c4bc6652b3f6c0b61de5a2212e1a538 +MD5 (stable_diffusion_v75/text_encoder.serialized.bin) = 83c83d6d935bdfeff97b02dfe69870e7 +MD5 (stable_diffusion_v75/unet.serialized.bin) = 06f3354d417cf81615b9f9151a68ff21 +MD5 (stable_diffusion_v75/vae_decoder.serialized.bin) = 17ded9d30f0cdf2286e60f3f32ea8e92 +MD5 (stable_diffusion_v79/betas.bin) = 09d2e4306d319caf1b34e6afb5c63c22 +MD5 (stable_diffusion_v79/lambdas.bin) = c7179725ec31a6e2c7daf008a5e1ff23 +MD5 (stable_diffusion_v79/sd_precompute_data.tar) = 4c4bc6652b3f6c0b61de5a2212e1a538 +MD5 (stable_diffusion_v79/text_encoder.serialized.bin) = 9ed8975cf198d78e1c141921ccd9c615 +MD5 (stable_diffusion_v79/unet.serialized.bin) = a96460727aca1568a12adf983483fb4c +MD5 (stable_diffusion_v79/vae_decoder.serialized.bin) = 0a9df82d0320001233d848e7077c014d +MD5 (stable_diffusion_v81_sd8eliteG5/betas.bin) = 09d2e4306d319caf1b34e6afb5c63c22 +MD5 (stable_diffusion_v81_sd8eliteG5/lambdas.bin) = c7179725ec31a6e2c7daf008a5e1ff23 +MD5 (stable_diffusion_v81_sd8eliteG5/sd_precompute_data.tar) = 4c4bc6652b3f6c0b61de5a2212e1a538 +MD5 (stable_diffusion_v81_sd8eliteG5/text_encoder.serialized.bin) = e7a0717485555275915c195c89243436 +MD5 (stable_diffusion_v81_sd8eliteG5/unet.serialized.bin) = 19f20bfc18cc49b21a7055e8129bc470 +MD5 (stable_diffusion_v81_sd8eliteG5/vae_decoder.serialized.bin) = 82436681811968ff84f30e1ff7bbce71 +MD5 (stable_diffusion_v81_sd8G5/betas.bin) = 09d2e4306d319caf1b34e6afb5c63c22 +MD5 (stable_diffusion_v81_sd8G5/lambdas.bin) = c7179725ec31a6e2c7daf008a5e1ff23 +MD5 (stable_diffusion_v81_sd8G5/sd_precompute_data.tar) = 4c4bc6652b3f6c0b61de5a2212e1a538 +MD5 (stable_diffusion_v81_sd8G5/text_encoder.serialized.bin) = e676c7968c53181f4a0b27cf579cf5bd +MD5 (stable_diffusion_v81_sd8G5/unet.serialized.bin) = 3c295c8ed164b2b6832e140f58c5a766 +MD5 (stable_diffusion_v81_sd8G5/vae_decoder.serialized.bin) = faa510e03379e699fd11936f6eb4d092 +MD5 (stable_diffusion/betas.bin) = 09d2e4306d319caf1b34e6afb5c63c22 +MD5 (stable_diffusion/lambdas.bin) = c7179725ec31a6e2c7daf008a5e1ff23 +MD5 (stable_diffusion/sd_precompute_data.tar) = 4c4bc6652b3f6c0b61de5a2212e1a538 +MD5 (stable_diffusion/text_encoder.serialized.bin) = 18df51f416405ef7c6019e5f7a6719d2 +MD5 (stable_diffusion/unet.serialized.bin) = 8e32e9debdc303524c003d917abe4815 +MD5 (stable_diffusion/vae_decoder.serialized.bin) = c1c0204befd92ab6a1d4c3703776e634 diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_dsp.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_dsp.pbtxt index 1d54dc8f8..41b345e17 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_dsp.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_dsp.pbtxt @@ -36,8 +36,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/mobilenet_v4_htp.dlc" - model_checksum: "4dc06ff76d00a442be24b796651a27df" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/mobilenet_v4_htp.dlc" + model_checksum: "769cc5495681c010eedaade2f4fee5bf" } } delegate_selected: "SNPE_DSP" @@ -56,7 +56,7 @@ benchmark_setting { } custom_setting { id: "use_ion_buffer" - value: "true" + value: "false" } delegate_choice: { priority: 1 @@ -65,8 +65,8 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12288 model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "07d1d4f86d09e950434bc0c790d8b00a" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "e34f79344c71ea2e625b17ca166cf144" } } delegate_selected: "SNPE_DSP" @@ -93,8 +93,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/ssd_mobiledet_qat_htp.dlc" - model_checksum: "eadf4fbaa84fd524079a74512d40cbc6" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/ssd_mobiledet_qat_htp.dlc" + model_checksum: "ecd8e895a6ffd9d0f558af143aa6a483" } } delegate_selected: "SNPE_DSP" @@ -129,8 +129,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/mobilebert_quantized_htp.dlc" - model_checksum: "4cfaced53aed0ea563628d626bf2570b" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/mobilebert_quantized_htp.dlc" + model_checksum: "b346d91294c6faee2d1b88e4846fae6e" } } delegate_selected: "SNPE_DSP" @@ -161,8 +161,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/mobile_mosaic_htp.dlc" - model_checksum: "b2b2a51ae3a52d97b1eb44c00432b713" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/mobile_mosaic_htp.dlc" + model_checksum: "0a4cf9d1349a66ef86dc1f1f2b1070cb" } } delegate_selected: "SNPE_DSP" @@ -197,9 +197,9 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/snusr_htp.dlc" - model_checksum: "8ff44a39cf911b3fd04809f4967d1501" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/snusr_htp.dlc" + model_checksum: "81b860b70814bbfab3079805b6826eab" } } delegate_selected: "SNPE_DSP" -} \ No newline at end of file +} diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd4g2.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd4g2.pbtxt deleted file mode 100644 index d659728ac..000000000 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd4g2.pbtxt +++ /dev/null @@ -1,241 +0,0 @@ -# proto-file: flutter/cpp/proto/backend_setting.proto -# proto-message: BackendSetting - -common_setting { - id: "num_threads" - name: "Number of threads" - value { - value: "4" - name: "4 threads" - } -} - -common_setting { - id: "profiling_level" - name: "profile level" - value { - value: "off" - name: "profile level" - } -} - -benchmark_setting { - benchmark_id: "image_classification_v2" - framework: "SNPE" - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "perf_profile" - value: "burst" - } - custom_setting { - id: "use_ion_buffer" - value: "false" - } - custom_setting { - id: "output_buffer_type" - value: "uint_8" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_CPU" - accelerator_name: "snpe_cpu" - accelerator_desc: "CPU" - custom_setting { - id: "cpu_int8" - value: "true" - } - model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/mobilenet_v4_htp.dlc" - model_checksum: "4dc06ff76d00a442be24b796651a27df" - } - } - delegate_selected: "SNPE_CPU" -} - -benchmark_setting { - benchmark_id: "image_classification_offline_v2" - framework: "SNPE" - custom_setting { - id: "scenario" - value: "Offline" - } - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "perf_profile" - value: "burst" - } - custom_setting { - id: "use_ion_buffer" - value: "false" - } - custom_setting { - id: "output_buffer_type" - value: "uint_8" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_CPU" - accelerator_name: "psnpe_cpu" - accelerator_desc: "CPU" - custom_setting { - id: "cpu_int8" - value: "true" - } - batch_size: 12360 - model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "07d1d4f86d09e950434bc0c790d8b00a" - } - } - delegate_selected: "SNPE_CPU" -} - -benchmark_setting { - benchmark_id: "object_detection" - framework: "SNPE" - custom_setting { - id: "snpe_output_tensors" - value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0" - } - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "use_ion_buffer" - value: "false" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_CPU" - accelerator_name: "snpe_cpu" - accelerator_desc: "CPU" - custom_setting { - id: "cpu_int8" - value: "true" - } - model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/ssd_mobiledet_qat_htp.dlc" - model_checksum: "eadf4fbaa84fd524079a74512d40cbc6" - } - } - delegate_selected: "SNPE_CPU" -} - -benchmark_setting { - benchmark_id: "natural_language_processing" - framework: "SNPE" - custom_setting { - id: "snpe_output_tensors" - value: "transpose:0" - } - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "input_buffer_type" - value: "float_32" - } - custom_setting { - id: "use_ion_buffer" - value: "false" - } - delegate_choice: { - priority: 1 - custom_setting { - id: "cpu_int8" - value: "true" - } - delegate_name: "SNPE_CPU" - accelerator_name: "snpe_cpu" - accelerator_desc: "CPU" - model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/mobilebert_quantized_htp.dlc" - model_checksum: "4cfaced53aed0ea563628d626bf2570b" - } - } - delegate_selected: "SNPE_CPU" -} - -benchmark_setting { - benchmark_id: "image_segmentation_v2" - framework: "SNPE" - custom_setting { - id: "input_buffer_type" - value: "uint_8" - } - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "output_buffer_type" - value: "int_32" - } - custom_setting { - id: "use_ion_buffer" - value: "false" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_CPU" - accelerator_name: "snpe_cpu" - accelerator_desc: "CPU" - custom_setting { - id: "cpu_int8" - value: "true" - } - model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/mobile_mosaic_htp.dlc" - model_checksum: "b2b2a51ae3a52d97b1eb44c00432b713" - } - } - delegate_selected: "SNPE_CPU" -} - -benchmark_setting { - benchmark_id: "super_resolution" - framework: "SNPE" - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "input_buffer_type" - value: "uint_8" - } - custom_setting { - id: "use_ion_buffer" - value: "false" - } - custom_setting { - id: "perf_profile" - value: "burst" - } - custom_setting { - id: "output_buffer_type" - value: "uint_8" - } - delegate_choice: { - priority: 1 - custom_setting { - id: "cpu_int8" - value: "true" - } - delegate_name: "SNPE_CPU" - accelerator_name: "snpe_cpu" - accelerator_desc: "CPU" - model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/snusr_htp.dlc" - model_checksum: "8ff44a39cf911b3fd04809f4967d1501" - } - } - delegate_selected: "SNPE_CPU" -} diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8sg3.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7G4.pbtxt similarity index 84% rename from mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8sg3.pbtxt rename to mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7G4.pbtxt index c2ee93d9e..c6f9dac1c 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8sg3.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7G4.pbtxt @@ -40,11 +40,11 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/mobilenet_v4_htp.dlc" - model_checksum: "4dc06ff76d00a442be24b796651a27df" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/mobilenet_v4_htp.dlc" + model_checksum: "769cc5495681c010eedaade2f4fee5bf" } } - single_stream_expected_latency_ns: 50000 + single_stream_expected_latency_ns: 500000 delegate_selected: "SNPE_DSP" } @@ -65,7 +65,7 @@ benchmark_setting { } custom_setting { id: "use_ion_buffer" - value: "true" + value: "false" } custom_setting { id: "output_buffer_type" @@ -78,8 +78,8 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12360 model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "07d1d4f86d09e950434bc0c790d8b00a" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "e34f79344c71ea2e625b17ca166cf144" } } delegate_selected: "SNPE_DSP" @@ -101,8 +101,8 @@ benchmark_setting { value: "false" } custom_setting { - id: "perf_profile" - value: "burst" + id: "perf_profile" + value: "burst" } delegate_choice: { priority: 1 @@ -110,8 +110,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/ssd_mobiledet_qat_htp.dlc" - model_checksum: "eadf4fbaa84fd524079a74512d40cbc6" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/ssd_mobiledet_qat_htp.dlc" + model_checksum: "ecd8e895a6ffd9d0f558af143aa6a483" } } delegate_selected: "SNPE_DSP" @@ -146,8 +146,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/mobilebert_quantized_htp.dlc" - model_checksum: "4cfaced53aed0ea563628d626bf2570b" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/mobilebert_quantized_htp_O2.dlc" + model_checksum: "fa0c8f33c81c4d39c74e0e8cacb60fdb" } } delegate_selected: "SNPE_DSP" @@ -160,10 +160,6 @@ benchmark_setting { id: "input_buffer_type" value: "uint_8" } - custom_setting { - id: "perf_profile" - value: "burst" - } custom_setting { id: "bg_load" value: "false" @@ -172,14 +168,18 @@ benchmark_setting { id: "output_buffer_type" value: "int_32" } + custom_setting { + id: "perf_profile" + value: "burst" + } delegate_choice: { priority: 1 delegate_name: "SNPE_DSP" accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/mobile_mosaic_htp.dlc" - model_checksum: "b2b2a51ae3a52d97b1eb44c00432b713" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/mobile_mosaic_htp.dlc" + model_checksum: "0a4cf9d1349a66ef86dc1f1f2b1070cb" } } delegate_selected: "SNPE_DSP" @@ -210,8 +210,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/snusr_htp.dlc" - model_checksum: "8ff44a39cf911b3fd04809f4967d1501" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/snusr_htp_O2.dlc" + model_checksum: "f78518906c08ab8b95b6fdc73c69f44f" } } delegate_selected: "SNPE_DSP" diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7cxg3.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7cxg3.pbtxt deleted file mode 100644 index 5f41361c6..000000000 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7cxg3.pbtxt +++ /dev/null @@ -1,207 +0,0 @@ -# proto-file: flutter/cpp/proto/backend_setting.proto -# proto-message: BackendSetting - -common_setting { - id: "num_threads" - name: "Number of threads" - value { - value: "4" - name: "4 threads" - } -} - -common_setting { - id: "profiling_level" - name: "profile level" - value { - value: "off" - name: "profile level" - } -} - -benchmark_setting { - benchmark_id: "image_classification_v2" - framework: "SNPE" - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "perf_profile" - value: "burst" - } - custom_setting { - id: "output_buffer_type" - value: "uint_8" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_DSP" - accelerator_name: "snpe_dsp" - accelerator_desc: "HTP" - model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/mobilenet_v4_htp.dlc" - model_checksum: "4dc06ff76d00a442be24b796651a27df" - } - } - single_stream_expected_latency_ns: 250000 - delegate_selected: "SNPE_DSP" -} - -benchmark_setting { - benchmark_id: "image_classification_offline_v2" - framework: "SNPE" - custom_setting { - id: "scenario" - value: "Offline" - } - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "perf_profile" - value: "burst" - } - custom_setting { - id: "use_ion_buffer" - value: "false" - } - custom_setting { - id: "output_buffer_type" - value: "uint_8" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_DSP" - accelerator_name: "psnpe_dsp" - accelerator_desc: "HTP" - batch_size: 12288 - model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "07d1d4f86d09e950434bc0c790d8b00a" - } - } - delegate_selected: "SNPE_DSP" -} - - -benchmark_setting { - benchmark_id: "object_detection" - framework: "SNPE" - custom_setting { - id: "snpe_output_tensors" - value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0" - } - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "use_ion_buffer" - value: "false" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_DSP" - accelerator_name: "snpe_dsp" - accelerator_desc: "HTP" - model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/ssd_mobiledet_qat_htp.dlc" - model_checksum: "eadf4fbaa84fd524079a74512d40cbc6" - } - } - delegate_selected: "SNPE_DSP" -} - -benchmark_setting { - benchmark_id: "natural_language_processing" - framework: "SNPE" - custom_setting { - id: "snpe_output_tensors" - value: "transpose:0" - } - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "input_buffer_type" - value: "float_32" - } - custom_setting { - id: "use_ion_buffer" - value: "false" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_DSP" - accelerator_name: "snpe_dsp" - accelerator_desc: "DSP" - model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/mobilebert_quantized_htp.dlc" - model_checksum: "4cfaced53aed0ea563628d626bf2570b" - } - } - delegate_selected: "SNPE_DSP" -} - -benchmark_setting { - benchmark_id: "image_segmentation_v2" - framework: "SNPE" - custom_setting { - id: "input_buffer_type" - value: "uint_8" - } - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "output_buffer_type" - value: "int_32" - } - custom_setting { - id: "use_ion_buffer" - value: "false" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_DSP" - accelerator_name: "snpe_dsp" - accelerator_desc: "DSP" - model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/mobile_mosaic_htp.dlc" - model_checksum: "b2b2a51ae3a52d97b1eb44c00432b713" - } - } - delegate_selected: "SNPE_DSP" -} - -benchmark_setting { - benchmark_id: "super_resolution" - framework: "SNPE" - custom_setting { - id: "bg_load" - value: "false" - } - custom_setting { - id: "output_buffer_type" - value: "uint_8" - } - custom_setting { - id: "use_ion_buffer" - value: "false" - } - delegate_choice: { - priority: 1 - delegate_name: "SNPE_DSP" - accelerator_name: "snpe_dsp" - accelerator_desc: "DSP" - model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/snusr_htp.dlc" - model_checksum: "8ff44a39cf911b3fd04809f4967d1501" - } - } - delegate_selected: "SNPE_DSP" -} diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7g3.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8G5.pbtxt similarity index 77% rename from mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7g3.pbtxt rename to mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8G5.pbtxt index cc03b4b8e..11168ed3f 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7g3.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8G5.pbtxt @@ -5,8 +5,8 @@ common_setting { id: "num_threads" name: "Number of threads" value { - value: "4" - name: "4 threads" + value: "8" + name: "8 threads" } } @@ -40,11 +40,11 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/mobilenet_v4_htp.dlc" - model_checksum: "4dc06ff76d00a442be24b796651a27df" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/mobilenet_v4_htp.dlc" + model_checksum: "769cc5495681c010eedaade2f4fee5bf" } } - single_stream_expected_latency_ns: 50000 + single_stream_expected_latency_ns: 500000 delegate_selected: "SNPE_DSP" } @@ -65,7 +65,7 @@ benchmark_setting { } custom_setting { id: "use_ion_buffer" - value: "true" + value: "false" } custom_setting { id: "output_buffer_type" @@ -78,8 +78,8 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12360 model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/mobilenet_v4_htp_batched_4_O2.dlc" - model_checksum: "db876e43d09aedd5e1eab788dbd97a04" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "e34f79344c71ea2e625b17ca166cf144" } } delegate_selected: "SNPE_DSP" @@ -96,25 +96,26 @@ benchmark_setting { id: "bg_load" value: "false" } - custom_setting { - id: "use_ion_buffer" - value: "false" - } custom_setting { id: "perf_profile" value: "burst" } + custom_setting { + id: "use_ion_buffer" + value: "false" + } delegate_choice: { priority: 1 delegate_name: "SNPE_DSP" accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/ssd_mobiledet_qat_htp.dlc" - model_checksum: "eadf4fbaa84fd524079a74512d40cbc6" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/ssd_mobiledet_qat_htp.dlc" + model_checksum: "ecd8e895a6ffd9d0f558af143aa6a483" } } delegate_selected: "SNPE_DSP" + single_stream_expected_latency_ns: 500000 } benchmark_setting { @@ -146,8 +147,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/mobilebert_quantized_htp.dlc" - model_checksum: "4cfaced53aed0ea563628d626bf2570b" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/mobilebert_quantized_htp.dlc" + model_checksum: "b346d91294c6faee2d1b88e4846fae6e" } } delegate_selected: "SNPE_DSP" @@ -160,10 +161,6 @@ benchmark_setting { id: "input_buffer_type" value: "uint_8" } - custom_setting { - id: "perf_profile" - value: "burst" - } custom_setting { id: "bg_load" value: "false" @@ -172,17 +169,22 @@ benchmark_setting { id: "output_buffer_type" value: "int_32" } + custom_setting { + id: "perf_profile" + value: "burst" + } delegate_choice: { priority: 1 delegate_name: "SNPE_DSP" accelerator_name: "snpe_dsp" - accelerator_desc: "DSP" + accelerator_desc: "HTP" model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/mobile_mosaic_htp.dlc" - model_checksum: "b2b2a51ae3a52d97b1eb44c00432b713" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/mobile_mosaic_htp.dlc" + model_checksum: "0a4cf9d1349a66ef86dc1f1f2b1070cb" } } delegate_selected: "SNPE_DSP" + single_stream_expected_latency_ns: 500000 } benchmark_setting { @@ -204,19 +206,15 @@ benchmark_setting { id: "perf_profile" value: "burst" } - custom_setting { - id: "use_ion_buffer" - value: "false" - } delegate_choice: { priority: 1 delegate_name: "SNPE_DSP" accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/snusr_htp.dlc" - model_checksum: "8ff44a39cf911b3fd04809f4967d1501" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/snusr_htp.dlc" + model_checksum: "81b860b70814bbfab3079805b6826eab" } } delegate_selected: "SNPE_DSP" -} \ No newline at end of file +} diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7sg3.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8eliteG5.pbtxt similarity index 70% rename from mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7sg3.pbtxt rename to mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8eliteG5.pbtxt index 8a1b938a6..7b3cfb217 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7sg3.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8eliteG5.pbtxt @@ -5,8 +5,8 @@ common_setting { id: "num_threads" name: "Number of threads" value { - value: "4" - name: "4 threads" + value: "8" + name: "8 threads" } } @@ -34,17 +34,21 @@ benchmark_setting { id: "output_buffer_type" value: "uint_8" } + custom_setting { + id: "dsp_start_sleep_latency" + value: "50" + } delegate_choice: { priority: 1 delegate_name: "SNPE_DSP" accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/mobilenet_v4_htp.dlc" - model_checksum: "4dc06ff76d00a442be24b796651a27df" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/mobilenet_v4_htp.dlc" + model_checksum: "769cc5495681c010eedaade2f4fee5bf" } } - single_stream_expected_latency_ns: 50000 + single_stream_expected_latency_ns: 500000 delegate_selected: "SNPE_DSP" } @@ -57,20 +61,24 @@ benchmark_setting { } custom_setting { id: "bg_load" - value: "true" + value: "false" } custom_setting { id: "perf_profile" value: "burst" } - custom_setting { - id: "use_ion_buffer" - value: "false" - } custom_setting { id: "output_buffer_type" value: "uint_8" } + custom_setting { + id: "dsp_start_sleep_latency" + value: "45" + } + custom_setting { + id: "use_ion_buffer" + value: "false" + } delegate_choice: { priority: 1 delegate_name: "SNPE_DSP" @@ -78,8 +86,8 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12360 model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/mobilenet_v4_htp_batched_4_O2.dlc" - model_checksum: "db876e43d09aedd5e1eab788dbd97a04" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/mobilenet_v4_htp_batched_8.dlc" + model_checksum: "1ba28c0c681ab398c50b8ed69285d4d8" } } delegate_selected: "SNPE_DSP" @@ -96,25 +104,26 @@ benchmark_setting { id: "bg_load" value: "false" } - custom_setting { - id: "use_ion_buffer" - value: "false" - } custom_setting { id: "perf_profile" value: "burst" } + custom_setting { + id: "use_ion_buffer" + value: "false" + } delegate_choice: { priority: 1 delegate_name: "SNPE_DSP" accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/ssd_mobiledet_qat_htp.dlc" - model_checksum: "eadf4fbaa84fd524079a74512d40cbc6" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/ssd_mobiledet_qat_htp.dlc" + model_checksum: "ecd8e895a6ffd9d0f558af143aa6a483" } } delegate_selected: "SNPE_DSP" + single_stream_expected_latency_ns: 300000 } benchmark_setting { @@ -146,8 +155,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/mobilebert_quantized_htp.dlc" - model_checksum: "4cfaced53aed0ea563628d626bf2570b" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/mobilebert_quantized_htp.dlc" + model_checksum: "b346d91294c6faee2d1b88e4846fae6e" } } delegate_selected: "SNPE_DSP" @@ -160,10 +169,6 @@ benchmark_setting { id: "input_buffer_type" value: "uint_8" } - custom_setting { - id: "perf_profile" - value: "burst" - } custom_setting { id: "bg_load" value: "false" @@ -172,17 +177,22 @@ benchmark_setting { id: "output_buffer_type" value: "int_32" } + custom_setting { + id: "perf_profile" + value: "burst" + } delegate_choice: { priority: 1 delegate_name: "SNPE_DSP" accelerator_name: "snpe_dsp" - accelerator_desc: "DSP" + accelerator_desc: "HTP" model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/mobile_mosaic_htp.dlc" - model_checksum: "b2b2a51ae3a52d97b1eb44c00432b713" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/mobile_mosaic_htp.dlc" + model_checksum: "0a4cf9d1349a66ef86dc1f1f2b1070cb" } } delegate_selected: "SNPE_DSP" + single_stream_expected_latency_ns: 500000 } benchmark_setting { @@ -190,7 +200,7 @@ benchmark_setting { framework: "SNPE" custom_setting { id: "bg_load" - value: "true" + value: "false" } custom_setting { id: "input_buffer_type" @@ -205,8 +215,20 @@ benchmark_setting { value: "burst" } custom_setting { - id: "use_ion_buffer" - value: "true" + id: "core_voltage_start" + value: "SNPE_DCVS_VOLTAGE_VCORNER_TURBO" + } + custom_setting { + id: "core_voltage_done" + value: "SNPE_DCVS_VOLTAGE_VCORNER_TURBO" + } + custom_setting { + id: "hmx_voltage" + value: "SNPE_DCVS_EXP_VCORNER_SVS_L1" + } + custom_setting { + id: "adaptive_polling" + value: "10" } delegate_choice: { priority: 1 @@ -214,9 +236,9 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/snusr_htp.dlc" - model_checksum: "8ff44a39cf911b3fd04809f4967d1501" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/snusr_htp.dlc" + model_checksum: "81b860b70814bbfab3079805b6826eab" } } delegate_selected: "SNPE_DSP" -} \ No newline at end of file +} diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g3.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8sG4.pbtxt similarity index 85% rename from mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g3.pbtxt rename to mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8sG4.pbtxt index 92dc81403..cac12ebc5 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g3.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8sG4.pbtxt @@ -40,8 +40,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/mobilenet_v4_htp.dlc" - model_checksum: "4dc06ff76d00a442be24b796651a27df" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/mobilenet_v4_htp.dlc" + model_checksum: "769cc5495681c010eedaade2f4fee5bf" } } single_stream_expected_latency_ns: 500000 @@ -65,7 +65,7 @@ benchmark_setting { } custom_setting { id: "use_ion_buffer" - value: "true" + value: "false" } custom_setting { id: "output_buffer_type" @@ -78,8 +78,8 @@ benchmark_setting { accelerator_desc: "HTP" batch_size: 12360 model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/mobilenet_v4_htp_batched_4.dlc" - model_checksum: "07d1d4f86d09e950434bc0c790d8b00a" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/mobilenet_v4_htp_batched_4.dlc" + model_checksum: "e34f79344c71ea2e625b17ca166cf144" } } delegate_selected: "SNPE_DSP" @@ -100,18 +100,22 @@ benchmark_setting { id: "use_ion_buffer" value: "false" } + custom_setting { + id: "perf_profile" + value: "burst" + } delegate_choice: { priority: 1 delegate_name: "SNPE_DSP" accelerator_name: "snpe_dsp" accelerator_desc: "HTP" model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/ssd_mobiledet_qat_htp.dlc" - model_checksum: "eadf4fbaa84fd524079a74512d40cbc6" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/ssd_mobiledet_qat_htp.dlc" + model_checksum: "ecd8e895a6ffd9d0f558af143aa6a483" } } - single_stream_expected_latency_ns: 500000 delegate_selected: "SNPE_DSP" + single_stream_expected_latency_ns: 500000 } benchmark_setting { @@ -143,8 +147,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/mobilebert_quantized_htp_O2.dlc" - model_checksum: "ba96b7c63a2e0437214611d20f0a5eb8" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/mobilebert_quantized_htp_O2.dlc" + model_checksum: "fa0c8f33c81c4d39c74e0e8cacb60fdb" } } delegate_selected: "SNPE_DSP" @@ -175,8 +179,8 @@ benchmark_setting { accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/mobile_mosaic_htp.dlc" - model_checksum: "b2b2a51ae3a52d97b1eb44c00432b713" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/mobile_mosaic_htp.dlc" + model_checksum: "0a4cf9d1349a66ef86dc1f1f2b1070cb" } } delegate_selected: "SNPE_DSP" @@ -201,18 +205,14 @@ benchmark_setting { id: "perf_profile" value: "burst" } - custom_setting { - id: "use_ion_buffer" - value: "true" - } delegate_choice: { priority: 1 delegate_name: "SNPE_DSP" accelerator_name: "snpe_dsp" accelerator_desc: "DSP" model_file: { - model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0/qualcomm/snusr_htp_O2.dlc" - model_checksum: "195336860aff6a4e949093ca474f4d72" + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/snusr_htp_O2.dlc" + model_checksum: "f78518906c08ab8b95b6fdc73c69f44f" } } delegate_selected: "SNPE_DSP" diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion_v73_sd7G4.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion_v73_sd7G4.pbtxt new file mode 100644 index 000000000..6ffd6d7fb --- /dev/null +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion_v73_sd7G4.pbtxt @@ -0,0 +1,46 @@ +# proto-file: flutter/cpp/proto/backend_setting.proto +# proto-message: BackendSetting + +benchmark_setting { + benchmark_id: "stable_diffusion" + framework: "QNN" + custom_setting { + id: "pipeline" + value: "StableDiffusionPipeline" + } + custom_setting { + id: "bg_load" + value: "false" + } + delegate_choice: { + priority: 1 + delegate_name: "QNN_DSP" + accelerator_name: "snpe_dsp" + accelerator_desc: "DSP" + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/stable_diffusion_v73_sd7G4/betas.bin" + model_checksum: "09d2e4306d319caf1b34e6afb5c63c22" + } + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/stable_diffusion_v73_sd7G4/lambdas.bin" + model_checksum: "c7179725ec31a6e2c7daf008a5e1ff23" + } + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/stable_diffusion_v73_sd7G4/sd_precompute_data.tar" + model_checksum: "4c4bc6652b3f6c0b61de5a2212e1a538" + } + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/stable_diffusion_v73_sd7G4/text_encoder.serialized.bin" + model_checksum: "f012f6d5e2f1715199a9c72b4bae771d" + } + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/stable_diffusion_v73_sd7G4/unet.serialized.bin" + model_checksum: "079213263ec3ab43451c44b9403df1ee" + } + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/stable_diffusion_v73_sd7G4/vae_decoder.serialized.bin" + model_checksum: "9a0703295816fb6462b13335bb7729b8" + } + } + delegate_selected: "QNN_DSP" +} diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion_v73_sd8sG4.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion_v73_sd8sG4.pbtxt new file mode 100644 index 000000000..08cabf108 --- /dev/null +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion_v73_sd8sG4.pbtxt @@ -0,0 +1,46 @@ +# proto-file: flutter/cpp/proto/backend_setting.proto +# proto-message: BackendSetting + +benchmark_setting { + benchmark_id: "stable_diffusion" + framework: "QNN" + custom_setting { + id: "pipeline" + value: "StableDiffusionPipeline" + } + custom_setting { + id: "bg_load" + value: "false" + } + delegate_choice: { + priority: 1 + delegate_name: "QNN_DSP" + accelerator_name: "snpe_dsp" + accelerator_desc: "DSP" + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/stable_diffusion_v73_sd8sG4/betas.bin" + model_checksum: "09d2e4306d319caf1b34e6afb5c63c22" + } + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/stable_diffusion_v73_sd8sG4/lambdas.bin" + model_checksum: "c7179725ec31a6e2c7daf008a5e1ff23" + } + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/stable_diffusion_v73_sd8sG4/sd_precompute_data.tar" + model_checksum: "4c4bc6652b3f6c0b61de5a2212e1a538" + } + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/stable_diffusion_v73_sd8sG4/text_encoder.serialized.bin" + model_checksum: "bf77554a27e38adeaabd4e74a869da76" + } + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/stable_diffusion_v73_sd8sG4/unet.serialized.bin" + model_checksum: "d5c7d4b4fff26f92a622ca29b91f4299" + } + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/stable_diffusion_v73_sd8sG4/vae_decoder.serialized.bin" + model_checksum: "1c0ed244d3f4cd5e96ab8ebcd1e7b0d7" + } + } + delegate_selected: "QNN_DSP" +} diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion_v75.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion_v75.pbtxt index 9430e5cd1..c45506fcb 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion_v75.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion_v75.pbtxt @@ -43,4 +43,4 @@ benchmark_setting { } } delegate_selected: "QNN_DSP" -} \ No newline at end of file +} diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion_v79.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion_v79.pbtxt index 43aa1a3d5..a619b2814 100644 --- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion_v79.pbtxt +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion_v79.pbtxt @@ -43,4 +43,4 @@ benchmark_setting { } } delegate_selected: "QNN_DSP" -} \ No newline at end of file +} diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion_v81_sd8G5.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion_v81_sd8G5.pbtxt new file mode 100644 index 000000000..495ec7893 --- /dev/null +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion_v81_sd8G5.pbtxt @@ -0,0 +1,46 @@ +# proto-file: flutter/cpp/proto/backend_setting.proto +# proto-message: BackendSetting + +benchmark_setting { + benchmark_id: "stable_diffusion" + framework: "QNN" + custom_setting { + id: "pipeline" + value: "StableDiffusionPipeline" + } + custom_setting { + id: "bg_load" + value: "false" + } + delegate_choice: { + priority: 1 + delegate_name: "QNN_DSP" + accelerator_name: "snpe_dsp" + accelerator_desc: "DSP" + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/stable_diffusion_v81_sd8G5/betas.bin" + model_checksum: "09d2e4306d319caf1b34e6afb5c63c22" + } + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/stable_diffusion_v81_sd8G5/lambdas.bin" + model_checksum: "c7179725ec31a6e2c7daf008a5e1ff23" + } + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/stable_diffusion_v81_sd8G5/sd_precompute_data.tar" + model_checksum: "4c4bc6652b3f6c0b61de5a2212e1a538" + } + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/stable_diffusion_v81_sd8G5/text_encoder.serialized.bin" + model_checksum: "e676c7968c53181f4a0b27cf579cf5bd" + } + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/stable_diffusion_v81_sd8G5/unet.serialized.bin" + model_checksum: "3c295c8ed164b2b6832e140f58c5a766" + } + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/stable_diffusion_v81_sd8G5/vae_decoder.serialized.bin" + model_checksum: "faa510e03379e699fd11936f6eb4d092" + } + } + delegate_selected: "QNN_DSP" +} diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion_v81_sd8eliteG5.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion_v81_sd8eliteG5.pbtxt new file mode 100644 index 000000000..d75649502 --- /dev/null +++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion_v81_sd8eliteG5.pbtxt @@ -0,0 +1,46 @@ +# proto-file: flutter/cpp/proto/backend_setting.proto +# proto-message: BackendSetting + +benchmark_setting { + benchmark_id: "stable_diffusion" + framework: "QNN" + custom_setting { + id: "pipeline" + value: "StableDiffusionPipeline" + } + custom_setting { + id: "bg_load" + value: "false" + } + delegate_choice: { + priority: 1 + delegate_name: "QNN_DSP" + accelerator_name: "snpe_dsp" + accelerator_desc: "DSP" + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/stable_diffusion_v81_sd8eliteG5/betas.bin" + model_checksum: "09d2e4306d319caf1b34e6afb5c63c22" + } + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/stable_diffusion_v81_sd8eliteG5/lambdas.bin" + model_checksum: "c7179725ec31a6e2c7daf008a5e1ff23" + } + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/stable_diffusion_v81_sd8eliteG5/sd_precompute_data.tar" + model_checksum: "4c4bc6652b3f6c0b61de5a2212e1a538" + } + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/stable_diffusion_v81_sd8eliteG5/text_encoder.serialized.bin" + model_checksum: "e7a0717485555275915c195c89243436" + } + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/stable_diffusion_v81_sd8eliteG5/unet.serialized.bin" + model_checksum: "19f20bfc18cc49b21a7055e8129bc470" + } + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/qualcomm/stable_diffusion_v81_sd8eliteG5/vae_decoder.serialized.bin" + model_checksum: "82436681811968ff84f30e1ff7bbce71" + } + } + delegate_selected: "QNN_DSP" +} diff --git a/mobile_back_qti/cpp/backend_qti/soc_utility.cc b/mobile_back_qti/cpp/backend_qti/soc_utility.cc index 1bbd7354f..df192bdc7 100644 --- a/mobile_back_qti/cpp/backend_qti/soc_utility.cc +++ b/mobile_back_qti/cpp/backend_qti/soc_utility.cc @@ -65,36 +65,35 @@ std::map socDetails = // soc_name, num_inits, // hlc, // llc, - // max_cores, needs_rpcmem - {435, SocInfo(2, 0, 0, 0, true, qti_settings_sd8cxg3, "SD8cxG3", 1, - std::vector({0, 1, 2, 3}), - std::vector({4, 5, 6, 7}), 8, true)}, - {568, SocInfo(0, 0, 1, 0, false, qti_settings_sd4g2, "SD4G2", 1, - std::vector({0, 1, 2, 3}), - std::vector({4, 5, 6, 7}), 8, true)}, + // max_cores, needs_rpcmem, + // m_needs_stablediffusion {538, SocInfo(2, 0, 0, 0, false, qti_settings_sd8cxg3, "SDX_Elite", 1, std::vector({0, 1, 2, 3}), std::vector({4, 5, 6, 7}), 8, true)}, - {475, SocInfo(2, 0, 0, 0, false, qti_settings_sd7cxg3, "SD7cxG3", 1, - std::vector({0, 1, 2, 3}), - std::vector({4, 5, 6, 7}), 8, false)}, - {557, SocInfo(2, 0, 0, 0, true, qti_settings_sd8g3, "SD8G3", 1, + {655, SocInfo(2, 0, 0, 0, true, qti_settings_sd8sG4, "SD8sG4", 1, std::vector({0, 1, 2, 3}), std::vector({4, 5, 6, 7}), 8, true, /* stable_diffusion */ true)}, - {614, SocInfo(2, 0, 0, 0, true, qti_settings_sd8sg3, "SD8SG3", 1, + {659, SocInfo(2, 0, 0, 0, true, qti_settings_sd7G4, "SD7G4", 1, std::vector({0, 1, 2, 3}), - std::vector({4, 5, 6, 7}), 8, true)}, - {608, SocInfo(2, 0, 0, 0, true, qti_settings_sd7g3, "SD7G3", 1, - std::vector({0, 1, 2, 3}), - std::vector({4, 5, 6, 7}), 8, true)}, - {636, SocInfo(2, 0, 0, 0, true, qti_settings_sd7sg3, "SD7SG3", 1, - std::vector({0, 1, 2, 3}), - std::vector({4, 5, 6, 7}), 8, true)}, + std::vector({4, 5, 6, 7}), 8, true, + /* stable_diffusion */ true)}, {618, SocInfo(2, 0, 0, 0, true, qti_settings_sd8elite, "SD8Elite", 1, std::vector({0, 1, 2, 3, 4, 5}), std::vector({6, 7}), 8, true, /* stable_diffusion */ true)}, + {660, SocInfo(2, 0, 0, 0, true, qti_settings_sd8eliteG5, + "SD8EliteG5", 1, std::vector({0, 1, 2, 3, 4, 5}), + std::vector({6, 7}), 8, true, + /* stable_diffusion */ true)}, + {685, SocInfo(2, 0, 0, 0, true, qti_settings_sd8G5, "SD8G5", 1, + std::vector({0, 1, 2, 3, 4, 5}), + std::vector({6, 7}), 8, true, + /* stable_diffusion */ true)}, + {640, SocInfo(2, 0, 0, 0, true, qti_settings_default_dsp, "SD6G4", + 1, std::vector({0, 1, 2, 3}), + std::vector({4, 5, 6, 7}), 8, true, + /* stable_diffusion */ false)}, {UNSUPPORTED_SOC_ID, SocInfo(2, 0, 0, 0, true, qti_settings_default_dsp, "Snapdragon", 1, std::vector({0, 1, 2, 3}), @@ -339,6 +338,7 @@ int Socs::soc_num_inits() { } bool Socs::isSnapDragon(const char *manufacturer) { + soc_info_init(); #ifdef __ANDROID__ bool is_qcom = false; if (strncmp("QUALCOMM", manufacturer, 7) == 0) { diff --git a/mobile_back_qti/cpp/backend_qti/soc_utility.h b/mobile_back_qti/cpp/backend_qti/soc_utility.h index acd29174e..d6b0ad97d 100644 --- a/mobile_back_qti/cpp/backend_qti/soc_utility.h +++ b/mobile_back_qti/cpp/backend_qti/soc_utility.h @@ -68,10 +68,16 @@ class SocInfo { } if (m_needs_stablediffusion) { #ifdef STABLEDIFFUSION_FLAG - if (m_soc_name == "SD8G3") - m_settings += qti_settings_stablediffusion_v75; - else if (m_soc_name == "SD8Elite") + if (m_soc_name == "SD8elite") m_settings += qti_settings_stablediffusion_v79; + else if (m_soc_name == "SD8sG4") + m_settings += qti_settings_stablediffusion_v73_sd8sG4; + else if (m_soc_name == "SD7G4") + m_settings += qti_settings_stablediffusion_v73_sd7G4; + else if (m_soc_name == "SD8EliteG5") + m_settings += qti_settings_stablediffusion_v81_sd8eliteG5; + else if (m_soc_name == "SD8G5") + m_settings += qti_settings_stablediffusion_v81_sd8G5; else // set m_needs_stablediffusion flag to "true" in SocInfo table to enable // default stable diffusion settings diff --git a/mobile_back_qti/docker/mlperf_dlc_prepare/Dockerfile b/mobile_back_qti/docker/mlperf_dlc_prepare/Dockerfile index cc81fba45..49a9f77f3 100644 --- a/mobile_back_qti/docker/mlperf_dlc_prepare/Dockerfile +++ b/mobile_back_qti/docker/mlperf_dlc_prepare/Dockerfile @@ -31,25 +31,26 @@ RUN apt-get update && apt-get upgrade -y && apt-get autoremove -y && \ zip bzip2 gnupg wget python3-six python3-pip libncurses5 openjdk-11-jdk-headless clang-format golang-1.13-go build-essential RUN pip3 install pip==24.1.1 setuptools==45.0.0 -RUN pip3 install numpy==1.26.4 opencv-python tensorflow-cpu==2.10.1 + RUN pip3 install protobuf==3.6.0 -RUN pip3 install Pillow tensorflow_hub tf-slim \ - absl-py pyyaml +RUN pip3 install Pillow tf-slim absl-py pyyaml +RUN pip3 install opencv-python tensorflow-cpu==2.10.1 RUN pip3 install pandas matplotlib RUN pip3 install onnx==1.12.0 onnxruntime packaging -#RUN pip3 install protobuf==3.19.4 +RUN pip3 install protobuf==3.19.4 +RUN pip3 install numpy==1.23.5 --force-reinstall --no-cache-dir RUN curl -SL http://releases.llvm.org/9.0.0/clang+llvm-9.0.0-x86_64-linux-gnu-ubuntu-16.04.tar.xz | tar -xJC /usr/local/. && \ mv /usr/local/clang+llvm-9.0.0-x86_64-linux-gnu-ubuntu-16.04 /usr/local/clang-9.0.0 -RUN apt-get update && apt-get install -y --no-install-recommends bash coreutils +RUN apt-get update && apt-get install -y bash coreutils RUN which readlink ARG ndk_version=android-ndk-r25c ARG android_ndk_home=/opt/android/${ndk_version} # Install the NDK # Use wget instead of curl to avoid "Error in the HTTP2 framing layer" -RUN cd /tmp && wget --max-redirect=0 -nv https://dl.google.com/android/repository/${ndk_version}-linux.zip && \ +RUN cd /tmp && wget -nv https://dl.google.com/android/repository/${ndk_version}-linux.zip && \ unzip -q /tmp/${ndk_version}-linux.zip -d /opt/android && \ rm /tmp/${ndk_version}-linux.zip diff --git a/mobile_back_qti/make/qti_backend.mk b/mobile_back_qti/make/qti_backend.mk index 94be93365..728323df2 100644 --- a/mobile_back_qti/make/qti_backend.mk +++ b/mobile_back_qti/make/qti_backend.mk @@ -28,19 +28,23 @@ else ifeq ($(WITH_QTI),$(filter $(WITH_QTI),1 2)) ${local_snpe_sdk_root}/lib/aarch64-android/libSnpeHtpV69Stub.so \ ${local_snpe_sdk_root}/lib/aarch64-android/libSnpeHtpV68Stub.so \ ${local_snpe_sdk_root}/lib/aarch64-android/libSnpeHtpV79Stub.so \ + ${local_snpe_sdk_root}/lib/aarch64-android/libSnpeHtpV81Stub.so \ ${local_snpe_sdk_root}/lib/aarch64-android/libSnpeHtpPrepare.so \ ${local_snpe_sdk_root}/lib/hexagon-v75/unsigned/libSnpeHtpV75Skel.so \ ${local_snpe_sdk_root}/lib/hexagon-v73/unsigned/libSnpeHtpV73Skel.so \ ${local_snpe_sdk_root}/lib/hexagon-v69/unsigned/libSnpeHtpV69Skel.so \ ${local_snpe_sdk_root}/lib/hexagon-v68/unsigned/libSnpeHtpV68Skel.so \ ${local_snpe_sdk_root}/lib/hexagon-v79/unsigned/libSnpeHtpV79Skel.so \ + ${local_snpe_sdk_root}/lib/hexagon-v81/unsigned/libSnpeHtpV81Skel.so \ ${local_snpe_sdk_root}/lib/aarch64-android/libQnnHtp.so \ ${local_snpe_sdk_root}/lib/aarch64-android/libQnnHtpV73Stub.so \ ${local_snpe_sdk_root}/lib/aarch64-android/libQnnHtpV75Stub.so \ ${local_snpe_sdk_root}/lib/aarch64-android/libQnnHtpV79Stub.so \ + ${local_snpe_sdk_root}/lib/aarch64-android/libQnnHtpV81Stub.so \ ${local_snpe_sdk_root}/lib/hexagon-v73/unsigned/libQnnHtpV73Skel.so \ ${local_snpe_sdk_root}/lib/hexagon-v75/unsigned/libQnnHtpV75Skel.so \ ${local_snpe_sdk_root}/lib/hexagon-v79/unsigned/libQnnHtpV79Skel.so \ + ${local_snpe_sdk_root}/lib/hexagon-v81/unsigned/libQnnHtpV81Skel.so \ ${local_snpe_sdk_root}/lib/aarch64-android/libQnnSystem.so \ ${BAZEL_LINKS_PREFIX}bin/flutter/android/commonlibs/lib_arm64/libc++_shared.so backend_qti_cmdline_files=${BAZEL_LINKS_PREFIX}bin/mobile_back_qti/cpp/backend_qti/libqtibackend.so \ @@ -50,12 +54,14 @@ else ifeq ($(WITH_QTI),$(filter $(WITH_QTI),1 2)) ${local_snpe_sdk_root}/lib/aarch64-android/libSnpeHtpV69Stub.so \ ${local_snpe_sdk_root}/lib/aarch64-android/libSnpeHtpV68Stub.so \ ${local_snpe_sdk_root}/lib/aarch64-android/libSnpeHtpV79Stub.so \ + ${local_snpe_sdk_root}/lib/aarch64-android/libSnpeHtpV81Stub.so \ ${local_snpe_sdk_root}/lib/aarch64-android/libSnpeHtpPrepare.so \ ${local_snpe_sdk_root}/lib/hexagon-v75/unsigned/libSnpeHtpV75Skel.so \ ${local_snpe_sdk_root}/lib/hexagon-v73/unsigned/libSnpeHtpV73Skel.so \ ${local_snpe_sdk_root}/lib/hexagon-v69/unsigned/libSnpeHtpV69Skel.so \ ${local_snpe_sdk_root}/lib/hexagon-v68/unsigned/libSnpeHtpV68Skel.so \ ${local_snpe_sdk_root}/lib/hexagon-v79/unsigned/libSnpeHtpV79Skel.so \ + ${local_snpe_sdk_root}/lib/hexagon-v81/unsigned/libSnpeHtpV81Skel.so \ mobile_back_qti/run_mlperf_tests.sh \ ${local_snpe_sdk_root}/lib/aarch64-android/libQnnHtp.so \ ${local_snpe_sdk_root}/lib/aarch64-android/libQnnHtpV73Stub.so \ @@ -64,6 +70,8 @@ else ifeq ($(WITH_QTI),$(filter $(WITH_QTI),1 2)) ${local_snpe_sdk_root}/lib/hexagon-v75/unsigned/libQnnHtpV75Skel.so \ ${local_snpe_sdk_root}/lib/aarch64-android/libQnnHtpV79Stub.so \ ${local_snpe_sdk_root}/lib/hexagon-v79/unsigned/libQnnHtpV79Skel.so \ + ${local_snpe_sdk_root}/lib/aarch64-android/libQnnHtpV81Stub.so \ + ${local_snpe_sdk_root}/lib/hexagon-v81/unsigned/libQnnHtpV81Skel.so \ ${local_snpe_sdk_root}/lib/aarch64-android/libQnnSystem.so \ ${BAZEL_LINKS_PREFIX}bin/flutter/android/commonlibs/lib_arm64/libc++_shared.so @@ -78,7 +86,7 @@ else ifeq ($(WITH_QTI),$(filter $(WITH_QTI),1 2)) ifeq ($(WITH_STABLEDIFFUSION),1) backend_qti_libs_deps = rm -f ./mobile_back_qti/cpp/backend_qti/StableDiffusionShared/include/opencv && \ - ln -s /opt/opencv-3.4.7_android/sdk/native mobile_back_qti/cpp/backend_qti/StableDiffusionShared/include/opencv + ln -s /opt/opencv-3.4.7_android/sdk/native mobile_back_qti/cpp/backend_qti/StableDiffusionShared/include/opencv backend_qti_flutter_docker_args = --env WITH_STABLEDIFFUSION=${WITH_STABLEDIFFUSION} backend_qti_android_target+=--//mobile_back_qti/cpp/backend_qti:with_stablediffusion=${WITH_STABLEDIFFUSION} backend_qti_cmdline_files+=mobile_back_qti/cpp/backend_qti/StableDiffusionShared/include/opencv/libs/arm64-v8a/libopencv_core.so \ @@ -133,4 +141,4 @@ docker/flutter/android/qti/release: flutter/check-release-env flutter/android/do docker/cmdline/android/qti/release: flutter/android/docker/qti/image MSYS2_ARG_CONV_EXCL="*" docker run \ ${flutter_common_docker_flags} \ - make cmdline/android/bins/release + make cmdline/android/bins/release \ No newline at end of file diff --git a/mobile_back_qti/run_mlperf_tests.sh b/mobile_back_qti/run_mlperf_tests.sh index 0d36bb2aa..3ceb4a0c9 100644 --- a/mobile_back_qti/run_mlperf_tests.sh +++ b/mobile_back_qti/run_mlperf_tests.sh @@ -25,6 +25,7 @@ export models_path="" export usecase_name="" export mode="" export LD_LIBRARY_PATH=. +export stable_diffusion_path="" # Below are the arguments and values to be passed to script in any order # use --dataset argument to pass dataset path as value @@ -33,6 +34,7 @@ export LD_LIBRARY_PATH=. # valid values for --mode argument: performance, accuracy. # use --usecase argument to pass name of usecase to run as value (if not mentioned, by default runs all 8 usecases) # valid values for --usecase argument: image_classification_v2, object_detection, image_segmentation, language_understanding, super_resolution, image_classification_offline_v2 +# valid values for --sdpath argument: stable_diffusion, stable_diffusion_v73_sd7G4, stable_diffusion_v73_sd8sG4, stable_diffusion_v79, stable_diffusion_v75, stable_diffusion_v81_sd8eliteG5(default), stable_diffusion_v81_sd8G5 while [[ $# -gt 0 ]] do @@ -56,6 +58,11 @@ do export mode=$2 shift 1 fi + if [[ "$1" == "--sdpath" ]] + then + export stable_diffusion_path=$2 + shift 1 + fi shift 1 done @@ -71,6 +78,12 @@ then exit 1 fi +if [[ "$stable_diffusion_path" == "" ]] +then + echo "--sdpath not set. Using stable_diffusion_v81_sd8eliteG5 path by default" + export stable_diffusion_path="stable_diffusion_v81_sd8eliteG5" +fi + if [[ "$mode" == "performance" || "$mode" == "" ]] then echo "Running in Performance (default) mode. Switch to accuracy mode using --mode accuracy" @@ -175,7 +188,7 @@ echo "####### Performance:: Stable diffusion in progress #######" export test_case=stable_diffusion mkdir -p $test_case$test_case_suffix export use_case_results_file=$results_prefix$test_case$results_suffix -./main EXTERNAL $test_case --mode=PerformanceOnly --input_tfrecord=$dataset_path/stable_diffusion/coco_gen_full.tfrecord --output_dir=$test_case$test_case_suffix --min_query_count=1024 --min_duration_ms=60000 --max_duration_ms=300000 --single_stream_expected_latency_ns=1000000 --model_file=$models_path/stable_diffusion --lib_path=libqtibackend.so --native_lib_path=. > $use_case_results_file 2>&1 +./main EXTERNAL $test_case --mode=PerformanceOnly --input_tfrecord=$dataset_path/stable_diffusion/coco_gen_full.tfrecord --output_dir=$test_case$test_case_suffix --min_query_count=1024 --min_duration_ms=60000 --max_duration_ms=300000 --single_stream_expected_latency_ns=1000000 --model_file=$models_path/$stable_diffusion_path --lib_path=libqtibackend.so --native_lib_path=. > $use_case_results_file 2>&1 echo "#######$test_case######" >> $results_file grep "90th percentile latency (ns)" $use_case_results_file >> $results_file grep "Result is" $use_case_results_file >> $results_file @@ -256,7 +269,7 @@ echo "####### Accuracy:: Stable diffusion in progress #######" export test_case=stable_diffusion mkdir -p $test_case$test_case_suffix export use_case_results_file=$results_prefix$test_case$results_suffix -./main EXTERNAL $test_case --mode=AccuracyOnly --input_tfrecord=$dataset_path/stable_diffusion/coco_gen_test.tfrecord --input_clip_model=$models_path/stable_diffusion/clip_model_512x512.tflite --output_dir=$test_case$test_case_suffix --min_query_count=100 --min_duration_ms=0 --single_stream_expected_latency_ns=1000000 --model_file=$models_path/stable_diffusion --lib_path=libqtibackend.so --native_lib_path=. > $use_case_results_file 2>&1 +./main EXTERNAL $test_case --mode=AccuracyOnly --input_tfrecord=$dataset_path/stable_diffusion/coco_gen_test.tfrecord --input_clip_model=$models_path/stable_diffusion/clip_model_512x512.tflite --output_dir=$test_case$test_case_suffix --min_query_count=100 --min_duration_ms=0 --single_stream_expected_latency_ns=1000000 --model_file=$models_path/$stable_diffusion_path --lib_path=libqtibackend.so --native_lib_path=. > $use_case_results_file 2>&1 echo "#######$test_case######" >> $results_file grep "Accuracy" $use_case_results_file >> $results_file echo "####### Stable Diffusion is complete #######" @@ -357,4 +370,3 @@ case $usecase_name in ;; esac fi - diff --git a/mobile_back_qti/variables.bzl b/mobile_back_qti/variables.bzl index b5919bfc5..4d6ffc31e 100644 --- a/mobile_back_qti/variables.bzl +++ b/mobile_back_qti/variables.bzl @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2024 Qualcomm Innovation Center, Inc. All rights reserved. +# Copyright (c) 2020-2025 Qualcomm Innovation Center, Inc. All rights reserved. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -36,7 +36,12 @@ def _impl(repository_ctx): sdk_version = filepath[found.stdout.rfind("/") + 1:] print("Update SNPE version: " + sdk_version) # buildifier: disable=print - repository_ctx.read(Label("@//:mobile_back_qti/qairt/" + sdk_version + "/ReleaseNotes.txt")) + + found = repository_ctx.execute(["find", repository_ctx.attr.workspace_dir + "/mobile_back_qti/qairt/" + sdk_version, "maxdepth", "1", "-name", "QAIRT_ReleaseNotes.txt"]) + if found.stdout != "": + repository_ctx.read(Label("@//:mobile_back_qti/qairt/" + sdk_version + "/QAIRT_ReleaseNotes.txt")) + else: + repository_ctx.read(Label("@//:mobile_back_qti/qairt/" + sdk_version + "/ReleaseNotes.txt")) repository_ctx.file("BUILD", "") repository_ctx.file( diff --git a/mobile_back_samsung/README.md b/mobile_back_samsung/README.md index c0e68b0e2..2271e6726 100644 --- a/mobile_back_samsung/README.md +++ b/mobile_back_samsung/README.md @@ -1,7 +1,7 @@ # Building MLPerf Open app with Samsung backend -1. Update the "lib/internal" folder at 'mobile_app_open/mobile_back_samsung/samsung/lib/internal' by copying the libs from [here](https://github.com/mlcommons/mobile_back_samsung/tree/submission_v5.0_samsung_backend/samsung_libs) +1. Update the "lib/internal" folder at 'mobile_app_open/mobile_back_samsung/samsung/lib/internal' by copying the libs from [here](https://github.com/mlcommons/mobile_back_samsung/tree/samsung_exynos2600_backend/samsung_libs) 2. Change directory to the main path (mobile_app_open) 3. Run the build command diff --git a/mobile_back_samsung/samsung/lib/BUILD b/mobile_back_samsung/samsung/lib/BUILD index df1f0c281..b4ee022f8 100644 --- a/mobile_back_samsung/samsung/lib/BUILD +++ b/mobile_back_samsung/samsung/lib/BUILD @@ -14,11 +14,13 @@ pbtxt2header( name = "mbe_config_pbtxt", srcs = [ "public/include/mbe_config_1200.pbtxt", + "public/include/mbe_config_1300.pbtxt", "public/include/mbe_config_2100.pbtxt", "public/include/mbe_config_2200.pbtxt", "public/include/mbe_config_2300.pbtxt", "public/include/mbe_config_2400.pbtxt", "public/include/mbe_config_2500.pbtxt", + "public/include/mbe_config_2600.pbtxt", ], ) @@ -33,11 +35,13 @@ cc_library( hdrs = [ "public/include/mbe_config.hpp", "public/include/mbe_config_1200.hpp", + "public/include/mbe_config_1300.hpp", "public/include/mbe_config_2100.hpp", "public/include/mbe_config_2200.hpp", "public/include/mbe_config_2300.hpp", "public/include/mbe_config_2400.hpp", "public/include/mbe_config_2500.hpp", + "public/include/mbe_config_2600.hpp", "public/include/mbe_core_holder.hpp", "public/include/mbe_helper.hpp", "public/include/mbe_loader.hpp", diff --git a/mobile_back_samsung/samsung/lib/checksums.txt b/mobile_back_samsung/samsung/lib/checksums.txt index a04ae6660..ea3a0d767 100644 --- a/mobile_back_samsung/samsung/lib/checksums.txt +++ b/mobile_back_samsung/samsung/lib/checksums.txt @@ -1,13 +1,14 @@ e2f78216c1a3e0d925e17e4d6e7befbc1a2a853867864c6f30e553bf971f2a42 internal/libc++.so dce841472a4883ac0660b36708eddc2cd29342c2a108bf4d9da2ffdddab02f91 internal/libc++_shared.so 34ce517c63720a6a858d4e0237247a4e6db857cc4a1f6dffe7210916b96fdc3b internal/libeden_nn_on_system.so -f459d2b74a2330b938ea163037c5c5f896a97c1b3a7c1454051246bdab766e5c internal/libenn_extension.so +6d2ae9b43f23e7a779ab70162e4d6e46ce8d355c76eb5483e55068ef1f162965 internal/libenn_extension.so 97202acb183aa26d83f393d185f0f3cd94f648678916c6b27d91a8445f354472 internal/libenn_public_api_cpp.so -22b9f6c8272539971471ffef2fb762fbfd86eeef8b0c6779326d45d864d23cfe internal/libmbe1200_core.so -5b7087884f42b5b097656b74116e67bb92d1837e2225d435346fa2bdc665ba8c internal/libmbe2100_core.so -a02ca37bebabb07793730f3f47f939485c79e3ba1d88c6402ee3dc15cc9caf00 internal/libmbe2200_core.so -781b67e8945eba3fbbbe53813cb76c794b09c432bde1aba5043e776286c126f8 internal/libmbe2300_core.so -8c04f045c39b4d18b0726c3d89b839a65c64fc440686d9c5ed19d335a4baaf1f internal/libmbe2400_core.so -2dac2d2521996513a7e7493d66584ea4f7f9d0b19dbb9c51c8ec3dbb4370a0df internal/libmbe2500_core.so +7be2331ea05c6606f6fd3b58f4c4ccbe45cef1140e3057f70c46da96e53b26c5 internal/libmbe1200_core.so +2dd31c49253f7bed34d53d272449ef8be6e96a07770fd5c88306df76fec82a38 internal/libmbe1300_core.so +bc55af4e62937df08e2c8f9b4580b263f06e7fb5b7eddd1e464883a3ff1787d8 internal/libmbe2100_core.so +52e9a3f95bb5d2e5e27df5dfb9eb22b85232deef0de19399f86a3026be7f58ae internal/libmbe2200_core.so +92bedd9463dd23263acd4315d9c78f2a886b6ac46412c3184c25d6ee66de4c38 internal/libmbe2300_core.so +e9bdcb39ad48293f6d578062b76d0ee85f539498c0796248504eb3df1d1b056a internal/libmbe2400_core.so +29b1fd5771db7bab268b402d91ceb75bd6feb10b6161d3c25fb13651557896d9 internal/libmbe2500_core.so +b413f78eb64141e311a17aaccb48921ceeb4a9bf9f6a5512e2d24429b91dbab9 internal/libmbe2600_core.so cc921615cd844b47871646b30f146048044b55cdb9bf17d3bd9c5f4569ec2efe internal/libofi_rt_framework_user_vendor.so -7489fe74d8fb399400426df3f830fa41864714a013116399a3e8d3e6d66a22e1 internal/libsamsungbackend.so diff --git a/mobile_back_samsung/samsung/lib/public/include/mbe_config.hpp b/mobile_back_samsung/samsung/lib/public/include/mbe_config.hpp index 46b4f0e4b..68921cf88 100755 --- a/mobile_back_samsung/samsung/lib/public/include/mbe_config.hpp +++ b/mobile_back_samsung/samsung/lib/public/include/mbe_config.hpp @@ -25,10 +25,12 @@ limitations under the License. */ #include "mbe_config_1200.hpp" +#include "mbe_config_1300.hpp" #include "mbe_config_2100.hpp" #include "mbe_config_2200.hpp" #include "mbe_config_2300.hpp" #include "mbe_config_2400.hpp" #include "mbe_config_2500.hpp" +#include "mbe_config_2600.hpp" #endif diff --git a/mobile_back_samsung/samsung/lib/public/include/mbe_config_1300.hpp b/mobile_back_samsung/samsung/lib/public/include/mbe_config_1300.hpp new file mode 100644 index 000000000..30910da29 --- /dev/null +++ b/mobile_back_samsung/samsung/lib/public/include/mbe_config_1300.hpp @@ -0,0 +1,18 @@ +/* Copyright 2020-2025 Samsung System LSI. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef MBE_CONFIG_1300_H +#define MBE_CONFIG_1300_H +#include +#endif diff --git a/mobile_back_samsung/samsung/lib/public/include/mbe_config_1300.pbtxt b/mobile_back_samsung/samsung/lib/public/include/mbe_config_1300.pbtxt new file mode 100644 index 000000000..663c6f006 --- /dev/null +++ b/mobile_back_samsung/samsung/lib/public/include/mbe_config_1300.pbtxt @@ -0,0 +1,231 @@ +# proto-file: flutter/cpp/proto/backend_setting.proto +# proto-message: BackendSetting + +common_setting { + id: "num_threads" + name: "Number of threads" + value { + value: "4" + name: "4 threads" + } +} + +benchmark_setting { + benchmark_id: "image_classification" + framework: "ENN" + delegate_choice: { + delegate_name: "ENN_NPU" + accelerator_name: "samsung_npu" + accelerator_desc: "NPU" + custom_setting { + id: "preset" + value: "1002" + } + custom_setting { + id: "i_type" + value: "Uint8" + } + custom_setting { + id: "o_type" + value: "Float32" + } + custom_setting { + id: "extension" + value: "false" + } + custom_setting { + id: "lazy_mode" + value: "false" + } + model_file: { + model_path: "local:///MLPerf_sideload/ic.nnc" + model_checksum: "11a2debf7c43e89dfd8a6b97fe8ad4e1" + } + } + delegate_selected: "ENN_NPU" + single_stream_expected_latency_ns: 900000 +} + +benchmark_setting { + benchmark_id: "image_segmentation_v2" + framework: "ENN" + delegate_choice: { + delegate_name: "ENN_NPU" + accelerator_name: "samsung_npu" + accelerator_desc: "NPU" + custom_setting { + id: "preset" + value: "1002" + } + custom_setting { + id: "i_type" + value: "Uint8" + } + custom_setting { + id: "o_type" + value: "Int32" + } + custom_setting { + id: "extension" + value: "true" + } + custom_setting { + id: "lazy_mode" + value: "true" + } + model_file: { + model_path: "local:///MLPerf_sideload/sm_uint8.nnc" + model_checksum: "ba5c65b897094559478ae82dcc2a3163" + } + } + delegate_selected: "ENN_NPU" + single_stream_expected_latency_ns: 1000000 +} + +benchmark_setting { + benchmark_id: "object_detection" + framework: "ENN" + delegate_choice: { + delegate_name: "ENN_NPU" + accelerator_name: "samsung_npu" + accelerator_desc: "NPU" + custom_setting { + id: "preset" + value: "1002" + } + custom_setting { + id: "i_type" + value: "Uint8" + } + custom_setting { + id: "o_type" + value: "Float32" + } + custom_setting { + id: "extension" + value: "false" + } + custom_setting { + id: "lazy_mode" + value: "false" + } + model_file: { + model_path: "local:///MLPerf_sideload/od.nnc" + model_checksum: "57b84061e81f91da19580c7171e59662" + } + } + delegate_selected: "ENN_NPU" + single_stream_expected_latency_ns: 1000000 +} + +benchmark_setting { + benchmark_id: "super_resolution" + framework: "ENN" + delegate_choice: { + delegate_name: "ENN_NPU" + accelerator_name: "samsung_npu" + accelerator_desc: "NPU" + custom_setting { + id: "preset" + value: "1002" + } + custom_setting { + id: "i_type" + value: "Uint8" + } + custom_setting { + id: "o_type" + value: "Float32" + } + custom_setting { + id: "extension" + value: "true" + } + custom_setting { + id: "lazy_mode" + value: "true" + } + model_file: { + model_path: "local:///MLPerf_sideload/sr.nnc" + model_checksum: "" + } + } + delegate_selected: "ENN_NPU" + single_stream_expected_latency_ns: 1000000 +} +benchmark_setting { + benchmark_id: "natural_language_processing" + framework: "ENN" + delegate_choice: { + delegate_name: "ENN_NPU" + accelerator_name: "npu" + accelerator_desc: "npu" + custom_setting { + id: "preset" + value: "1000" + } + custom_setting { + id: "i_type" + value: "Int32" + } + custom_setting { + id: "o_type" + value: "Float32" + } + custom_setting { + id: "extension" + value: "false" + } + custom_setting { + id: "lazy_mode" + value: "false" + } + model_file: { + model_path: "local:///MLPerf_sideload/mobile_bert_gpu.nnc" + model_checksum: "8bcc0eedf45e05d16f20a1842b514a3e" + } + } + delegate_selected: "ENN_NPU" + single_stream_expected_latency_ns: 1000000 +} + +benchmark_setting { + benchmark_id: "image_classification_offline" + framework: "ENN" + delegate_choice: { + delegate_name: "ENN_NPU" + accelerator_name: "samsung_npu" + accelerator_desc: "npu" + batch_size: 48 + custom_setting { + id: "scenario" + value: "offline" + } + custom_setting { + id: "preset" + value: "1002" + } + custom_setting { + id: "i_type" + value: "Uint8" + } + custom_setting { + id: "o_type" + value: "Float32" + } + custom_setting { + id: "extension" + value: "false" + } + custom_setting { + id: "lazy_mode" + value: "false" + } + model_file: { + model_path: "local:///MLPerf_sideload/ic_offline.nnc" + model_checksum: "11a2debf7c43e89dfd8a6b97fe8ad4e1" + } + } + delegate_selected: "ENN_NPU" + single_stream_expected_latency_ns: 1000000 +} diff --git a/mobile_back_samsung/samsung/lib/public/include/mbe_config_2600.hpp b/mobile_back_samsung/samsung/lib/public/include/mbe_config_2600.hpp new file mode 100644 index 000000000..7221a7356 --- /dev/null +++ b/mobile_back_samsung/samsung/lib/public/include/mbe_config_2600.hpp @@ -0,0 +1,18 @@ +/* Copyright 2020-2025 Samsung System LSI. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef MBE_CONFIG_2600_H +#define MBE_CONFIG_2600_H +#include +#endif diff --git a/mobile_back_samsung/samsung/lib/public/include/mbe_config_2600.pbtxt b/mobile_back_samsung/samsung/lib/public/include/mbe_config_2600.pbtxt new file mode 100644 index 000000000..c818cf8cf --- /dev/null +++ b/mobile_back_samsung/samsung/lib/public/include/mbe_config_2600.pbtxt @@ -0,0 +1,308 @@ +# proto-file: flutter/cpp/proto/backend_setting.proto +# proto-message: BackendSetting + +common_setting { + id: "num_threads" + name: "Number of threads" + value { + value: "4" + name: "4 threads" + } +} + +benchmark_setting { + benchmark_id: "image_segmentation_v2" + framework: "ENN" + delegate_choice: { + delegate_name: "ENN_NPU" + accelerator_name: "samsung_npu" + accelerator_desc: "NPU" + custom_setting { + id: "preset" + value: "1007" + } + custom_setting { + id: "i_type" + value: "Uint8" + } + custom_setting { + id: "o_type" + value: "Uint8" + } + custom_setting { + id: "extension" + value: "true" + } + custom_setting { + id: "lazy_mode" + value: "true" + } + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/samsung/exynos2600/sm_uint8.nnc" + model_checksum: "43814a29b2e63719af67a30e8b5efc0c" + } + } + delegate_selected: "ENN_NPU" + single_stream_expected_latency_ns: 500000 +} + +benchmark_setting { + benchmark_id: "object_detection" + framework: "ENN" + delegate_choice: { + delegate_name: "ENN_NPU" + accelerator_name: "samsung_npu" + accelerator_desc: "NPU" + custom_setting { + id: "preset" + value: "1007" + } + custom_setting { + id: "i_type" + value: "Int8" + } + custom_setting { + id: "o_type" + value: "Float32" + } + custom_setting { + id: "extension" + value: "true" + } + custom_setting { + id: "lazy_mode" + value: "true" + } + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/samsung/exynos2600/od.nnc" + model_checksum: "43a7e0faa0ab1e8a86e774947792e36d" + } + } + delegate_selected: "ENN_NPU" + single_stream_expected_latency_ns: 200000 +} + +benchmark_setting { + benchmark_id: "super_resolution" + framework: "ENN" + delegate_choice: { + delegate_name: "ENN_NPU" + accelerator_name: "samsung_npu" + accelerator_desc: "NPU" + custom_setting { + id: "preset" + value: "1010" + } + custom_setting { + id: "i_type" + value: "Int8" + } + custom_setting { + id: "o_type" + value: "Int8" + } + custom_setting { + id: "extension" + value: "true" + } + custom_setting { + id: "lazy_mode" + value: "true" + } + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/samsung/exynos2600/sr.nnc" + model_checksum: "5ff526a6a30f781fdc4be310df43ac5e" + } + } + delegate_selected: "ENN_NPU" + single_stream_expected_latency_ns: 990000 +} + +benchmark_setting { + benchmark_id: "natural_language_processing" + framework: "ENN" + delegate_choice: { + delegate_name: "ENN_NPU" + accelerator_name: "npu" + accelerator_desc: "npu" + custom_setting { + id: "preset" + value: "1011" + } + custom_setting { + id: "i_type" + value: "Int32" + } + custom_setting { + id: "o_type" + value: "Float32" + } + custom_setting { + id: "extension" + value: "true" + } + custom_setting { + id: "lazy_mode" + value: "true" + } + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/samsung/exynos2600/mobile_bert.nnc" + model_checksum: "5b7c8b635697c909693264034fcc5898" + } + } + delegate_selected: "ENN_NPU" + single_stream_expected_latency_ns: 990000 +} + +benchmark_setting { + benchmark_id: "image_classification_v2" + framework: "ENN" + delegate_choice: { + delegate_name: "ENN_NPU" + accelerator_name: "samsung_npu" + accelerator_desc: "NPU" + custom_setting { + id: "preset" + value: "1007" + } + custom_setting { + id: "i_type" + value: "Uint8" + } + custom_setting { + id: "o_type" + value: "Float32" + } + custom_setting { + id: "extension" + value: "true" + } + custom_setting { + id: "lazy_mode" + value: "true" + } + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/samsung/exynos2600/mnv4_large.nnc" + model_checksum: "6d86899c52a88ae24c025a327bd7e3a0" + } + } + delegate_selected: "ENN_NPU" + single_stream_expected_latency_ns: 1000000 +} + +benchmark_setting { + benchmark_id: "image_classification_offline_v2" + framework: "ENN" + delegate_choice: { + delegate_name: "ENN_NPU" + accelerator_name: "samsung_npu" + accelerator_desc: "npu" + batch_size: 6144 + custom_setting { + id: "scenario" + value: "offline" + } + custom_setting { + id: "preset" + value: "1004" + } + custom_setting { + id: "i_type" + value: "Uint8" + } + custom_setting { + id: "o_type" + value: "Float32" + } + custom_setting { + id: "extension" + value: "false" + } + custom_setting { + id: "lazy_mode" + value: "false" + } + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/samsung/exynos2600/mnv4_large_offline.nnc" + model_checksum: "dbef7a1c6d56e2437d89085a3a38d7bf" + } + } + delegate_selected: "ENN_NPU" + single_stream_expected_latency_ns: 1000000 +} + +benchmark_setting { + benchmark_id: "stable_diffusion" + framework: "ENN" + delegate_choice: { + delegate_name: "ENN_NPU" + accelerator_name: "samsung_npu" + accelerator_desc: "npu" + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/samsung/exynos2600/sd_dec.nnc" + model_checksum: "0d961ff0471472b2903594f497e3064c" + } + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/samsung/exynos2600/sd_unet.nnc" + model_checksum: "f600698aab63398291f64f3c49d99b2e" + } + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/samsung/exynos2600/sd_enc.nnc" + model_checksum: "9470f2195a2b2eee0c0e90d5fd3853fe" + } + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/samsung/exynos2600/te.bin" + model_checksum: "798b772155a69de5df44b304327bb3cc" + } + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/samsung/exynos2600/gt.bin" + model_checksum: "f41c1130809647fbccd76707b2f14305" + } + model_file: { + model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/samsung/exynos2600/np.bin" + model_checksum: "c50807d72ce221cf08a2248a6ac3c48e" + } + } + custom_setting { + id: "preset" + value: "1004" + } + custom_setting { + id: "o_type" + value: "Uint8" + } + delegate_selected: "ENN_NPU" + single_stream_expected_latency_ns: 500000 + custom_setting { + id: "pipeline" + value: "StableDiffusionPipeline" + } + custom_setting { + id: "pipeline" + value: "StableDiffusionPipeline" + } + custom_setting { + id: "text_encoder_filename" + value: "sd_enc.nnc" + } + custom_setting { + id: "diffusion_model_filename" + value: "sd_unet.nnc" + } + custom_setting { + id: "decoder_filename" + value: "sd_dec.nnc" + } + custom_setting { + id: "timestep_embeddings_filename" + value: "te.bin" + } + custom_setting { + id: "gather_filename" + value: "gt.bin" + } + custom_setting { + id: "negative_prompt_filename" + value: "np.bin" + } +} diff --git a/mobile_back_samsung/samsung/lib/public/include/mbe_loader.hpp b/mobile_back_samsung/samsung/lib/public/include/mbe_loader.hpp old mode 100755 new mode 100644 index c65074c01..6de5c68a8 --- a/mobile_back_samsung/samsung/lib/public/include/mbe_loader.hpp +++ b/mobile_back_samsung/samsung/lib/public/include/mbe_loader.hpp @@ -29,8 +29,9 @@ limitations under the License. namespace mbe { std::string mbe_core_libs[CORE_MAX] = { - "libmbe1200_core.so", "libmbe2100_core.so", "libmbe2200_core.so", - "libmbe2300_core.so", "libmbe2400_core.so", "libmbe2500_core.so", + "libmbe1200_core.so", "libmbe1300_core.so", "libmbe2100_core.so", + "libmbe2200_core.so", "libmbe2300_core.so", "libmbe2400_core.so", + "libmbe2500_core.so", "libmbe2600_core.so", }; void* load_symbol(void* dl_handle, const char* name) { diff --git a/mobile_back_samsung/samsung/lib/public/include/mbe_utils.hpp b/mobile_back_samsung/samsung/lib/public/include/mbe_utils.hpp index 1792f5f3a..919e57c2d 100755 --- a/mobile_back_samsung/samsung/lib/public/include/mbe_utils.hpp +++ b/mobile_back_samsung/samsung/lib/public/include/mbe_utils.hpp @@ -28,11 +28,13 @@ namespace mbe { enum DEVICE_ID { CORE_INVALID = -1, SOC_1200 = 0, + SOC_1300, SOC_2100, SOC_2200, SOC_2300, SOC_2400, SOC_2500, + SOC_2600, CORE_MAX }; diff --git a/mobile_back_samsung/samsung/lib/public/mbe_core/mbe_helper.cc b/mobile_back_samsung/samsung/lib/public/mbe_core/mbe_helper.cc index f8cec81e1..375721e6c 100644 --- a/mobile_back_samsung/samsung/lib/public/mbe_core/mbe_helper.cc +++ b/mobile_back_samsung/samsung/lib/public/mbe_core/mbe_helper.cc @@ -70,6 +70,9 @@ static int get_core_id_from_model(const char *model) { if (strstr((char *)model, "ERD8825") || strstr((char *)model, "A536") || strstr((char *)model, "S5E8825")) core_id = SOC_1200; + else if (strstr((char *)model, "ERD8835") || + strstr((char *)model, "S5E8835") || strstr((char *)model, "A546")) + core_id = SOC_1300; else if (strstr((char *)model, "G998") || strstr((char *)model, "G996") || strstr((char *)model, "G991") || strstr((char *)model, "G998") || strstr((char *)model, "UNIVERSAL2100")) @@ -93,6 +96,8 @@ static int get_core_id_from_model(const char *model) { strstr((char *)model, "S931") || strstr((char *)model, "S936") || strstr((char *)model, "S938") || strstr((char *)model, "F766")) core_id = SOC_2500; + else if (strstr((char *)model, "ERD9965") || strstr((char *)model, "S5E9965")) + core_id = SOC_2600; else return CORE_INVALID; return retrieve_model_surfix(model, core_id) ? core_id : CORE_INVALID; @@ -104,6 +109,8 @@ static int get_core_id_from_hardware(const char *hardware) { if (strstr((char *)hardware, "8825")) core_id = SOC_1200; + else if (strstr((char *)hardware, "8835")) + core_id = SOC_1300; else if (strstr((char *)hardware, "2100")) core_id = SOC_2100; else if (strstr((char *)hardware, "9925")) @@ -114,6 +121,8 @@ static int get_core_id_from_hardware(const char *hardware) { core_id = SOC_2400; else if (strstr((char *)hardware, "9955")) core_id = SOC_2500; + else if (strstr((char *)hardware, "9965")) + core_id = SOC_2600; else return CORE_INVALID; return core_id; @@ -127,12 +136,14 @@ int core_ctrl::support_mbe(const char *manufacturer, const char *model) { return CORE_INVALID; } + /* int core_id = get_core_id_from_model(model); if (core_id > CORE_INVALID) { return core_id; } + */ - core_id = get_core_id(); + int core_id = get_core_id(); if (core_id > CORE_INVALID) { return core_id; } @@ -144,6 +155,8 @@ const char *core_ctrl::get_benchmark_config(int core_id) { if (core_id == SOC_1200) { settings = mbe_config_1200_pbtxt.c_str(); + } else if (core_id == SOC_1300) { + settings = mbe_config_1300_pbtxt.c_str(); } else if (core_id == SOC_2100) { settings = mbe_config_2100_pbtxt.c_str(); } else if (core_id == SOC_2200) { @@ -154,6 +167,8 @@ const char *core_ctrl::get_benchmark_config(int core_id) { settings = mbe_config_2400_pbtxt.c_str(); } else if (core_id == SOC_2500) { settings = mbe_config_2500_pbtxt.c_str(); + } else if (core_id == SOC_2600) { + settings = mbe_config_2600_pbtxt.c_str(); } return settings; } diff --git a/mobile_back_samsung/samsung_backend.mk b/mobile_back_samsung/samsung_backend.mk index 7ad12ab36..717c0d8a4 100644 --- a/mobile_back_samsung/samsung_backend.mk +++ b/mobile_back_samsung/samsung_backend.mk @@ -1,4 +1,4 @@ -# Copyright 2020-2023 Samsung Electronics Co. LTD All Rights Reserved. +# Copyright 2020-2025 Samsung Electronics Co. LTD All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -24,11 +24,13 @@ ifeq (${WITH_SAMSUNG},1) ${BAZEL_LINKS_PREFIX}bin/mobile_back_samsung/samsung/lib/libsamsungbackend.so \ ${MOBILE_BACK_SAMSUNG_LIB_ROOT}/internal/libenn_public_api_cpp.so \ ${MOBILE_BACK_SAMSUNG_LIB_ROOT}/internal/libenn_extension.so \ + ${MOBILE_BACK_SAMSUNG_LIB_ROOT}/internal/libmbe2600_core.so \ ${MOBILE_BACK_SAMSUNG_LIB_ROOT}/internal/libmbe2500_core.so \ ${MOBILE_BACK_SAMSUNG_LIB_ROOT}/internal/libmbe2400_core.so \ ${MOBILE_BACK_SAMSUNG_LIB_ROOT}/internal/libmbe2300_core.so \ ${MOBILE_BACK_SAMSUNG_LIB_ROOT}/internal/libmbe2200_core.so \ ${MOBILE_BACK_SAMSUNG_LIB_ROOT}/internal/libmbe2100_core.so \ + ${MOBILE_BACK_SAMSUNG_LIB_ROOT}/internal/libmbe1300_core.so \ ${MOBILE_BACK_SAMSUNG_LIB_ROOT}/internal/libmbe1200_core.so \ ${MOBILE_BACK_SAMSUNG_LIB_ROOT}/internal/libc++.so \ ${MOBILE_BACK_SAMSUNG_LIB_ROOT}/internal/libeden_nn_on_system.so \ diff --git a/tools/utils/update_model_path_n_checksum.sh b/tools/utils/update_model_path_n_checksum.sh new file mode 100755 index 000000000..ae1d16de1 --- /dev/null +++ b/tools/utils/update_model_path_n_checksum.sh @@ -0,0 +1,175 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Update model_path and model_checksum in *.pbtxt files under the given directory. +# - Replaces model_path values that start with SOURCE_URL with TARGET_URL. +# - Sets model_checksum from checksums.txt by matching the relative path (preferred) or the basename as fallback. +# - Checksums file formats supported: +# MD5 () = +# (legacy: md5 or sha256) +# +# Usage: +# update_model_path_n_checksum.sh [-d DIR] [-c CHECKSUM_FILE] [-t TARGET_URL] [-s SOURCE_URL] +# -d DIR Directory to scan (default: script directory) +# -c CHECKSUM_FILE Path to checksums.txt (default: DIR/checksums.txt) +# -t|--target-url Base target URL (default: https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/) +# -s|--source-url Source URL prefix to replace (default: local:///mlperf_models/) +# +# + +SCAN_DIR="$(cd "$(dirname "$0")" && pwd)" +TARGET_URL="https://mobile.mlcommons-storage.org/app-resources/models/v5_0_1/" +SOURCE_URL="local:///mlperf_models/" +CHECKSUM_FILE="" + +while [[ $# -gt 0 ]]; do + case "$1" in + -d) SCAN_DIR="$2"; shift 2 ;; + -c) CHECKSUM_FILE="$2"; shift 2 ;; + -t|--target-url) TARGET_URL="$2"; shift 2 ;; + -s|--source-url) SOURCE_URL="$2"; shift 2 ;; + -h|--help) + grep '^#' "$0" | sed -E 's/^# ?//' | sed -n '1,40p' + exit 0 + ;; + *) echo "Unknown option: $1" >&2; exit 1 ;; + esac +done + +# Normalize URLs to have a trailing slash for concatenation +if [[ -n "${TARGET_URL}" && "${TARGET_URL}" != */ ]]; then TARGET_URL="${TARGET_URL}/"; fi +if [[ -n "${SOURCE_URL}" && "${SOURCE_URL}" != */ ]]; then SOURCE_URL="${SOURCE_URL}/"; fi + +if [[ -z "${CHECKSUM_FILE}" ]]; then + CHECKSUM_FILE="$SCAN_DIR/checksums.txt" +fi + +if [[ ! -f "$CHECKSUM_FILE" ]]; then + echo "ERROR: checksum file not found: $CHECKSUM_FILE" >&2 + exit 1 +fi + +if [[ ! -d "$SCAN_DIR" ]]; then + echo "ERROR: directory not found: $SCAN_DIR" >&2 + exit 1 +fi + + +shopt -s nullglob +# Gather files using glob to avoid subshell side-effects +files=("$SCAN_DIR"/*.pbtxt) +if [[ ! -e "${files[0]}" ]]; then + echo "No .pbtxt files found in $SCAN_DIR" + exit 0 +fi + +for f in "${files[@]}"; do + tmp="${f}.tmp.$$" + changes_in_file=0 + + # Process with awk reading checksums first, then the pbtxt file + awk -v TARGET_URL="$TARGET_URL" -v SOURCE_URL="$SOURCE_URL" ' + BEGIN { + local_prefix = SOURCE_URL + } + # Phase 1: Read checksum file (first input) + FNR==NR { + # Accepted formats per line: + # MD5 () = + # (legacy; md5 or sha256) + # Skip empty/comment lines + if ($0 ~ /^[ \t]*$/) next; + if ($0 ~ /^[#]/) next; + + line=$0 + hash=""; path=""; + + # Try to parse: MD5 (path) = hash + lp = index(line, "("); + rp = index(line, ")"); + eq = index(line, "="); + if (match(line, /^[[:space:]]*MD5[[:space:]]*\(/) && lp>0 && rp>lp && eq>rp) { + path = substr(line, lp+1, rp - lp - 1); + hash = substr(line, eq+1); + gsub(/^[[:space:]]+/, "", hash); + gsub(/[[:space:]]+$/, "", hash); + } else { + # Legacy: first two whitespace-separated fields + split(line, a, /[ \t]+/); + hash = a[1]; path = a[2]; + } + + # Normalize hash to lowercase + hash = tolower(hash); + + # Validate hex length: 32 (md5) or 64 (sha256) + if ((length(hash)==32 || length(hash)==64) && length(path)>0) { + cksum_by_rel[path]=hash + # Also map by basename as a fallback + n=split(path, parts, "/"); base=parts[n] + if (!(base in cksum_by_base)) cksum_by_base[base]=hash + } + next + } + + # Phase 2: Process pbtxt file + { + line=$0 + if (index(line, "model_path:") && index(line, local_prefix)) { + # Extract the relative path after the prefix + # Example: model_path: "local:///mlperf_models/foo/bar.bin" + start=index(line, local_prefix) + length(local_prefix) + rel=substr(line, start) + # rel currently includes trailing quote and maybe more; strip after next quote + quote_pos=index(rel, "\"") + if (quote_pos > 0) rel=substr(rel, 1, quote_pos-1) + + # Determine remote path (generic): always TARGET_URL + rel + remote=TARGET_URL rel + + # Rebuild the model_path line with the remote URL + prefix=substr(line, 1, index(line, "\"")-1) + print prefix "\"" remote "\"" + pending_rel=rel + changed=1 + next + } + + # If the previous line changed a model_path, override the model_checksum that follows + if (pending_rel!="" && index(line, "model_checksum:") ) { + rel=pending_rel + # Choose checksum: exact relative path first, then basename + hash="" + if (rel in cksum_by_rel) { + hash=cksum_by_rel[rel] + } else { + n=split(rel, parts, "/"); base=parts[n] + if (base in cksum_by_base) hash=cksum_by_base[base] + } + indent=substr(line, 1, index(line, "m")-1) + print indent "model_checksum: \"" hash "\"" + pending_rel="" + changed=1 + next + } + + # Default: print line as-is + print line + } + + END { + } + ' "$CHECKSUM_FILE" "$f" > "$tmp" + + # Detect if file changed by comparing + if ! cmp -s "$f" "$tmp"; then + changes_in_file=1 + mv "$tmp" "$f" + echo "Updated: $f" + else + rm -f "$tmp" + fi + + +done +