Arm Release v1.3.0 (KhronosGroup#24)

Updated liceneses and sample CMakeLists Updated sample tutorials to be more clear about mali specific content Ordering samples now removes duplicates from the order list Render granularity only displays the first time, and it is now calculated correctly Added missing headers to CMake, variant definitions are now OO, removed superfluous dynamic code Shader program removed and reverted back inside pipeline layout Removed FAQ to be added as a seperate MR Added contents to memory limits document Updated depth format function and integrated it into all existing depth buffer images
huawei-ahcox · Feb 20, 2020 · 2d31f78 · 2d31f78
1 parent 4772121
commit 2d31f78
Show file tree

Hide file tree

Showing 99 changed files with 3,052 additions and 931 deletions.
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -4,14 +4,6 @@ stages:
   - Test
   - Package
 
-cache:
-  key: ${CI_PIPELINE_ID}
-  paths:
-    - build/windows
-    - build/linux
-    - build/android
-    - output
-
 Doxygen:
   stage: Check
   image: khronosgroup/vulkan-samples
@@ -77,6 +69,10 @@ Linux:
   tags:
     - linux
     - docker
+  artifacts:
+    paths:
+      - build/linux
+    expire_in: 2h
   script:
     - cmake -G "Unix Makefiles" -H. -Bbuild/linux -DCMAKE_BUILD_TYPE=Release -DVKB_BUILD_TESTS=ON -DVKB_BUILD_SAMPLES=ON
     - cmake --build build/linux --target vulkan_samples --config Release -- -j$(($(nproc)/2+1))
@@ -85,12 +81,12 @@ Windows:
   stage: Build
   variables:
     GIT_SUBMODULE_STRATEGY: recursive
+  tags:
+    - gpu
   artifacts:
     paths:
       - build/windows
-    expire_in: 1d
-  tags:
-    - gpu
+    expire_in: 2h
   script:
     - cmake -G"Visual Studio 15 2017 Win64" -H. -Bbuild/windows -DVKB_BUILD_TESTS:BOOL=ON -DVKB_BUILD_SAMPLES:BOOL=ON
     - cmake --build build/windows --target vulkan_samples --config Release
@@ -103,6 +99,10 @@ Android:
   tags:
     - linux
     - docker
+  artifacts:
+    paths:
+      - build/android
+    expire_in: 2h
   script:
     - cmake -G "Unix Makefiles" -H. -Bbuild/android -DCMAKE_TOOLCHAIN_FILE=bldsys/toolchain/android_gradle.cmake -DVKB_BUILD_TESTS=ON -DVKB_BUILD_SAMPLES=ON
     - cmake --build build/android --config Release --target vulkan_samples_package -- -j$(($(nproc)/2+1))
@@ -119,7 +119,7 @@ GenerateSample:
    - docker
   script:
    - cd tests/generate_sample
-   - python3 generate_sample_test.py
+   - python generate_sample_test.py
 
 SystemTest:
   stage: Test

diff --git a/README.md b/README.md
@@ -54,10 +54,9 @@ Additionally you may find the following links useful:
 - Create a framework that can be used as reference material and also as a sandbox for advanced experimentation with Vulkan
 
 ## Tutorials
-- **General**
+- **Project Basics**
   - [Controls](./docs/misc.md#controls)
   - [Debug window](./docs/misc.md#debug-window)
-  - [Driver version](./docs/misc.md#driver-version)
   - [Create a Sample](./docs/create_sample.md)
 - **Vulkan Essentials**  
   - [How does Vulkan compare to OpenGL ES? What should you expect when targeting Vulkan?](./samples/vulkan_basics.md)
@@ -66,17 +65,26 @@ Additionally you may find the following links useful:
   - [Appropriate use of surface rotation](./samples/performance/surface_rotation/surface_rotation_tutorial.md)
 - **Pipelines**
   - [Use of pipeline caches to avoid startup latency](./samples/performance/pipeline_cache/pipeline_cache_tutorial.md)
+  - [Utilizing Specialization Constants](./samples/performance/specialization_constants/specialization_constants_tutorial.md)
 - **Descriptors**
   - [Descriptor and buffer management](./samples/performance/descriptor_management/descriptor_management_tutorial.md)
 - **Render Passes**
   - [Appropriate use of load/store operations, and use of transient attachments](./samples/performance/render_passes/render_passes_tutorial.md)
+  - [Choosing the correct layout when transitioning images](./samples/performance/layout_transitions/layout_transitions_tutorial.md)
 - **Render Subpasses**
   - [Benefits of subpasses over multiple render passes, use of transient attachments, and G-buffer recommended size](./samples/performance/render_subpasses/render_subpasses_tutorial.md)
+- **Workload Synchronization**
+  - [Using pipeline barriers efficiently](./samples/performance/pipeline_barriers/pipeline_barriers_tutorial.md)
+  - [How to synchronize back to the CPU and avoid stalling](./samples/performance/wait_idle/wait_idle_tutorial.md)
 - **Command Buffers**
-  - [Allocation and management of command buffers](./samples/performance/command_buffer_usage/command_buffer_usage_tutorial.md)
+  - [Allocation and management of command buffers](./samples/performance/command_buffer_usage/command_buffer_usage_tutorial.md#Recycling-strategies)
   - [Multi-threaded recording with secondary command buffers](./samples/performance/command_buffer_usage/command_buffer_usage_tutorial.md#Multi-threaded-recording)
 - **AFBC**
   - [Appropriate use of AFBC](./samples/performance/afbc/afbc_tutorial.md)
+- **Misc**
+  - [Driver version](./docs/misc.md#driver-version)
+  - [Memory limits](./docs/memory_limits.md)
+  - [Vulkan FAQ](./docs/faq.md)
 
 ## Setup
 

diff --git a/bldsys/cmake/create_gradle_project.cmake b/bldsys/cmake/create_gradle_project.cmake
@@ -1,5 +1,5 @@
 #[[
- Copyright (c) 2019, Arm Limited and Contributors
+ Copyright (c) 2019-2020, Arm Limited and Contributors
 
  SPDX-License-Identifier: Apache-2.0
 
@@ -184,7 +184,7 @@ endif()
 if(EXISTS ${NATIVE_SCRIPT})
     file(RELATIVE_PATH NATIVE_SCRIPT_TMP ${OUTPUT_DIR} ${NATIVE_SCRIPT})
 
-    set(CMAKE_PATH "cmake {\n\t\t\tpath '${NATIVE_SCRIPT_TMP}'\n\t\t\tbuildStagingDirectory \'build-native\'\n\t\t\tversion \'3.10.2\'\n\t\t} ")
+    set(CMAKE_PATH "cmake {\n\t\t\tpath '${NATIVE_SCRIPT_TMP}'\n\t\t\tbuildStagingDirectory \'build-native\'\n\t\t\tversion \'3.10.2+\'\n\t\t} ")
 endif()
 
 # cmake.arguments

diff --git a/bldsys/cmake/sample_helper.cmake b/bldsys/cmake/sample_helper.cmake
@@ -1,5 +1,5 @@
 #[[
- Copyright (c) 2019, Arm Limited and Contributors
+ Copyright (c) 2019-2020, Arm Limited and Contributors
 
  SPDX-License-Identifier: Apache-2.0
 
@@ -335,6 +335,8 @@ function(order_sample_list)
 
     cmake_parse_arguments(TARGET "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
 
+    list(REMOVE_DUPLICATES TARGET_ORDER)
+
     # Add samples based on the given order
     foreach(SAMPLE_ID ${TARGET_ORDER})
         list(FIND TARGET_INPUT ${SAMPLE_ID} FOUND_SAMPLE)

diff --git a/bldsys/cmake/template/gradle/build.gradle.in b/bldsys/cmake/template/gradle/build.gradle.in
@@ -53,9 +53,7 @@ android {
             @JNI_LIBS_SRC_DIRS@
             @MANIFEST_FILE@
 
-            debug {
-                jniLibs.srcDirs += ["$ndkDirectory/sources/third_party/vulkan/src/build-android/jniLibs"]
-            }
+            jniLibs.srcDirs += ["$ndkDirectory/sources/third_party/vulkan/src/build-android/jniLibs"]
         }
     }
 

diff --git a/docs/build.md b/docs/build.md
@@ -1,5 +1,5 @@
 <!--
-- Copyright (c) 2019, Arm Limited and Contributors
+- Copyright (c) 2019-2020, Arm Limited and Contributors
 -
 - SPDX-License-Identifier: Apache-2.0
 -
@@ -254,7 +254,7 @@ For all dependencies set the following environment variables.
 ##### Windows <!-- omit in toc -->
 
 ```
-bldsys/scripts/generate_android_gradle.bat
+bldsys\scripts\generate_android_gradle.bat
 ```
 
 ##### Linux <!-- omit in toc -->
@@ -280,29 +280,6 @@ adb install build/outputs/apk/debug/vulkan_samples-debug.apk
 
 > Alternatively, you may open the `build/android_gradle` folder in Android Studio and run the project from here
 
-## Build with CMake
-
-`Step 1.` Select a generator which supports custom compiler like `Unix Makefiles` or `Ninja`.
-
-`Step 2.` Run the command below in the root directory of the project.
-
-```
-cmake -G "Unix Makefiles" -H. -Bbuild/android -DCMAKE_TOOLCHAIN_FILE=bldsys/toolchain/android_gradle.cmake
-```
-
-`Step 3.` Build the project using the command below
-
-```
-cmake --build build/android --config Release --target vulkan_samples_package
-```
-
-`Step 4.` You can now run the apk on a connected device
-
-```
-cd build/android/vulkan_samples/vulkan_samples_package
-adb install build/outputs/apk/debug/vulkan_samples-debug.apk
-```
-
 # Building Individual Samples
 
 `Step 1.` When generating cmake set the `VKB_ENTRYPOINTS` flag to `ON`

diff --git a/docs/memory_limits.md b/docs/memory_limits.md
@@ -0,0 +1,39 @@
+<!--
+- Copyright (c) 2019-2020, Arm Limited and Contributors
+-
+- SPDX-License-Identifier: Apache-2.0
+-
+- Licensed under the Apache License, Version 2.0 the "License";
+- you may not use this file except in compliance with the License.
+- You may obtain a copy of the License at
+-
+-     http://www.apache.org/licenses/LICENSE-2.0
+-
+- Unless required by applicable law or agreed to in writing, software
+- distributed under the License is distributed on an "AS IS" BASIS,
+- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- See the License for the specific language governing permissions and
+- limitations under the License.
+-
+-->
+
+# Memory limits with Vulkan <!-- omit in toc -->
+
+## Contents <!-- omit in toc -->
+
+- [Mali GPUs](#mali-gpus)
+
+## Mali GPUs
+
+This article covers situations in which a Vulkan application might trigger an out of memory (OOM) condition on Mali GPUs, resulting in a `DEVICE_LOST` error, even if the API usage is correct. The OOM condition that developers hit most often is due to a very high vertex load, which might be relatively common when porting Vulkan applications from desktop to mobile.
+
+Mali GPUs have a memory region which is available to store the intermediate geometry output from a render pass. This memory is used to store all of the varying data generated by vertex, tessellation, and geometry shading prior to fragment shading. Exceeding the size of this region may result in a `VK_ERROR_DEVICE_LOST`. The limit is fixed to 180 MB on current Mali GPUs, but it may be increases or lifted altogether in future GPUs.
+
+The reasoning behind this limit is that tile-based renderers need to write out and then read back intermediate geometry output, thus vertex load is directly correlated to memory bandwidth. For a typical program using 64 bytes of varying data per vertex the 180 MB of intermediate storage can contain over 2 million vertices, which we expect to be enough for normal mobile application usage.
+We will now cover the reasons why such a vertex load is unlikely to be sustainable and possible mitigations if your application is hitting it.
+
+Let us consider a vertex-heavy application with a single render pass that reaches the 180 MB limit. Since the GPU has to write the data out and read it back from memory, this results in 2 x 180 = 360 MB/render pass, which at 30 FPS brings memory bandwidth up to 30 x 360 = 10.8 GB/s. Memory bandwidth has a direct correlation with power consumption, which can be estimated as 100 mW/(GB/s). This means that an application using 180 MB of varying data will consume at least 1.08 W, and this does not consider further contributions to memory bandwidth and general GPU power consumption. A mobile GPU cannot sustain such a power usage without overheating, which would further cause a reduction of GPU frequency and a performance drop.
+
+The only real solution to the issue is to keep the application’s vertex count below approximately 2 million, as derived above for an average of 64 bytes of varying data per vertex. In scenarios where the memory storage is exceeded and reducing the vertex load is not feasible, we recommend that the application splits the render pass into multiple render passes, each using a safe amount of intermediate storage. Later render passes can use a loadOp=LOAD to restore the content of the framebuffer and continue rendering on top of earlier rendering. This form of incremental rendering might impact performance, due to the write-out and further read-back of the color image.
+
+If your vertex load is unpredictable and you are hitting `DEVICE_LOST` issues in the field, you can set up a scheme for estimating memory consumption for each draw call in a render pass, then performing incremental rendering if the limit is reached. You should keep in mind that memory is allocated for all vertex indices between the min and max index referenced by a draw call, and for all generated vertices for tessellation and geometry shading, even if they are subsequently culled by the clipping and culling pass. Such an estimate will be conservative, as the actual amount of memory allocated might be lower, so we don’t recommend adding a further safety margin to the 180 MB limit.
diff --git a/framework/CMakeLists.txt b/framework/CMakeLists.txt
@@ -63,6 +63,8 @@ set(COMMON_FILES
     # Header Files
     common/vk_common.h
     common/vk_initializers.h
+    common/glm_common.h 
+    common/resource_caching.h
     common/logging.h
     common/helpers.h
     common/error.h

diff --git a/framework/api_vulkan_sample.cpp b/framework/api_vulkan_sample.cpp
@@ -32,8 +32,7 @@ bool ApiVulkanSample::prepare(vkb::Platform &platform)
 		return false;
 	}
 
-	VkBool32 valid_depth_format = vkb::get_supported_depth_format(device->get_physical_device(), &depth_format);
-	assert(valid_depth_format);
+	depth_format = vkb::get_suitable_depth_format(device->get_physical_device());
 
 	// Create synchronization objects
 	VkSemaphoreCreateInfo semaphore_create_info = vkb::initializers::semaphore_create_info();

diff --git a/framework/common/resource_caching.h b/framework/common/resource_caching.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2018-2019, Arm Limited and Contributors
+/* Copyright (c) 2018-2020, Arm Limited and Contributors
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -405,7 +405,7 @@ struct hash<vkb::PipelineState>
 
 		vkb::hash_combine(result, pipeline_state.get_subpass_index());
 
-		for (auto stage : pipeline_state.get_pipeline_layout().get_stages())
+		for (auto stage : pipeline_state.get_pipeline_layout().get_shader_modules())
 		{
 			vkb::hash_combine(result, stage->get_id());
 		}