From c144acc5344e98e07bc6e07f013701a5e05e710d Mon Sep 17 00:00:00 2001 From: Yulong Wang <7679871+fs-eire@users.noreply.github.com> Date: Mon, 22 Aug 2022 10:48:12 -0700 Subject: [PATCH] Replace 'master' branch ref to 'main' in the code (#12547) --- .../ProviderOptions.shared.cs | 4 +-- .../SessionOptions.shared.cs | 8 ++--- dockerfiles/README.md | 2 +- docs/C_API_Guidelines.md | 8 ++--- docs/Coding_Conventions_and_Standards.md | 4 +-- docs/ContribOperators.md | 2 +- docs/NotesOnThreading.md | 4 +-- docs/ORTMobilePackageOperatorTypeSupport.md | 2 +- docs/OperatorKernels.md | 2 +- docs/Roadmap.md | 36 +++++++++---------- docs/TVM_EP.md | 4 +-- docs/WinML_principles.md | 15 ++++---- docs/onnxruntime_extensions.md | 6 ++-- docs/python/ReadMeOV.rst | 16 ++++----- .../onnxruntime-nuphar-tutorial.ipynb | 6 ++-- .../onnxruntime/core/framework/run_options.h | 2 +- java/src/test/android/README.md | 6 ++-- js/common/README.md | 2 +- js/common/lib/inference-session.ts | 8 ++--- js/node/README.md | 2 +- js/react_native/README.md | 2 +- js/web/README.md | 2 +- objectivec/include/ort_session.h | 4 +-- onnxruntime/core/framework/session_options.h | 2 +- .../providers/dnnl/subgraph/dnnl_matmul.cc | 2 +- .../dnnl/subgraph/dnnl_subgraph_primitive.cc | 2 +- .../python/tools/tensorrt/perf/README.md | 24 ++++++------- .../python/tools/transformers/Dev_Guide.md | 18 +++++----- .../python/tools/transformers/README.md | 12 +++---- ...nce_Bert_with_OnnxRuntime_on_AzureML.ipynb | 2 +- ...e_GPT2-OneStepSearch_OnnxRuntime_CPU.ipynb | 4 +-- ...ference_GPT2_with_OnnxRuntime_on_CPU.ipynb | 2 +- .../PyTorch_Bert-Squad_OnnxRuntime_CPU.ipynb | 6 ++-- .../PyTorch_Bert-Squad_OnnxRuntime_GPU.ipynb | 6 ++-- ...w_Tf2onnx_Bert-Squad_OnnxRuntime_CPU.ipynb | 8 ++--- .../test/providers/nnapi/nnapi_basic_test.cc | 2 +- .../onnx_backend_test_series_filters.jsonc | 2 +- .../core/framework/checkpointing.cc | 2 +- .../orttraining/eager/opgen/opgen/atenops.py | 2 +- .../python/training/onnxblock/README.md | 8 ++--- ...ow_to_add_distributed_ci_pipeline_tests.md | 6 ++-- tools/python/gen_contrib_doc.py | 2 +- tools/python/gen_opkernel_doc.py | 2 +- tools/python/gen_ort_mobile_pkg_doc.py | 2 +- 44 files changed, 131 insertions(+), 132 deletions(-) diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/ProviderOptions.shared.cs b/csharp/src/Microsoft.ML.OnnxRuntime/ProviderOptions.shared.cs index 5e6f1b8ec39ff..a65839b1834d5 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/ProviderOptions.shared.cs +++ b/csharp/src/Microsoft.ML.OnnxRuntime/ProviderOptions.shared.cs @@ -247,7 +247,7 @@ public static void StringToDict(string s, Dictionary dict) /// /// CoreML flags for use with SessionOptions /// - /// + /// [Flags] public enum CoreMLFlags : uint { @@ -261,7 +261,7 @@ public enum CoreMLFlags : uint /// /// NNAPI flags for use with SessionOptions /// - /// + /// [Flags] public enum NnapiFlags { diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.shared.cs b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.shared.cs index 944ded201aa41..f2ad2a55471c8 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.shared.cs +++ b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.shared.cs @@ -11,7 +11,7 @@ namespace Microsoft.ML.OnnxRuntime { /// /// Graph optimization level to use with SessionOptions - /// [https://github.com/microsoft/onnxruntime/blob/master/docs/ONNX_Runtime_Graph_Optimizations.md] + /// [https://github.com/microsoft/onnxruntime/blob/main/docs/ONNX_Runtime_Graph_Optimizations.md] /// public enum GraphOptimizationLevel { @@ -408,13 +408,13 @@ public void AppendExecutionProvider(string providerName, Dictionary(); } - + var keysArray = NativeOnnxValueHelper.ConvertNamesToUtf8( providerOptions.Keys.ToArray(), n => n, cleanupList); var valuesArray = NativeOnnxValueHelper.ConvertNamesToUtf8( providerOptions.Values.ToArray(), n => n, cleanupList); - + NativeApiStatus.VerifySuccess(NativeMethods.SessionOptionsAppendExecutionProvider( handle, epArray[0], keysArray, valuesArray, (UIntPtr)providerOptions.Count)); } @@ -426,7 +426,7 @@ public void AppendExecutionProvider(string providerName, Dictionary /// path to the custom op library diff --git a/dockerfiles/README.md b/dockerfiles/README.md index 7d8d661ad4322..5ef8500457beb 100644 --- a/dockerfiles/README.md +++ b/dockerfiles/README.md @@ -127,7 +127,7 @@ If the `device_type` runtime config option is not explicitly specified, CPU will Example: HETERO:MYRIAD,CPU HETERO:HDDL,GPU,CPU MULTI:MYRIAD,GPU,CPU AUTO:GPU,CPU -*This is the hardware accelerator target that is enabled by **default** in the container image. After building the container image for one default target, the application may explicitly choose a different target at run time with the same container by using the [Dynamic device selction API](https://github.com/microsoft/onnxruntime/blob/master/docs/execution_providers/OpenVINO-ExecutionProvider.md#dynamic-device-selection).* +*This is the hardware accelerator target that is enabled by **default** in the container image. After building the container image for one default target, the application may explicitly choose a different target at run time with the same container by using the [Dynamic device selction API](https://github.com/microsoft/onnxruntime/blob/main/docs/execution_providers/OpenVINO-ExecutionProvider.md#dynamic-device-selection).* ### OpenVINO on CPU diff --git a/docs/C_API_Guidelines.md b/docs/C_API_Guidelines.md index e00ac7b96c2ee..3416d0a50455d 100644 --- a/docs/C_API_Guidelines.md +++ b/docs/C_API_Guidelines.md @@ -36,13 +36,13 @@ If an API such as CreateSession creates an Ort object such as Session, Session c No C++ exceptions must propagate through the C++/C boundaries. All C++ exceptions must be converted to OrtStatus instances at API boundaries. Such functions should return nullptr on success. -Macros API_IMPL_BEGIN and API_IMPL_END are helpful in this regard. +Macros API_IMPL_BEGIN and API_IMPL_END are helpful in this regard. Cleanup API that destroys objects or simply deallocates memory must return void. Most of the time such API can never error out. Adding return status creates more uncertainty for the client and does not help in exception scenarios such as try/finally in C#. Returning void helps clients to write cleaner code and preserve original exception if any with its meaningful error message rather than memory deallocation failure. This requirement will also help us to create C++ API wrappers that are exception safe. -Consider logging errors if you must rather than return them to the client. +Consider logging errors if you must rather than return them to the client. Example: on Windows delete operator is implemented on top of HeapFree() which may return an error. However, delete never returns anything and can be relied upon as a no throw primitive for cleanup purposes. @@ -52,7 +52,7 @@ When API errors out it must leave all its out parameters and buffers untouched, The obvious exception in this rule is the actual OrtStatus that is dynamically allocated and must be released by the client using the corresponding API. -Some of the client code, notably in C#, attempts to detect which out arguments need a cleanup when an API errors out. The way it is done, out arguments are pre-set to a specific value, such as zero. If the API errors out, the client code attempts to cleanup if the out argument has changed. +Some of the client code, notably in C#, attempts to detect which out arguments need a cleanup when an API errors out. The way it is done, out arguments are pre-set to a specific value, such as zero. If the API errors out, the client code attempts to cleanup if the out argument has changed. Such a technique is error prone and dangerous, as the client has no way of finding out if the out argument has already been cleaned up by the API as should be the case. It may result in double free. One reason for this is our insufficient documentation. This also results in a convoluted hard to read code with nested try/finally/catch clauses. @@ -80,4 +80,4 @@ Use types that fall into established patterns. For example, we use int64_t for d ### 9. Adding a new API -Follow these guidelines and instructions in the source code. "Rules on how to add a new Ort API version" in [onnxruntime_c_api.cc](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/core/session/onnxruntime_c_api.cc). +Follow these guidelines and instructions in the source code. "Rules on how to add a new Ort API version" in [onnxruntime_c_api.cc](https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/core/session/onnxruntime_c_api.cc). diff --git a/docs/Coding_Conventions_and_Standards.md b/docs/Coding_Conventions_and_Standards.md index e1e0ec08bd1a7..d08875ddbbd9c 100644 --- a/docs/Coding_Conventions_and_Standards.md +++ b/docs/Coding_Conventions_and_Standards.md @@ -97,7 +97,7 @@ void foo(gsl::span names) { * Qualify usages of `auto` with `const`, `*`, `&` and `&&` where applicable to more clearly express the intent * When adding a new class, disable copy/assignment/move until you have a proven need for these capabilities. If a need arises, enable copy/assignment/move selectively, and when doing so validate that the implementation of the class supports what is being enabled. * Use `ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE` initially - * See the other `ORT_DISALLOW_*` macros in + * See the other `ORT_DISALLOW_*` macros in * Sometimes, `std::unique_ptr` might be considered for delayed or optional construction of objects or members of classes. Instead, use `std::optional` as appropriate to reduce the number of allocations. * Don't use `else` after `return`. see: [https://llvm.org/docs/CodingStandards.html#don-t-use-else-after-a-return](https://llvm.org/docs/CodingStandards.html#don-t-use-else-after-a-return) * Don't overuse `std::shared_ptr`. Use `std::shared_ptr` only if it's not clear when and where the object will be de-allocated. See also: [https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines#Rf-shared_ptr](https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines#Rf-shared_ptr) @@ -141,7 +141,7 @@ Follow the [Black formatter](https://black.readthedocs.io)'s coding style when p Please adhere to the [PEP8 Style Guide](https://www.python.org/dev/peps/pep-0008/). We use [Google's python style guide](https://google.github.io/styleguide/pyguide.html) as the style guide which is an extension to PEP8. -Code can be validated with [flake8](https://pypi.org/project/flake8/) using the configuration file in the root directory called [.flake8](https://github.com/microsoft/onnxruntime/tree/master/.flake8). +Code can be validated with [flake8](https://pypi.org/project/flake8/) using the configuration file in the root directory called [.flake8](https://github.com/microsoft/onnxruntime/blob/main/.flake8). Use `pyright`, which is provided as a component of the `pylance` extension in VS Code for static type checking. diff --git a/docs/ContribOperators.md b/docs/ContribOperators.md index 703949b6ca6b4..e35bc530338d6 100644 --- a/docs/ContribOperators.md +++ b/docs/ContribOperators.md @@ -1,5 +1,5 @@ ## Contrib Operator Schemas -*This file is automatically generated from the registered contrib operator schemas by [this script](https://github.com/microsoft/onnxruntime/blob/master/tools/python/gen_contrib_doc.py). +*This file is automatically generated from the registered contrib operator schemas by [this script](https://github.com/microsoft/onnxruntime/blob/main/tools/python/gen_contrib_doc.py). Do not modify directly.* * com.microsoft diff --git a/docs/NotesOnThreading.md b/docs/NotesOnThreading.md index 25f504c02c8c5..83bb0d400a81b 100644 --- a/docs/NotesOnThreading.md +++ b/docs/NotesOnThreading.md @@ -3,12 +3,12 @@ This document is intended for ORT developers. ORT allows the usage of either OpenMP or non-OpenMP (ORT) threads for execution. Threadpool management -is abstracted behind: (1) ThreadPool class in [threadpool.h](https://github.com/microsoft/onnxruntime/blob/master/include/onnxruntime/core/platform/threadpool.h) and (2) functions in [thread_utils.h](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/core/util/thread_utils.h). +is abstracted behind: (1) ThreadPool class in [threadpool.h](https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/platform/threadpool.h) and (2) functions in [thread_utils.h](https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/core/util/thread_utils.h). When developing an op, please use these abstractions to parallelize your code. These abstractions centralize 2 things. When OpenMP is enabled, they resort to using OpenMP. When OpenMP is disabled they resort to sequential execution if the threadpool ptr is NULL or schedule the tasks on the threadpool otherwise. -Examples of these abstractions are: ([threadpool.h](https://github.com/microsoft/onnxruntime/blob/master/include/onnxruntime/core/platform/threadpool.h) has more documentation for these) +Examples of these abstractions are: ([threadpool.h](https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/platform/threadpool.h) has more documentation for these) * TryParallelFor * TrySimpleParallelFor * TryBatchParallelFor diff --git a/docs/ORTMobilePackageOperatorTypeSupport.md b/docs/ORTMobilePackageOperatorTypeSupport.md index 09de5d9d4cc39..6a69a2c598823 100644 --- a/docs/ORTMobilePackageOperatorTypeSupport.md +++ b/docs/ORTMobilePackageOperatorTypeSupport.md @@ -2,7 +2,7 @@ ## Supported operators and types -The supported operators and types are based on what is required to support float32 and quantized versions of popular models. The full list of input models used to determine this list is available [here](https://github.com/microsoft/onnxruntime/blob/master/tools/ci_build/github/android/mobile_package.required_operators.readme.txt) +The supported operators and types are based on what is required to support float32 and quantized versions of popular models. The full list of input models used to determine this list is available [here](https://github.com/microsoft/onnxruntime/blob/main/tools/ci_build/github/android/mobile_package.required_operators.readme.txt) ## Supported data input types diff --git a/docs/OperatorKernels.md b/docs/OperatorKernels.md index f50b920234b10..0b5ae058a3474 100644 --- a/docs/OperatorKernels.md +++ b/docs/OperatorKernels.md @@ -1,5 +1,5 @@ ## Supported Operators and Data Types -*This file is automatically generated from the registered kernels by [this script](https://github.com/microsoft/onnxruntime/blob/master/tools/python/gen_opkernel_doc.py). +*This file is automatically generated from the registered kernels by [this script](https://github.com/microsoft/onnxruntime/blob/main/tools/python/gen_opkernel_doc.py). Do not modify directly.* ## Execution Providers diff --git a/docs/Roadmap.md b/docs/Roadmap.md index eda0ffb572f86..728239ba46714 100644 --- a/docs/Roadmap.md +++ b/docs/Roadmap.md @@ -1,11 +1,11 @@ # ONNX Runtime Roadmap -ONNX Runtime is an active, fast-paced project backed by a strong team of Microsoft engineers and data scientists along with a worldwide community of partners and contributors. This roadmap summarizes the pending investments identified by the team to continually grow +ONNX Runtime is an active, fast-paced project backed by a strong team of Microsoft engineers and data scientists along with a worldwide community of partners and contributors. This roadmap summarizes the pending investments identified by the team to continually grow ONNX Runtime as a robust, versatile, and high performance inference engine for DNN and traditional ML models. ## High Level Goals ONNX Runtime is a runtime accelerator that supports interoperable ML and DNN models based on the [ONNX](https://onnx.ai/) spec. For key technical design objectives and considerations, see [ONNX Runtime Inference High Level Design](./InferenceHighLevelDesign.md). -We recognize the challenges involved in operationalizing ML models performantly in an agile way, and we understand that high volume production services can be highly performance-sensitive and often need to support a variety of compute targets (we experience these first-hand at Microsoft across our vast array of products and services). +We recognize the challenges involved in operationalizing ML models performantly in an agile way, and we understand that high volume production services can be highly performance-sensitive and often need to support a variety of compute targets (we experience these first-hand at Microsoft across our vast array of products and services). As such, our investments are directly in support of solving those challenges, focusing on areas such as: * Platform coverage @@ -14,24 +14,24 @@ As such, our investments are directly in support of solving those challenges, fo * Model coverage * Quality and ease of use - including backwards compatibility of models (older opsets) and APIs -In addition to our OSS participation, we also internally use this technology in core products at Microsoft, with over 80 models in production providing an average of 2x+ performance improvement. +In addition to our OSS participation, we also internally use this technology in core products at Microsoft, with over 80 models in production providing an average of 2x+ performance improvement. ## Investments -In support of the high level goals outlined above, the investment areas listed below represent our active and backlog projects, -which are largely driven by community demand and anticipated usage opportunities. We will work through our prioritized backlog as -quickly as possible, and if there are any specific features or enhancements you need, we gladly welcome community contributions for -these efforts or any of the [enhancements suggested on Github](https://github.com/microsoft/onnxruntime/issues?q=is%3Aopen+is%3Aissue+label%3Aenhancement). If you have a specific suggestion or unsupported use case, please let us +In support of the high level goals outlined above, the investment areas listed below represent our active and backlog projects, +which are largely driven by community demand and anticipated usage opportunities. We will work through our prioritized backlog as +quickly as possible, and if there are any specific features or enhancements you need, we gladly welcome community contributions for +these efforts or any of the [enhancements suggested on Github](https://github.com/microsoft/onnxruntime/issues?q=is%3Aopen+is%3Aissue+label%3Aenhancement). If you have a specific suggestion or unsupported use case, please let us know by filing a [Github issue](https://github.com/microsoft/onnxruntime/issues). * **Platform coverage** - [Expanded platform compatibility](#expanded-platform-compatibility) * **Extensibility and customization** - [Accelerators and Execution Providers](#accelerators-and-execution-providers) -* **Performance** - [Continued performance optimizations](#continued-performance-optimizations) -* **Model coverage** - [Expanded model compatibility](#expanded-model-compatibility) +* **Performance** - [Continued performance optimizations](#continued-performance-optimizations) +* **Model coverage** - [Expanded model compatibility](#expanded-model-compatibility) * **Quality and ease of use** - [Increased integration with popular ML products](#increased-integration-with-popular-products) --- ### Expanded platform compatibility -ONNX Runtime already supports a wide range of architectures, platforms, and languages, and this will continue to be an active investment area to broaden the availability of the engine for varied usage. +ONNX Runtime already supports a wide range of architectures, platforms, and languages, and this will continue to be an active investment area to broaden the availability of the engine for varied usage. Additionally, we understand that lightweight devices and local applications may have constraints for package size, so there is active awareness to opportunistically minimize binary size. #### Architectures @@ -67,7 +67,7 @@ To maximize performance potential, we will be continually adding additional CUDA In addition to new execution providers, we aim to make it easy for community partners to contribute in a non-disruptive way. To support this, we are investing in improvements to the execution provider interface for easily registering new execution providers and separating out EPs from the core runtime engine. ### Continued Performance Optimizations -Performance is a key focus for ONNX Runtime. From latency to memory utilization to CPU usage, we are constantly seeking strategies to deliver the best performance. Although DNNs are rapidly driving research areas for innovation, we acknowledge that in practice, many companies and developers are still using traditional ML frameworks for reasons ranging from expertise to privacy to legality. As such, ONNX Runtime is focused on improvements and support for both DNNs and traditional ML. +Performance is a key focus for ONNX Runtime. From latency to memory utilization to CPU usage, we are constantly seeking strategies to deliver the best performance. Although DNNs are rapidly driving research areas for innovation, we acknowledge that in practice, many companies and developers are still using traditional ML frameworks for reasons ranging from expertise to privacy to legality. As such, ONNX Runtime is focused on improvements and support for both DNNs and traditional ML. #### Examples of projects the team is working on: * More quantization support @@ -79,17 +79,17 @@ Performance is a key focus for ONNX Runtime. From latency to memory utilization IoT provides growing opportunity to execute ML workloads on the edge of the network, where the data is collected. However, the devices used for ML execution have different hardware specifications. To support compatibility with this group of devices, we will invest in strategies to optimize ONNX model execution across the breadth of IoT endpoints using different hardware configurations with CPUs, GPUs and custom NN ASICs. ### Expanded model compatibility -The ONNX spec focuses on ML model interoperability rather than coverage of all operators from all frameworks. +The ONNX spec focuses on ML model interoperability rather than coverage of all operators from all frameworks. We aim to continuously improve coverage to support popular as well as new state-of-the-art models. -#### Spec coverage +#### Spec coverage As more operators are added to the ONNX spec, ONNX Runtime will provide implementations (default CPU and GPU-CUDA) of each to stay in compliance with the latest ONNX spec. This includes: * Sparse Tensor support #### Investments in popular converters -We work with the OSS and ONNX community to ensure popular frameworks can export or be converted to ONNX format. +We work with the OSS and ONNX community to ensure popular frameworks can export or be converted to ONNX format. * [PyTorch export](https://pytorch.org/docs/stable/onnx.html) * [Tensorflow-ONNX](https://github.com/onnx/tensorflow-onnx) * [Keras-ONNX](https://github.com/onnx/keras-onnx) @@ -104,8 +104,8 @@ To decrease the risk of model inferencing failures, we will improve the error ha Focusing on practicality, we take a scenario driven approach to adding additional capabilities to ONNX Runtime. ### Increased integration with popular products -We understand that data scientists and ML engineers work with many different products and toolsets to bring complex machine learning -algorithms to life through innovative user-facing applications. We want to ensure ONNX Runtime works as seamlessly as possible with +We understand that data scientists and ML engineers work with many different products and toolsets to bring complex machine learning +algorithms to life through innovative user-facing applications. We want to ensure ONNX Runtime works as seamlessly as possible with these. If you've identified any integration ideas or opportunities and have questions or need assistance, we encourage use of Github Issues as a discussion forum. Some of these products include: @@ -114,6 +114,6 @@ Some of these products include: * [ML.NET](https://docs.microsoft.com/en-us/dotnet/machine-learning/tutorials/object-detection-onnx): inference ONNX models in .NET * [PyTorch](https://pytorch.org/docs/stable/onnx.html): improve coverage for exporting trained models to ONNX * [Windows](https://docs.microsoft.com/en-us/windows/ai/windows-ml/index): run ONNX models on Windows devices using the built-in Windows ML APIs. Windows ML APIs will be included in the ONNX Runtime builds and binaries to enable Windows developers to get OS-independent updates -* [SQL Database Edge](https://docs.microsoft.com/en-us/azure/sql-database-edge/deploy-onnx): predict with ONNX models in SQL Database Edge, an optimized relational database engine geared for IoT and IoT Edge deployments +* [SQL Database Edge](https://docs.microsoft.com/en-us/azure/sql-database-edge/deploy-onnx): predict with ONNX models in SQL Database Edge, an optimized relational database engine geared for IoT and IoT Edge deployments -Have an idea or feature request? [Contribute](https://github.com/microsoft/onnxruntime/blob/master/CONTRIBUTING.md) or [let us know](https://github.com/microsoft/onnxruntime/blob/master/.github/ISSUE_TEMPLATE/feature_request.md)! +Have an idea or feature request? [Contribute](https://github.com/microsoft/onnxruntime/blob/main/CONTRIBUTING.md) or [let us know](https://github.com/microsoft/onnxruntime/blob/main/.github/ISSUE_TEMPLATE/feature_request.md)! diff --git a/docs/TVM_EP.md b/docs/TVM_EP.md index c1c0b1552b87c..8a34d75635e04 100644 --- a/docs/TVM_EP.md +++ b/docs/TVM_EP.md @@ -245,7 +245,7 @@ It is also possible to use a precompiled model. The compiled model can be obtained using the [OctoML platform](https://onnx.octoml.ai) or compiled directly (see **Support precompiled model** section in -[Sample notebook for ResNet50 inference with TVM EP](https://github.com/microsoft/onnxruntime/blob/master/docs/python/inference/notebooks/onnxruntime-tvm-tutorial.ipynb) +[Sample notebook for ResNet50 inference with TVM EP](https://github.com/microsoft/onnxruntime/blob/main/docs/python/inference/notebooks/onnxruntime-tvm-tutorial.ipynb) for more information on model compilation). In order to use the precompiled model, only need to pass two options: @@ -261,7 +261,7 @@ You can read more about these options in section [Configuration options](#config ## Samples -- [Sample notebook for ResNet50 inference with TVM EP](https://github.com/microsoft/onnxruntime/blob/master/docs/python/inference/notebooks/onnxruntime-tvm-tutorial.ipynb) +- [Sample notebook for ResNet50 inference with TVM EP](https://github.com/microsoft/onnxruntime/blob/main/docs/python/inference/notebooks/onnxruntime-tvm-tutorial.ipynb) ## Known issues - At this moment, the TVM EP has only been verified on UNIX/Linux and Windows systems. diff --git a/docs/WinML_principles.md b/docs/WinML_principles.md index cf4d84a11c063..192ad614fba79 100644 --- a/docs/WinML_principles.md +++ b/docs/WinML_principles.md @@ -1,32 +1,31 @@ # Contributing to Windows ML -Window Machine Learning is a high-performance, reliable API for deploying hardware-accelerated ML inferences on Windows devices. Please visit the [Windows ML documentation](https://docs.microsoft.com/en-us/windows/ai/windows-ml/) to learn more about Windows ML. +Window Machine Learning is a high-performance, reliable API for deploying hardware-accelerated ML inferences on Windows devices. Please visit the [Windows ML documentation](https://docs.microsoft.com/en-us/windows/ai/windows-ml/) to learn more about Windows ML. ## Windows ML Base Principles **We design and optimize for all Windows devices.** - + Our goal is to provide developers with a platform that enables new experiences that run well on all Windows devices. Our design drives innovation in the DirectX ecosystem through DirectML and gives developers the confidence that their applications will work for all Windows customers. **We maintain and curate the Windows ML APIs.** - + The API is designed to ensure consistency of developer’s experience across the Windows platform. We provide long-term servicing and support, and we are committed to ensuring application’s compatibility as we evolve the API. -**Windows ML is a core component of Windows.** +**Windows ML is a core component of Windows.** The Windows ML code is packaged and distributed with each new release of Windows. To provide consumers with high-quality products, Microsoft is responsible for distributing Windows ML and related binaries as part of Windows or standalone distributable packages. ## Open for Community Contributions -We encourage community contributions to Windows ML to enhance users’ experience on Windows. We use the principles above to guide how we look at and evaluate all contributions. +We encourage community contributions to Windows ML to enhance users’ experience on Windows. We use the principles above to guide how we look at and evaluate all contributions. Ensure your feature request follows all these principles to help the review process and include information about the customer problem(s) the feature request addresses. -Note: minor issues or bugs can be addressed more quickly using the [bug/performance issue request](https://github.com/microsoft/onnxruntime/issues/new/choose) rather than feature request. +Note: minor issues or bugs can be addressed more quickly using the [bug/performance issue request](https://github.com/microsoft/onnxruntime/issues/new/choose) rather than feature request. ## Start your Feature Request -If you'd like to contribute to Windows ML and engage with the community to get feedback, please review to the contributing [process details](https://github.com/microsoft/onnxruntime/blob/master/CONTRIBUTING.md) and submit a new feature request [here](https://github.com/microsoft/onnxruntime/issues/new/choose). - +If you'd like to contribute to Windows ML and engage with the community to get feedback, please review to the contributing [process details](https://github.com/microsoft/onnxruntime/blob/main/CONTRIBUTING.md) and submit a new feature request [here](https://github.com/microsoft/onnxruntime/issues/new/choose). diff --git a/docs/onnxruntime_extensions.md b/docs/onnxruntime_extensions.md index 189108daa4450..37ed45a0ae32f 100644 --- a/docs/onnxruntime_extensions.md +++ b/docs/onnxruntime_extensions.md @@ -6,7 +6,7 @@ ONNXRuntime Extensions is a comprehensive package to extend the capability of th onnxruntime-extensions supports many useful custom operators to enhance the text processing capability of ONNXRuntime, which include some widely used **string operators** and popular **tokenizers**. For custom operators supported and how to use them, please check the documentation [custom operators](https://github.com/microsoft/onnxruntime-extensions/blob/main/docs/custom_text_ops.md). ## Build ONNXRuntime with Extensions -We have supported build onnxruntime-extensions as a static library and link it into ONNXRuntime. To enable custom operators from onnxruntime-extensions, you should add argument `--use_extensions`, which will use onnxruntime-extensions from git submodule in path cmake/external/onnxruntime-extensions **by default**. +We have supported build onnxruntime-extensions as a static library and link it into ONNXRuntime. To enable custom operators from onnxruntime-extensions, you should add argument `--use_extensions`, which will use onnxruntime-extensions from git submodule in path cmake/external/onnxruntime-extensions **by default**. If you want to build ONNXRuntime with a pre-pulled onnxruntime-extensions, pass extra argument `--extensions_overridden_path `. @@ -96,7 +96,7 @@ sess.run(...) ``` ### Run E2E Model in JavaScript -To run E2E ONNX model in JavaScript, you need to first [prepare ONNX Runtime WebAssembly artifacts](https://github.com/microsoft/onnxruntime/tree/master/js), include the generated `ort.min.js`, and then load and run the model in JS. +To run E2E ONNX model in JavaScript, you need to first [prepare ONNX Runtime WebAssembly artifacts](https://github.com/microsoft/onnxruntime/blob/main/js), include the generated `ort.min.js`, and then load and run the model in JS. ```js // use an async context to call onnxruntime functions async function main() { @@ -122,4 +122,4 @@ async function main() { document.write(`failed to inference ONNX model: ${e}.`); } } -``` \ No newline at end of file +``` diff --git a/docs/python/ReadMeOV.rst b/docs/python/ReadMeOV.rst index b9b794fadb165..3a96ced1c8a9b 100644 --- a/docs/python/ReadMeOV.rst +++ b/docs/python/ReadMeOV.rst @@ -1,7 +1,7 @@ OpenVINO™ Execution Provider for ONNX Runtime =============================================== -`OpenVINO™ Execution Provider for ONNX Runtime `_ is a product designed for ONNX Runtime developers who want to get started with OpenVINO™ in their inferencing applications. This product delivers `OpenVINO™ `_ inline optimizations which enhance inferencing performance with minimal code modifications. +`OpenVINO™ Execution Provider for ONNX Runtime `_ is a product designed for ONNX Runtime developers who want to get started with OpenVINO™ in their inferencing applications. This product delivers `OpenVINO™ `_ inline optimizations which enhance inferencing performance with minimal code modifications. OpenVINO™ Execution Provider for ONNX Runtime accelerates inference across many `AI models `_ on a variety of Intel® hardware such as: - Intel® CPUs @@ -23,16 +23,16 @@ This package supports: - Intel® integrated GPUs - Intel® Movidius™ Vision Processing Units (VPUs). -Please Note for VAD-M use Docker installation / Build from Source for Linux. +Please Note for VAD-M use Docker installation / Build from Source for Linux. ``pip3 install onnxruntime-openvino==1.12.0`` -Please install OpenVINO™ PyPi Package separately for Windows. +Please install OpenVINO™ PyPi Package separately for Windows. For installation instructions on Windows please refer to `OpenVINO™ Execution Provider for ONNX Runtime for Windows `_. **OpenVINO™ Execution Provider for ONNX Runtime** Linux Wheels comes with pre-built libraries of OpenVINO™ version 2022.1.0 eliminating the need to install OpenVINO™ separately. The OpenVINO™ libraries are prebuilt with CXX11_ABI flag set to 0. -The package also includes module that is used by torch-ort-inference to accelerate inference for PyTorch models with OpenVINO Execution Provider. +The package also includes module that is used by torch-ort-inference to accelerate inference for PyTorch models with OpenVINO Execution Provider. See `torch-ort-inference `_ for more details. For more details on build and installation please refer to `Build `_. @@ -53,21 +53,21 @@ To see what you can do with **OpenVINO™ Execution Provider for ONNX Runtime**, Docker Support ^^^^^^^^^^^^^^ -The latest OpenVINO™ EP docker image can be downloaded from DockerHub. +The latest OpenVINO™ EP docker image can be downloaded from DockerHub. For more details see `Docker ReadMe `_. Prebuilt Images ^^^^^^^^^^^^^^^^ -- Please find prebuilt docker images for Intel® CPU and Intel® iGPU on OpenVINO™ Execution Provider `Release Page `_. +- Please find prebuilt docker images for Intel® CPU and Intel® iGPU on OpenVINO™ Execution Provider `Release Page `_. License ^^^^^^^^ -**OpenVINO™ Execution Provider for ONNX Runtime** is licensed under `MIT `_. +**OpenVINO™ Execution Provider for ONNX Runtime** is licensed under `MIT `_. By contributing to the project, you agree to the license and copyright terms therein -and release your contribution under these terms. +and release your contribution under these terms. Support ^^^^^^^^ diff --git a/docs/python/inference/notebooks/onnxruntime-nuphar-tutorial.ipynb b/docs/python/inference/notebooks/onnxruntime-nuphar-tutorial.ipynb index 231a801c17b95..3eef63dea0987 100644 --- a/docs/python/inference/notebooks/onnxruntime-nuphar-tutorial.ipynb +++ b/docs/python/inference/notebooks/onnxruntime-nuphar-tutorial.ipynb @@ -17,7 +17,7 @@ "\n", "This example shows how to accelerate model inference using Nuphar, an execution provider that leverages just-in-time compilation to generate optimized executables.\n", "\n", - "For more background about Nuphar, please check [Nuphar-ExecutionProvider.md](https://github.com/microsoft/onnxruntime/blob/master/docs/execution_providers/Nuphar-ExecutionProvider.md) and its [build instructions](https://www.onnxruntime.ai/docs/how-to/build.html#nuphar).\n", + "For more background about Nuphar, please check [Nuphar-ExecutionProvider.md](https://github.com/microsoft/onnxruntime/blob/main/docs/execution_providers/Nuphar-ExecutionProvider.md) and its [build instructions](https://www.onnxruntime.ai/docs/how-to/build.html#nuphar).\n", "\n", "#### Tutorial Roadmap:\n", "1. Prerequistes\n", @@ -36,7 +36,7 @@ "## 1. Prerequistes\n", "Please make sure you have installed following Python packages. Besides, C++ compiler/linker is required for ahead-of-time compilation. Please make sure you have g++ if running on Linux, or Visual Studio 2017 on Windows.\n", "\n", - "For simplicity, you may use [Nuphar docker image](https://github.com/microsoft/onnxruntime/blob/master/dockerfiles/README.md) from Microsoft Container Registry.\n" + "For simplicity, you may use [Nuphar docker image](https://github.com/microsoft/onnxruntime/blob/main/dockerfiles/README.md) from Microsoft Container Registry.\n" ] }, { @@ -1199,4 +1199,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/include/onnxruntime/core/framework/run_options.h b/include/onnxruntime/core/framework/run_options.h index b3edd3889ebb8..76cb89a8e7b83 100644 --- a/include/onnxruntime/core/framework/run_options.h +++ b/include/onnxruntime/core/framework/run_options.h @@ -12,7 +12,7 @@ * Configuration information for a Run call. */ struct OrtRunOptions { - /// Log severity. See https://github.com/microsoft/onnxruntime/blob/master/include/onnxruntime/core/common/logging/severity.h + /// Log severity. See https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/common/logging/severity.h /// Default = -1 (use the log severity from the InferenceSession that the Run is for). int run_log_severity_level = -1; int run_log_verbosity_level = 0; ///< VLOG level if debug build and run_log_severity_level is 0 (VERBOSE). diff --git a/java/src/test/android/README.md b/java/src/test/android/README.md index f1bacfeb296c5..c5658be38660e 100644 --- a/java/src/test/android/README.md +++ b/java/src/test/android/README.md @@ -1,4 +1,4 @@ -# Android Test Application for ORT-Mobile +# Android Test Application for ORT-Mobile This directory contains a simple android application for testing [ONNX Runtime AAR package](https://www.onnxruntime.ai/docs/how-to/build.html#build-android-archive-aar). @@ -11,7 +11,7 @@ For general usage and build purpose of ORT-Mobile Android, please see the [docum This android application is mainly aimed for testing: - Model used: A simple [sigmoid ONNX model](https://github.com/onnx/onnx/blob/f9b0cc99344869c246b8f4011b8586a39841284c/onnx/backend/test/data/node/test_sigmoid/model.onnx) (converted to ORT format under `app\src\androidTest\assets` folder). - - Here's a [documentation](https://github.com/microsoft/onnxruntime/blob/master/docs/ONNX_Runtime_for_Mobile_Platforms.md#1-create-ort-format-model-and-configuration-file-with-required-operators) about how you can convert an ONNX model into ORT format. + - Here's a [documentation](https://github.com/microsoft/onnxruntime/blob/main/docs/ONNX_Runtime_for_Mobile_Platforms.md#1-create-ort-format-model-and-configuration-file-with-required-operators) about how you can convert an ONNX model into ORT format. - Main test file: An android instrumentation test under `app\src\androidtest\java\ai.onnxruntime.example.javavalidator\SimpleTest.kt` - The main dependency of this application is `onnxruntime` aar package under `app\libs`. - The MainActivity of this application is set to be empty. @@ -31,7 +31,7 @@ Please note that you may need to set the `--android_abi=x86_64` (the default opt The build will generate two apks which is required to run the test application in `$YOUR_BUILD_DIR/java/androidtest/android/app/build/outputs/apk`: -* `androidtest/debug/app-debug-androidtest.apk` +* `androidtest/debug/app-debug-androidtest.apk` * `debug/app-debug.apk` After running the build script, the two apks will be installed on `ort_android` emulator and it will automatically run the test application in an adb shell. diff --git a/js/common/README.md b/js/common/README.md index 2f79561c32b55..b3768f3e3845d 100644 --- a/js/common/README.md +++ b/js/common/README.md @@ -10,4 +10,4 @@ This package (onnxruntime-common) is not designed for using directly. Please con ## License -License information can be found [here](https://github.com/microsoft/onnxruntime/blob/master/README.md#license). +License information can be found [here](https://github.com/microsoft/onnxruntime/blob/main/README.md#license). diff --git a/js/common/lib/inference-session.ts b/js/common/lib/inference-session.ts index 6e0faafbe80fb..06864f6a2a26e 100644 --- a/js/common/lib/inference-session.ts +++ b/js/common/lib/inference-session.ts @@ -117,7 +117,7 @@ export declare namespace InferenceSession { /** * Log severity level. See - * https://github.com/microsoft/onnxruntime/blob/master/include/onnxruntime/core/common/logging/severity.h + * https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/common/logging/severity.h * * This setting is available only in ONNXRuntime (Node.js binding and react-native) or WebAssembly backend */ @@ -132,7 +132,7 @@ export declare namespace InferenceSession { /** * Store configurations for a session. See - * https://github.com/microsoft/onnxruntime/blob/master/include/onnxruntime/core/session/ + * https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/session/ * onnxruntime_session_options_config_keys.h * * This setting is available only in WebAssembly backend. Will support Node.js binding and react-native later @@ -201,7 +201,7 @@ export declare namespace InferenceSession { export interface RunOptions { /** * Log severity level. See - * https://github.com/microsoft/onnxruntime/blob/master/include/onnxruntime/core/common/logging/severity.h + * https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/common/logging/severity.h * * This setting is available only in ONNXRuntime (Node.js binding and react-native) or WebAssembly backend */ @@ -230,7 +230,7 @@ export declare namespace InferenceSession { /** * Set a single run configuration entry. See - * https://github.com/microsoft/onnxruntime/blob/master/include/onnxruntime/core/session/ + * https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/session/ * onnxruntime_run_options_config_keys.h * * This setting is available only in WebAssembly backend. Will support Node.js binding and react-native later diff --git a/js/node/README.md b/js/node/README.md index d2f616696c3d6..24995e0012c45 100644 --- a/js/node/README.md +++ b/js/node/README.md @@ -26,4 +26,4 @@ To use on platforms without pre-built binaries, you can build Node.js binding fr ## License -License information can be found [here](https://github.com/microsoft/onnxruntime/blob/master/README.md#license). +License information can be found [here](https://github.com/microsoft/onnxruntime/blob/main/README.md#license). diff --git a/js/react_native/README.md b/js/react_native/README.md index 9fdc4a5fe9481..d64d08484ae63 100644 --- a/js/react_native/README.md +++ b/js/react_native/README.md @@ -38,4 +38,4 @@ ONNX Runtime React Native currently supports most operators used by popular mode ### License -License information can be found [here](https://github.com/microsoft/onnxruntime/blob/master/README.md#license). +License information can be found [here](https://github.com/microsoft/onnxruntime/blob/main/README.md#license). diff --git a/js/web/README.md b/js/web/README.md index e81cbc234c94e..f36c54dccefdb 100644 --- a/js/web/README.md +++ b/js/web/README.md @@ -54,4 +54,4 @@ ONNX Runtime Web currently supports a subset of operators in [ai.onnx](https://g ## License -License information can be found [here](https://github.com/microsoft/onnxruntime/blob/master/README.md#license). +License information can be found [here](https://github.com/microsoft/onnxruntime/blob/main/README.md#license). diff --git a/objectivec/include/ort_session.h b/objectivec/include/ort_session.h index 93c1a945ca351..bac82c32d4fea 100644 --- a/objectivec/include/ort_session.h +++ b/objectivec/include/ort_session.h @@ -160,7 +160,7 @@ NS_ASSUME_NONNULL_BEGIN * Sets a session configuration key-value pair. * Any value for a previously set key will be overwritten. * The session configuration keys and values are documented here: - * https://github.com/microsoft/onnxruntime/blob/master/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h + * https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h * * @param key The key. * @param value The value. @@ -212,7 +212,7 @@ NS_ASSUME_NONNULL_BEGIN * Sets a run configuration key-value pair. * Any value for a previously set key will be overwritten. * The run configuration keys and values are documented here: - * https://github.com/microsoft/onnxruntime/blob/master/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h + * https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h * * @param key The key. * @param value The value. diff --git a/onnxruntime/core/framework/session_options.h b/onnxruntime/core/framework/session_options.h index c3012299a3acc..827cb3c257c34 100644 --- a/onnxruntime/core/framework/session_options.h +++ b/onnxruntime/core/framework/session_options.h @@ -83,7 +83,7 @@ struct SessionOptions { std::string session_logid; ///< logger id to use for session output /// Log severity for the inference session. Applies to session load, initialization, etc. - /// See https://github.com/microsoft/onnxruntime/blob/master/include/onnxruntime/core/common/logging/severity.h + /// See https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/common/logging/severity.h /// Default = -1 (use default logger severity) int session_log_severity_level = -1; int session_log_verbosity_level = 0; ///< VLOG level if debug build and session_log_severity_level is 0 (VERBOSE). diff --git a/onnxruntime/core/providers/dnnl/subgraph/dnnl_matmul.cc b/onnxruntime/core/providers/dnnl/subgraph/dnnl_matmul.cc index b3fe47154069b..49b7094559b14 100644 --- a/onnxruntime/core/providers/dnnl/subgraph/dnnl_matmul.cc +++ b/onnxruntime/core/providers/dnnl/subgraph/dnnl_matmul.cc @@ -18,7 +18,7 @@ DnnlMatMul::DnnlMatMul() {} // "MatMulPostOps" is a OneDNN only fusion of MatMul and upto 32 elementwise or binary ops. // See dnnl_subgraph_transformer.cc MatMulBinaryEltwise(...). // "FusedMatMul" is a ContribOperator defined here: -// https://github.com/microsoft/onnxruntime/blob/master/docs/ContribOperators.md#com.microsoft.FusedMatMul +// https://github.com/microsoft/onnxruntime/blob/main/docs/ContribOperators.md#com.microsoft.FusedMatMul // Depending on its attributes "FusedMatMul" can transpose eather input to the MatMul and scale the resulting output void DnnlMatMul::CreatePrimitive(DnnlSubgraphPrimitive& sp, DnnlNode& node) { std::unordered_set binary_ops = {"Add", "Div", "Mul", "Sub"}; diff --git a/onnxruntime/core/providers/dnnl/subgraph/dnnl_subgraph_primitive.cc b/onnxruntime/core/providers/dnnl/subgraph/dnnl_subgraph_primitive.cc index 4c85fa979016c..ca00a9c3cbd4e 100644 --- a/onnxruntime/core/providers/dnnl/subgraph/dnnl_subgraph_primitive.cc +++ b/onnxruntime/core/providers/dnnl/subgraph/dnnl_subgraph_primitive.cc @@ -197,7 +197,7 @@ void DnnlSubgraphPrimitive::AddKernels() { DnnlLrn().CreatePrimitive(*this, node); // MatMulPostOps is a OneDNN only fusion of MatMul and upto 32 elementwise or binary ops // FusedMatMul is a ContribOperator defined here: - // https://github.com/microsoft/onnxruntime/blob/master/docs/ContribOperators.md#com.microsoft.FusedMatMul + // https://github.com/microsoft/onnxruntime/blob/main/docs/ContribOperators.md#com.microsoft.FusedMatMul } else if (node.OpType() == "MatMul" || node.OpType() == "MatMulPostOps" || node.OpType() == "FusedMatMul") { DnnlMatMul().CreatePrimitive(*this, node); } else if (node.OpType() == "MatMulInteger" || node.OpType() == "MatMulIntegerPostOps") { diff --git a/onnxruntime/python/tools/tensorrt/perf/README.md b/onnxruntime/python/tools/tensorrt/perf/README.md index 9cf439e0a6fcb..bc01f33ada087 100644 --- a/onnxruntime/python/tools/tensorrt/perf/README.md +++ b/onnxruntime/python/tools/tensorrt/perf/README.md @@ -6,16 +6,16 @@ This script mainly focus on benchmarking ORT TensorRT EP performance compared wi ### Linux GPU TensorRT Perf CI Pipeline - [x] **Build ORT** Build ORT from source. Specify _branch_ variable if not master. - [x] **Post to Dashboard** Post to ONNX Runtime EP Dashboard (No Docker). -- [ ] **Run in Docker (CUDA 11.0)** Check to run in CUDA 11.0 vs 10.2 (default). +- [ ] **Run in Docker (CUDA 11.0)** Check to run in CUDA 11.0 vs 10.2 (default). - [ ] **Configure EPs** Choose which EPs to run against. Specify _epList_ variable. - **ModelGroups**: Select which model groups to run. (i.e. selected-models, specify _selected-models_ variable) -#### Variables (under Advanced Options) +#### Variables (under Advanced Options) - **branch**: (*default: master*) Specified branch to run against. -- **epList**: List of EPs to run separated by spaces [from available options](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/tensorrt/perf/benchmark.py#L26) _i.e. "CPUExecutionProvider TensorrtExecutionProvider"_ -- **selected-models**: Specified path to model json file or model folder if selected-models in ModelGroups. +- **epList**: List of EPs to run separated by spaces [from available options](https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/tensorrt/perf/benchmark.py#L26) _i.e. "CPUExecutionProvider TensorrtExecutionProvider"_ +- **selected-models**: Specified path to model json file or model folder if selected-models in ModelGroups. -## Usage Locally +## Usage Locally You can use following command to test whether models can be run using TensorRT and run benchmark: ``` ./perf.sh @@ -61,8 +61,8 @@ However, benchmark.py creates only one process to run all the model inferences o - **--cuda_ep_options**: Optional argument. Comma-separated key/value pairs denoting CUDA EP options. Ex: `--cuda_ep_options device_id=0,arena_extend_strategy=kNextPowerOfTwo`. Refer to [CUDA Execution Provider Options](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#configuration-options) for a complete list of options. -### Validation Configuration -- **--percent_mismatch**: The allowed percentage of values to be incorrect when comparing given outputs to ORT outputs. +### Validation Configuration +- **--percent_mismatch**: The allowed percentage of values to be incorrect when comparing given outputs to ORT outputs. - **--rtol**: The relative tolerance for validating ORT outputs. - **--atol**: The absolute tolerance for validating ORT outputs. @@ -148,7 +148,7 @@ The output of running benchmark: ``` python comparison_script.py -p "prev" -c "current" -o "output.csv" ``` -- **compare_latency.py**: creates a csv file with any regressions in average latencies +- **compare_latency.py**: creates a csv file with any regressions in average latencies - **new_failures.py**: creates a csv file with any new failures ## Others @@ -169,18 +169,18 @@ ort_build_latest.py: This script should be run before running run_perf_machine.s - **-c, --cuda_home**: CUDA home directory. - **-b, --branch**: (*default: master*) ORT branch name you are perf testing on. -### Running Perf Script -run_perf_docker.sh: Runs the perf script in docker environment. +### Running Perf Script +run_perf_docker.sh: Runs the perf script in docker environment. - **-d, --docker_image**: Name of perf docker image. - **-o, --option**: Name of which models you want to run {i.e. selected-models} - **-p, --perf_dir**: Path to perf directory. - **-m, --model_path**: Model path relative to workspace (/). If option is selected-models, include path to models either json or folder. -run_perf_machine.sh: Runs the perf script directly. +run_perf_machine.sh: Runs the perf script directly. - **-o, --option**: Name of which models you want to run {i.e. selected-models} - **-m, --model_path**: Model path relative to workspace (~/). If option is selected-models, include path to models either json or folder. ## Dependencies - When inferencing model using CUDA float16, this script following script to convert nodes in model graph from float32 to float16. It also modifies the converting script a little bit to better cover more model graph conversion. https://github.com/microsoft/onnxconverter-common/blob/master/onnxconverter_common/float16.py -- For dynamic input shape models, the script runs symbolic shape inference on the model. https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/symbolic_shape_infer.py +- For dynamic input shape models, the script runs symbolic shape inference on the model. https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/symbolic_shape_infer.py diff --git a/onnxruntime/python/tools/transformers/Dev_Guide.md b/onnxruntime/python/tools/transformers/Dev_Guide.md index 390d642befec3..11f12049fd0bd 100644 --- a/onnxruntime/python/tools/transformers/Dev_Guide.md +++ b/onnxruntime/python/tools/transformers/Dev_Guide.md @@ -1,6 +1,6 @@ # Transformer Model Optimization Tool Dev Guide -Transformer model optimization tool applies to BERT, GPT-2 and some variations (like Roberta, DistilBert etc). However, it cannot cover all the cases especially for the new ones that are coming out of academics. This guide will give you an overall introduction of how the graph transformation works and how to optimize your custom transformer-based model with limited code changes on graph fusion logic and kernels implementations. +Transformer model optimization tool applies to BERT, GPT-2 and some variations (like Roberta, DistilBert etc). However, it cannot cover all the cases especially for the new ones that are coming out of academics. This guide will give you an overall introduction of how the graph transformation works and how to optimize your custom transformer-based model with limited code changes on graph fusion logic and kernels implementations. The objective of the Dev Guide is to enable more transformer-based models to take advantage of ONNXRuntime optimized kernels. @@ -8,7 +8,7 @@ Meanwhile, welcome to contribute! ## Prerequisite * Expect the developer has basic knowledge of C++, CUDA and python programming. -* [Transformer Model Optimization Tool Overview](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/README.md) +* [Transformer Model Optimization Tool Overview](https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/transformers/README.md) * This guide assumes that a valid onnx model exported from the original framework is ready. If there are any issues with model exporting, fp16 conversion, profiling and benchmark. Please refer to the above link. * [Netron](https://github.com/lutzroeder/netron) is an excellent graph visualization tool. [Web version](https://netron.app/) * Optional: In case kernel changes are needed, here is the instruction on [building the ONNXRuntime](https://onnxruntime.ai/docs/build/) with packages on [different APIs and Language bindings](https://onnxruntime.ai/docs/build/inferencing.html#apis-and-language-bindings) @@ -18,20 +18,20 @@ Meanwhile, welcome to contribute! The graph fusion transforms a certain graph structure to a single fused node. The kernel wrapped by the fused node is the strict computation equivalent of that certain graph structure and executed by the runtime engine. This means that the candidate graph should have the exact same logic as fused node kernel implementation. It's suggested to get familiar with the targeted optimized kernel implementation and then work on the fusion logic. ### Kernel Implementation -ONNXRuntime supports optimized kernels as contrib operators in both CPU and CUDA Execution Provider. -* The definition of the optimized kernels can be found in [onnxruntime/core/graph/contrib_ops/contrib_defs.cc](https://github.com/microsoft/onnxruntime/blob/rel-1.9.0/onnxruntime/core/graph/contrib_ops/contrib_defs.cc). -* The CPU implementation of the optimized kernels can be found under [onnxruntime/contrib_ops/cpu/bert](https://github.com/microsoft/onnxruntime/tree/rel-1.9.0/onnxruntime/contrib_ops/cpu/bert). +ONNXRuntime supports optimized kernels as contrib operators in both CPU and CUDA Execution Provider. +* The definition of the optimized kernels can be found in [onnxruntime/core/graph/contrib_ops/contrib_defs.cc](https://github.com/microsoft/onnxruntime/blob/rel-1.9.0/onnxruntime/core/graph/contrib_ops/contrib_defs.cc). +* The CPU implementation of the optimized kernels can be found under [onnxruntime/contrib_ops/cpu/bert](https://github.com/microsoft/onnxruntime/tree/rel-1.9.0/onnxruntime/contrib_ops/cpu/bert). * The CUDA implementation of the optimized kernels can be found under [onnxruntime/contrib_ops/cuda/bert](https://github.com/microsoft/onnxruntime/tree/rel-1.9.0/onnxruntime/contrib_ops/cuda/bert). * Contrib ops tests can be found [here](https://github.com/microsoft/onnxruntime/tree/rel-1.9.0/onnxruntime/test/contrib_ops) For instance, the entry point of Attention CPU kernel is the [Compute()](https://github.com/microsoft/onnxruntime/blob/rel-1.9.0/onnxruntime/contrib_ops/cpu/bert/attention.cc#L408) function. Similarly, for the EmbedLayerNorm CUDA kernel, the entry point is the [ComputeInternal()](https://github.com/microsoft/onnxruntime/blob/rel-1.9.0/onnxruntime/contrib_ops/cuda/bert/embed_layer_norm.cc#L36) function. ### Graph Fusion -The main part of the transformer [optimizer](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/optimizer.py) is graph fusion. In the current implementation for bert optimization, it supports a couple of [fusions](https://github.com/microsoft/onnxruntime/blob/rel-1.9.0/onnxruntime/python/tools/transformers/onnx_model_bert.py#L302) executed in order. Each particular graph fusion is an inheritance class of [Fusion](https://github.com/microsoft/onnxruntime/blob/rel-1.9.0/onnxruntime/python/tools/transformers/fusion_base.py#L13) with fuse() method to implement. For instance, the [fuse()](https://github.com/microsoft/onnxruntime/blob/rel-1.9.0/onnxruntime/python/tools/transformers/fusion_attention.py#L280) method in attention fusion. +The main part of the transformer [optimizer](https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/transformers/optimizer.py) is graph fusion. In the current implementation for bert optimization, it supports a couple of [fusions](https://github.com/microsoft/onnxruntime/blob/rel-1.9.0/onnxruntime/python/tools/transformers/onnx_model_bert.py#L302) executed in order. Each particular graph fusion is an inheritance class of [Fusion](https://github.com/microsoft/onnxruntime/blob/rel-1.9.0/onnxruntime/python/tools/transformers/fusion_base.py#L13) with fuse() method to implement. For instance, the [fuse()](https://github.com/microsoft/onnxruntime/blob/rel-1.9.0/onnxruntime/python/tools/transformers/fusion_attention.py#L280) method in attention fusion. The [onnx_model](https://github.com/microsoft/onnxruntime/blob/rel-1.9.0/onnxruntime/python/tools/transformers/onnx_model.py#L19) class provides many useful functions to modify onnx graph including not limited to: * Retrieve all graph nodes with [self.nodes()](https://github.com/microsoft/onnxruntime/blob/rel-1.9.0/onnxruntime/python/tools/transformers/onnx_model.py#L58) -* A [mapping](https://github.com/microsoft/onnxruntime/blob/rel-1.9.0/onnxruntime/python/tools/transformers/onnx_model.py#L41-L56) of edge names to nodes. +* A [mapping](https://github.com/microsoft/onnxruntime/blob/rel-1.9.0/onnxruntime/python/tools/transformers/onnx_model.py#L41-L56) of edge names to nodes. * [Basic operations](https://github.com/microsoft/onnxruntime/blob/rel-1.9.0/onnxruntime/python/tools/transformers/onnx_model.py#L120-L181) of input/output, node, initializer. * [Match graph patterns](https://github.com/microsoft/onnxruntime/blob/rel-1.9.0/onnxruntime/python/tools/transformers/onnx_model.py#L310-L385) up-streaming and down-streaming. @@ -43,10 +43,10 @@ The [onnx_model](https://github.com/microsoft/onnxruntime/blob/rel-1.9.0/onnxrun After fusing the graph, [check the parity](https://github.com/microsoft/onnxruntime/blob/rel-1.9.0/onnxruntime/python/tools/transformers/onnx_exporter.py#L104) between optimized onnx model and original one by feeding the same inputs to both models and comparing outputs. ## A Concrete Case -* The Attention Op and EmbedLayerNorm Op are not fused([EmbedLayerNorm graph](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/notebooks/images/embed_unfused.png) and [Attention graph](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/notebooks/images/attention_unfused.png) with Netron) after running optimization script on a custom transformer-based onnx model. +* The Attention Op and EmbedLayerNorm Op are not fused([EmbedLayerNorm graph](https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/transformers/notebooks/images/embed_unfused.png) and [Attention graph](https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/transformers/notebooks/images/attention_unfused.png) with Netron) after running optimization script on a custom transformer-based onnx model. * Checked and confirmed that these two candidate graphs have identical logic to the current CPU/CUDA kernel implementation. * Applied some code changes to the [Attention fusion](https://github.com/microsoft/onnxruntime/compare/wangye/opt#diff-bd125663ee59865deb608c7ec666ac4760b55ce73fc38cc3d463abd0aaa90817) and [EmbedLayerNorm fusion](https://github.com/microsoft/onnxruntime/compare/wangye/opt#diff-bb2157f08cf00e8434e77fcfeeaa960e5e9c6db2df2b637a5f49e48d77a56185) -* Re-run the script and these two Ops are fused([EmbedLayerNorm Op](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/notebooks/images/embed_fused.png) and [Attention Op](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/notebooks/images/attention_fused.png) with Netron). +* Re-run the script and these two Ops are fused([EmbedLayerNorm Op](https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/transformers/notebooks/images/embed_fused.png) and [Attention Op](https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/transformers/notebooks/images/attention_fused.png) with Netron). * The parity is OK ## Contribution diff --git a/onnxruntime/python/tools/transformers/README.md b/onnxruntime/python/tools/transformers/README.md index 3a9cb9cb8abd4..c882ef7b17d08 100644 --- a/onnxruntime/python/tools/transformers/README.md +++ b/onnxruntime/python/tools/transformers/README.md @@ -25,7 +25,7 @@ For tf2onnx, please refer to its [BERT tutorial](https://github.com/onnx/tensorf ### GPT-2 Model conversion -Converting GPT-2 model from PyTorch to ONNX is not straightforward when past state is used. We add a tool [convert_to_onnx](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/convert_to_onnx.py) to help you. +Converting GPT-2 model from PyTorch to ONNX is not straightforward when past state is used. We add a tool [convert_to_onnx](https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/transformers/convert_to_onnx.py) to help you. You can use commands like the following to convert a pre-trained PyTorch GPT-2 model to ONNX for given precision (float32, float16 or int8): ``` @@ -45,7 +45,7 @@ conda activate longformer pip install torch==1.9.1+cpu torchvision==0.10.1+cpu torchaudio==0.9.1 -f https://download.pytorch.org/whl/torch_stable.html pip install onnx transformers==4.18.0 onnxruntime numpy ``` -Next, build the source of [torch extensions for Longformer ONNX exporting](https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/transformers/torch_extensions) like the following: +Next, build the source of [torch extensions for Longformer ONNX exporting](https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/transformers/torch_extensions) like the following: ``` cd onnxruntime/python/tools/transformers/models/longformer/torch_extensions python setup.py install @@ -76,7 +76,7 @@ You can also use command line. Example of optimizing a BERT-large model to use m python -m onnxruntime.transformers.optimizer --input bert_large.onnx --output bert_large_fp16.onnx --num_heads 16 --hidden_size 1024 --float16 ``` -You can also download the latest script files from [here](https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/transformers/). Then run it like the following: +You can also download the latest script files from [here](https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/transformers/). Then run it like the following: ```console python optimizer.py --input bert.onnx --output bert_opt.onnx --model_type bert ``` @@ -122,7 +122,7 @@ If your model is not in the list, it might only be partial optimized or not opti ## Benchmark -There is a bash script [run_benchmark.sh](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/run_benchmark.sh) for running benchmark. You can modify the bash script to choose your options (like models to test, batch sizes, sequence lengths, target device etc) before running. +There is a bash script [run_benchmark.sh](https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/transformers/run_benchmark.sh) for running benchmark. You can modify the bash script to choose your options (like models to test, batch sizes, sequence lengths, target device etc) before running. The bash script will call benchmark.py script to measure inference performance of OnnxRuntime, PyTorch or PyTorch+TorchScript on pretrained models of Huggingface Transformers. @@ -144,7 +144,7 @@ The model has 12 layers and 768 hidden, with input_ids as input. | onnxruntime | 1.4.0 | fp32 | 4 | 1.51 | 1.93 | 2.98 | 5.01 | 9.13 | 17.95 | 38.15 | | onnxruntime | 1.4.0 | fp16 | 4 | 1.27 | 1.35 | 1.43 | 1.83 | 2.66 | 4.40 | 9.76 | -[run_benchmark.sh](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/run_benchmark.sh) is used to get the results. +[run_benchmark.sh](https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/transformers/run_benchmark.sh) is used to get the results. #### gpt2 (GPT2LMHeadModel) @@ -164,7 +164,7 @@ The model has 12 layers and 768 hidden, with input_ids, position_ids, attention_ Since past state is used, sequence length in input_ids is 1. For example, s=4 means the past sequence length is 4 and the total sequence length is 5. -[benchmark_gpt2.py](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/models/gpt2/benchmark_gpt2.py) is used to get the results like the following commands: +[benchmark_gpt2.py](https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/transformers/models/gpt2/benchmark_gpt2.py) is used to get the results like the following commands: ```console python -m onnxruntime.transformers.models.gpt2.benchmark_gpt2 --use_gpu -m gpt2 -o -v -b 1 8 32 128 -s 4 8 32 128 -p fp32 diff --git a/onnxruntime/python/tools/transformers/notebooks/Inference_Bert_with_OnnxRuntime_on_AzureML.ipynb b/onnxruntime/python/tools/transformers/notebooks/Inference_Bert_with_OnnxRuntime_on_AzureML.ipynb index bdbfbbbb6a6ad..346ce1e870bbe 100644 --- a/onnxruntime/python/tools/transformers/notebooks/Inference_Bert_with_OnnxRuntime_on_AzureML.ipynb +++ b/onnxruntime/python/tools/transformers/notebooks/Inference_Bert_with_OnnxRuntime_on_AzureML.ipynb @@ -339,7 +339,7 @@ " sess_options = onnxruntime.SessionOptions()\n", " \n", " # You need set environment variables like OMP_NUM_THREADS for OpenMP to get best performance.\n", - " # See https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/bert/notebooks/PyTorch_Bert-Squad_OnnxRuntime_CPU.ipynb\n", + " # See https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/bert/notebooks/PyTorch_Bert-Squad_OnnxRuntime_CPU.ipynb\n", " sess_options.intra_op_num_threads = 1\n", " \n", " session = onnxruntime.InferenceSession(model_path, sess_options)\n", diff --git a/onnxruntime/python/tools/transformers/notebooks/Inference_GPT2-OneStepSearch_OnnxRuntime_CPU.ipynb b/onnxruntime/python/tools/transformers/notebooks/Inference_GPT2-OneStepSearch_OnnxRuntime_CPU.ipynb index a108024b2b523..4f2198bbb2184 100644 --- a/onnxruntime/python/tools/transformers/notebooks/Inference_GPT2-OneStepSearch_OnnxRuntime_CPU.ipynb +++ b/onnxruntime/python/tools/transformers/notebooks/Inference_GPT2-OneStepSearch_OnnxRuntime_CPU.ipynb @@ -99,7 +99,7 @@ "source": [ "## Convert GPT2 model from PyTorch to ONNX with one step search ##\n", "\n", - "We have a script [convert_to_onnx.py](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/convert_to_onnx.py) that could help you to convert GPT2 with past state to ONNX. \n", + "We have a script [convert_to_onnx.py](https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/transformers/convert_to_onnx.py) that could help you to convert GPT2 with past state to ONNX. \n", "\n", "The script accepts a pretrained model name or path of a checkpoint directory as input, and converts the model to ONNX. It also verifies that the ONNX model could generate same input as the pytorch model. The usage is like \n", "```\n", @@ -488,4 +488,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/onnxruntime/python/tools/transformers/notebooks/Inference_GPT2_with_OnnxRuntime_on_CPU.ipynb b/onnxruntime/python/tools/transformers/notebooks/Inference_GPT2_with_OnnxRuntime_on_CPU.ipynb index aeb47f50fb46a..e92e8bb0affd1 100644 --- a/onnxruntime/python/tools/transformers/notebooks/Inference_GPT2_with_OnnxRuntime_on_CPU.ipynb +++ b/onnxruntime/python/tools/transformers/notebooks/Inference_GPT2_with_OnnxRuntime_on_CPU.ipynb @@ -83,7 +83,7 @@ "source": [ "## Convert GPT2 model from PyTorch to ONNX ##\n", "\n", - "We have a script [convert_to_onnx.py](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/models/gpt2/convert_to_onnx.py) that could help you to convert GPT2 with past state to ONNX. \n", + "We have a script [convert_to_onnx.py](https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/transformers/models/gpt2/convert_to_onnx.py) that could help you to convert GPT2 with past state to ONNX. \n", "\n", "The script accepts a pretrained model name or path of a checkpoint directory as input, and converts the model to ONNX. It also verifies that the ONNX model could generate same input as the pytorch model. The usage is like \n", "```\n", diff --git a/onnxruntime/python/tools/transformers/notebooks/PyTorch_Bert-Squad_OnnxRuntime_CPU.ipynb b/onnxruntime/python/tools/transformers/notebooks/PyTorch_Bert-Squad_OnnxRuntime_CPU.ipynb index b3979ef324204..d6bd3884ba185 100644 --- a/onnxruntime/python/tools/transformers/notebooks/PyTorch_Bert-Squad_OnnxRuntime_CPU.ipynb +++ b/onnxruntime/python/tools/transformers/notebooks/PyTorch_Bert-Squad_OnnxRuntime_CPU.ipynb @@ -384,7 +384,7 @@ "source": [ "## 5. Offline Optimization Script and Test Tools\n", "\n", - "It is recommended to try [OnnxRuntime Transformer Model Optimization Tool](https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/transformers) on the exported ONNX models. It could help verify whether the model can be fully optimized, and get performance test results." + "It is recommended to try [OnnxRuntime Transformer Model Optimization Tool](https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/transformers) on the exported ONNX models. It could help verify whether the model can be fully optimized, and get performance test results." ] }, { @@ -786,9 +786,9 @@ "\n", "Note that running Jupyter Notebook has slight impact on performance result since Jupyter Notebook is using system resources like CPU and memory etc. It is recommended to close Jupyter Notebook and other applications, then run the performance test tool in a console to get more accurate performance numbers.\n", "\n", - "We have a [benchmark script](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/run_benchmark.sh). It is recommended to use it compare inference speed of OnnxRuntime with PyTorch.\n", + "We have a [benchmark script](https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/transformers/run_benchmark.sh). It is recommended to use it compare inference speed of OnnxRuntime with PyTorch.\n", "\n", - "[OnnxRuntime C API](https://github.com/microsoft/onnxruntime/blob/master/docs/C_API.md) could get slightly better performance than python API. If you use C API in inference, you can use OnnxRuntime_Perf_Test.exe built from source to measure performance instead.\n", + "[OnnxRuntime C API](https://github.com/microsoft/onnxruntime/blob/main/docs/C_API.md) could get slightly better performance than python API. If you use C API in inference, you can use OnnxRuntime_Perf_Test.exe built from source to measure performance instead.\n", "\n", "Here is the machine configuration that generated the above results. The machine has GPU but not used in CPU inference.\n", "You might get slower or faster result based on your hardware." diff --git a/onnxruntime/python/tools/transformers/notebooks/PyTorch_Bert-Squad_OnnxRuntime_GPU.ipynb b/onnxruntime/python/tools/transformers/notebooks/PyTorch_Bert-Squad_OnnxRuntime_GPU.ipynb index ca041f614966e..a769fc12ae117 100644 --- a/onnxruntime/python/tools/transformers/notebooks/PyTorch_Bert-Squad_OnnxRuntime_GPU.ipynb +++ b/onnxruntime/python/tools/transformers/notebooks/PyTorch_Bert-Squad_OnnxRuntime_GPU.ipynb @@ -601,7 +601,7 @@ "source": [ "## 5. Offline Optimization and Test Tools\n", "\n", - "It is recommended to try [OnnxRuntime Transformer Model Optimization Tool](https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/transformers) on the exported ONNX models. It could help verify whether the model can be fully optimized, and get performance test results." + "It is recommended to try [OnnxRuntime Transformer Model Optimization Tool](https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/transformers) on the exported ONNX models. It could help verify whether the model can be fully optimized, and get performance test results." ] }, { @@ -1318,9 +1318,9 @@ "\n", "Note that running Jupyter Notebook has significant impact on performance result. You can close Jupyter Notebook and other applications, then run the performance test in a console to get more accurate performance numbers.\n", "\n", - "We have a [benchmark script](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/run_benchmark.sh). It is recommended to use it measure inference speed of OnnxRuntime.\n", + "We have a [benchmark script](https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/transformers/run_benchmark.sh). It is recommended to use it measure inference speed of OnnxRuntime.\n", "\n", - "[OnnxRuntime C API](https://github.com/microsoft/onnxruntime/blob/master/docs/C_API.md) could get slightly better performance than python API. If you use C API in inference, you can use OnnxRuntime_Perf_Test.exe built from source to measure performance instead.\n", + "[OnnxRuntime C API](https://github.com/microsoft/onnxruntime/blob/main/docs/C_API.md) could get slightly better performance than python API. If you use C API in inference, you can use OnnxRuntime_Perf_Test.exe built from source to measure performance instead.\n", "\n", "Here is the machine configuration that generated the above results. You might get slower or faster result according to your hardware." ] diff --git a/onnxruntime/python/tools/transformers/notebooks/Tensorflow_Tf2onnx_Bert-Squad_OnnxRuntime_CPU.ipynb b/onnxruntime/python/tools/transformers/notebooks/Tensorflow_Tf2onnx_Bert-Squad_OnnxRuntime_CPU.ipynb index e617b94e7b1de..aebde9a08a24a 100644 --- a/onnxruntime/python/tools/transformers/notebooks/Tensorflow_Tf2onnx_Bert-Squad_OnnxRuntime_CPU.ipynb +++ b/onnxruntime/python/tools/transformers/notebooks/Tensorflow_Tf2onnx_Bert-Squad_OnnxRuntime_CPU.ipynb @@ -360,7 +360,7 @@ "source": [ "## 5. Model Optimization\n", "\n", - "[ONNX Runtime BERT Model Optimization Tools](https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/transformers) is a set of tools for optimizing and testing BERT models. Let's try some of them on the exported models." + "[ONNX Runtime BERT Model Optimization Tools](https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/transformers) is a set of tools for optimizing and testing BERT models. Let's try some of them on the exported models." ], "metadata": {} }, @@ -611,9 +611,9 @@ "\n", "Note that running Jupyter Notebook has impact on performance result since Jupyter Notebook is using system resources like CPU and memory etc. It is recommended to close Jupyter Notebook and other applications, then run the performance test tool in a console to get more accurate performance numbers.\n", "\n", - "We have a [benchmark script](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/run_benchmark.sh). It is recommended to use it to measure inference speed of OnnxRuntime.\n", + "We have a [benchmark script](https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/transformers/run_benchmark.sh). It is recommended to use it to measure inference speed of OnnxRuntime.\n", "\n", - "[OnnxRuntime C API](https://github.com/microsoft/onnxruntime/blob/master/docs/C_API.md) could get slightly better performance than python API. If you use C API in inference, you can use OnnxRuntime_Perf_Test.exe built from source to measure performance instead.\n", + "[OnnxRuntime C API](https://github.com/microsoft/onnxruntime/blob/main/docs/C_API.md) could get slightly better performance than python API. If you use C API in inference, you can use OnnxRuntime_Perf_Test.exe built from source to measure performance instead.\n", "\n", "Here is the machine configuration that generated the above results. The machine has GPU but not used in CPU inference.\n", "You might get slower or faster result based on your hardware." @@ -729,4 +729,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc b/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc index cb2d258d0c062..3d557ad6da57c 100644 --- a/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc +++ b/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc @@ -314,7 +314,7 @@ TEST(NnapiExecutionProviderTest, TestQDQResize) { // Setting verify_entire_graph_use_ep for this test as false. This is because layout transformation adds // Transpose (NCHW -> NHWC) nodes. Post tranformation graph looks like this Transpose -> DQ -> Resize -> Q -> Transpose // NNAPI does not pick the first Transpose as its input is graph/partition input - // See https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc#L305 + // See https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc#L305 // onnxruntime::nnapi::IsInternalQuantizationSupported RunQDQModelTest(BuildQDQResizeTestCase({1, 3, 64, 64} /* input_shape */, {1, 3, 32, 32} /* sizes_data */, diff --git a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc index e6ed4e8346830..a244898c2a752 100644 --- a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc +++ b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc @@ -24,7 +24,7 @@ // - /js/node/test/test-utils.ts // - /onnxruntime/test/python/onnx_backend_test_series.py // - // See also: https://github.com/microsoft/onnxruntime/blob/master/docs/How_To_Update_ONNX_Dev_Notes.md + // See also: https://github.com/microsoft/onnxruntime/blob/main/docs/How_To_Update_ONNX_Dev_Notes.md // // Tests that are failing temporarily and should be fixed diff --git a/orttraining/orttraining/core/framework/checkpointing.cc b/orttraining/orttraining/core/framework/checkpointing.cc index 714039a7dd23b..b367586584eaf 100644 --- a/orttraining/orttraining/core/framework/checkpointing.cc +++ b/orttraining/orttraining/core/framework/checkpointing.cc @@ -81,7 +81,7 @@ Status SaveRuntimeTensor( // TODO need to ensure the data is written in little-endian format... // e.g., with endian_utils.h:WriteLittleEndian() - // https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/core/framework/endian_utils.h + // https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/core/framework/endian_utils.h if constexpr (endian::native != endian::little) { ORT_NOT_IMPLEMENTED("checkpointing currently requires little-endian host byte order"); } diff --git a/orttraining/orttraining/eager/opgen/opgen/atenops.py b/orttraining/orttraining/eager/opgen/opgen/atenops.py index 5529ad0e0e453..de78ce2923427 100644 --- a/orttraining/orttraining/eager/opgen/opgen/atenops.py +++ b/orttraining/orttraining/eager/opgen/opgen/atenops.py @@ -160,7 +160,7 @@ def __init__(self, dY, X): "aten::nonzero.out": SignatureOnly(), "aten::_log_softmax.out": SignatureOnly(), # NegativeLogLikelihoodLoss is not supported by the CPU Execution Provider so testing is not possible - # Leaving nll_loss_forward.output set to fallback. https://github.com/microsoft/onnxruntime/blob/master/docs/OperatorKernels.md. + # Leaving nll_loss_forward.output set to fallback. https://github.com/microsoft/onnxruntime/blob/main/docs/OperatorKernels.md. "aten::nll_loss_forward.output": MakeTorchFallback(), "aten::nll_loss_backward.grad_input": MakeTorchFallback(), "aten::_log_softmax_backward_data.out": MakeTorchFallback(), diff --git a/orttraining/orttraining/python/training/onnxblock/README.md b/orttraining/orttraining/python/training/onnxblock/README.md index caa44e2b2bd5c..0d297f48a5dd0 100644 --- a/orttraining/orttraining/python/training/onnxblock/README.md +++ b/orttraining/orttraining/python/training/onnxblock/README.md @@ -99,7 +99,7 @@ onnxblock.save_checkpoint(my_model.parameters(), output_checkpoint_path) Once the models and checkpoint have been generated, they can be loaded in the online training step and executed. For an example on how the online training loop should be written given these generated files, refer to this -[sample trainer](https://github.com/microsoft/onnxruntime/blob/master/orttraining/orttraining/test/training_api/trainer/trainer.cc). -For all the `onnxblocks` that are supported, please look at the [building_blocks](https://github.com/microsoft/onnxruntime/blob/master/orttraining/orttraining/python/training/onnxblock/building_blocks.py). -For all the loss blocks that are supported, please look at the [loss blocks](https://github.com/microsoft/onnxruntime/tree/master/orttraining/orttraining/python/training/onnxblock/loss/loss.py). -For all the optimizer blocks that are supported, please look at the [optim blocks](https://github.com/microsoft/onnxruntime/blob/master/orttraining/orttraining/python/training/onnxblock/optim/optim.py). +[sample trainer](https://github.com/microsoft/onnxruntime/blob/main/orttraining/orttraining/test/training_api/trainer/trainer.cc). +For all the `onnxblocks` that are supported, please look at the [building_blocks](https://github.com/microsoft/onnxruntime/blob/main/orttraining/orttraining/python/training/onnxblock/building_blocks.py). +For all the loss blocks that are supported, please look at the [loss blocks](https://github.com/microsoft/onnxruntime/blob/main/orttraining/orttraining/python/training/onnxblock/loss/loss.py). +For all the optimizer blocks that are supported, please look at the [optim blocks](https://github.com/microsoft/onnxruntime/blob/main/orttraining/orttraining/python/training/onnxblock/optim/optim.py). diff --git a/orttraining/orttraining/test/python/how_to_add_distributed_ci_pipeline_tests.md b/orttraining/orttraining/test/python/how_to_add_distributed_ci_pipeline_tests.md index cfafe7f696c1f..2ecb9e14dd8a3 100644 --- a/orttraining/orttraining/test/python/how_to_add_distributed_ci_pipeline_tests.md +++ b/orttraining/orttraining/test/python/how_to_add_distributed_ci_pipeline_tests.md @@ -5,13 +5,13 @@ This is a simple guide on how the distributed CI pipeline works and how it can b ### The Pipeline The distributed CI pipeline is intended for running tests that require a distributed environment (for example, tests that need to be run with ```mpirun```). -The pipeline ```yml``` file is defined in [```tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-distributed-test-ci-pipeline.yml```](https://github.com/microsoft/onnxruntime/blob/master/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-distributed-test-ci-pipeline.yml). +The pipeline ```yml``` file is defined in [```tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-distributed-test-ci-pipeline.yml```](https://github.com/microsoft/onnxruntime/blob/main/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-distributed-test-ci-pipeline.yml). The pipeline runs on every pull request commit under the [```orttraining-distributed```](https://dev.azure.com/onnxruntime/onnxruntime/_build?definitionId=140&_a=summary) check. The flow of events in the pipeline are: 1. Clone the git repository and checkout the branch that needs to run for the CI (the pull request). 2. Build the docker container installing all dependencies that are needed for the distributed tests (for example, ```open-mpi```) -3. Run all tests defined in the file [```orttraining/orttraining/test/python/orttraining_distributed_tests.py```](https://github.com/microsoft/onnxruntime/blob/master/orttraining/orttraining/test/python/orttraining_distributed_tests.py) through the script [```orttraining/orttraining/test/python/launch_test.py```](https://github.com/microsoft/onnxruntime/blob/master/orttraining/orttraining/test/python/launch_test.py) +3. Run all tests defined in the file [```orttraining/orttraining/test/python/orttraining_distributed_tests.py```](https://github.com/microsoft/onnxruntime/blob/main/orttraining/orttraining/test/python/orttraining_distributed_tests.py) through the script [```orttraining/orttraining/test/python/launch_test.py```](https://github.com/microsoft/onnxruntime/blob/main/orttraining/orttraining/test/python/launch_test.py) 4. Report the status of the tests. ## Running Locally @@ -28,7 +28,7 @@ python orttraining_distributed_tests.py Follow the below steps to add new distributed tests that will run in this pipeline. 1. Create a new python file that can be called as a script. Let's call this ```dummy_distributed_test.py``` as an example. -2. Make sure this ```dummy_distributed_test.py``` can be called and executed using either ```python dummy_distributed_test.py``` or using ```mpirun -n -x NCCL_DEBUG=INFO python dummy_distributed_test.py```. A real example of such a test file is [```orttraining/orttraining/test/python/orttraining_test_checkpoint.py```](https://github.com/microsoft/onnxruntime/blob/master/orttraining/orttraining/test/python/orttraining_test_checkpoint.py). +2. Make sure this ```dummy_distributed_test.py``` can be called and executed using either ```python dummy_distributed_test.py``` or using ```mpirun -n -x NCCL_DEBUG=INFO python dummy_distributed_test.py```. A real example of such a test file is [```orttraining/orttraining/test/python/orttraining_test_checkpoint.py```](https://github.com/microsoft/onnxruntime/blob/main/orttraining/orttraining/test/python/orttraining_test_checkpoint.py). 3. Create a new function in ```orttraining/orttraining/test/python/orttraining_distributed_tests.py``` ```python def run_dummy_distributed_tests(cwd, log): diff --git a/tools/python/gen_contrib_doc.py b/tools/python/gen_contrib_doc.py index bad97451cf6c0..15e7f65d093d9 100644 --- a/tools/python/gen_contrib_doc.py +++ b/tools/python/gen_contrib_doc.py @@ -318,7 +318,7 @@ def main(output_path: str, domain_filter: [str]): fout.write("## Contrib Operator Schemas\n") fout.write( "*This file is automatically generated from the registered contrib operator schemas by " - "[this script](https://github.com/microsoft/onnxruntime/blob/master/tools/python/gen_contrib_doc.py).\n" + "[this script](https://github.com/microsoft/onnxruntime/blob/main/tools/python/gen_contrib_doc.py).\n" "Do not modify directly.*\n" ) diff --git a/tools/python/gen_opkernel_doc.py b/tools/python/gen_opkernel_doc.py index 640bc22ba7255..e399b00c97fcd 100644 --- a/tools/python/gen_opkernel_doc.py +++ b/tools/python/gen_opkernel_doc.py @@ -68,7 +68,7 @@ def main(output_path: pathlib.Path, provider_filter: [str]): fout.write("## Supported Operators and Data Types\n") fout.write( "*This file is automatically generated from the registered kernels by " - "[this script](https://github.com/microsoft/onnxruntime/blob/master/tools/python/gen_opkernel_doc.py).\n" + "[this script](https://github.com/microsoft/onnxruntime/blob/main/tools/python/gen_opkernel_doc.py).\n" "Do not modify directly.*\n\n" ) opdef = rtpy.get_all_operator_schema() diff --git a/tools/python/gen_ort_mobile_pkg_doc.py b/tools/python/gen_ort_mobile_pkg_doc.py index 5818c362d23b2..db9ca027352f1 100644 --- a/tools/python/gen_ort_mobile_pkg_doc.py +++ b/tools/python/gen_ort_mobile_pkg_doc.py @@ -15,7 +15,7 @@ def generate_docs(output_file, required_ops, op_type_impl_filter): out.write( "The supported operators and types are based on what is required to support float32 and quantized " "versions of popular models. The full list of input models used to determine this list is available " - "[here](https://github.com/microsoft/onnxruntime/blob/master/tools/ci_build/github/android/mobile_package" + "[here](https://github.com/microsoft/onnxruntime/blob/main/tools/ci_build/github/android/mobile_package" ".required_operators.readme.txt)" ) out.write("\n\n")