-
Notifications
You must be signed in to change notification settings - Fork 38
/
onnxruntime_c_api.h
4917 lines (4465 loc) · 218 KB
/
onnxruntime_c_api.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// See docs\c_cxx\README.md on generating the Doxygen documentation from this file
/** \mainpage ONNX Runtime
*
* ONNX Runtime is a high-performance inference and training graph execution engine for deep learning models.
*
* ONNX Runtime's C, C++ APIs offer an easy to use interface to onboard and execute onnx models.
* - \subpage c_cpp_api "Core C, C++ APIs"
* - \subpage training_c_cpp_api "Training C, C++ APIs for on-device training"
*
* \page c_cpp_api Core C, C++ APIs
* <h1>C</h1>
*
* ::OrtApi - Click here to go to the structure with all C API functions.
*
* <h1>C++</h1>
*
* ::Ort - Click here to go to the namespace holding all of the C++ wrapper classes
*
* It is a set of header only wrapper classes around the C API. The goal is to turn the C style return value error codes into C++ exceptions, and to
* automate memory management through standard C++ RAII principles.
*
* \addtogroup Global
* ONNX Runtime C API
* @{
*/
#pragma once
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
/** \brief The API version defined in this header
*
* This value is used by some API functions to behave as this version of the header expects.
*/
#define ORT_API_VERSION 20
#ifdef __cplusplus
extern "C" {
#endif
//! @}
// SAL2 Definitions
#ifndef _WIN32
#define _In_
#define _In_z_
#define _In_opt_
#define _In_opt_z_
#define _Out_
#define _Outptr_
#define _Out_opt_
#define _Inout_
#define _Inout_opt_
#define _Frees_ptr_opt_
#define _Ret_maybenull_
#define _Ret_notnull_
#define _Check_return_
#define _Outptr_result_maybenull_
#define _In_reads_(X)
#define _Inout_updates_(X)
#define _Out_writes_(X)
#define _Inout_updates_all_(X)
#define _Out_writes_bytes_all_(X)
#define _Out_writes_all_(X)
#define _Success_(X)
#define _Outptr_result_buffer_maybenull_(X)
#define ORT_ALL_ARGS_NONNULL __attribute__((nonnull))
#else
#include <specstrings.h>
#define ORT_ALL_ARGS_NONNULL
#endif
#ifdef _WIN32
// Define ORT_DLL_IMPORT if your program is dynamically linked to Ort.
// dllexport is not used, we use a .def file.
#ifdef ORT_DLL_IMPORT
#define ORT_EXPORT __declspec(dllimport)
#else
#define ORT_EXPORT
#endif
#define ORT_API_CALL _stdcall
#define ORT_MUST_USE_RESULT
#define ORTCHAR_T wchar_t
#else
// To make symbols visible on macOS/iOS
#ifdef __APPLE__
#define ORT_EXPORT __attribute__((visibility("default")))
#else
#define ORT_EXPORT
#endif
#define ORT_API_CALL
#define ORT_MUST_USE_RESULT __attribute__((warn_unused_result))
#define ORTCHAR_T char
#endif
/// ORTCHAR_T, ORT_TSTR are reserved specifically for path handling.
/// All other strings are UTF-8 encoded, use char and std::string
#ifndef ORT_TSTR
#ifdef _WIN32
#define ORT_TSTR(X) L##X
// When X is a macro, L##X is not defined. In this case, we need to use ORT_TSTR_ON_MACRO.
#define ORT_TSTR_ON_MACRO(X) L"" X
#else
#define ORT_TSTR(X) X
#define ORT_TSTR_ON_MACRO(X) X
#endif
#endif
// On Windows, ORT_FILE is a wchar_t version of the __FILE__ macro.
// Otherwise, ORT_FILE is equivalent to __FILE__.
#ifndef ORT_FILE
#define ORT_FILE_INTERNAL(x) ORT_TSTR(x)
#define ORT_FILE ORT_FILE_INTERNAL(__FILE__)
#endif
// Any pointer marked with _In_ or _Out_, cannot be NULL.
// Windows users should use unicode paths when possible to bypass the MAX_PATH limitation
// Every pointer marked with _In_ or _Out_, cannot be NULL. Caller should ensure that.
// for ReleaseXXX(...) functions, they can accept NULL pointer.
#ifdef __cplusplus
// For any compiler with C++11 support, MSVC 2015 and greater, or Clang version supporting noexcept.
// Such complex condition is needed because compilers set __cplusplus value differently.
#ifndef __has_feature
#define __has_feature(x) 0
#endif
#if ((__cplusplus >= 201103L) || (_MSC_VER >= 1900) || (defined(__has_feature) && __has_feature(cxx_noexcept)))
#define NO_EXCEPTION noexcept
#else
#define NO_EXCEPTION throw()
#endif
#else
#define NO_EXCEPTION
#endif
// __VA_ARGS__ on Windows and Linux are different
#define ORT_API(RETURN_TYPE, NAME, ...) RETURN_TYPE ORT_API_CALL NAME(__VA_ARGS__) NO_EXCEPTION
#define ORT_API_STATUS(NAME, ...) \
_Success_(return == 0) _Check_return_ _Ret_maybenull_ OrtStatusPtr ORT_API_CALL NAME(__VA_ARGS__) \
NO_EXCEPTION ORT_MUST_USE_RESULT
// XXX: Unfortunately, SAL annotations are known to not work with function pointers
#define ORT_API2_STATUS(NAME, ...) \
_Check_return_ _Ret_maybenull_ OrtStatusPtr(ORT_API_CALL* NAME)(__VA_ARGS__) NO_EXCEPTION ORT_MUST_USE_RESULT
// Used in *.cc files. Almost as same as ORT_API_STATUS, except without ORT_MUST_USE_RESULT and ORT_EXPORT
#define ORT_API_STATUS_IMPL(NAME, ...) \
_Success_(return == 0) _Check_return_ _Ret_maybenull_ OrtStatusPtr ORT_API_CALL NAME(__VA_ARGS__) NO_EXCEPTION
#define ORT_CLASS_RELEASE(X) void(ORT_API_CALL * Release##X)(_Frees_ptr_opt_ Ort##X * input)
#ifdef __DOXYGEN__
#undef ORT_API_STATUS
#define ORT_API_STATUS(NAME, ...) OrtStatus* NAME(__VA_ARGS__)
#undef ORT_API2_STATUS
#define ORT_API2_STATUS(NAME, ...) OrtStatus* NAME(__VA_ARGS__)
#undef ORT_CLASS_RELEASE
#define ORT_CLASS_RELEASE(X) void Release##X(Ort##X* input)
#undef NO_EXCEPTION
#define NO_EXCEPTION
#endif
/** \addtogroup Global
* ONNX Runtime C API
* @{
*/
/** Copied from TensorProto::DataType
* Currently, Ort doesn't support complex64, complex128
*/
typedef enum ONNXTensorElementDataType {
ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED,
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, // maps to c type float
ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8, // maps to c type uint8_t
ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8, // maps to c type int8_t
ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16, // maps to c type uint16_t
ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16, // maps to c type int16_t
ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, // maps to c type int32_t
ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64, // maps to c type int64_t
ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING, // maps to c++ type std::string
ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL,
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16,
ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE, // maps to c type double
ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32, // maps to c type uint32_t
ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64, // maps to c type uint64_t
ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX64, // complex with float32 real and imaginary components
ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX128, // complex with float64 real and imaginary components
ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16, // Non-IEEE floating-point format based on IEEE754 single-precision
// float 8 types were introduced in onnx 1.14, see https://onnx.ai/onnx/technical/float8.html
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E4M3FN, // Non-IEEE floating-point format based on IEEE754 single-precision
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E4M3FNUZ, // Non-IEEE floating-point format based on IEEE754 single-precision
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E5M2, // Non-IEEE floating-point format based on IEEE754 single-precision
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E5M2FNUZ, // Non-IEEE floating-point format based on IEEE754 single-precision
// Int4 types were introduced in ONNX 1.16. See https://onnx.ai/onnx/technical/int4.html
ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT4, // maps to a pair of packed uint4 values (size == 1 byte)
ONNX_TENSOR_ELEMENT_DATA_TYPE_INT4 // maps to a pair of packed int4 values (size == 1 byte)
} ONNXTensorElementDataType;
// Synced with onnx TypeProto oneof
typedef enum ONNXType {
ONNX_TYPE_UNKNOWN,
ONNX_TYPE_TENSOR,
ONNX_TYPE_SEQUENCE,
ONNX_TYPE_MAP,
ONNX_TYPE_OPAQUE,
ONNX_TYPE_SPARSETENSOR,
ONNX_TYPE_OPTIONAL
} ONNXType;
// These types are synced with internal
// SparseFormatFlags
typedef enum OrtSparseFormat {
ORT_SPARSE_UNDEFINED = 0,
ORT_SPARSE_COO = 0x1,
ORT_SPARSE_CSRC = 0x2,
ORT_SPARSE_BLOCK_SPARSE = 0x4
} OrtSparseFormat;
// Enum allows to query sparse tensor indices
enum OrtSparseIndicesFormat {
ORT_SPARSE_COO_INDICES,
ORT_SPARSE_CSR_INNER_INDICES,
ORT_SPARSE_CSR_OUTER_INDICES,
ORT_SPARSE_BLOCK_SPARSE_INDICES
};
/** \brief Logging severity levels
*
* In typical API usage, specifying a logging severity level specifies the minimum severity of log messages to show.
*/
typedef enum OrtLoggingLevel {
ORT_LOGGING_LEVEL_VERBOSE, ///< Verbose informational messages (least severe).
ORT_LOGGING_LEVEL_INFO, ///< Informational messages.
ORT_LOGGING_LEVEL_WARNING, ///< Warning messages.
ORT_LOGGING_LEVEL_ERROR, ///< Error messages.
ORT_LOGGING_LEVEL_FATAL, ///< Fatal error messages (most severe).
} OrtLoggingLevel;
typedef enum OrtErrorCode {
ORT_OK,
ORT_FAIL,
ORT_INVALID_ARGUMENT,
ORT_NO_SUCHFILE,
ORT_NO_MODEL,
ORT_ENGINE_ERROR,
ORT_RUNTIME_EXCEPTION,
ORT_INVALID_PROTOBUF,
ORT_MODEL_LOADED,
ORT_NOT_IMPLEMENTED,
ORT_INVALID_GRAPH,
ORT_EP_FAIL,
} OrtErrorCode;
typedef enum OrtOpAttrType {
ORT_OP_ATTR_UNDEFINED = 0,
ORT_OP_ATTR_INT,
ORT_OP_ATTR_INTS,
ORT_OP_ATTR_FLOAT,
ORT_OP_ATTR_FLOATS,
ORT_OP_ATTR_STRING,
ORT_OP_ATTR_STRINGS,
} OrtOpAttrType;
//! @}
#define ORT_RUNTIME_CLASS(X) \
struct Ort##X; \
typedef struct Ort##X Ort##X
/** \addtogroup Global
* ONNX Runtime C API
* @{
*/
// The actual types defined have an Ort prefix
ORT_RUNTIME_CLASS(Env);
ORT_RUNTIME_CLASS(Status); // nullptr for Status* indicates success
ORT_RUNTIME_CLASS(MemoryInfo);
ORT_RUNTIME_CLASS(IoBinding);
ORT_RUNTIME_CLASS(Session); // Don't call ReleaseSession from Dllmain (because session owns a thread pool)
ORT_RUNTIME_CLASS(Value);
ORT_RUNTIME_CLASS(RunOptions);
ORT_RUNTIME_CLASS(TypeInfo);
ORT_RUNTIME_CLASS(TensorTypeAndShapeInfo);
ORT_RUNTIME_CLASS(MapTypeInfo);
ORT_RUNTIME_CLASS(SequenceTypeInfo);
ORT_RUNTIME_CLASS(OptionalTypeInfo);
ORT_RUNTIME_CLASS(SessionOptions);
ORT_RUNTIME_CLASS(CustomOpDomain);
ORT_RUNTIME_CLASS(ModelMetadata);
ORT_RUNTIME_CLASS(ThreadPoolParams);
ORT_RUNTIME_CLASS(ThreadingOptions);
ORT_RUNTIME_CLASS(ArenaCfg);
ORT_RUNTIME_CLASS(PrepackedWeightsContainer);
ORT_RUNTIME_CLASS(TensorRTProviderOptionsV2);
ORT_RUNTIME_CLASS(CUDAProviderOptionsV2);
ORT_RUNTIME_CLASS(CANNProviderOptions);
ORT_RUNTIME_CLASS(DnnlProviderOptions);
ORT_RUNTIME_CLASS(Op);
ORT_RUNTIME_CLASS(OpAttr);
ORT_RUNTIME_CLASS(Logger);
ORT_RUNTIME_CLASS(ShapeInferContext);
ORT_RUNTIME_CLASS(LoraAdapter);
#ifdef _WIN32
typedef _Return_type_success_(return == 0) OrtStatus* OrtStatusPtr;
#else
typedef OrtStatus* OrtStatusPtr;
#endif
/** \brief Memory allocation interface
*
* Structure of function pointers that defines a memory allocator. This can be created and filled in by the user for custom allocators.
*
* When an allocator is passed to any function, be sure that the allocator object is not destroyed until the last allocated object using it is freed.
*/
typedef struct OrtAllocator {
uint32_t version; ///< Must be initialized to ORT_API_VERSION
void*(ORT_API_CALL* Alloc)(struct OrtAllocator* this_, size_t size); ///< Returns a pointer to an allocated block of `size` bytes
void(ORT_API_CALL* Free)(struct OrtAllocator* this_, void* p); ///< Free a block of memory previously allocated with OrtAllocator::Alloc
const struct OrtMemoryInfo*(ORT_API_CALL* Info)(const struct OrtAllocator* this_); ///< Return a pointer to an ::OrtMemoryInfo that describes this allocator
/**
* @brief Optional allocation function to use for memory allocations made during session initialization.
* Use this function if you want to separate allocations made by ORT during Run() calls from
* those made during session initialization. This allows for separate memory management strategies for these allocations.
*/
void*(ORT_API_CALL* Reserve)(struct OrtAllocator* this_, size_t size); ///< Returns a pointer to an allocated block of `size` bytes
} OrtAllocator;
typedef void(ORT_API_CALL* OrtLoggingFunction)(
void* param, OrtLoggingLevel severity, const char* category, const char* logid, const char* code_location,
const char* message);
/** \brief Graph optimization level
*
* Refer to https://www.onnxruntime.ai/docs/performance/graph-optimizations.html#graph-optimization-levels
* for an in-depth understanding of the Graph Optimization Levels.
*/
typedef enum GraphOptimizationLevel {
ORT_DISABLE_ALL = 0,
ORT_ENABLE_BASIC = 1,
ORT_ENABLE_EXTENDED = 2,
ORT_ENABLE_ALL = 99
} GraphOptimizationLevel;
typedef enum ExecutionMode {
ORT_SEQUENTIAL = 0,
ORT_PARALLEL = 1,
} ExecutionMode;
/** \brief Language projection identifiers
* /see OrtApi::SetLanguageProjection
*/
typedef enum OrtLanguageProjection {
ORT_PROJECTION_C = 0,
ORT_PROJECTION_CPLUSPLUS = 1,
ORT_PROJECTION_CSHARP = 2,
ORT_PROJECTION_PYTHON = 3,
ORT_PROJECTION_JAVA = 4,
ORT_PROJECTION_WINML = 5,
ORT_PROJECTION_NODEJS = 6,
} OrtLanguageProjection;
struct OrtKernelInfo;
typedef struct OrtKernelInfo OrtKernelInfo;
struct OrtKernelContext;
typedef struct OrtKernelContext OrtKernelContext;
struct OrtCustomOp;
typedef struct OrtCustomOp OrtCustomOp;
typedef enum OrtAllocatorType {
OrtInvalidAllocator = -1,
OrtDeviceAllocator = 0,
OrtArenaAllocator = 1
} OrtAllocatorType;
/** \brief Memory types for allocated memory, execution provider specific types should be extended in each provider.
*/
// Whenever this struct is updated, please also update the MakeKey function in onnxruntime / core / framework / execution_provider.cc
typedef enum OrtMemType {
OrtMemTypeCPUInput = -2, ///< Any CPU memory used by non-CPU execution provider
OrtMemTypeCPUOutput = -1, ///< CPU accessible memory outputted by non-CPU execution provider, i.e. CUDA_PINNED
OrtMemTypeCPU = OrtMemTypeCPUOutput, ///< Temporary CPU accessible memory allocated by non-CPU execution provider, i.e. CUDA_PINNED
OrtMemTypeDefault = 0, ///< The default allocator for execution provider
} OrtMemType;
/** \brief This mimics OrtDevice type constants so they can be returned in the API
*/
typedef enum OrtMemoryInfoDeviceType {
OrtMemoryInfoDeviceType_CPU = 0,
OrtMemoryInfoDeviceType_GPU = 1,
OrtMemoryInfoDeviceType_FPGA = 2
} OrtMemoryInfoDeviceType;
/** \brief Algorithm to use for cuDNN Convolution Op
*/
typedef enum OrtCudnnConvAlgoSearch {
OrtCudnnConvAlgoSearchExhaustive, // expensive exhaustive benchmarking using cudnnFindConvolutionForwardAlgorithmEx
OrtCudnnConvAlgoSearchHeuristic, // lightweight heuristic based search using cudnnGetConvolutionForwardAlgorithm_v7
OrtCudnnConvAlgoSearchDefault, // default algorithm using CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
} OrtCudnnConvAlgoSearch;
/** \brief CUDA Provider Options
*
* \see OrtApi::SessionOptionsAppendExecutionProvider_CUDA
*/
typedef struct OrtCUDAProviderOptions {
#ifdef __cplusplus
OrtCUDAProviderOptions()
: device_id{},
cudnn_conv_algo_search{OrtCudnnConvAlgoSearchExhaustive},
gpu_mem_limit{SIZE_MAX},
arena_extend_strategy{},
do_copy_in_default_stream{1},
has_user_compute_stream{},
user_compute_stream{},
default_memory_arena_cfg{},
tunable_op_enable{false},
tunable_op_tuning_enable{false},
tunable_op_max_tuning_duration_ms{} {}
#endif
/** \brief CUDA device Id
* Defaults to 0.
*/
int device_id;
/** \brief CUDA Convolution algorithm search configuration.
* See enum OrtCudnnConvAlgoSearch for more details.
* Defaults to OrtCudnnConvAlgoSearchExhaustive.
*/
OrtCudnnConvAlgoSearch cudnn_conv_algo_search;
/** \brief CUDA memory limit (To use all possible memory pass in maximum size_t)
* Defaults to SIZE_MAX.
* \note If a ::OrtArenaCfg has been applied, it will override this field
*/
size_t gpu_mem_limit;
/** \brief Strategy used to grow the memory arena
* 0 = kNextPowerOfTwo<br>
* 1 = kSameAsRequested<br>
* Defaults to 0.
* \note If a ::OrtArenaCfg has been applied, it will override this field
*/
int arena_extend_strategy;
/** \brief Flag indicating if copying needs to take place on the same stream as the compute stream in the CUDA EP
* 0 = Use separate streams for copying and compute.
* 1 = Use the same stream for copying and compute.
* Defaults to 1.
* WARNING: Setting this to 0 may result in data races for some models.
* Please see issue #4829 for more details.
*/
int do_copy_in_default_stream;
/** \brief Flag indicating if there is a user provided compute stream
* Defaults to 0.
*/
int has_user_compute_stream;
/** \brief User provided compute stream.
* If provided, please set `has_user_compute_stream` to 1.
*/
void* user_compute_stream;
/** \brief CUDA memory arena configuration parameters
*/
OrtArenaCfg* default_memory_arena_cfg;
/** \brief Enable TunableOp for using.
* Set it to 1/0 to enable/disable TunableOp. Otherwise, it is disabled by default.
* This option can be overridden by environment variable ORT_CUDA_TUNABLE_OP_ENABLE.
*/
int tunable_op_enable;
/** \brief Enable TunableOp for tuning.
* Set it to 1/0 to enable/disable TunableOp tuning. Otherwise, it is disabled by default.
* This option can be overridden by environment variable ORT_CUDA_TUNABLE_OP_TUNING_ENABLE.
*/
int tunable_op_tuning_enable;
/** \brief Max tuning duration time limit for each instance of TunableOp.
* Defaults to 0 to disable the limit.
*/
int tunable_op_max_tuning_duration_ms;
} OrtCUDAProviderOptions;
/** \brief ROCM Provider Options
*
* \see OrtApi::SessionOptionsAppendExecutionProvider_ROCM
*/
typedef struct OrtROCMProviderOptions {
#ifdef __cplusplus
OrtROCMProviderOptions()
: device_id{},
miopen_conv_exhaustive_search{0},
gpu_mem_limit{SIZE_MAX},
arena_extend_strategy{},
do_copy_in_default_stream{1},
has_user_compute_stream{},
user_compute_stream{},
default_memory_arena_cfg{},
enable_hip_graph{false},
tunable_op_enable{false},
tunable_op_tuning_enable{false},
tunable_op_max_tuning_duration_ms{} {}
#endif
/** \brief ROCM device Id
* Defaults to 0.
*/
int device_id;
/** \brief ROCM MIOpen Convolution algorithm exaustive search option.
* Defaults to 0 (false).
*/
int miopen_conv_exhaustive_search;
/** \brief ROCM memory limit (To use all possible memory pass in maximum size_t)
* Defaults to SIZE_MAX.
* \note If a ::OrtArenaCfg has been applied, it will override this field
*/
size_t gpu_mem_limit;
/** \brief Strategy used to grow the memory arena
* 0 = kNextPowerOfTwo<br>
* 1 = kSameAsRequested<br>
* Defaults to 0.
* \note If a ::OrtArenaCfg has been applied, it will override this field
*/
int arena_extend_strategy;
/** \brief Flag indicating if copying needs to take place on the same stream as the compute stream in the ROCM EP
* 0 = Use separate streams for copying and compute.
* 1 = Use the same stream for copying and compute.
* Defaults to 1.
* WARNING: Setting this to 0 may result in data races for some models.
* Please see issue #4829 for more details.
*/
int do_copy_in_default_stream;
/** \brief Flag indicating if there is a user provided compute stream
* Defaults to 0.
*/
int has_user_compute_stream;
/** \brief User provided compute stream.
* If provided, please set `has_user_compute_stream` to 1.
*/
void* user_compute_stream;
/** \brief ROCM memory arena configuration parameters
*/
OrtArenaCfg* default_memory_arena_cfg;
int enable_hip_graph;
/** \brief Enable TunableOp for using.
* Set it to 1/0 to enable/disable TunableOp. Otherwise, it is disabled by default.
* This option can be overridden by environment variable ORT_ROCM_TUNABLE_OP_ENABLE.
*/
int tunable_op_enable;
/** \brief Enable TunableOp for tuning.
* Set it to 1/0 to enable/disable TunableOp tuning. Otherwise, it is disabled by default.
* This option can be overridden by environment variable ORT_ROCM_TUNABLE_OP_TUNING_ENABLE.
*/
int tunable_op_tuning_enable;
/** \brief Max tuning duration time limit for each instance of TunableOp.
* Defaults to 0 to disable the limit.
*/
int tunable_op_max_tuning_duration_ms;
} OrtROCMProviderOptions;
/** \brief TensorRT Provider Options
*
* \see OrtApi::SessionOptionsAppendExecutionProvider_TensorRT
*/
typedef struct OrtTensorRTProviderOptions {
int device_id; ///< CUDA device id (0 = default device)
int has_user_compute_stream; // indicator of user specified CUDA compute stream.
void* user_compute_stream; // user specified CUDA compute stream.
int trt_max_partition_iterations; // maximum iterations for TensorRT parser to get capability
int trt_min_subgraph_size; // minimum size of TensorRT subgraphs
size_t trt_max_workspace_size; // maximum workspace size for TensorRT.
int trt_fp16_enable; // enable TensorRT FP16 precision. Default 0 = false, nonzero = true
int trt_int8_enable; // enable TensorRT INT8 precision. Default 0 = false, nonzero = true
const char* trt_int8_calibration_table_name; // TensorRT INT8 calibration table name.
int trt_int8_use_native_calibration_table; // use native TensorRT generated calibration table. Default 0 = false, nonzero = true
int trt_dla_enable; // enable DLA. Default 0 = false, nonzero = true
int trt_dla_core; // DLA core number. Default 0
int trt_dump_subgraphs; // dump TRT subgraph. Default 0 = false, nonzero = true
int trt_engine_cache_enable; // enable engine caching. Default 0 = false, nonzero = true
const char* trt_engine_cache_path; // specify engine cache path
int trt_engine_decryption_enable; // enable engine decryption. Default 0 = false, nonzero = true
const char* trt_engine_decryption_lib_path; // specify engine decryption library path
int trt_force_sequential_engine_build; // force building TensorRT engine sequentially. Default 0 = false, nonzero = true
// This is the legacy struct and don't add new fields here.
// For new field that can be represented by string, please add it in include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h
// For non-string field, need to create a new separate api to handle it.
} OrtTensorRTProviderOptions;
/** \brief MIGraphX Provider Options
*
* \see OrtApi::SessionOptionsAppendExecutionProvider_MIGraphX
*/
typedef struct OrtMIGraphXProviderOptions {
int device_id; // hip device id.
int migraphx_fp16_enable; // MIGraphX FP16 precision. Default 0 = false, nonzero = true
int migraphx_int8_enable; // MIGraphX INT8 precision. Default 0 = false, nonzero = true
int migraphx_use_native_calibration_table; // MIGraphx INT8 cal table. Default 0 = false, noznero = true
const char* migraphx_int8_calibration_table_name; // MIGraphx INT8 calibration table name
int migraphx_save_compiled_model; // migraphx save compiled model. Default 0 = false, noznero = true
const char* migraphx_save_model_path; // migraphx model path name
int migraphx_load_compiled_model; // migraphx int8 cal table. Default 0 = false, noznero = true
const char* migraphx_load_model_path; // migraphx model path name
bool migraphx_exhaustive_tune; // migraphx tuned compile Default = false
} OrtMIGraphXProviderOptions;
/** \brief OpenVINO Provider Options
*
* \see OrtApi::SessionOptionsAppendExecutionProvider_OpenVINO
*/
typedef struct OrtOpenVINOProviderOptions {
#ifdef __cplusplus
OrtOpenVINOProviderOptions() : device_type{},
enable_npu_fast_compile{},
device_id{},
num_of_threads{},
cache_dir{},
context{},
enable_opencl_throttling{},
enable_dynamic_shapes{} {}
#endif
/** \brief Device type string
*
* Valid settings are one of: "CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16"
*/
const char* device_type;
unsigned char enable_npu_fast_compile;
const char* device_id;
size_t num_of_threads; ///< 0 = Use default number of threads
const char* cache_dir; // path is set to empty by default
void* context;
unsigned char enable_opencl_throttling; ///< 0 = disabled, nonzero = enabled
unsigned char enable_dynamic_shapes; ///< 0 = disabled, nonzero = enabled
} OrtOpenVINOProviderOptions;
struct OrtApi;
typedef struct OrtApi OrtApi;
struct OrtTrainingApi;
typedef struct OrtTrainingApi OrtTrainingApi;
/** \brief The helper interface to get the right version of OrtApi
*
* Get a pointer to this structure through ::OrtGetApiBase
*/
struct OrtApiBase {
/** \brief Get a pointer to the requested version of the ::OrtApi
*
* \param[in] version Must be ::ORT_API_VERSION
* \return The ::OrtApi for the version requested, nullptr will be returned if this version is unsupported, for example when using a runtime
* older than the version created with this header file.
*
* One can call GetVersionString() to get the version of the Onnxruntime library for logging
* and error reporting purposes.
*/
const OrtApi*(ORT_API_CALL* GetApi)(uint32_t version)NO_EXCEPTION;
/** \brief Returns a null terminated string of the version of the Onnxruntime library (eg: "1.8.1")
*
* \return UTF-8 encoded version string. Do not deallocate the returned buffer.
*/
const char*(ORT_API_CALL* GetVersionString)(void)NO_EXCEPTION;
};
typedef struct OrtApiBase OrtApiBase;
/** \brief The Onnxruntime library's entry point to access the C API
*
* Call this to get the a pointer to an ::OrtApiBase
*/
ORT_EXPORT const OrtApiBase* ORT_API_CALL OrtGetApiBase(void) NO_EXCEPTION;
/** \brief Thread work loop function
*
* Onnxruntime will provide the working loop on custom thread creation
* Argument is an onnxruntime built-in type which will be provided when thread pool calls OrtCustomCreateThreadFn
*/
typedef void (*OrtThreadWorkerFn)(void* ort_worker_fn_param);
typedef const struct OrtCustomHandleType {
char __place_holder;
}* OrtCustomThreadHandle;
/** \brief Ort custom thread creation function
*
* The function should return a thread handle to be used in onnxruntime thread pools
* Onnxruntime will throw exception on return value of nullptr or 0, indicating that the function failed to create a thread
*/
typedef OrtCustomThreadHandle (*OrtCustomCreateThreadFn)(void* ort_custom_thread_creation_options, OrtThreadWorkerFn ort_thread_worker_fn, void* ort_worker_fn_param);
/** \brief Custom thread join function
*
* Onnxruntime thread pool destructor will call the function to join a custom thread.
* Argument ort_custom_thread_handle is the value returned by OrtCustomCreateThreadFn
*/
typedef void (*OrtCustomJoinThreadFn)(OrtCustomThreadHandle ort_custom_thread_handle);
typedef OrtStatus*(ORT_API_CALL* RegisterCustomOpsFn)(OrtSessionOptions* options, const OrtApiBase* api);
/** \brief Callback function for RunAsync
*
* \param[in] user_data User specific data that passed back to the callback
* \param[out] outputs On succeed, outputs host inference results, on error, the value will be nullptr
* \param[out] num_outputs Number of outputs, on error, the value will be zero
* \param[out] status On error, status will provide details
*/
typedef void (*RunAsyncCallbackFn)(void* user_data, OrtValue** outputs, size_t num_outputs, OrtStatusPtr status);
/** \brief The C API
*
* All C API functions are defined inside this structure as pointers to functions.
* Call OrtApiBase::GetApi to get a pointer to it
*
* \nosubgrouping
*/
struct OrtApi {
/// \name OrtStatus
/// @{
/**
* \brief Create an OrtStatus from a null terminated string
*
* \param[in] code
* \param[in] msg A null-terminated string. Its contents will be copied.
* \return A new OrtStatus object, must be destroyed with OrtApi::ReleaseStatus
*/
OrtStatus*(ORT_API_CALL* CreateStatus)(OrtErrorCode code, _In_ const char* msg)NO_EXCEPTION ORT_ALL_ARGS_NONNULL;
/** \brief Get OrtErrorCode from OrtStatus
*
* \param[in] status
* \return OrtErrorCode that \p status was created with
*/
OrtErrorCode(ORT_API_CALL* GetErrorCode)(_In_ const OrtStatus* status) NO_EXCEPTION ORT_ALL_ARGS_NONNULL;
/** \brief Get error string from OrtStatus
*
* \param[in] status
* \return The error message inside the `status`. Do not free the returned value.
*/
const char*(ORT_API_CALL* GetErrorMessage)(_In_ const OrtStatus* status)NO_EXCEPTION ORT_ALL_ARGS_NONNULL;
/// @}
/// \name OrtEnv
/// @{
/** \brief Create an OrtEnv
*
* \note Invoking this function will return the same instance of the environment as that returned by a previous call
* to another env creation function; all arguments to this function will be ignored.
* \param[in] log_severity_level The log severity level.
* \param[in] logid The log identifier.
* \param[out] out Returned newly created OrtEnv. Must be freed with OrtApi::ReleaseEnv
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*/
ORT_API2_STATUS(CreateEnv, OrtLoggingLevel log_severity_level, _In_ const char* logid, _Outptr_ OrtEnv** out);
/** \brief Create an OrtEnv
*
* \note Invoking this function will return the same instance of the environment as that returned by a previous call
* to another env creation function; all arguments to this function will be ignored. If you want to provide your
* own logging function, consider setting it using the SetUserLoggingFunction API instead.
* \param[in] logging_function A pointer to a logging function.
* \param[in] logger_param A pointer to arbitrary data passed as the ::OrtLoggingFunction `param` parameter to
* `logging_function`. This parameter is optional.
* \param[in] log_severity_level The log severity level.
* \param[in] logid The log identifier.
* \param[out] out Returned newly created OrtEnv. Must be freed with OrtApi::ReleaseEnv
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*/
ORT_API2_STATUS(CreateEnvWithCustomLogger, _In_ OrtLoggingFunction logging_function, _In_opt_ void* logger_param,
_In_ OrtLoggingLevel log_severity_level, _In_ const char* logid, _Outptr_ OrtEnv** out);
/** \brief Enable Telemetry
*
* \note Telemetry events are on by default since they are lightweight
* \param[in] env
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*/
ORT_API2_STATUS(EnableTelemetryEvents, _In_ const OrtEnv* env);
/** \brief Disable Telemetry
*
* \see OrtApi::EnableTelemetryEvents
* \param[in] env
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*/
ORT_API2_STATUS(DisableTelemetryEvents, _In_ const OrtEnv* env);
/// @}
/// \name OrtSession
/// @{
/** \brief Create an OrtSession from a model file
*
* \param[in] env
* \param[in] model_path
* \param[in] options
* \param[out] out Returned newly created OrtSession. Must be freed with OrtApi::ReleaseSession
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*/
// TODO: document the path separator convention? '/' vs '\'
// TODO: should specify the access characteristics of model_path. Is this read only during the
// execution of CreateSession, or does the OrtSession retain a handle to the file/directory
// and continue to access throughout the OrtSession lifetime?
// What sort of access is needed to model_path : read or read/write?
ORT_API2_STATUS(CreateSession, _In_ const OrtEnv* env, _In_ const ORTCHAR_T* model_path,
_In_ const OrtSessionOptions* options, _Outptr_ OrtSession** out);
/** \brief Create an OrtSession from memory
*
* \param[in] env
* \param[in] model_data
* \param[in] model_data_length
* \param[in] options
* \param[out] out Returned newly created OrtSession. Must be freed with OrtApi::ReleaseSession
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*/
ORT_API2_STATUS(CreateSessionFromArray, _In_ const OrtEnv* env, _In_ const void* model_data, size_t model_data_length,
_In_ const OrtSessionOptions* options, _Outptr_ OrtSession** out);
/** \brief Run the model in an ::OrtSession
*
* Will not return until the model run has completed. Multiple threads might be used to run the model based on
* the options in the ::OrtSession and settings used when creating the ::OrtEnv
*
* \param[in] session
* \param[in] run_options If nullptr, will use a default ::OrtRunOptions
* \param[in] input_names Array of null terminated UTF8 encoded strings of the input names
* \param[in] inputs Array of ::OrtValue%s of the input values
* \param[in] input_len Number of elements in the input_names and inputs arrays
* \param[in] output_names Array of null terminated UTF8 encoded strings of the output names
* \param[in] output_names_len Number of elements in the output_names and outputs array
* \param[out] outputs Array of ::OrtValue%s that the outputs are stored in. This can also be
* an array of nullptr values, in this case ::OrtValue objects will be allocated and pointers
* to them will be set into the `outputs` array.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*/
ORT_API2_STATUS(Run, _Inout_ OrtSession* session, _In_opt_ const OrtRunOptions* run_options,
_In_reads_(input_len) const char* const* input_names,
_In_reads_(input_len) const OrtValue* const* inputs, size_t input_len,
_In_reads_(output_names_len) const char* const* output_names, size_t output_names_len,
_Inout_updates_all_(output_names_len) OrtValue** outputs);
/// @}
/// \name OrtSessionOptions
/// @{
/** \brief Create an ::OrtSessionOptions object
*
* To use additional providers, you must build ORT with the extra providers enabled. Then call one of these
* functions to enable them in the session:<br>
* OrtSessionOptionsAppendExecutionProvider_CPU<br>
* OrtSessionOptionsAppendExecutionProvider_CUDA<br>
* OrtSessionOptionsAppendExecutionProvider_(remaining providers...)<br>
* The order they are called indicates the preference order as well. In other words call this method
* on your most preferred execution provider first followed by the less preferred ones.
* If none are called Ort will use its internal CPU execution provider.
*
* \param[out] options The newly created OrtSessionOptions. Must be freed with OrtApi::ReleaseSessionOptions
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*/
ORT_API2_STATUS(CreateSessionOptions, _Outptr_ OrtSessionOptions** options);
/** \brief Set filepath to save optimized model after graph level transformations
*
* \param[in] options
* \param[in] optimized_model_filepath
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*/
ORT_API2_STATUS(SetOptimizedModelFilePath, _Inout_ OrtSessionOptions* options,
_In_ const ORTCHAR_T* optimized_model_filepath);
/** \brief Create a copy of an existing ::OrtSessionOptions
*
* \param[in] in_options OrtSessionOptions to copy
* \param[out] out_options Returned newly created ::OrtSessionOptions. Must be freed with OrtApi::ReleaseSessionOptions
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*/
ORT_API2_STATUS(CloneSessionOptions, _In_ const OrtSessionOptions* in_options,
_Outptr_ OrtSessionOptions** out_options);
/** \brief Set execution mode
*
* Controls whether you want to execute operators in your graph sequentially or in parallel. Usually when the model
* has many branches, setting this option to ExecutionMode.ORT_PARALLEL will give you better performance.
* See [docs/ONNX_Runtime_Perf_Tuning.md] for more details.
*
* \param[in] options
* \param[in] execution_mode
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*/
ORT_API2_STATUS(SetSessionExecutionMode, _Inout_ OrtSessionOptions* options, ExecutionMode execution_mode);
/** \brief Enable profiling for a session
*
* \param[in] options
* \param[in] profile_file_prefix
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*/
ORT_API2_STATUS(EnableProfiling, _Inout_ OrtSessionOptions* options, _In_ const ORTCHAR_T* profile_file_prefix);
/** \brief Disable profiling for a session
*
* \param[in] options
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*/
ORT_API2_STATUS(DisableProfiling, _Inout_ OrtSessionOptions* options);
/** \brief Enable the memory pattern optimization
*
* The idea is if the input shapes are the same, we could trace the internal memory allocation
* and generate a memory pattern for future request. So next time we could just do one allocation
* with a big chunk for all the internal memory allocation.
* \note Memory pattern optimization is only available when Sequential Execution mode is enabled (see OrtApi::SetSessionExecutionMode)
*
* \see OrtApi::DisableMemPattern
*
* \param[in] options
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*/
ORT_API2_STATUS(EnableMemPattern, _Inout_ OrtSessionOptions* options);
/** \brief Disable the memory pattern optimization
*
* \see OrtApi::EnableMemPattern
*
* \param[in] options
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*/
ORT_API2_STATUS(DisableMemPattern, _Inout_ OrtSessionOptions* options);
/** \brief Enable the memory arena on CPU
*
* Arena may pre-allocate memory for future usage.
*
* \param[in] options
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*/
ORT_API2_STATUS(EnableCpuMemArena, _Inout_ OrtSessionOptions* options);
/** \brief Disable the memory arena on CPU
*
* \param[in] options
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*/
ORT_API2_STATUS(DisableCpuMemArena, _Inout_ OrtSessionOptions* options);
/** \brief Set session log id
*
* \param[in] options
* \param[in] logid The log identifier.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*/
ORT_API2_STATUS(SetSessionLogId, _Inout_ OrtSessionOptions* options, const char* logid);
/** \brief Set session log verbosity level
*
* Applies to session load, initialization, etc
*
* \param[in] options
* \param[in] session_log_verbosity_level \snippet{doc} snippets.dox Log Verbosity Level