diff --git a/docs/ContribOperators.md b/docs/ContribOperators.md
index 9fba550e0a558..7a8f3bb9bb28e 100644
--- a/docs/ContribOperators.md
+++ b/docs/ContribOperators.md
@@ -1839,7 +1839,7 @@ This version of the operator has been available since version 1 of the 'com.micr
 ### <a name="com.microsoft.LongformerAttention"></a><a name="com.microsoft.longformerattention">**com.microsoft.LongformerAttention**</a>
 
   Longformer Self Attention with a local context and a global context. Tokens attend locally: Each token
-  attends to its W previous tokens and W succeding tokens with W being the window length. A selected few tokens
+  attends to its W previous tokens and W succeeding tokens with W being the window length. A selected few tokens
   attend globally to all other tokens.
   
   The attention mask is of shape (batch_size, sequence_length), where sequence_length is a multiple of 2W after padding.
@@ -2723,7 +2723,7 @@ This version of the operator has been available since version 1 of the 'com.micr
 
 <dl>
 <dt><tt>T</tt> : tensor(uint8), tensor(int8)</dt>
-<dd>Constrain input and output types to singed/unsigned int8 tensors.</dd>
+<dd>Constrain input and output types to signed/unsigned int8 tensors.</dd>
 </dl>
 
 
@@ -2965,7 +2965,7 @@ This version of the operator has been available since version 1 of the 'com.micr
 
 <dl>
 <dt><tt>T</tt> : tensor(uint8), tensor(int8)</dt>
-<dd>Constrain input and output types to singed/unsigned int8 tensors.</dd>
+<dd>Constrain input and output types to signed/unsigned int8 tensors.</dd>
 </dl>
 
 
@@ -4002,9 +4002,9 @@ This version of the operator has been available since version 1 of the 'com.micr
 <dt><tt>char_embedding_size</tt> : int</dt>
 <dd>Integer representing the embedding vector size for each char.If not provide, use the char embedding size of embedding vector.</dd>
 <dt><tt>conv_window_size</tt> : int</dt>
-<dd>This operator applies convolution to word from left to right with window equal to conv_window_size and stride to 1.Take word 'example' for example, with conv_window_size equal to 2, conv is applied to [ex],[xa], [am], [mp]...If not provide, use the first dimension of conv kernal shape.</dd>
+<dd>This operator applies convolution to word from left to right with window equal to conv_window_size and stride to 1.Take word 'example' for example, with conv_window_size equal to 2, conv is applied to [ex],[xa], [am], [mp]...If not provide, use the first dimension of conv kernel shape.</dd>
 <dt><tt>embedding_size</tt> : int</dt>
-<dd>Integer representing the embedding vector size for each word.If not provide, use the fileter size of conv weight</dd>
+<dd>Integer representing the embedding vector size for each word.If not provide, use the filter size of conv weight</dd>
 </dl>
 
 #### Inputs
diff --git a/docs/cmake_guideline.md b/docs/cmake_guideline.md
index d3f0f85a4d742..e03706476d73f 100644
--- a/docs/cmake_guideline.md
+++ b/docs/cmake_guideline.md
@@ -157,7 +157,7 @@ CMAKE_HOST_SYSTEM_PROCESSOR is the one you should use.
 
 What are the valid values:
 - macOS: it can be x86_64 or arm64. (maybe it could also be arm64e but cmake forgot to document that)
-- Linux: i686, x86_64, aarch64, armv7l, ... The possible values for `uname -m` command. They sightly differ from what you can get from GCC. This sometimes confuses people: `cmake` and `uname` sit in one boat, GCC is in another boat but GCC is closer to your C/C++ source code.
+- Linux: i686, x86_64, aarch64, armv7l, ... The possible values for `uname -m` command. They slightly differ from what you can get from GCC. This sometimes confuses people: `cmake` and `uname` sit in one boat, GCC is in another boat but GCC is closer to your C/C++ source code.
 - Windows: AMD64, ...
 - Android/iOS/...: we don't care. We don't use them as a development environment.
 
diff --git a/docs/python/inference/api_summary.rst b/docs/python/inference/api_summary.rst
index c6d1f4868e7b5..4f82e1242d6de 100644
--- a/docs/python/inference/api_summary.rst
+++ b/docs/python/inference/api_summary.rst
@@ -265,7 +265,7 @@ Internal classes
 ----------------
 
 These classes cannot be instantiated by users but they are returned
-by methods or functions of this libary.
+by methods or functions of this library.
 
 ModelMetadata
 ^^^^^^^^^^^^^
diff --git a/onnxruntime/core/graph/contrib_ops/bert_defs.cc b/onnxruntime/core/graph/contrib_ops/bert_defs.cc
index 1235527e57243..acffd55831071 100644
--- a/onnxruntime/core/graph/contrib_ops/bert_defs.cc
+++ b/onnxruntime/core/graph/contrib_ops/bert_defs.cc
@@ -100,7 +100,7 @@ ONNX_MS_OPERATOR_SET_SCHEMA(Attention, 1,
 
 constexpr const char* Longformer_Attention_doc = R"DOC(
 Longformer Self Attention with a local context and a global context. Tokens attend locally: Each token
-attends to its W previous tokens and W succeding tokens with W being the window length. A selected few tokens
+attends to its W previous tokens and W succeeding tokens with W being the window length. A selected few tokens
 attend globally to all other tokens.
 
 The attention mask is of shape (batch_size, sequence_length), where sequence_length is a multiple of 2W after padding.
diff --git a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc
index 921d44716f12b..309e1f4ddf014 100644
--- a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc
+++ b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc
@@ -1756,14 +1756,14 @@ ONNX_MS_OPERATOR_SET_SCHEMA(WordConvEmbedding, 1,
                                 .Attr(
                                     "embedding_size",
                                     "Integer representing the embedding vector size for each word."
-                                    "If not provide, use the fileter size of conv weight",
+                                    "If not provide, use the filter size of conv weight",
                                     AttributeProto::INT,
                                     OPTIONAL_VALUE)
                                 .Attr(
                                     "conv_window_size",
                                     "This operator applies convolution to word from left to right with window equal to conv_window_size and stride to 1."
                                     "Take word 'example' for example, with conv_window_size equal to 2, conv is applied to [ex],[xa], [am], [mp]..."
-                                    "If not provide, use the first dimension of conv kernal shape.",
+                                    "If not provide, use the first dimension of conv kernel shape.",
                                     AttributeProto::INT,
                                     OPTIONAL_VALUE)
                                 .Attr(
diff --git a/onnxruntime/core/graph/contrib_ops/nhwc_schema_defs.cc b/onnxruntime/core/graph/contrib_ops/nhwc_schema_defs.cc
index 65e37394e1f83..c5d5ea5a2e413 100644
--- a/onnxruntime/core/graph/contrib_ops/nhwc_schema_defs.cc
+++ b/onnxruntime/core/graph/contrib_ops/nhwc_schema_defs.cc
@@ -71,7 +71,7 @@ equal to the spatial dimension of input tensor. Input is of type uint8_t or int8
                                         "dimensions are all 1.",
                                         "T")
                                 .TypeConstraint("T", {"tensor(uint8)", "tensor(int8)"},
-                                                "Constrain input and output types to singed/unsigned int8 tensors.")
+                                                "Constrain input and output types to signed/unsigned int8 tensors.")
                                 .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
                                   propagateElemTypeFromInputToOutput(ctx, 0, 0);
 
diff --git a/onnxruntime/core/graph/contrib_ops/quantization_defs.cc b/onnxruntime/core/graph/contrib_ops/quantization_defs.cc
index 62c0d6da3c88e..66096eaf252c6 100644
--- a/onnxruntime/core/graph/contrib_ops/quantization_defs.cc
+++ b/onnxruntime/core/graph/contrib_ops/quantization_defs.cc
@@ -617,7 +617,7 @@ The output tensor has the same shape.
                 "tensor. The output tensor has the same rank as the input. ",
                 "T")
         .TypeConstraint("T", {"tensor(uint8)", "tensor(int8)"},
-                        "Constrain input and output types to singed/unsigned int8 tensors.")
+                        "Constrain input and output types to signed/unsigned int8 tensors.")
         .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
           // Type inference
           propagateElemTypeFromInputToOutput(ctx, 0, 0);