diff --git a/.gitmodules b/.gitmodules
index 0e5eaa77..70ad46f3 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,3 @@
 [submodule "tensorrt_llm"]
 	path = tensorrt_llm
-	url = git@github.com:NVIDIA/TensorRT-LLM.git
+	url = https://github.com/NVIDIA/TensorRT-LLM.git
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9ee1c078..4fffede7 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -40,3 +40,9 @@ repos:
     rev: v0.6.10
     hooks:
     -   id: cmake-format
+-   repo: https://github.com/codespell-project/codespell
+    rev: v2.2.4
+    hooks:
+    -   id: codespell
+        args:
+        - --skip=".git,tensorrt_llm"
diff --git a/README.md b/README.md
index a899fd77..53583bf7 100644
--- a/README.md
+++ b/README.md
@@ -363,7 +363,7 @@ You might have to contact your cluster's administrator to help you customize the
 ### Kill the Triton server
 
 ```bash
-pgrep tritonserver | xargs kill -9
+pkill tritonserver
 ```
 
 ## Testing the TensorRT-LLM Backend
diff --git a/all_models/gpt/tensorrt_llm/1/model.py b/all_models/gpt/tensorrt_llm/1/model.py
index 3d036efd..55cab37b 100644
--- a/all_models/gpt/tensorrt_llm/1/model.py
+++ b/all_models/gpt/tensorrt_llm/1/model.py
@@ -242,7 +242,7 @@ def execute(self, requests):
                 # response:
                 #
                 # pb_utils.InferenceResponse(
-                #    output_tensors=..., TritonError("An error occured"))
+                #    output_tensors=..., TritonError("An error occurred"))
 
                 inference_response = pb_utils.InferenceResponse(output_tensors)
             else:
diff --git a/dockerfile/Dockerfile.trt_llm_backend b/dockerfile/Dockerfile.trt_llm_backend
index 7826a746..8fc565fc 100644
--- a/dockerfile/Dockerfile.trt_llm_backend
+++ b/dockerfile/Dockerfile.trt_llm_backend
@@ -8,7 +8,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends rapidjson-dev p
 COPY requirements.txt /tmp/
 RUN pip3 install -r /tmp/requirements.txt --extra-index-url https://pypi.ngc.nvidia.com
 
-# Remove prevous TRT installation
+# Remove previous TRT installation
 # We didn't remove libnvinfer* here because tritonserver depends on the pre-installed libraries.
 RUN apt-get remove --purge -y tensorrt*
 RUN pip uninstall -y tensorrt
diff --git a/inflight_batcher_llm/CMakeLists.txt b/inflight_batcher_llm/CMakeLists.txt
index d8b2c74e..f08e8015 100644
--- a/inflight_batcher_llm/CMakeLists.txt
+++ b/inflight_batcher_llm/CMakeLists.txt
@@ -30,7 +30,7 @@ set(TRITON_BUILD
 
 if(TRITON_BUILD)
   set_ifndef(TRTLLM_DIR ${CMAKE_CURRENT_SOURCE_DIR}/tensorrt_llm)
-  # Install build time dependencies. This section is excuted during cmake
+  # Install build time dependencies. This section is executed during cmake
   # configure time.
   execute_process(
     COMMAND bash -x ./tools/environment_setup.sh
diff --git a/inflight_batcher_llm/src/libtensorrtllm.cc b/inflight_batcher_llm/src/libtensorrtllm.cc
index 39dc133b..c68b940d 100644
--- a/inflight_batcher_llm/src/libtensorrtllm.cc
+++ b/inflight_batcher_llm/src/libtensorrtllm.cc
@@ -858,7 +858,6 @@ class ModelInstanceState
                             packed.insert(
                                 packed.end(), std::move_iterator(vpacked.begin()), std::move_iterator(vpacked.end()));
                         }
-                        int64_t nWords1 = static_cast<int64_t>(packed.size());
                         bcast(packed, 0, COMM_WORLD);
                     }
                 }
@@ -1128,7 +1127,7 @@ class ModelInstanceState
             TLLM_LOG_WARNING("max_num_sequences is not specified, will be set to the TRT engine max_batch_size");
         }
 
-        std::optional<bool> enableTrtOverlap = std::nullopt;
+        bool enableTrtOverlap = true;
         try
         {
             enableTrtOverlap = model_state_->GetParameter<bool>("enable_trt_overlap");
@@ -1139,8 +1138,11 @@ class ModelInstanceState
             TLLM_LOG_WARNING("enable_trt_overlap is not specified, will be set to true");
         }
 
-        TrtGptModelOptionalParams optionalParams(
-            maxNumSequences, maxTokensInPagedKvCache, kvCacheFreeGpuMemFraction, enableTrtOverlap);
+        TrtGptModelOptionalParams optionalParams;
+        optionalParams.maxNumSequences = maxNumSequences;
+        optionalParams.kvCacheConfig.maxTokens = maxTokensInPagedKvCache;
+        optionalParams.kvCacheConfig.freeGpuMemoryFraction = kvCacheFreeGpuMemFraction;
+        optionalParams.enableTrtOverlap = enableTrtOverlap;
 
         mBatchManager = std::make_shared<GptManager>(
             mModelPath, mTrtGptModelType, maxBeamWidth, schedulerPolicy,
diff --git a/scripts/launch_triton_server.py b/scripts/launch_triton_server.py
index 2efec7e8..a2cc2ac7 100644
--- a/scripts/launch_triton_server.py
+++ b/scripts/launch_triton_server.py
@@ -1,5 +1,6 @@
 import argparse
 import subprocess
+import sys
 from pathlib import Path
 
 
@@ -9,9 +10,18 @@ def parse_arguments():
                         type=int,
                         default=1,
                         help='world size, only support tensor parallelism now')
-    parser.add_argument('--tritonserver',
-                        type=str,
-                        default='/opt/tritonserver/bin/tritonserver')
+    parser.add_argument(
+        '--tritonserver',
+        type=str,
+        help='path to the tritonserver exe',
+        default='/opt/tritonserver/bin/tritonserver',
+    )
+    parser.add_argument(
+        '--force',
+        '-f',
+        action='store_true',
+        help='launch tritonserver regardless of other instances running')
+
     path = str(Path(__file__).parent.absolute()) + '/../all_models/gpt'
     parser.add_argument('--model_repo', type=str, default=path)
     return parser.parse_args()
@@ -30,13 +40,15 @@ def get_cmd(world_size, tritonserver, model_repo):
 
 if __name__ == '__main__':
     args = parse_arguments()
-    res = subprocess.run(['pgrep', 'tritonserver'],
+    res = subprocess.run(['pgrep', '-r', 'R', 'tritonserver'],
                          capture_output=True,
                          encoding='utf-8')
     if res.stdout:
         pids = res.stdout.replace('\n', ' ').rstrip()
-        raise RuntimeError(
-            f'tritonserver process(es) already found with PID(s): {pids}.\n\tUse `kill {pids}` to stop them.'
-        )
+        msg = f'tritonserver process(es) already found with PID(s): {pids}.\n\tUse `kill {pids}` to stop them.'
+        if args.force:
+            print(msg, file=sys.stderr)
+        else:
+            raise RuntimeError(msg + ' Or use --force.')
     cmd = get_cmd(int(args.world_size), args.tritonserver, args.model_repo)
     subprocess.Popen(cmd)
diff --git a/tensorrt_llm b/tensorrt_llm
index d8b408e6..4de32a86 160000
--- a/tensorrt_llm
+++ b/tensorrt_llm
@@ -1 +1 @@
-Subproject commit d8b408e6dcc1d45982a8b94399cd74b78f80befa
+Subproject commit 4de32a86ae92bc49a7ec17c00ec2f2d03663c198
diff --git a/tools/environment_setup.sh b/tools/environment_setup.sh
index 4367dbe1..d799f950 100644
--- a/tools/environment_setup.sh
+++ b/tools/environment_setup.sh
@@ -33,7 +33,7 @@ git lfs install
 
 pip3 install -r requirements.txt --extra-index-url https://pypi.ngc.nvidia.com
 
-# Remove prevous TRT installation
+# Remove previous TRT installation
 apt-get remove --purge -y tensorrt* libnvinfer*
 pip uninstall -y tensorrt
 
diff --git a/tools/fill_template.py b/tools/fill_template.py
index cb298b31..0524f9ef 100644
--- a/tools/fill_template.py
+++ b/tools/fill_template.py
@@ -27,7 +27,7 @@ def main(file_path, substitutions, in_place):
     parser.add_argument(
         "substitutions",
         help=
-        "substitions to perform, in the format variable_name_1:value_1,variable_name_2:value_2..."
+        "substitutions to perform, in the format variable_name_1:value_1,variable_name_2:value_2..."
     )
     parser.add_argument("--in_place",
                         "-i",
diff --git a/tools/gen_trtllm_dockerfile.py b/tools/gen_trtllm_dockerfile.py
index 922fe11e..9cc9032d 100644
--- a/tools/gen_trtllm_dockerfile.py
+++ b/tools/gen_trtllm_dockerfile.py
@@ -33,7 +33,7 @@
 
 def install_new_version_of_TRT(clone_repo=False, trtllm_be_repo_tag="main"):
     df = """
-# Remove prevous TRT installation
+# Remove previous TRT installation
 RUN apt-get remove --purge -y tensorrt* libnvinfer*
 RUN pip uninstall -y tensorrt