diff --git a/README.md b/README.md
index 865ce0cc..799d60cb 100644
--- a/README.md
+++ b/README.md
@@ -220,7 +220,7 @@ The following table shows the fields that may to be modified before deployment:
 | `max_attention_window_size` | Optional (default=max_sequence_length). When using techniques like sliding window attention, the maximum number of tokens that are attended to generate one token. Defaults attends to all tokens in sequence. |
 | `kv_cache_free_gpu_mem_fraction` | Optional (default=0.9). Set to a number between 0 and 1 to indicate the maximum fraction of GPU memory (after loading the model) that may be used for KV cache.|
 | `max_num_sequences` | Optional (default=`max_batch_size` if `enable_trt_overlap` is `false` and to `2 * max_batch_size` if `enable_trt_overlap` is `true`, where `max_batch_size` is the TRT engine maximum batch size). Maximum number of sequences that the in-flight batching scheme can maintain state for.
-| `enable_trt_overlap` | Optional (default=`true`). Set to `true` to partition available requests into 2 'microbatches' that can be run concurrently to hide exposed CPU runtime |
+| `enable_trt_overlap` | Optional (default=`false`). Set to `true` to partition available requests into 2 'microbatches' that can be run concurrently to hide exposed CPU runtime |
 | `exclude_input_in_output` | Optional (default=`false`). Set to `true` to only return completion tokens in a response. Set to `false` to return the prompt tokens concatenated with the generated tokens  |
 | `normalize_log_probs` | Optional (default=`true`). Set to `false` to skip normalization of `output_log_probs`  |
 
diff --git a/dockerfile/Dockerfile.trt_llm_backend b/dockerfile/Dockerfile.trt_llm_backend
index 158e80aa..0fb2d027 100644
--- a/dockerfile/Dockerfile.trt_llm_backend
+++ b/dockerfile/Dockerfile.trt_llm_backend
@@ -1,5 +1,5 @@
 ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver
-ARG BASE_TAG=23.10-py3
+ARG BASE_TAG=23.12-py3
 
 FROM ${BASE_IMAGE}:${BASE_TAG} as base
 
diff --git a/inflight_batcher_llm/README.md b/inflight_batcher_llm/README.md
index 0664d814..fe040d7a 100644
--- a/inflight_batcher_llm/README.md
+++ b/inflight_batcher_llm/README.md
@@ -88,25 +88,25 @@ parameters: {
 }
 ```
 
-By default, in-flight batching will try to overlap the execution of batches of
+In-flight batching is able to overlap the execution of batches of
 requests. It may have a negative impact on performance when the number of
-requests is too small. To disable that feature, set the `enable_trt_overlap`
-parameter to `False` in the `config.pbtxt` file:
+requests is too small. To enable that feature, set the `enable_trt_overlap`
+parameter to `True` in the `config.pbtxt` file:
 
 ```
 parameters: {
   key: "enable_trt_overlap"
   value: {
-    string_value: "False"
+    string_value: "True"
   }
 }
 ```
 
-Or, equivalently, add `enable_trt_overlap:False` to the invocation of the
+Or, equivalently, add `enable_trt_overlap:True` to the invocation of the
 `fill_template.py` tool:
 
 ```bash
-python3 tools/fill_template.py -i all_models/inflight_batcher_llm/tensorrt_llm/config.pbtxt "enable_trt_overlap:False"
+python3 tools/fill_template.py -i all_models/inflight_batcher_llm/tensorrt_llm/config.pbtxt "enable_trt_overlap:True"
 ```
 
 To reuse previously computed KV cache values (e.g. for system prompt), set `enable_kv_cache_reuse`
diff --git a/inflight_batcher_llm/src/model_instance_state.cc b/inflight_batcher_llm/src/model_instance_state.cc
index 73d60eb1..0aa4b420 100644
--- a/inflight_batcher_llm/src/model_instance_state.cc
+++ b/inflight_batcher_llm/src/model_instance_state.cc
@@ -174,7 +174,7 @@ ModelInstanceState::ModelInstanceState(ModelState* model_state, TRITONBACKEND_Mo
         TLLM_LOG_WARNING("max_num_sequences is not specified, will be set to the TRT engine max_batch_size");
     }
 
-    bool enableTrtOverlap = true;
+    bool enableTrtOverlap = false;
     try
     {
         enableTrtOverlap = model_state_->GetParameter<bool>("enable_trt_overlap");
@@ -182,7 +182,7 @@ ModelInstanceState::ModelInstanceState(ModelState* model_state, TRITONBACKEND_Mo
     catch (const std::exception& e)
     {
         // If parameter is not specified, just ignore
-        TLLM_LOG_WARNING("enable_trt_overlap is not specified, will be set to true");
+        TLLM_LOG_WARNING("enable_trt_overlap is not specified, will be set to false");
     }
 
     bool normalizeLogProbs = true;
diff --git a/requirements.txt b/requirements.txt
index 09b93670..de1735f5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
 regex
 fire
 tritonclient[all]
-transformers==4.31.0
+transformers==4.36.1
 pandas
 tabulate
diff --git a/tensorrt_llm b/tensorrt_llm
index d879430b..c8965302 160000
--- a/tensorrt_llm
+++ b/tensorrt_llm
@@ -1 +1 @@
-Subproject commit d879430b040ea30c65220c9171a8a70607398748
+Subproject commit c89653021e66ca78c55f02b366f404455bc12e8d
diff --git a/tools/version.txt b/tools/version.txt
index 79f0a3e7..24b8df30 100644
--- a/tools/version.txt
+++ b/tools/version.txt
@@ -1 +1 @@
-77a564a261cdb68c9091ac04c87d5c704da48da5
+ad7d4adac6bebead80be01388b94d1f57a50245a