Skip to content

Commit 49def34

Browse files
authored
Update TensorRT-LLM backend (#362)
* Update TensorRT-LLM backend
1 parent 5b5eb96 commit 49def34

File tree

5 files changed

+7
-5
lines changed

5 files changed

+7
-5
lines changed

inflight_batcher_llm/src/model_instance_state.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -405,7 +405,7 @@ std::list<std::shared_ptr<InferenceRequest>> ModelInstanceState::get_inference_r
405405
mHasActiveRequests = (num_new_work_items > 0 || mBatchManager->getNumActiveRequests() > 0);
406406
if (mHasActiveRequests)
407407
{
408-
commSession.bcast(num_new_work_items, 0);
408+
commSession.bcastValue(num_new_work_items, 0);
409409
}
410410

411411
if (num_new_work_items > 0)
@@ -425,7 +425,7 @@ std::list<std::shared_ptr<InferenceRequest>> ModelInstanceState::get_inference_r
425425
{
426426
// subordinate ranks hang until master rank sends work
427427
int64_t num_new_work_items;
428-
commSession.bcast(num_new_work_items, 0);
428+
commSession.bcastValue(num_new_work_items, 0);
429429
mHasActiveRequests = (num_new_work_items > 0 || mBatchManager->getNumActiveRequests() > 0);
430430
if (num_new_work_items > 0)
431431
{
@@ -483,7 +483,7 @@ std::unordered_set<uint64_t> ModelInstanceState::pollStopSignals()
483483
if (commSession.getSize() > 1 && mHasActiveRequests)
484484
{
485485
// Broadcast number of stopped requests
486-
commSession.bcast(nStoppedReqIds, 0);
486+
commSession.bcastValue(nStoppedReqIds, 0);
487487

488488
if (nStoppedReqIds > 0)
489489
{

inflight_batcher_llm/src/model_instance_state.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
#include "tensorrt_llm/batch_manager/namedTensor.h"
4141
#include "tensorrt_llm/batch_manager/schedulerPolicy.h"
4242
#include "tensorrt_llm/batch_manager/trtGptModelOptionalParams.h"
43+
#include "tensorrt_llm/runtime/decodingMode.h"
4344

4445
#include "model_state.h"
4546
#include "work_item.h"

inflight_batcher_llm/src/work_item.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2626

2727
#include "work_item.h"
28+
#include <map>
2829

2930
namespace triton::backend::inflight_batcher_llm
3031
{

tensorrt_llm

Submodule tensorrt_llm updated 164 files

tools/version.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
721a579afde43dd2e2037153da244baac6eedd29
1+
6ffd999ccac3393b80e94753a8d2bc302fb70005

0 commit comments

Comments
 (0)