@@ -98,10 +98,6 @@ class ModelState : public BackendModel {
9898 return enable_jit_executor_pair_;
9999 }
100100 bool EnabledInferenceMode () { return enable_inference_mode_; }
101- const std::pair<bool , bool >& EnabledNvfuserPair () const
102- {
103- return enable_nvfuser_pair_;
104- }
105101 bool EnabledCacheCleaning () { return enable_cache_cleaning_; }
106102
107103 bool EnabledWeightSharing () { return enable_weight_sharing_; }
@@ -132,16 +128,11 @@ class ModelState : public BackendModel {
132128
133129 // Flag pairs to indicate if various JIT settings are set and
134130 // enabled respectively. Defaults to (false, true). Default behavior
135- // is to do nothing if not explicitly set. Tensor fuser flag is
136- // ignore if nvfuser is explicitly set.
131+ // is to do nothing if not explicitly set.
137132 std::pair<bool , bool > enable_tensor_fuser_pair_;
138133 std::pair<bool , bool > enable_jit_profiling_pair_;
139134 std::pair<bool , bool > enable_jit_executor_pair_;
140135
141- // Flag pair to indicate whether nvfuser is set and enabled respectively.
142- // Defaults to (false, false).
143- std::pair<bool , bool > enable_nvfuser_pair_;
144-
145136 // Model mapping for shared TorchScript model across all instances on the
146137 // same device. The key is a pair of isGPU and device index.
147138 std::map<
@@ -233,8 +224,7 @@ ModelState::ModelState(TRITONBACKEND_Model* triton_model)
233224 enable_inference_mode_ (true ), enable_cache_cleaning_(false ),
234225 enable_weight_sharing_(false ), enable_tensor_fuser_pair_({false , true }),
235226 enable_jit_profiling_pair_({false , true }),
236- enable_jit_executor_pair_({false , true }),
237- enable_nvfuser_pair_({false , false })
227+ enable_jit_executor_pair_({false , true })
238228{
239229}
240230
@@ -475,29 +465,6 @@ ModelState::ParseParameters()
475465 " for model instance '" + Name () + " '" )
476466 .c_str ());
477467 }
478-
479- // If 'ENABLE_NVFUSER' is not present in 'parameters' then no
480- // update is made to 'enable_nvfuser'.
481- bool enable_nvfuser = false ;
482- err = ParseParameter (params, " ENABLE_NVFUSER" , &enable_nvfuser);
483- if (err != nullptr ) {
484- if (TRITONSERVER_ErrorCode (err) != TRITONSERVER_ERROR_NOT_FOUND) {
485- return err;
486- } else {
487- LOG_MESSAGE (
488- TRITONSERVER_LOG_INFO, (std::string (" NvFuser is not specified" ) +
489- " for model instance '" + Name () + " '" )
490- .c_str ());
491- TRITONSERVER_ErrorDelete (err);
492- }
493- } else {
494- enable_nvfuser_pair_ = {true , enable_nvfuser};
495- LOG_MESSAGE (
496- TRITONSERVER_LOG_INFO, (std::string (" NvFuser is " ) +
497- (enable_nvfuser ? " enabled" : " disabled" ) +
498- " for model instance '" + Name () + " '" )
499- .c_str ());
500- }
501468 }
502469
503470 return nullptr ;
@@ -1552,34 +1519,13 @@ ModelInstanceState::Execute(
15521519 std::get<1 >(model_state_->EnabledJitExecutor ());
15531520 }
15541521
1555- // Fuser. Parameter is ignored if NVFuser parameter is explicitly
1556- // set (either enabled or disabled). No change is made unless
1557- // fuser is explicitly set in parameters.
1558- if (!std::get<0 >(model_state_->EnabledNvfuserPair ()) &&
1559- std::get<0 >(model_state_->EnabledTensorExprFuser ())) {
1522+ // Fuser. No change is made unless fuser is explicitly set in
1523+ // parameters.
1524+ if (std::get<0 >(model_state_->EnabledTensorExprFuser ())) {
15601525 torch::jit::setTensorExprFuserEnabled (
15611526 std::get<1 >(model_state_->EnabledTensorExprFuser ()));
15621527 }
15631528
1564- // NV-Fuser. No change is made unless parameter is explicitly set.
1565- if (std::get<0 >(model_state_->EnabledNvfuserPair ())) {
1566- bool is_device_gpu =
1567- (device_.is_cuda () ||
1568- ((Kind () == TRITONSERVER_INSTANCEGROUPKIND_MODEL) &&
1569- (device_cnt_ > 0 )));
1570- if (std::get<1 >(model_state_->EnabledNvfuserPair ()) && is_device_gpu) {
1571- torch::jit::overrideCanFuseOnCPU (false );
1572- torch::jit::overrideCanFuseOnGPU (false );
1573- torch::jit::setTensorExprFuserEnabled (false );
1574- torch::jit::fuser::cuda::setEnabled (true );
1575- } else {
1576- torch::jit::overrideCanFuseOnCPU (true );
1577- torch::jit::overrideCanFuseOnGPU (true );
1578- torch::jit::setTensorExprFuserEnabled (true );
1579- torch::jit::fuser::cuda::setEnabled (false );
1580- }
1581- }
1582-
15831529 torch::NoGradGuard no_grad;
15841530
15851531 // If input is a dictionary, prepare dictionary from 'input_tensors'.
0 commit comments