cherry pick 3663: fix the int8 quantization error, remove duplicated lines (#3665)

lanluo-nvidia · web-flow · commit 1b2d668d737d · 2025-07-10T08:49:42.000-07:00
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/quantize.py b/py/torch_tensorrt/dynamo/conversion/impl/quantize.py
@@ -45,7 +45,6 @@ def quantize(
     Adds quantize and dequantize ops (QDQ) which quantize to INT8 or FP8 based
     on the output_type set and dequantizes them back.
     """
-
     with unset_fake_temporarily():
         if isinstance(input_tensor, (torch.Tensor, TRTTensor)):
             if input_tensor.dtype not in (
@@ -118,8 +117,6 @@ def quantize(
         if not isinstance(input_tensor, TRTTensor):
             input_tensor = get_trt_tensor(ctx, input_tensor, name + "_quantize_input")
 
-        quantize_layer = ctx.net.add_quantize(input_tensor, scale, dtype)
-
         # Add Q node
         quantize_layer = ctx.net.add_quantize(input_tensor, scale, dtype)
         if axis is not None: