@@ -83,6 +83,24 @@ void CpuDeviceInterface::convertAVFrameToFrameOutput(
8383  enum  AVPixelFormat frameFormat =
8484      static_cast <enum  AVPixelFormat>(avFrame->format );
8585
86+   //  This is an early-return optimization: if the format is already what we
87+   //  need, and the dimensions are also what we need, we don't need to call
88+   //  swscale or filtergraph. We can just convert the AVFrame to a tensor.
89+   if  (frameFormat == AV_PIX_FMT_RGB24 &&
90+       avFrame->width  == expectedOutputWidth &&
91+       avFrame->height  == expectedOutputHeight) {
92+     outputTensor = toTensor (avFrame);
93+     if  (preAllocatedOutputTensor.has_value ()) {
94+       //  We have already validated that preAllocatedOutputTensor and
95+       //  outputTensor have the same shape.
96+       preAllocatedOutputTensor.value ().copy_ (outputTensor);
97+       frameOutput.data  = preAllocatedOutputTensor.value ();
98+     } else  {
99+       frameOutput.data  = outputTensor;
100+     }
101+     return ;
102+   }
103+ 
86104  //  By default, we want to use swscale for color conversion because it is
87105  //  faster. However, it has width requirements, so we may need to fall back
88106  //  to filtergraph. We also need to respect what was requested from the
@@ -159,7 +177,7 @@ void CpuDeviceInterface::convertAVFrameToFrameOutput(
159177          std::make_unique<FilterGraph>(filtersContext, videoStreamOptions);
160178      prevFiltersContext_ = std::move (filtersContext);
161179    }
162-     outputTensor = convertAVFrameToTensorUsingFilterGraph ( avFrame);
180+     outputTensor = toTensor (filterGraphContext_-> convert ( avFrame) );
163181
164182    //  Similarly to above, if this check fails it means the frame wasn't
165183    //  reshaped to its expected dimensions by filtergraph.
@@ -208,23 +226,20 @@ int CpuDeviceInterface::convertAVFrameToTensorUsingSwsScale(
208226  return  resultHeight;
209227}
210228
211- torch::Tensor CpuDeviceInterface::convertAVFrameToTensorUsingFilterGraph (
212-     const  UniqueAVFrame& avFrame) {
213-   UniqueAVFrame filteredAVFrame = filterGraphContext_->convert (avFrame);
214- 
215-   TORCH_CHECK_EQ (filteredAVFrame->format , AV_PIX_FMT_RGB24);
229+ torch::Tensor CpuDeviceInterface::toTensor (const  UniqueAVFrame& avFrame) {
230+   TORCH_CHECK_EQ (avFrame->format , AV_PIX_FMT_RGB24);
216231
217-   auto  frameDims = getHeightAndWidthFromResizedAVFrame (*filteredAVFrame .get ());
232+   auto  frameDims = getHeightAndWidthFromResizedAVFrame (*avFrame .get ());
218233  int  height = frameDims.height ;
219234  int  width = frameDims.width ;
220235  std::vector<int64_t > shape = {height, width, 3 };
221-   std::vector<int64_t > strides = {filteredAVFrame ->linesize [0 ], 3 , 1 };
222-   AVFrame* filteredAVFramePtr  = filteredAVFrame. release ( );
223-   auto  deleter = [filteredAVFramePtr ](void *) {
224-     UniqueAVFrame avFrameToDelete (filteredAVFramePtr );
236+   std::vector<int64_t > strides = {avFrame ->linesize [0 ], 3 , 1 };
237+   AVFrame* avFrameClone  = av_frame_clone (avFrame. get () );
238+   auto  deleter = [avFrameClone ](void *) {
239+     UniqueAVFrame avFrameToDelete (avFrameClone );
225240  };
226241  return  torch::from_blob (
227-       filteredAVFramePtr ->data [0 ], shape, strides, deleter, {torch::kUInt8 });
242+       avFrameClone ->data [0 ], shape, strides, deleter, {torch::kUInt8 });
228243}
229244
230245void  CpuDeviceInterface::createSwsContext (
0 commit comments