@@ -263,26 +263,16 @@ void GpuTrace::EnqueueWork(Context* context, uint32_t sequenceId, uint64_t times
263263 return ;
264264 }
265265
266+ // If the queue is too small, enlarge it by 16 entries at a time. Typically, this will only be
267+ // needed for the first packet observed on this node, which will result in sizing the queue from
268+ // 0 to 16. However, there are other cases where the queue entries can grow beyond that. e.g.,
269+ // this seems to always happen when an application closes.
266270 uint32_t queueSize = (uint32_t ) node->mQueue .size ();
267271 if (node->mQueueCount == queueSize) {
268- // If the queue is too small, enlarge it by 16 entries at a time. I only expect this to
269- // happen for the first packet observed on this node, which will result in sizing the queue
270- // from 0 to 16.
271- //
272- // However, there are other cases where the queue entries seem to grow unexpectedly. e.g.,
273- // this seems to always happen when an application closes. So, we place a reasonable
274- // maximum limit on this to prevent unexpectedly growing the queues arbitrarily.
275- if (node->mQueueCount >= 16 *10 ) {
276- return ;
277- }
278-
279- auto queueIndex = queueSize == 0
280- ? 0
281- : (node->mQueueIndex + node->mQueueCount ) % queueSize;
282-
283272 Node::EnqueuedPacket empty{};
284- node->mQueue .insert (node->mQueue .begin () + queueIndex , 16 , empty);
273+ node->mQueue .insert (node->mQueue .begin () + node-> mQueueIndex , 16 , empty);
285274 queueSize += 16 ;
275+ node->mQueueIndex = (node->mQueueIndex + 16 ) % queueSize;
286276 }
287277
288278 // Enqueue the packet.
@@ -403,8 +393,9 @@ bool GpuTrace::CompleteWork(Context* context, uint32_t sequenceId, uint64_t time
403393 }
404394
405395 // Pop the completed packet from the queue, and start the next one.
396+ uint32_t queueSize = (uint32_t ) node->mQueue .size ();
406397 for (;;) {
407- node->mQueueIndex = (node->mQueueIndex + 1 ) % ( uint32_t ) node-> mQueue . size () ;
398+ node->mQueueIndex = (node->mQueueIndex + 1 ) % queueSize ;
408399 node->mQueueCount -= 1 ;
409400 if (node->mQueueCount == 0 ) {
410401 break ;
@@ -421,6 +412,21 @@ bool GpuTrace::CompleteWork(Context* context, uint32_t sequenceId, uint64_t time
421412 }
422413 }
423414
415+ // Decrease queue storage in multiples of 16.
416+ uint32_t N = (queueSize - node->mQueueCount ) / 16 ;
417+ if (N >= 2 ) {
418+ N = (N - 1 ) * 16 ;
419+ if (node->mQueueIndex >= N) {
420+ node->mQueue .erase (node->mQueue .begin () + node->mQueueIndex - N, node->mQueue .begin () + node->mQueueIndex );
421+ node->mQueueIndex -= N;
422+ } else {
423+ node->mQueue .erase (node->mQueue .begin () + queueSize - (N - node->mQueueIndex ), node->mQueue .end ());
424+ node->mQueue .erase (node->mQueue .begin (), node->mQueue .begin () + node->mQueueIndex );
425+ node->mQueueIndex = 0 ;
426+ }
427+ node->mQueue .shrink_to_fit ();
428+ }
429+
424430 return true ;
425431}
426432
0 commit comments