From f4367439fae68ebb7e097ba2eeca919be5baf7f4 Mon Sep 17 00:00:00 2001 From: Laxmikant Kale Date: Sat, 3 Dec 2022 09:46:20 -0500 Subject: [PATCH 01/10] Modifies converse schedeuler's getNextMessage so nodeGroup messages can run with higher priority over local As it is, nodeGroup messages are not checked until all local and regular Charm queue (prio Q) messages are checked, which cause issues when the applicaiton is using nodeGroup messages in the hope that *some* PE will attend to it quickly. The change makes getNextMessage check nodeGroup queue every 2^nodeGrpFreq iterations with high priority in addition to its usual check after exhasuting local queues (except task Q). This commit has not been tested at all. But pusing it to allow others to help me test/fix it. --- src/conv-core/convcore.C | 17 ++++++++++++++++- src/conv-core/converse.h | 6 ++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/conv-core/convcore.C b/src/conv-core/convcore.C index 4ecd5f5442..e98e0c5df9 100644 --- a/src/conv-core/convcore.C +++ b/src/conv-core/convcore.C @@ -1656,6 +1656,7 @@ void CsdSchedulerState_new(CsdSchedulerState_t *s) #if CMK_NODE_QUEUE_AVAILABLE s->nodeQ=CsvAccess(CsdNodeQueue); s->nodeLock=CsvAccess(CsdNodeQueueLock); + s->nodeGrpFreq =2; // check node queue once in 4 (2^2) iterations. #endif #if CMK_GRID_QUEUE_AVAILABLE s->gridQ=CpvAccess(CsdGridQueue); @@ -1664,6 +1665,7 @@ void CsdSchedulerState_new(CsdSchedulerState_t *s) s->taskQ = CpvAccess(CsdTaskQueue); s->suspendedTaskQ = CpvAccess(CmiSuspendedTaskQueue); #endif + } @@ -1719,7 +1721,20 @@ void CsdSchedulerState_new(CsdSchedulerState_t *s) */ void *CsdNextMessage(CsdSchedulerState_t *s) { void *msg; - if((*(s->localCounter))-- >0) + +s->iter++; + +#if CMK_NODE_QUEUE_AVAILABLE + if (1 == (s->iter & (1 << s->nodeGrpFreq)) // since we use nodeGrpFreq == 0 to mean + // don't check NodeQ with high priority, i + // value of 1 serves well as when to check it.i but we sshould avoid "%" operator + // note: s->nodeGrpFreq is raised to a power of 2 + { + msg = CmiGetNonLocalNodeQ(); + if (NULL != msg) return msg; + } +#endif + if((*(s->localCounter))-- >0) { /* This avoids a race condition with migration detected by megatest*/ msg=CdsFifo_Dequeue(s->localQ); diff --git a/src/conv-core/converse.h b/src/conv-core/converse.h index 7939140640..db87729531 100644 --- a/src/conv-core/converse.h +++ b/src/conv-core/converse.h @@ -1145,6 +1145,12 @@ typedef struct { void *localQ; Queue nodeQ; Queue schedQ; + unsigned short iter; // counting number of sched iterations (hopefully of for it to roll over + unsigned short nodeGrpFreq; // call nodegroup queue once every 2^nodeGrpFreq iterations with high prio + // should add a function to change this from the program for advanced users. One obstacle: + // it is inside a struct that is on stack, and so not accessible for standalone functions. Need to + // resolve this by making a schedule a c++ object, but even then we need a ptr to the currently-running scheduler + // 0 means do not check nodegroup queue with high prio.. will be checked with low prio after other Qs int *localCounter; #if CMK_OBJECT_QUEUE_AVAILABLE Queue objQ; From 800aa6bdaecb662e03c9355f394f16403856c058 Mon Sep 17 00:00:00 2001 From: Laxmikant Kale Date: Sun, 4 Dec 2022 07:36:04 -0800 Subject: [PATCH 02/10] fixed syntax error (missing right parent). --- src/conv-core/convcore.C | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/conv-core/convcore.C b/src/conv-core/convcore.C index e98e0c5df9..f3f2847f66 100644 --- a/src/conv-core/convcore.C +++ b/src/conv-core/convcore.C @@ -1722,13 +1722,14 @@ void CsdSchedulerState_new(CsdSchedulerState_t *s) void *CsdNextMessage(CsdSchedulerState_t *s) { void *msg; -s->iter++; + s->iter++; #if CMK_NODE_QUEUE_AVAILABLE - if (1 == (s->iter & (1 << s->nodeGrpFreq)) // since we use nodeGrpFreq == 0 to mean + if (1 == (s->iter & (1 << s->nodeGrpFreq))) // since we use nodeGrpFreq == 0 to mean // don't check NodeQ with high priority, i - // value of 1 serves well as when to check it.i but we sshould avoid "%" operator - // note: s->nodeGrpFreq is raised to a power of 2 + // value of 1 serves well as when to check it. + // note: s->nodeGrpFreq is raised to a power of 2, to avoid modulo operator + //note: maybe nodeGrpFreq should be a global readonly (or member of some globalParams group), to avoid "s->" { msg = CmiGetNonLocalNodeQ(); if (NULL != msg) return msg; From 62b05692800aa2cc3a362f1cceb8b9c88a82bd0a Mon Sep 17 00:00:00 2001 From: Zane Fink Date: Tue, 9 Apr 2024 13:03:09 -0500 Subject: [PATCH 03/10] Addressing changes --- src/conv-core/convcore.C | 10 ++++------ src/conv-core/converse.h | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/conv-core/convcore.C b/src/conv-core/convcore.C index a2559c42da..16a43973bd 100644 --- a/src/conv-core/convcore.C +++ b/src/conv-core/convcore.C @@ -1657,7 +1657,7 @@ void CsdSchedulerState_new(CsdSchedulerState_t *s) #if CMK_NODE_QUEUE_AVAILABLE s->nodeQ=CsvAccess(CsdNodeQueue); s->nodeLock=CsvAccess(CsdNodeQueueLock); - s->nodeGrpFreq =2; // check node queue once in 4 (2^2) iterations. + s->nodeGrpFreq=4; #endif #if CMK_GRID_QUEUE_AVAILABLE s->gridQ=CpvAccess(CsdGridQueue); @@ -1726,11 +1726,9 @@ void *CsdNextMessage(CsdSchedulerState_t *s) { s->iter++; #if CMK_NODE_QUEUE_AVAILABLE - if (1 == (s->iter & (1 << s->nodeGrpFreq))) // since we use nodeGrpFreq == 0 to mean - // don't check NodeQ with high priority, i - // value of 1 serves well as when to check it. - // note: s->nodeGrpFreq is raised to a power of 2, to avoid modulo operator - //note: maybe nodeGrpFreq should be a global readonly (or member of some globalParams group), to avoid "s->" + // we use nodeGrpFreq == 0 to mean + // don't check NodeQ with high priority + if (s->nodeGrpFreq && (0 == (s->iter % s->nodeGrpFreq))) { msg = CmiGetNonLocalNodeQ(); if (NULL != msg) return msg; diff --git a/src/conv-core/converse.h b/src/conv-core/converse.h index 52f9a1ded9..6cfa01388c 100644 --- a/src/conv-core/converse.h +++ b/src/conv-core/converse.h @@ -1130,7 +1130,7 @@ typedef struct { Queue nodeQ; Queue schedQ; unsigned short iter; // counting number of sched iterations (hopefully of for it to roll over - unsigned short nodeGrpFreq; // call nodegroup queue once every 2^nodeGrpFreq iterations with high prio + unsigned short nodeGrpFreq; // call nodegroup queue once every nodeGrpFreq iterations with high prio // should add a function to change this from the program for advanced users. One obstacle: // it is inside a struct that is on stack, and so not accessible for standalone functions. Need to // resolve this by making a schedule a c++ object, but even then we need a ptr to the currently-running scheduler From 0f31dd0361002eff7aad0fb8bb2304badb0e6ed6 Mon Sep 17 00:00:00 2001 From: Zane Fink Date: Fri, 3 May 2024 14:20:42 -0500 Subject: [PATCH 04/10] initialize scheduler iter --- src/conv-core/convcore.C | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/conv-core/convcore.C b/src/conv-core/convcore.C index 16a43973bd..107b914258 100644 --- a/src/conv-core/convcore.C +++ b/src/conv-core/convcore.C @@ -1655,9 +1655,10 @@ void CsdSchedulerState_new(CsdSchedulerState_t *s) s->schedQ=CpvAccess(CsdSchedQueue); s->localCounter=&(CpvAccess(CsdLocalCounter)); #if CMK_NODE_QUEUE_AVAILABLE - s->nodeQ=CsvAccess(CsdNodeQueue); - s->nodeLock=CsvAccess(CsdNodeQueueLock); - s->nodeGrpFreq=4; + s->nodeQ=CsvAccess(CsdNodeQueue); + s->nodeLock=CsvAccess(CsdNodeQueueLock); + s->nodeGrpFreq=4; + s->iter = 0; #endif #if CMK_GRID_QUEUE_AVAILABLE s->gridQ=CpvAccess(CsdGridQueue); From 5f3e5b20993aa25b37b2876ea4b01b1288862d3a Mon Sep 17 00:00:00 2001 From: Zane Fink Date: Fri, 3 May 2024 15:08:41 -0500 Subject: [PATCH 05/10] remove extra whitespace --- src/conv-core/convcore.C | 1 - 1 file changed, 1 deletion(-) diff --git a/src/conv-core/convcore.C b/src/conv-core/convcore.C index 107b914258..cad74f7564 100644 --- a/src/conv-core/convcore.C +++ b/src/conv-core/convcore.C @@ -1667,7 +1667,6 @@ void CsdSchedulerState_new(CsdSchedulerState_t *s) s->taskQ = CpvAccess(CsdTaskQueue); s->suspendedTaskQ = CpvAccess(CmiSuspendedTaskQueue); #endif - } From 734222142f40e89c4a7a214e16e5005af2a0a475 Mon Sep 17 00:00:00 2001 From: Zane Fink Date: Fri, 3 May 2024 15:09:57 -0500 Subject: [PATCH 06/10] Remove text after the parenthetical --- src/conv-core/converse.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/conv-core/converse.h b/src/conv-core/converse.h index 6cfa01388c..defdabc766 100644 --- a/src/conv-core/converse.h +++ b/src/conv-core/converse.h @@ -1129,7 +1129,7 @@ typedef struct { void *localQ; Queue nodeQ; Queue schedQ; - unsigned short iter; // counting number of sched iterations (hopefully of for it to roll over + unsigned short iter; // counting number of sched iterations unsigned short nodeGrpFreq; // call nodegroup queue once every nodeGrpFreq iterations with high prio // should add a function to change this from the program for advanced users. One obstacle: // it is inside a struct that is on stack, and so not accessible for standalone functions. Need to From 22fdc8354018b746f73ede67a5b21c28f80db520 Mon Sep 17 00:00:00 2001 From: Zane Fink Date: Tue, 28 May 2024 17:19:48 -0500 Subject: [PATCH 07/10] Remove long comment --- src/conv-core/converse.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/conv-core/converse.h b/src/conv-core/converse.h index 6cfa01388c..3a1ea78065 100644 --- a/src/conv-core/converse.h +++ b/src/conv-core/converse.h @@ -1130,11 +1130,7 @@ typedef struct { Queue nodeQ; Queue schedQ; unsigned short iter; // counting number of sched iterations (hopefully of for it to roll over - unsigned short nodeGrpFreq; // call nodegroup queue once every nodeGrpFreq iterations with high prio - // should add a function to change this from the program for advanced users. One obstacle: - // it is inside a struct that is on stack, and so not accessible for standalone functions. Need to - // resolve this by making a schedule a c++ object, but even then we need a ptr to the currently-running scheduler - // 0 means do not check nodegroup queue with high prio.. will be checked with low prio after other Qs + unsigned short nodeGrpFreq; // call nodegroup queue once every nodeGrpFreq iterations with high prio int *localCounter; #if CMK_OBJECT_QUEUE_AVAILABLE Queue objQ; From 83e904f991df5943fb89b030a392436476e4c8d5 Mon Sep 17 00:00:00 2001 From: Zane Fink Date: Tue, 28 May 2024 17:32:34 -0500 Subject: [PATCH 08/10] Re-organize, fix whitespace --- src/conv-core/convcore.C | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/src/conv-core/convcore.C b/src/conv-core/convcore.C index 16a43973bd..ab3ddd1811 100644 --- a/src/conv-core/convcore.C +++ b/src/conv-core/convcore.C @@ -1723,34 +1723,32 @@ void CsdSchedulerState_new(CsdSchedulerState_t *s) void *CsdNextMessage(CsdSchedulerState_t *s) { void *msg; - s->iter++; + if ((*(s->localCounter))-- > 0) { + /* This avoids a race condition with migration detected by megatest*/ + msg = CdsFifo_Dequeue(s->localQ); + if (msg != NULL) { +#if CMI_QD + CpvAccess(cQdState)->mProcessed++; +#endif + return msg; + } + CqsDequeue(s->schedQ, (void**)&msg); + if (msg != NULL) + return msg; + } #if CMK_NODE_QUEUE_AVAILABLE - // we use nodeGrpFreq == 0 to mean - // don't check NodeQ with high priority - if (s->nodeGrpFreq && (0 == (s->iter % s->nodeGrpFreq))) - { + s->iter++; + // we use nodeGrpFreq == 0 to mean + // don't check NodeQ with high priority + if (s->nodeGrpFreq && (0 == (s->iter % s->nodeGrpFreq))) { msg = CmiGetNonLocalNodeQ(); if (NULL != msg) return msg; } #endif - if((*(s->localCounter))-- >0) - { - /* This avoids a race condition with migration detected by megatest*/ - msg=CdsFifo_Dequeue(s->localQ); - if (msg!=NULL) - { -#if CMI_QD - CpvAccess(cQdState)->mProcessed++; -#endif - return msg; - } - CqsDequeue(s->schedQ,(void **)&msg); - if (msg!=NULL) return msg; - } - + *(s->localCounter)=CsdLocalMax; - if ( NULL!=(msg=CmiGetNonLocal()) || + if ( NULL!=(msg=CmiGetNonLocal()) || NULL!=(msg=CdsFifo_Dequeue(s->localQ)) ) { #if CMI_QD CpvAccess(cQdState)->mProcessed++; From bc5f37c2894346cc39077c94ecfacd370a1079e3 Mon Sep 17 00:00:00 2001 From: Zane Fink Date: Wed, 29 May 2024 11:26:59 -0500 Subject: [PATCH 09/10] Normalize indentation --- src/conv-core/convcore.C | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/conv-core/convcore.C b/src/conv-core/convcore.C index e0322deff4..1f72bf7e67 100644 --- a/src/conv-core/convcore.C +++ b/src/conv-core/convcore.C @@ -1655,10 +1655,10 @@ void CsdSchedulerState_new(CsdSchedulerState_t *s) s->schedQ=CpvAccess(CsdSchedQueue); s->localCounter=&(CpvAccess(CsdLocalCounter)); #if CMK_NODE_QUEUE_AVAILABLE - s->nodeQ=CsvAccess(CsdNodeQueue); - s->nodeLock=CsvAccess(CsdNodeQueueLock); - s->nodeGrpFreq=4; - s->iter = 0; + s->nodeQ=CsvAccess(CsdNodeQueue); + s->nodeLock=CsvAccess(CsdNodeQueueLock); + s->nodeGrpFreq=4; + s->iter = 0; #endif #if CMK_GRID_QUEUE_AVAILABLE s->gridQ=CpvAccess(CsdGridQueue); From 3b0ebbdbf41e91bc2449328f2a60680df63ae773 Mon Sep 17 00:00:00 2001 From: Zane Fink Date: Fri, 31 May 2024 14:56:37 -0500 Subject: [PATCH 10/10] More whitespace --- src/conv-core/convcore.C | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/conv-core/convcore.C b/src/conv-core/convcore.C index 1f72bf7e67..8e734e2939 100644 --- a/src/conv-core/convcore.C +++ b/src/conv-core/convcore.C @@ -1751,10 +1751,10 @@ void *CsdNextMessage(CsdSchedulerState_t *s) { if ( NULL!=(msg=CmiGetNonLocal()) || NULL!=(msg=CdsFifo_Dequeue(s->localQ)) ) { #if CMI_QD - CpvAccess(cQdState)->mProcessed++; + CpvAccess(cQdState)->mProcessed++; #endif - return msg; - } + return msg; + } #if CMK_GRID_QUEUE_AVAILABLE /*#warning "CsdNextMessage: CMK_GRID_QUEUE_AVAILABLE" */ CqsDequeue (s->gridQ, (void **) &msg);