-
Couldn't load subscription status.
- Fork 14.8k
KAFKA-19829: Implement group-level initial rebalance delay #20755
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: trunk
Are you sure you want to change the base?
Changes from 2 commits
fc586ac
33f9667
e57d4a1
164143f
e2410bc
3c163b7
b0814e2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1985,30 +1985,54 @@ private CoordinatorResult<StreamsGroupHeartbeatResult, CoordinatorRecord> stream | |
|
|
||
| // Actually bump the group epoch | ||
| int groupEpoch = group.groupEpoch(); | ||
| boolean isInitialRebalance = group.isEmpty(); | ||
|
||
| if (bumpGroupEpoch) { | ||
| groupEpoch += 1; | ||
| if (isInitialRebalance) { | ||
| groupEpoch = 2; | ||
|
||
| } else { | ||
| groupEpoch += 1; | ||
| } | ||
| records.add(newStreamsGroupMetadataRecord(groupId, groupEpoch, metadataHash, validatedTopologyEpoch, currentAssignmentConfigs)); | ||
| log.info("[GroupId {}][MemberId {}] Bumped streams group epoch to {} with metadata hash {} and validated topic epoch {}.", groupId, memberId, groupEpoch, metadataHash, validatedTopologyEpoch); | ||
| metrics.record(STREAMS_GROUP_REBALANCES_SENSOR_NAME); | ||
| group.setMetadataRefreshDeadline(currentTimeMs + METADATA_REFRESH_INTERVAL_MS, groupEpoch); | ||
| } | ||
|
|
||
| // Schedule initial rebalance delay for new streams groups to coalesce joins. | ||
| int initialDelayMs = streamsGroupInitialRebalanceDelayMs(groupId); | ||
| if (isInitialRebalance & initialDelayMs > 0) { | ||
| timer.scheduleIfAbsent( | ||
| streamsInitialRebalanceKey(groupId), | ||
| initialDelayMs, | ||
| TimeUnit.MILLISECONDS, | ||
| false, | ||
| () -> fireStreamsInitialRebalance(groupId) | ||
| ); | ||
| } | ||
|
|
||
| // 4. Update the target assignment if the group epoch is larger than the target assignment epoch or a static member | ||
| // replaces an existing static member. | ||
| // The delta between the existing and the new target assignment is persisted to the partition. | ||
| int targetAssignmentEpoch; | ||
| TasksTuple targetAssignment; | ||
| if (groupEpoch > group.assignmentEpoch()) { | ||
| targetAssignment = updateStreamsTargetAssignment( | ||
| group, | ||
| groupEpoch, | ||
| updatedMember, | ||
| updatedConfiguredTopology, | ||
| metadataImage, | ||
| records, | ||
| currentAssignmentConfigs | ||
| ); | ||
| targetAssignmentEpoch = groupEpoch; | ||
| boolean initialDelayActive = timer.isScheduled(streamsInitialRebalanceKey(groupId)); | ||
| if (initialDelayActive && group.assignmentEpoch() == 0) { | ||
| // During initial rebalance delay, return empty assignment to first joining members. | ||
| targetAssignmentEpoch = group.assignmentEpoch(); | ||
| targetAssignment = TasksTuple.EMPTY; | ||
| } else { | ||
| targetAssignment = updateStreamsTargetAssignment( | ||
| group, | ||
| groupEpoch, | ||
| updatedMember, | ||
| updatedConfiguredTopology, | ||
| metadataImage, | ||
| records, | ||
| currentAssignmentConfigs | ||
| ); | ||
| targetAssignmentEpoch = groupEpoch; | ||
| } | ||
| } else { | ||
| targetAssignmentEpoch = group.assignmentEpoch(); | ||
| targetAssignment = group.targetAssignment(updatedMember.memberId()); | ||
|
|
@@ -8570,6 +8594,10 @@ private boolean maybeDeleteEmptyStreamsGroup(String groupId, List<CoordinatorRec | |
| // Add tombstones for the previous streams group. The tombstones won't actually be | ||
| // replayed because its coordinator result has a non-null appendFuture. | ||
| createGroupTombstoneRecords(group, records); | ||
| // Cancel any pending initial rebalance timer. | ||
| if (timer.isScheduled(streamsInitialRebalanceKey(groupId))) { | ||
| timer.cancel(streamsInitialRebalanceKey(groupId)); | ||
| } | ||
| removeGroup(groupId); | ||
| return true; | ||
| } | ||
|
|
@@ -8659,6 +8687,15 @@ private int streamsGroupHeartbeatIntervalMs(String groupId) { | |
| .orElse(config.streamsGroupHeartbeatIntervalMs()); | ||
| } | ||
|
|
||
| /** | ||
| * Get the initial rebalance delay of the provided streams group. | ||
| */ | ||
| private int streamsGroupInitialRebalanceDelayMs(String groupId) { | ||
| Optional<GroupConfig> groupConfig = groupConfigManager.groupConfig(groupId); | ||
| return groupConfig.map(GroupConfig::streamsInitialRebalanceDelayMs) | ||
| .orElse(config.streamsGroupInitialRebalanceDelayMs()); | ||
| } | ||
|
|
||
| /** | ||
| * Get the assignor of the provided streams group. | ||
| */ | ||
|
|
@@ -8716,6 +8753,31 @@ static String classicGroupSyncKey(String groupId) { | |
| return "sync-" + groupId; | ||
| } | ||
|
|
||
| /** | ||
| * Callback when the initial rebalance delay timer expires. | ||
| * This is a no-op as the actual assignment computation happens on the next heartbeat. | ||
| * | ||
| * @param groupId The group id. | ||
| * | ||
| * @return An empty result. | ||
| */ | ||
| private CoordinatorResult<Void, CoordinatorRecord> fireStreamsInitialRebalance(String groupId) { | ||
|
||
| return EMPTY_RESULT; | ||
| } | ||
|
|
||
| /** | ||
| * Generate a streams group initial rebalance key for the timer. | ||
| * | ||
| * Package private for testing. | ||
| * | ||
| * @param groupId The group id. | ||
| * | ||
| * @return the initial rebalance key. | ||
| */ | ||
| static String streamsInitialRebalanceKey(String groupId) { | ||
| return "initial-rebalance-timeout-" + groupId; | ||
| } | ||
|
|
||
| /** | ||
| * Generate a consumer group join key for the timer. | ||
| * | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@mjsax Are we good with a default of 3 seconds for delaying the initial rebalance?