diff --git a/server/src/main/java/org/opensearch/cluster/ClusterModule.java b/server/src/main/java/org/opensearch/cluster/ClusterModule.java index 517d3abd2a472..3885befc471eb 100644 --- a/server/src/main/java/org/opensearch/cluster/ClusterModule.java +++ b/server/src/main/java/org/opensearch/cluster/ClusterModule.java @@ -77,6 +77,7 @@ import org.opensearch.cluster.routing.allocation.decider.ResizeAllocationDecider; import org.opensearch.cluster.routing.allocation.decider.RestoreInProgressAllocationDecider; import org.opensearch.cluster.routing.allocation.decider.SameShardAllocationDecider; +import org.opensearch.cluster.routing.allocation.decider.ReplicaOnlyAllocationDecider; import org.opensearch.cluster.routing.allocation.decider.SearchReplicaAllocationDecider; import org.opensearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider; import org.opensearch.cluster.routing.allocation.decider.SnapshotInProgressAllocationDecider; @@ -396,6 +397,7 @@ public static Collection createAllocationDeciders( addAllocationDecider(deciders, new RestoreInProgressAllocationDecider()); addAllocationDecider(deciders, new FilterAllocationDecider(settings, clusterSettings)); addAllocationDecider(deciders, new SearchReplicaAllocationDecider()); + addAllocationDecider(deciders, new ReplicaOnlyAllocationDecider()); addAllocationDecider(deciders, new SameShardAllocationDecider(settings, clusterSettings)); addAllocationDecider(deciders, new DiskThresholdDecider(settings, clusterSettings)); addAllocationDecider(deciders, new WarmDiskThresholdDecider(settings, clusterSettings)); diff --git a/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java b/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java index 1bb26478af08e..d9e9c2404ca48 100644 --- a/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java +++ b/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java @@ -130,6 +130,10 @@ public static boolean isDedicatedWarmNode(Settings settings) { return getRolesFromSettings(settings).stream().allMatch(DiscoveryNodeRole.WARM_ROLE::equals); } + public static boolean isReplicaOnlyNode(Settings settings) { + return hasRole(settings, DiscoveryNodeRole.REPLICA_ONLY_ROLE); + } + private final String nodeName; private final String nodeId; private final String ephemeralId; @@ -542,6 +546,15 @@ public boolean isSearchNode() { return roles.contains(DiscoveryNodeRole.SEARCH_ROLE); } + /** + * Returns whether the node is a replica-only node. + * + * @return true if the node contains a replica_only role, false otherwise + */ + public boolean isReplicaOnlyNode() { + return roles.contains(DiscoveryNodeRole.REPLICA_ONLY_ROLE); + } + /** * Returns whether the node is a remote store node. * diff --git a/server/src/main/java/org/opensearch/cluster/node/DiscoveryNodeRole.java b/server/src/main/java/org/opensearch/cluster/node/DiscoveryNodeRole.java index ac4135686486e..ff27e85274887 100644 --- a/server/src/main/java/org/opensearch/cluster/node/DiscoveryNodeRole.java +++ b/server/src/main/java/org/opensearch/cluster/node/DiscoveryNodeRole.java @@ -337,11 +337,48 @@ public void validateRole(List roles) { }; + /** + * Represents the role for a replica-only node, which hosts replica shards from auto-expand indices + * without participating in primary shard hosting. Replica-only nodes: + * - Never host primary shards or promote replicas to primaries + * - Only accept replica shards from indices with {@code index.auto_expand_replicas: 0-all} + * - Do not trigger rebalancing on regular data nodes when joining/leaving the cluster + * - Must be a dedicated role (cannot coexist with any other role) + */ + public static final DiscoveryNodeRole REPLICA_ONLY_ROLE = new DiscoveryNodeRole("replica_only", "ro", true) { + + @Override + public Setting legacySetting() { + // replica_only role is added in 3.5 so doesn't need to configure legacy setting + return null; + } + + @Override + public void validateRole(List roles) { + // replica_only role must be the only role on a node (dedicated) + for (DiscoveryNodeRole role : roles) { + if (role.equals(DiscoveryNodeRole.REPLICA_ONLY_ROLE) == false) { + throw new IllegalArgumentException( + String.format( + Locale.ROOT, + "%s role must be the only role on a node. Cannot be combined with: %s", + DiscoveryNodeRole.REPLICA_ONLY_ROLE.roleName(), + role.roleName() + ) + ); + } + } + } + + }; + /** * The built-in node roles. */ public static SortedSet BUILT_IN_ROLES = Collections.unmodifiableSortedSet( - new TreeSet<>(Arrays.asList(DATA_ROLE, INGEST_ROLE, CLUSTER_MANAGER_ROLE, REMOTE_CLUSTER_CLIENT_ROLE, WARM_ROLE)) + new TreeSet<>( + Arrays.asList(DATA_ROLE, INGEST_ROLE, CLUSTER_MANAGER_ROLE, REMOTE_CLUSTER_CLIENT_ROLE, WARM_ROLE, REPLICA_ONLY_ROLE) + ) ); /** diff --git a/server/src/main/java/org/opensearch/cluster/routing/RoutingNodes.java b/server/src/main/java/org/opensearch/cluster/routing/RoutingNodes.java index 4d2232afd9a5e..1760001b3d61f 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/RoutingNodes.java +++ b/server/src/main/java/org/opensearch/cluster/routing/RoutingNodes.java @@ -32,6 +32,7 @@ package org.opensearch.cluster.routing; +import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.lucene.util.CollectionUtil; import org.opensearch.cluster.ClusterState; @@ -85,6 +86,8 @@ */ @PublicApi(since = "1.0.0") public class RoutingNodes implements Iterable { + private static final Logger logger = LogManager.getLogger(RoutingNodes.class); + private final Metadata metadata; private final Map nodesToShards = new HashMap<>(); @@ -812,6 +815,21 @@ private void promoteReplicaToPrimary(ShardRouting activeReplica, RoutingChangesO // if the activeReplica was relocating before this call to failShard, its relocation was cancelled earlier when we // failed initializing replica shards (and moved replica relocation source back to started) assert activeReplica.started() : "replica relocation should have been cancelled: " + activeReplica; + + // CRITICAL: Never promote replicas on replica-only nodes + RoutingNode routingNode = node(activeReplica.currentNodeId()); + if (routingNode != null && routingNode.node().isReplicaOnlyNode()) { + logger.warn( + "Cannot promote replica shard [{}] to primary on replica-only node [{}]. " + + "Shard will remain as replica. Primary must be allocated to a regular data node.", + activeReplica.shardId(), + routingNode.nodeId() + ); + // Do NOT call promoteActiveReplicaShardToPrimary - just return + // The primary will remain unassigned, triggering allocation to a data node + return; + } + promoteActiveReplicaShardToPrimary(activeReplica); routingChangesObserver.replicaPromoted(activeReplica); } diff --git a/server/src/main/java/org/opensearch/cluster/routing/allocation/allocator/LocalShardsBalancer.java b/server/src/main/java/org/opensearch/cluster/routing/allocation/allocator/LocalShardsBalancer.java index 2fcf51f102fd4..c521d2f9af080 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/allocation/allocator/LocalShardsBalancer.java +++ b/server/src/main/java/org/opensearch/cluster/routing/allocation/allocator/LocalShardsBalancer.java @@ -765,6 +765,15 @@ MoveDecision decideMove(final ShardRouting shardRouting) { private Map buildModelFromAssigned() { Map nodes = new HashMap<>(); for (RoutingNode rn : routingNodes) { + // EXCLUDE replica-only nodes from rebalancing calculations + // These nodes are managed solely by auto-expand replica logic + if (rn.node().isReplicaOnlyNode()) { + if (logger.isTraceEnabled()) { + logger.trace("Excluding replica-only node [{}] from rebalancing model", rn.nodeId()); + } + continue; + } + BalancedShardsAllocator.ModelNode node = new BalancedShardsAllocator.ModelNode(rn); nodes.put(rn.nodeId(), node); for (ShardRouting shard : rn) { diff --git a/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/ReplicaOnlyAllocationDecider.java b/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/ReplicaOnlyAllocationDecider.java new file mode 100644 index 0000000000000..0dd423b90ff66 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/ReplicaOnlyAllocationDecider.java @@ -0,0 +1,140 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.routing.allocation.decider; + +import org.opensearch.cluster.metadata.AutoExpandReplicas; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.routing.RoutingNode; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.allocation.RoutingAllocation; + +/** + * This allocation decider ensures that replica-only nodes follow strict allocation rules: + *
    + *
  • Replica-only nodes never host primary shards
  • + *
  • Replica-only nodes only host replica shards from indices with auto_expand_replicas: 0-all
  • + *
  • Primary shards are never promoted on replica-only nodes
  • + *
  • Regular data nodes are not affected by replica-only node presence
  • + *
+ * + * @opensearch.internal + */ +public class ReplicaOnlyAllocationDecider extends AllocationDecider { + + public static final String NAME = "replica_only"; + + @Override + public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + return canAllocate(shardRouting, node.node(), allocation); + } + + @Override + public Decision canRemain(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + return canAllocate(shardRouting, node.node(), allocation); + } + + @Override + public Decision canForceAllocatePrimary(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + // CRITICAL: Never allow primary allocation to replica-only nodes, even with force + if (node.node().isReplicaOnlyNode()) { + return allocation.decision( + Decision.NO, + NAME, + "primary shard [%s] cannot be force allocated to replica-only node [%s]", + shardRouting.shardId(), + node.nodeId() + ); + } + return allocation.decision(Decision.YES, NAME, "node is not a replica-only node"); + } + + @Override + public Decision shouldAutoExpandToNode(IndexMetadata indexMetadata, DiscoveryNode node, RoutingAllocation allocation) { + if (!node.isReplicaOnlyNode()) { + // Regular data nodes participate in auto-expand for all indices + return allocation.decision(Decision.YES, NAME, "node [%s] is a data node, eligible for auto-expand", node.getId()); + } + + // Replica-only nodes only participate in 0-all auto-expand + AutoExpandReplicas autoExpandReplicas = AutoExpandReplicas.SETTING.get(indexMetadata.getSettings()); + boolean isAutoExpandAll = autoExpandReplicas.isEnabled() + && autoExpandReplicas.getMaxReplicas() == Integer.MAX_VALUE + && autoExpandReplicas.toString().startsWith("0-"); + + if (isAutoExpandAll) { + return allocation.decision( + Decision.YES, + NAME, + "replica-only node [%s] is eligible for auto-expand replicas from index [%s] with auto_expand_replicas: 0-all", + node.getId(), + indexMetadata.getIndex().getName() + ); + } else { + return allocation.decision( + Decision.NO, + NAME, + "replica-only node [%s] is not eligible for index [%s] without auto_expand_replicas: 0-all", + node.getId(), + indexMetadata.getIndex().getName() + ); + } + } + + private Decision canAllocate(ShardRouting shardRouting, DiscoveryNode node, RoutingAllocation allocation) { + boolean isReplicaOnlyNode = node.isReplicaOnlyNode(); + + // Case 1: Primary shard allocation + if (shardRouting.primary()) { + if (isReplicaOnlyNode) { + return allocation.decision( + Decision.NO, + NAME, + "primary shard [%s] cannot be allocated to replica-only node [%s]", + shardRouting.shardId(), + node.getId() + ); + } + // Allow primaries on regular data nodes + return allocation.decision(Decision.YES, NAME, "node [%s] is a data node, can host primary shard", node.getId()); + } + + // Case 2: Replica shard allocation + IndexMetadata indexMetadata = allocation.metadata().getIndexSafe(shardRouting.index()); + AutoExpandReplicas autoExpandReplicas = AutoExpandReplicas.SETTING.get(indexMetadata.getSettings()); + boolean isAutoExpandAll = autoExpandReplicas.isEnabled() + && autoExpandReplicas.getMaxReplicas() == Integer.MAX_VALUE + && autoExpandReplicas.toString().startsWith("0-"); + + if (isReplicaOnlyNode) { + if (isAutoExpandAll) { + return allocation.decision( + Decision.YES, + NAME, + "replica shard [%s] from auto-expand (0-all) index can be allocated to replica-only node [%s]", + shardRouting.shardId(), + node.getId() + ); + } else { + return allocation.decision( + Decision.NO, + NAME, + "replica shard [%s] cannot be allocated to replica-only node [%s] " + + "because index [%s] does not have auto_expand_replicas: 0-all", + shardRouting.shardId(), + node.getId(), + indexMetadata.getIndex().getName() + ); + } + } + + // Regular data nodes can host any replica + return allocation.decision(Decision.YES, NAME, "node [%s] is a data node, can host replica shard", node.getId()); + } +} diff --git a/server/src/test/java/org/opensearch/cluster/node/DiscoveryNodeRoleTests.java b/server/src/test/java/org/opensearch/cluster/node/DiscoveryNodeRoleTests.java index 5f0658f7f110b..b707d37d0f387 100644 --- a/server/src/test/java/org/opensearch/cluster/node/DiscoveryNodeRoleTests.java +++ b/server/src/test/java/org/opensearch/cluster/node/DiscoveryNodeRoleTests.java @@ -159,4 +159,41 @@ public void testRoleNameIsCaseInsensitive() { assertEquals(roleName.toLowerCase(Locale.ROOT), dynamicRole.roleName()); assertEquals(roleNameAbbreviation.toLowerCase(Locale.ROOT), dynamicRole.roleNameAbbreviation()); } + + public void testReplicaOnlyRoleIsDedicated() { + // replica_only role cannot coexist with any other role + final IllegalArgumentException e1 = expectThrows( + IllegalArgumentException.class, + () -> DiscoveryNodeRole.REPLICA_ONLY_ROLE.validateRole(Arrays.asList(DiscoveryNodeRole.REPLICA_ONLY_ROLE, DiscoveryNodeRole.DATA_ROLE)) + ); + assertThat(e1, hasToString(containsString("replica_only role must be the only role"))); + + final IllegalArgumentException e2 = expectThrows( + IllegalArgumentException.class, + () -> DiscoveryNodeRole.REPLICA_ONLY_ROLE.validateRole( + Arrays.asList(DiscoveryNodeRole.REPLICA_ONLY_ROLE, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE) + ) + ); + assertThat(e2, hasToString(containsString("replica_only role must be the only role"))); + + final IllegalArgumentException e3 = expectThrows( + IllegalArgumentException.class, + () -> DiscoveryNodeRole.REPLICA_ONLY_ROLE.validateRole(Arrays.asList(DiscoveryNodeRole.REPLICA_ONLY_ROLE, DiscoveryNodeRole.INGEST_ROLE)) + ); + assertThat(e3, hasToString(containsString("replica_only role must be the only role"))); + + // replica_only role by itself should not throw + DiscoveryNodeRole.REPLICA_ONLY_ROLE.validateRole(Arrays.asList(DiscoveryNodeRole.REPLICA_ONLY_ROLE)); + } + + public void testReplicaOnlyRoleProperties() { + assertEquals("replica_only", DiscoveryNodeRole.REPLICA_ONLY_ROLE.roleName()); + assertEquals("ro", DiscoveryNodeRole.REPLICA_ONLY_ROLE.roleNameAbbreviation()); + assertTrue(DiscoveryNodeRole.REPLICA_ONLY_ROLE.canContainData()); + assertNull(DiscoveryNodeRole.REPLICA_ONLY_ROLE.legacySetting()); + } + + public void testReplicaOnlyRoleInBuiltInRoles() { + assertTrue(DiscoveryNodeRole.BUILT_IN_ROLES.contains(DiscoveryNodeRole.REPLICA_ONLY_ROLE)); + } } diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/decider/ReplicaOnlyAllocationDeciderTests.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/decider/ReplicaOnlyAllocationDeciderTests.java new file mode 100644 index 0000000000000..9fcd744959872 --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/decider/ReplicaOnlyAllocationDeciderTests.java @@ -0,0 +1,343 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.routing.allocation.decider; + +import org.opensearch.Version; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.EmptyClusterInfoService; +import org.opensearch.cluster.OpenSearchAllocationTestCase; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodeRole; +import org.opensearch.cluster.routing.RecoverySource; +import org.opensearch.cluster.routing.RoutingNode; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.UnassignedInfo; +import org.opensearch.cluster.routing.allocation.AllocationService; +import org.opensearch.cluster.routing.allocation.RoutingAllocation; +import org.opensearch.cluster.routing.allocation.allocator.BalancedShardsAllocator; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.snapshots.EmptySnapshotsInfoService; +import org.opensearch.test.gateway.TestGatewayAllocator; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS; + +public class ReplicaOnlyAllocationDeciderTests extends OpenSearchAllocationTestCase { + + private ReplicaOnlyAllocationDecider decider; + private AllocationDeciders allocationDeciders; + private ClusterState clusterState; + + @Override + public void setUp() throws Exception { + super.setUp(); + decider = new ReplicaOnlyAllocationDecider(); + + Set> settings = new HashSet<>(ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + ClusterSettings clusterSettings = new ClusterSettings(Settings.builder().build(), settings); + + allocationDeciders = new AllocationDeciders( + Arrays.asList(decider, new SameShardAllocationDecider(Settings.EMPTY, clusterSettings), new ReplicaAfterPrimaryActiveAllocationDecider()) + ); + + AllocationService service = new AllocationService( + allocationDeciders, + new TestGatewayAllocator(), + new BalancedShardsAllocator(Settings.EMPTY), + EmptyClusterInfoService.INSTANCE, + EmptySnapshotsInfoService.INSTANCE + ); + + // Create a cluster with 2 nodes: one regular data node, one replica-only node + DiscoveryNode dataNode = newNode("data-node", Collections.singleton(DiscoveryNodeRole.DATA_ROLE)); + DiscoveryNode replicaOnlyNode = newNode("replica-only-node", Collections.singleton(DiscoveryNodeRole.REPLICA_ONLY_ROLE)); + + Metadata metadata = Metadata.builder() + .put( + IndexMetadata.builder("auto-expand-index") + .settings( + settings(Version.CURRENT).put(SETTING_AUTO_EXPAND_REPLICAS, "0-all") + ) + .numberOfShards(1) + .numberOfReplicas(1) + ) + .put(IndexMetadata.builder("regular-index").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(1)) + .build(); + + clusterState = ClusterState.builder(org.opensearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) + .metadata(metadata) + .nodes(org.opensearch.cluster.node.DiscoveryNodes.builder().add(dataNode).add(replicaOnlyNode)) + .build(); + + clusterState = ClusterState.builder(clusterState) + .routingTable(org.opensearch.cluster.routing.RoutingTable.builder().addAsNew(metadata.index("auto-expand-index")).addAsNew(metadata.index("regular-index")).build()) + .build(); + } + + public void testPrimaryCannotAllocateToReplicaOnlyNode() { + RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, clusterState.getRoutingNodes(), clusterState, null, null, 0); + allocation.debugDecision(true); + + ShardRouting primaryShard = ShardRouting.newUnassigned( + clusterState.routingTable().index("auto-expand-index").shard(0).shardId(), + true, + RecoverySource.EmptyStoreRecoverySource.INSTANCE, + new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "") + ); + + RoutingNode replicaOnlyNode = clusterState.getRoutingNodes().node("replica-only-node"); + RoutingNode dataNode = clusterState.getRoutingNodes().node("data-node"); + + // Primary cannot be allocated to replica-only node + Decision decision = decider.canAllocate(primaryShard, replicaOnlyNode, allocation); + assertEquals(Decision.Type.NO, decision.type()); + assertTrue(decision.toString(), decision.toString().contains("primary shard")); + assertTrue(decision.toString(), decision.toString().contains("replica-only")); + + // Primary can be allocated to data node + decision = decider.canAllocate(primaryShard, dataNode, allocation); + assertEquals(Decision.Type.YES, decision.type()); + } + + public void testReplicaCanAllocateToReplicaOnlyNodeWithAutoExpandAll() { + RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, clusterState.getRoutingNodes(), clusterState, null, null, 0); + allocation.debugDecision(true); + + ShardRouting replicaShard = ShardRouting.newUnassigned( + clusterState.routingTable().index("auto-expand-index").shard(0).shardId(), + false, + RecoverySource.PeerRecoverySource.INSTANCE, + new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "") + ); + + RoutingNode replicaOnlyNode = clusterState.getRoutingNodes().node("replica-only-node"); + + // Replica from auto-expand 0-all index CAN be allocated to replica-only node + Decision decision = decider.canAllocate(replicaShard, replicaOnlyNode, allocation); + assertEquals(Decision.Type.YES, decision.type()); + assertTrue(decision.toString(), decision.toString().contains("auto-expand")); + } + + public void testReplicaCannotAllocateToReplicaOnlyNodeWithoutAutoExpandAll() { + RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, clusterState.getRoutingNodes(), clusterState, null, null, 0); + allocation.debugDecision(true); + + ShardRouting replicaShard = ShardRouting.newUnassigned( + clusterState.routingTable().index("regular-index").shard(0).shardId(), + false, + RecoverySource.PeerRecoverySource.INSTANCE, + new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "") + ); + + RoutingNode replicaOnlyNode = clusterState.getRoutingNodes().node("replica-only-node"); + RoutingNode dataNode = clusterState.getRoutingNodes().node("data-node"); + + // Replica from regular index CANNOT be allocated to replica-only node + Decision decision = decider.canAllocate(replicaShard, replicaOnlyNode, allocation); + assertEquals(Decision.Type.NO, decision.type()); + assertTrue(decision.toString(), decision.toString().contains("does not have auto_expand_replicas")); + + // Replica from regular index CAN be allocated to data node + decision = decider.canAllocate(replicaShard, dataNode, allocation); + assertEquals(Decision.Type.YES, decision.type()); + } + + public void testForceAllocatePrimaryBlocked() { + RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, clusterState.getRoutingNodes(), clusterState, null, null, 0); + allocation.debugDecision(true); + + ShardRouting primaryShard = ShardRouting.newUnassigned( + clusterState.routingTable().index("auto-expand-index").shard(0).shardId(), + true, + RecoverySource.EmptyStoreRecoverySource.INSTANCE, + new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "") + ); + + RoutingNode replicaOnlyNode = clusterState.getRoutingNodes().node("replica-only-node"); + RoutingNode dataNode = clusterState.getRoutingNodes().node("data-node"); + + // Force allocation of primary is STILL blocked on replica-only node + Decision decision = decider.canForceAllocatePrimary(primaryShard, replicaOnlyNode, allocation); + assertEquals(Decision.Type.NO, decision.type()); + assertTrue(decision.toString(), decision.toString().contains("cannot be force allocated")); + + // Force allocation allowed on data node + decision = decider.canForceAllocatePrimary(primaryShard, dataNode, allocation); + assertEquals(Decision.Type.YES, decision.type()); + } + + public void testCanRemainFollowsSameLogic() { + RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, clusterState.getRoutingNodes(), clusterState, null, null, 0); + allocation.debugDecision(true); + + RoutingNode replicaOnlyNode = clusterState.getRoutingNodes().node("replica-only-node"); + RoutingNode dataNode = clusterState.getRoutingNodes().node("data-node"); + + // Primary cannot remain on replica-only node + ShardRouting primaryShard = ShardRouting.newUnassigned( + clusterState.routingTable().index("auto-expand-index").shard(0).shardId(), + true, + RecoverySource.EmptyStoreRecoverySource.INSTANCE, + new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "") + ); + Decision decision = decider.canRemain(primaryShard, replicaOnlyNode, allocation); + assertEquals(Decision.Type.NO, decision.type()); + + // Replica from auto-expand index CAN remain on replica-only node + ShardRouting autoExpandReplica = ShardRouting.newUnassigned( + clusterState.routingTable().index("auto-expand-index").shard(0).shardId(), + false, + RecoverySource.PeerRecoverySource.INSTANCE, + new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "") + ); + decision = decider.canRemain(autoExpandReplica, replicaOnlyNode, allocation); + assertEquals(Decision.Type.YES, decision.type()); + + // Replica from regular index CANNOT remain on replica-only node + ShardRouting regularReplica = ShardRouting.newUnassigned( + clusterState.routingTable().index("regular-index").shard(0).shardId(), + false, + RecoverySource.PeerRecoverySource.INSTANCE, + new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "") + ); + decision = decider.canRemain(regularReplica, replicaOnlyNode, allocation); + assertEquals(Decision.Type.NO, decision.type()); + } + + public void testShouldAutoExpandToNodeForAutoExpandAllIndex() { + RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, clusterState.getRoutingNodes(), clusterState, null, null, 0); + allocation.debugDecision(true); + + IndexMetadata autoExpandIndex = clusterState.metadata().index("auto-expand-index"); + DiscoveryNode replicaOnlyNode = clusterState.nodes().get("replica-only-node"); + DiscoveryNode dataNode = clusterState.nodes().get("data-node"); + + // Replica-only node SHOULD be included for auto-expand 0-all index + Decision decision = decider.shouldAutoExpandToNode(autoExpandIndex, replicaOnlyNode, allocation); + assertEquals(Decision.Type.YES, decision.type()); + assertTrue(decision.toString(), decision.toString().contains("eligible for auto-expand")); + + // Data node always included + decision = decider.shouldAutoExpandToNode(autoExpandIndex, dataNode, allocation); + assertEquals(Decision.Type.YES, decision.type()); + } + + public void testShouldAutoExpandToNodeForNonAutoExpandIndex() { + RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, clusterState.getRoutingNodes(), clusterState, null, null, 0); + allocation.debugDecision(true); + + IndexMetadata regularIndex = clusterState.metadata().index("regular-index"); + DiscoveryNode replicaOnlyNode = clusterState.nodes().get("replica-only-node"); + DiscoveryNode dataNode = clusterState.nodes().get("data-node"); + + // Replica-only node should NOT be included for non-auto-expand index + Decision decision = decider.shouldAutoExpandToNode(regularIndex, replicaOnlyNode, allocation); + assertEquals(Decision.Type.NO, decision.type()); + assertTrue(decision.toString(), decision.toString().contains("without auto_expand_replicas: 0-all")); + + // Data node still included + decision = decider.shouldAutoExpandToNode(regularIndex, dataNode, allocation); + assertEquals(Decision.Type.YES, decision.type()); + } + + public void testRegularDataNodeAcceptsAllShards() { + RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, clusterState.getRoutingNodes(), clusterState, null, null, 0); + allocation.debugDecision(true); + + RoutingNode dataNode = clusterState.getRoutingNodes().node("data-node"); + + // Regular data node can accept primaries + ShardRouting primaryShard = ShardRouting.newUnassigned( + clusterState.routingTable().index("auto-expand-index").shard(0).shardId(), + true, + RecoverySource.EmptyStoreRecoverySource.INSTANCE, + new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "") + ); + Decision decision = decider.canAllocate(primaryShard, dataNode, allocation); + assertEquals(Decision.Type.YES, decision.type()); + + // Regular data node can accept replicas from auto-expand index + ShardRouting autoExpandReplica = ShardRouting.newUnassigned( + clusterState.routingTable().index("auto-expand-index").shard(0).shardId(), + false, + RecoverySource.PeerRecoverySource.INSTANCE, + new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "") + ); + decision = decider.canAllocate(autoExpandReplica, dataNode, allocation); + assertEquals(Decision.Type.YES, decision.type()); + + // Regular data node can accept replicas from regular index + ShardRouting regularReplica = ShardRouting.newUnassigned( + clusterState.routingTable().index("regular-index").shard(0).shardId(), + false, + RecoverySource.PeerRecoverySource.INSTANCE, + new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "") + ); + decision = decider.canAllocate(regularReplica, dataNode, allocation); + assertEquals(Decision.Type.YES, decision.type()); + } + + public void testAutoExpandWithDifferentSettings() { + // Test auto-expand with different settings (0-5, 1-all, etc.) + Metadata metadata = Metadata.builder() + .put( + IndexMetadata.builder("auto-expand-0-5") + .settings(settings(Version.CURRENT).put(SETTING_AUTO_EXPAND_REPLICAS, "0-5")) + .numberOfShards(1) + .numberOfReplicas(1) + ) + .put( + IndexMetadata.builder("auto-expand-1-all") + .settings(settings(Version.CURRENT).put(SETTING_AUTO_EXPAND_REPLICAS, "1-all")) + .numberOfShards(1) + .numberOfReplicas(1) + ) + .build(); + + ClusterState testState = ClusterState.builder(clusterState).metadata(metadata).build(); + testState = ClusterState.builder(testState) + .routingTable( + org.opensearch.cluster.routing.RoutingTable.builder() + .addAsNew(metadata.index("auto-expand-0-5")) + .addAsNew(metadata.index("auto-expand-1-all")) + .build() + ) + .build(); + + RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, testState.getRoutingNodes(), testState, null, null, 0); + RoutingNode replicaOnlyNode = testState.getRoutingNodes().node("replica-only-node"); + + // auto-expand 0-5: NOT 0-all, should be blocked + ShardRouting replica05 = ShardRouting.newUnassigned( + testState.routingTable().index("auto-expand-0-5").shard(0).shardId(), + false, + RecoverySource.PeerRecoverySource.INSTANCE, + new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "") + ); + Decision decision = decider.canAllocate(replica05, replicaOnlyNode, allocation); + assertEquals(Decision.Type.NO, decision.type()); + + // auto-expand 1-all: NOT 0-all, should be blocked + ShardRouting replica1all = ShardRouting.newUnassigned( + testState.routingTable().index("auto-expand-1-all").shard(0).shardId(), + false, + RecoverySource.PeerRecoverySource.INSTANCE, + new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "") + ); + decision = decider.canAllocate(replica1all, replicaOnlyNode, allocation); + assertEquals(Decision.Type.NO, decision.type()); + } +}