diff --git a/packages/shell-api/src/collection.spec.ts b/packages/shell-api/src/collection.spec.ts index 205da41f37..3f218d662c 100644 --- a/packages/shell-api/src/collection.spec.ts +++ b/packages/shell-api/src/collection.spec.ts @@ -2276,6 +2276,70 @@ describe('Collection', function () { ShellApiErrors.NotConnectedToShardedCluster ); }); + + describe('with orphan documents', function () { + const mockedNumChunks = 2; + const mockedCollectionConfigInfo = {}; + const mockedShardStats = { + shard: 'test-shard', + storageStats: { + size: 1000, + numOrphanDocs: 10, + avgObjSize: 7, + count: 15, + }, + }; + const mockedShardInfo = { + host: 'dummy-host', + }; + + beforeEach(function () { + const serviceProviderCursor = stubInterface(); + + // Make find and limit have no effect so the value of findOne is determined by tryNext. + serviceProviderCursor.limit.returns(serviceProviderCursor); + serviceProvider.find.returns(serviceProviderCursor); + + // Mock according to the order of findOne calls getShardDistribution uses. + serviceProviderCursor.tryNext + .onCall(0) + .resolves(mockedCollectionConfigInfo); + serviceProviderCursor.tryNext.onCall(1).resolves(mockedShardInfo); + serviceProvider.countDocuments.returns( + Promise.resolve(mockedNumChunks) + ); + + const aggregateTryNext = sinon.stub(); + aggregateTryNext.onCall(0).resolves(mockedShardStats); + aggregateTryNext.onCall(1).resolves(null); + + // eslint-disable-next-line @typescript-eslint/no-unsafe-argument + serviceProvider.aggregate.returns({ + tryNext: aggregateTryNext, + } as any); + }); + + it('should account for numOrphanDocs when calculating size', async function () { + const shardDistribution = await collection.getShardDistribution(); + + const { storageStats } = mockedShardStats; + expect(shardDistribution.type).equals('StatsResult'); + const adjustedSize = + storageStats.size - + storageStats.numOrphanDocs * storageStats.avgObjSize; + expect(shardDistribution.value.Totals.data).equals( + `${adjustedSize}B` + ); + const shardField = Object.keys(shardDistribution.value).find( + (field) => field !== 'Totals' + ) as `Shard ${string} at ${string}`; + + expect(shardField).not.undefined; + expect( + shardDistribution.value[shardField]['estimated data per chunk'] + ).equals(`${adjustedSize / mockedNumChunks}B`); + }); + }); }); describe('analyzeShardKey', function () { diff --git a/packages/shell-api/src/collection.ts b/packages/shell-api/src/collection.ts index cacfdd8f69..1723425bb1 100644 --- a/packages/shell-api/src/collection.ts +++ b/packages/shell-api/src/collection.ts @@ -2135,12 +2135,14 @@ export default class Collection extends ShellApiWithMongoClass { @returnsPromise @topologies([Topologies.Sharded]) @apiVersions([]) - async getShardDistribution(): Promise { + async getShardDistribution(): Promise< + CommandResult + > { this._emitCollectionApiCall('getShardDistribution', {}); await getConfigDB(this._database); // Warns if not connected to mongos - const result = {} as Document; + const result = {} as GetShardDistributionResult; const config = this._mongo.getDB('config'); const collStats = await ( @@ -2179,17 +2181,24 @@ export default class Collection extends ShellApiWithMongoClass { .findOne({ _id: extractedShardStats.shard }), config.getCollection('chunks').countDocuments(countChunksQuery), ]); + + // Since 6.0, there can be orphan documents indicated by numOrphanDocs. + // These orphan documents need to be accounted for in the size calculation. + const orphanDocumentsSize = + (extractedShardStats.storageStats.numOrphanDocs ?? 0) * + (extractedShardStats.storageStats.avgObjSize ?? 0); + const ownedSize = + extractedShardStats.storageStats.size - orphanDocumentsSize; + const shardStats = { shardId: shard, host: host !== null ? host.host : null, - size: extractedShardStats.storageStats.size, + size: ownedSize, count: extractedShardStats.storageStats.count, numChunks: numChunks, avgObjSize: extractedShardStats.storageStats.avgObjSize, }; - const key = `Shard ${shardStats.shardId} at ${shardStats.host}`; - // In sharded timeseries collections we do not have a count // so we intentionally pass NaN as a result to the client. const shardStatsCount: number = shardStats.count ?? NaN; @@ -2203,7 +2212,7 @@ export default class Collection extends ShellApiWithMongoClass { ? 0 : Math.floor(shardStatsCount / shardStats.numChunks); - result[key] = { + result[`Shard ${shardStats.shardId} at ${shardStats.host}`] = { data: dataFormat(coerceToJSNumber(shardStats.size)), docs: shardStatsCount, chunks: shardStats.numChunks, @@ -2211,7 +2220,7 @@ export default class Collection extends ShellApiWithMongoClass { 'estimated docs per chunk': estimatedDocsPerChunk, }; - totals.size += coerceToJSNumber(shardStats.size); + totals.size += coerceToJSNumber(ownedSize); totals.count += coerceToJSNumber(shardStatsCount); totals.numChunks += coerceToJSNumber(shardStats.numChunks); @@ -2224,7 +2233,7 @@ export default class Collection extends ShellApiWithMongoClass { data: dataFormat(totals.size), docs: totals.count, chunks: totals.numChunks, - } as Document; + } as GetShardDistributionResult['Totals']; for (const shardStats of conciseShardsStats) { const estDataPercent = @@ -2243,7 +2252,8 @@ export default class Collection extends ShellApiWithMongoClass { ]; } result.Totals = totalValue; - return new CommandResult('StatsResult', result); + + return new CommandResult('StatsResult', result); } @serverVersions(['3.1.0', ServerVersions.latest]) @@ -2467,3 +2477,24 @@ export default class Collection extends ShellApiWithMongoClass { ); } } + +export type GetShardDistributionResult = { + Totals: { + data: string; + docs: number; + chunks: number; + } & { + [individualShardDistribution: `Shard ${string}`]: [ + `${number} % data`, + `${number} % docs in cluster`, + `${string} avg obj size on shard` + ]; + }; + [individualShardResult: `Shard ${string} at ${string}`]: { + data: string; + docs: number; + chunks: number; + 'estimated data per chunk': string; + 'estimated docs per chunk': number; + }; +};