@@ -13,8 +13,21 @@ import { evalTest, TEST_AGENTS } from './test-helper.js';
1313
1414const INDEX_TS = 'export const add = (a: number, b: number) => a + b;\n' ;
1515
16+ // A minimal package.json is used to provide a realistic workspace anchor.
17+ // This prevents the agent from making incorrect assumptions about the environment
18+ // and helps it properly navigate or act as if it is in a standard Node.js project.
19+ const MOCK_PACKAGE_JSON = JSON . stringify (
20+ {
21+ name : 'subagent-eval-project' ,
22+ version : '1.0.0' ,
23+ type : 'module' ,
24+ } ,
25+ null ,
26+ 2 ,
27+ ) ;
28+
1629function readProjectFile (
17- rig : { testDir ? : string } ,
30+ rig : { testDir : string | null } ,
1831 relativePath : string ,
1932) : string {
2033 return fs . readFileSync ( path . join ( rig . testDir ! , relativePath ) , 'utf8' ) ;
@@ -117,15 +130,7 @@ describe('subagent eval test cases', () => {
117130 files : {
118131 ...TEST_AGENTS . TESTING_AGENT . asFile ( ) ,
119132 'index.ts' : INDEX_TS ,
120- 'package.json' : JSON . stringify (
121- {
122- name : 'subagent-eval-project' ,
123- version : '1.0.0' ,
124- type : 'module' ,
125- } ,
126- null ,
127- 2 ,
128- ) ,
133+ 'package.json' : MOCK_PACKAGE_JSON ,
129134 } ,
130135 assert : async ( rig , _result ) => {
131136 const toolLogs = rig . readToolLogs ( ) as Array < {
@@ -164,15 +169,7 @@ describe('subagent eval test cases', () => {
164169 ...TEST_AGENTS . TESTING_AGENT . asFile ( ) ,
165170 'index.ts' : INDEX_TS ,
166171 'README.md' : 'TODO: update the README.\n' ,
167- 'package.json' : JSON . stringify (
168- {
169- name : 'subagent-eval-project' ,
170- version : '1.0.0' ,
171- type : 'module' ,
172- } ,
173- null ,
174- 2 ,
175- ) ,
172+ 'package.json' : MOCK_PACKAGE_JSON ,
176173 } ,
177174 assert : async ( rig , _result ) => {
178175 const toolLogs = rig . readToolLogs ( ) as Array < {
@@ -190,4 +187,105 @@ describe('subagent eval test cases', () => {
190187 ) ;
191188 } ,
192189 } ) ;
190+
191+ /**
192+ * Checks that the main agent can correctly select the appropriate subagent
193+ * from a large pool of available subagents (10 total).
194+ */
195+ evalTest ( 'USUALLY_PASSES' , {
196+ name : 'should select the correct subagent from a pool of 10 different agents' ,
197+ prompt : 'Please add a new SQL table migration for a user profile.' ,
198+ files : {
199+ ...TEST_AGENTS . DOCS_AGENT . asFile ( ) ,
200+ ...TEST_AGENTS . TESTING_AGENT . asFile ( ) ,
201+ ...TEST_AGENTS . DATABASE_AGENT . asFile ( ) ,
202+ ...TEST_AGENTS . CSS_AGENT . asFile ( ) ,
203+ ...TEST_AGENTS . I18N_AGENT . asFile ( ) ,
204+ ...TEST_AGENTS . SECURITY_AGENT . asFile ( ) ,
205+ ...TEST_AGENTS . DEVOPS_AGENT . asFile ( ) ,
206+ ...TEST_AGENTS . ANALYTICS_AGENT . asFile ( ) ,
207+ ...TEST_AGENTS . ACCESSIBILITY_AGENT . asFile ( ) ,
208+ ...TEST_AGENTS . MOBILE_AGENT . asFile ( ) ,
209+ 'package.json' : MOCK_PACKAGE_JSON ,
210+ } ,
211+ assert : async ( rig , _result ) => {
212+ const toolLogs = rig . readToolLogs ( ) as Array < {
213+ toolRequest : { name : string } ;
214+ } > ;
215+ await rig . expectToolCallSuccess ( [ 'database-agent' ] ) ;
216+
217+ // Ensure the generalist and other irrelevant specialists were not invoked
218+ const uncalledAgents = [
219+ 'generalist' ,
220+ TEST_AGENTS . DOCS_AGENT . name ,
221+ TEST_AGENTS . TESTING_AGENT . name ,
222+ TEST_AGENTS . CSS_AGENT . name ,
223+ TEST_AGENTS . I18N_AGENT . name ,
224+ TEST_AGENTS . SECURITY_AGENT . name ,
225+ TEST_AGENTS . DEVOPS_AGENT . name ,
226+ TEST_AGENTS . ANALYTICS_AGENT . name ,
227+ TEST_AGENTS . ACCESSIBILITY_AGENT . name ,
228+ TEST_AGENTS . MOBILE_AGENT . name ,
229+ ] ;
230+
231+ for ( const agentName of uncalledAgents ) {
232+ expect ( toolLogs . some ( ( l ) => l . toolRequest . name === agentName ) ) . toBe (
233+ false ,
234+ ) ;
235+ }
236+ } ,
237+ } ) ;
238+
239+ /**
240+ * Checks that the main agent can correctly select the appropriate subagent
241+ * from a large pool of available subagents, even when many irrelevant MCP tools are present.
242+ *
243+ * This test includes stress tests the subagent delegation with ~80 tools.
244+ */
245+ evalTest ( 'USUALLY_PASSES' , {
246+ name : 'should select the correct subagent from a pool of 10 different agents with MCP tools present' ,
247+ prompt : 'Please add a new SQL table migration for a user profile.' ,
248+ setup : async ( rig ) => {
249+ rig . addTestMcpServer ( 'workspace-server' , 'google-workspace' ) ;
250+ } ,
251+ files : {
252+ ...TEST_AGENTS . DOCS_AGENT . asFile ( ) ,
253+ ...TEST_AGENTS . TESTING_AGENT . asFile ( ) ,
254+ ...TEST_AGENTS . DATABASE_AGENT . asFile ( ) ,
255+ ...TEST_AGENTS . CSS_AGENT . asFile ( ) ,
256+ ...TEST_AGENTS . I18N_AGENT . asFile ( ) ,
257+ ...TEST_AGENTS . SECURITY_AGENT . asFile ( ) ,
258+ ...TEST_AGENTS . DEVOPS_AGENT . asFile ( ) ,
259+ ...TEST_AGENTS . ANALYTICS_AGENT . asFile ( ) ,
260+ ...TEST_AGENTS . ACCESSIBILITY_AGENT . asFile ( ) ,
261+ ...TEST_AGENTS . MOBILE_AGENT . asFile ( ) ,
262+ 'package.json' : MOCK_PACKAGE_JSON ,
263+ } ,
264+ assert : async ( rig , _result ) => {
265+ const toolLogs = rig . readToolLogs ( ) as Array < {
266+ toolRequest : { name : string } ;
267+ } > ;
268+ await rig . expectToolCallSuccess ( [ 'database-agent' ] ) ;
269+
270+ // Ensure the generalist and other irrelevant specialists were not invoked
271+ const uncalledAgents = [
272+ 'generalist' ,
273+ TEST_AGENTS . DOCS_AGENT . name ,
274+ TEST_AGENTS . TESTING_AGENT . name ,
275+ TEST_AGENTS . CSS_AGENT . name ,
276+ TEST_AGENTS . I18N_AGENT . name ,
277+ TEST_AGENTS . SECURITY_AGENT . name ,
278+ TEST_AGENTS . DEVOPS_AGENT . name ,
279+ TEST_AGENTS . ANALYTICS_AGENT . name ,
280+ TEST_AGENTS . ACCESSIBILITY_AGENT . name ,
281+ TEST_AGENTS . MOBILE_AGENT . name ,
282+ ] ;
283+
284+ for ( const agentName of uncalledAgents ) {
285+ expect ( toolLogs . some ( ( l ) => l . toolRequest . name === agentName ) ) . toBe (
286+ false ,
287+ ) ;
288+ }
289+ } ,
290+ } ) ;
193291} ) ;
0 commit comments