diff --git a/.github/skills/azure-typespec-author/evaluate/.vally.yaml b/.github/skills/azure-typespec-author/evaluate/.vally.yaml
index e0308b86996..aac362f065f 100644
--- a/.github/skills/azure-typespec-author/evaluate/.vally.yaml
+++ b/.github/skills/azure-typespec-author/evaluate/.vally.yaml
@@ -14,7 +14,7 @@ environments:
         type: stdio
         command: dotnet
         args: ["run", "--project", "../../../../tools/azsdk-cli/Azure.Sdk.Tools.Cli", "--", "start"]
-        timeout: 300000
+        timeout: "10m"
         env:
           AZSDKTOOLS_AGENT_TESTING: "false"
           AZSDKTOOLS_COLLECT_TELEMETRY: "false"
@@ -25,7 +25,7 @@ environments:
         type: stdio
         command: dotnet
         args: ["run", "--project", "../../../../tools/azsdk-cli/Azure.Sdk.Tools.Cli", "--", "start"]
-        timeout: 300000
+        timeout: "10m"
         env:
           AZSDKTOOLS_AGENT_TESTING: "false"
           AZSDKTOOLS_COLLECT_TELEMETRY: "false"
@@ -44,4 +44,4 @@ suites:
   warning:
     evals: ["evals/005001.eval.yaml"]
   all:
-    evals: ["evals/*.eval.yaml"]
\ No newline at end of file
+    evals: ["evals/*.eval.yaml"]
diff --git a/.github/skills/azure-typespec-author/evaluate/evals/001001.eval.yaml b/.github/skills/azure-typespec-author/evaluate/evals/001001.eval.yaml
index b81d05f1586..00e66ba8f1b 100644
--- a/.github/skills/azure-typespec-author/evaluate/evals/001001.eval.yaml
+++ b/.github/skills/azure-typespec-author/evaluate/evals/001001.eval.yaml
@@ -8,10 +8,10 @@ environment: azsdk-mcp
 
 # Execution configuration
 config:
-  runs: 1                      # Trials per stimulus
-  timeout: 1800                # Seconds per trial
-  model: claude-opus-4.6    #gpt-4o #claude-sonnet-4.6 # Model for agent execution
-  executor: copilot-sdk        # Which executor to use
+  runs: 1                        # Trials per stimulus
+  timeout: "660s"                # Seconds per trial
+  model: claude-opus-4.6         #gpt-4o #claude-sonnet-4.6 # Model for agent execution
+  executor: copilot-sdk          # Which executor to use
 
 # Test cases
 stimuli:
diff --git a/.github/skills/azure-typespec-author/evaluate/evals/001002.eval.yaml b/.github/skills/azure-typespec-author/evaluate/evals/001002.eval.yaml
index a7ab314a0b3..b2642759cc3 100644
--- a/.github/skills/azure-typespec-author/evaluate/evals/001002.eval.yaml
+++ b/.github/skills/azure-typespec-author/evaluate/evals/001002.eval.yaml
@@ -8,10 +8,10 @@ environment: azsdk-mcp
 
 # Execution configuration
 config:
-  runs: 1                      # Trials per stimulus
-  timeout: 1800                # Seconds per trial
-  model: claude-opus-4.6    #gpt-4o #claude-sonnet-4.6 # Model for agent execution
-  executor: copilot-sdk        # Which executor to use
+  runs: 1                        # Trials per stimulus
+  timeout: "660s"                # Seconds per trial
+  model: claude-opus-4.6         #gpt-4o #claude-sonnet-4.6 # Model for agent execution
+  executor: copilot-sdk          # Which executor to use
 
 # Test cases
 stimuli:
diff --git a/.github/skills/azure-typespec-author/evaluate/evals/001003.eval.yaml b/.github/skills/azure-typespec-author/evaluate/evals/001003.eval.yaml
index 5a170070fc0..18c60e02666 100644
--- a/.github/skills/azure-typespec-author/evaluate/evals/001003.eval.yaml
+++ b/.github/skills/azure-typespec-author/evaluate/evals/001003.eval.yaml
@@ -8,10 +8,10 @@ environment: azsdk-mcp
 
 # Execution configuration
 config:
-  runs: 1                      # Trials per stimulus
-  timeout: 1800                # Seconds per trial
-  model: claude-opus-4.6    #gpt-4o #claude-sonnet-4.6 # Model for agent execution
-  executor: copilot-sdk        # Which executor to use
+  runs: 1                        # Trials per stimulus
+  timeout: "660s"                # Seconds per trial
+  model: claude-opus-4.6         #gpt-4o #claude-sonnet-4.6 # Model for agent execution
+  executor: copilot-sdk          # Which executor to use
 
 # Test cases
 stimuli:
diff --git a/.github/skills/azure-typespec-author/evaluate/evals/001004.eval.yaml b/.github/skills/azure-typespec-author/evaluate/evals/001004.eval.yaml
index 0940cca560d..d2fb436654f 100644
--- a/.github/skills/azure-typespec-author/evaluate/evals/001004.eval.yaml
+++ b/.github/skills/azure-typespec-author/evaluate/evals/001004.eval.yaml
@@ -8,10 +8,10 @@ environment: azsdk-mcp
 
 # Execution configuration
 config:
-  runs: 1                      # Trials per stimulus
-  timeout: 1800                # Seconds per trial
-  model: claude-opus-4.6    #gpt-4o #claude-sonnet-4.6 # Model for agent execution
-  executor: copilot-sdk        # Which executor to use
+  runs: 1                        # Trials per stimulus
+  timeout: "660s"                # Seconds per trial
+  model: claude-opus-4.6         #gpt-4o #claude-sonnet-4.6 # Model for agent execution
+  executor: copilot-sdk          # Which executor to use
 
 # Test cases
 stimuli:
diff --git a/.github/skills/azure-typespec-author/evaluate/evals/001005.eval.yaml b/.github/skills/azure-typespec-author/evaluate/evals/001005.eval.yaml
index a07a68a97f4..afcaeacf921 100644
--- a/.github/skills/azure-typespec-author/evaluate/evals/001005.eval.yaml
+++ b/.github/skills/azure-typespec-author/evaluate/evals/001005.eval.yaml
@@ -8,10 +8,10 @@ environment: azsdk-mcp
 
 # Execution configuration
 config:
-  runs: 1                      # Trials per stimulus
-  timeout: 1800                # Seconds per trial
-  model: claude-opus-4.6    #gpt-4o #claude-sonnet-4.6 # Model for agent execution
-  executor: copilot-sdk        # Which executor to use
+  runs: 1                        # Trials per stimulus
+  timeout: "660s"                # Seconds per trial
+  model: claude-opus-4.6         #gpt-4o #claude-sonnet-4.6 # Model for agent execution
+  executor: copilot-sdk          # Which executor to use
 
 # Test cases
 stimuli:
@@ -23,57 +23,58 @@ stimuli:
     max_tokens: 5000
   environment:
     files:
-    - src: ../fixtures/001005-version-add-preview-after-preview\employee.tsp
+    - src: ../fixtures/001005-version-add-preview-after-preview/employee.tsp
       dest: employee.tsp
-    - src: ../fixtures/001005-version-add-preview-after-preview\main.tsp
+    - src: ../fixtures/001005-version-add-preview-after-preview/main.tsp
       dest: main.tsp
-    - src: ../fixtures/001005-version-add-preview-after-preview\shared.tsp
+    - src: ../fixtures/001005-version-add-preview-after-preview/shared.tsp
       dest: shared.tsp
-    - src: ../fixtures/001005-version-add-preview-after-preview\tspconfig.yaml
+    - src: ../fixtures/001005-version-add-preview-after-preview/tspconfig.yaml
       dest: tspconfig.yaml
-    - src: ../fixtures/001005-version-add-preview-after-preview\package.json
+    - src: ../fixtures/001005-version-add-preview-after-preview/package.json
       dest: package.json
-    - src: ../fixtures/001005-version-add-preview-after-preview\examples\2024-10-01-preview\Employees_CreateOrUpdate_MaximumSet_Gen.json
+    - src: ../fixtures/001005-version-add-preview-after-preview/examples/2024-10-01-preview/Employees_CreateOrUpdate_MaximumSet_Gen.json
       dest: examples/2024-10-01-preview/Employees_CreateOrUpdate_MaximumSet_Gen.json
-    - src: ../fixtures/001005-version-add-preview-after-preview\examples\2024-10-01-preview\Employees_Delete_MaximumSet_Gen.json
+    - src: ../fixtures/001005-version-add-preview-after-preview/examples/2024-10-01-preview/Employees_Delete_MaximumSet_Gen.json
       dest: examples/2024-10-01-preview/Employees_Delete_MaximumSet_Gen.json
-    - src: ../fixtures/001005-version-add-preview-after-preview\examples\2024-10-01-preview\Employees_Get_MaximumSet_Gen.json
+    - src: ../fixtures/001005-version-add-preview-after-preview/examples/2024-10-01-preview/Employees_Get_MaximumSet_Gen.json
       dest: examples/2024-10-01-preview/Employees_Get_MaximumSet_Gen.json
-    - src: ../fixtures/001005-version-add-preview-after-preview\examples\2024-10-01-preview\Employees_ListByResourceGroup_MaximumSet_Gen.json
+    - src: ../fixtures/001005-version-add-preview-after-preview/examples/2024-10-01-preview/Employees_ListByResourceGroup_MaximumSet_Gen.json
       dest: examples/2024-10-01-preview/Employees_ListByResourceGroup_MaximumSet_Gen.json
-    - src: ../fixtures/001005-version-add-preview-after-preview\examples\2024-10-01-preview\Employees_ListBySubscription_MaximumSet_Gen.json
+    - src: ../fixtures/001005-version-add-preview-after-preview/examples/2024-10-01-preview/Employees_ListBySubscription_MaximumSet_Gen.json
       dest: examples/2024-10-01-preview/Employees_ListBySubscription_MaximumSet_Gen.json
-    - src: ../fixtures/001005-version-add-preview-after-preview\examples\2024-10-01-preview\Employees_Update_MaximumSet_Gen.json
+    - src: ../fixtures/001005-version-add-preview-after-preview/examples/2024-10-01-preview/Employees_Update_MaximumSet_Gen.json
       dest: examples/2024-10-01-preview/Employees_Update_MaximumSet_Gen.json
-    - src: ../fixtures/001005-version-add-preview-after-preview\examples\2024-10-01-preview\Operations_List_MaximumSet_Gen.json
+    - src: ../fixtures/001005-version-add-preview-after-preview/examples/2024-10-01-preview/Operations_List_MaximumSet_Gen.json
       dest: examples/2024-10-01-preview/Operations_List_MaximumSet_Gen.json
-    - src: ../fixtures/001005-version-add-preview-after-preview\examples\2024-10-01-preview\Operations_List_MinimumSet_Gen.json
+    - src: ../fixtures/001005-version-add-preview-after-preview/examples/2024-10-01-preview/Operations_List_MinimumSet_Gen.json
       dest: examples/2024-10-01-preview/Operations_List_MinimumSet_Gen.json
-    - src: ../fixtures/001005-version-add-preview-after-preview\examples\2021-10-01\Employees_CreateOrUpdate_MaximumSet_Gen.json
+    - src: ../fixtures/001005-version-add-preview-after-preview/examples/2021-10-01/Employees_CreateOrUpdate_MaximumSet_Gen.json
       dest: examples/2021-10-01/Employees_CreateOrUpdate_MaximumSet_Gen.json
-    - src: ../fixtures/001005-version-add-preview-after-preview\examples\2021-10-01\Employees_Delete_MaximumSet_Gen.json
+    - src: ../fixtures/001005-version-add-preview-after-preview/examples/2021-10-01/Employees_Delete_MaximumSet_Gen.json
       dest: examples/2021-10-01/Employees_Delete_MaximumSet_Gen.json
-    - src: ../fixtures/001005-version-add-preview-after-preview\examples\2021-10-01\Employees_Get_MaximumSet_Gen.json
+    - src: ../fixtures/001005-version-add-preview-after-preview/examples/2021-10-01/Employees_Get_MaximumSet_Gen.json
       dest: examples/2021-10-01/Employees_Get_MaximumSet_Gen.json
-    - src: ../fixtures/001005-version-add-preview-after-preview\examples\2021-10-01\Employees_ListByResourceGroup_MaximumSet_Gen.json
+    - src: ../fixtures/001005-version-add-preview-after-preview/examples/2021-10-01/Employees_ListByResourceGroup_MaximumSet_Gen.json
       dest: examples/2021-10-01/Employees_ListByResourceGroup_MaximumSet_Gen.json
-    - src: ../fixtures/001005-version-add-preview-after-preview\examples\2021-10-01\Employees_ListByResourceGroup_MinimumSet_Gen.json
+    - src: ../fixtures/001005-version-add-preview-after-preview/examples/2021-10-01/Employees_ListByResourceGroup_MinimumSet_Gen.json
       dest: examples/2021-10-01/Employees_ListByResourceGroup_MinimumSet_Gen.json
-    - src: ../fixtures/001005-version-add-preview-after-preview\examples\2021-10-01\Employees_ListBySubscription_MaximumSet_Gen.json
+    - src: ../fixtures/001005-version-add-preview-after-preview/examples/2021-10-01/Employees_ListBySubscription_MaximumSet_Gen.json
       dest: examples/2021-10-01/Employees_ListBySubscription_MaximumSet_Gen.json
-    - src: ../fixtures/001005-version-add-preview-after-preview\examples\2021-10-01\Employees_ListBySubscription_MinimumSet_Gen.json
+    - src: ../fixtures/001005-version-add-preview-after-preview/examples/2021-10-01/Employees_ListBySubscription_MinimumSet_Gen.json
       dest: examples/2021-10-01/Employees_ListBySubscription_MinimumSet_Gen.json
-    - src: ../fixtures/001005-version-add-preview-after-preview\examples\2021-10-01\Employees_Update_MaximumSet_Gen.json
+    - src: ../fixtures/001005-version-add-preview-after-preview/examples/2021-10-01/Employees_Update_MaximumSet_Gen.json
       dest: examples/2021-10-01/Employees_Update_MaximumSet_Gen.json
-    - src: ../fixtures/001005-version-add-preview-after-preview\examples\2021-10-01\Operations_List_MaximumSet_Gen.json
+    - src: ../fixtures/001005-version-add-preview-after-preview/examples/2021-10-01/Operations_List_MaximumSet_Gen.json
       dest: examples/2021-10-01/Operations_List_MaximumSet_Gen.json
-    - src: ../fixtures/001005-version-add-preview-after-preview\examples\2021-10-01\Operations_List_MinimumSet_Gen.json
+    - src: ../fixtures/001005-version-add-preview-after-preview/examples/2021-10-01/Operations_List_MinimumSet_Gen.json
       dest: examples/2021-10-01/Operations_List_MinimumSet_Gen.json
   graders:
   - type: tool-calls
     config:
       required:
         - edit
+        - web_fetch
         - azure-sdk-mcp-azsdk_run_typespec_validation
   - type: skill-invocation
     config:
diff --git a/.github/skills/azure-typespec-author/evaluate/evals/001006.eval.yaml b/.github/skills/azure-typespec-author/evaluate/evals/001006.eval.yaml
index 13e796fc3ea..ce8fb183df9 100644
--- a/.github/skills/azure-typespec-author/evaluate/evals/001006.eval.yaml
+++ b/.github/skills/azure-typespec-author/evaluate/evals/001006.eval.yaml
@@ -8,10 +8,10 @@ environment: azsdk-mcp
 
 # Execution configuration
 config:
-  runs: 1                      # Trials per stimulus
-  timeout: 1800                # Seconds per trial
-  model: claude-opus-4.6    #gpt-4o #claude-sonnet-4.6 # Model for agent execution
-  executor: copilot-sdk        # Which executor to use
+  runs: 1                        # Trials per stimulus
+  timeout: "660s"                # Seconds per trial
+  model: claude-opus-4.6         #gpt-4o #claude-sonnet-4.6 # Model for agent execution
+  executor: copilot-sdk          # Which executor to use
 
 # Test cases
 stimuli:
@@ -23,57 +23,58 @@ stimuli:
     max_tokens: 5000
   environment:
     files:
-    - src: ../fixtures/001006-version-add-preview-after-stable\employee.tsp
+    - src: ../fixtures/001006-version-add-preview-after-stable/employee.tsp
       dest: employee.tsp
-    - src: ../fixtures/001006-version-add-preview-after-stable\main.tsp
+    - src: ../fixtures/001006-version-add-preview-after-stable/main.tsp
       dest: main.tsp
-    - src: ../fixtures/001006-version-add-preview-after-stable\shared.tsp
+    - src: ../fixtures/001006-version-add-preview-after-stable/shared.tsp
       dest: shared.tsp
-    - src: ../fixtures/001006-version-add-preview-after-stable\tspconfig.yaml
+    - src: ../fixtures/001006-version-add-preview-after-stable/tspconfig.yaml
       dest: tspconfig.yaml
-    - src: ../fixtures/001006-version-add-preview-after-stable\package.json
+    - src: ../fixtures/001006-version-add-preview-after-stable/package.json
       dest: package.json
-    - src: ../fixtures/001006-version-add-preview-after-stable\examples\2021-10-01\Employees_CreateOrUpdate_MaximumSet_Gen.json
+    - src: ../fixtures/001006-version-add-preview-after-stable/examples/2021-10-01/Employees_CreateOrUpdate_MaximumSet_Gen.json
       dest: examples/2021-10-01/Employees_CreateOrUpdate_MaximumSet_Gen.json
-    - src: ../fixtures/001006-version-add-preview-after-stable\examples\2021-10-01\Employees_Delete_MaximumSet_Gen.json
+    - src: ../fixtures/001006-version-add-preview-after-stable/examples/2021-10-01/Employees_Delete_MaximumSet_Gen.json
       dest: examples/2021-10-01/Employees_Delete_MaximumSet_Gen.json
-    - src: ../fixtures/001006-version-add-preview-after-stable\examples\2021-10-01\Employees_Get_MaximumSet_Gen.json
+    - src: ../fixtures/001006-version-add-preview-after-stable/examples/2021-10-01/Employees_Get_MaximumSet_Gen.json
       dest: examples/2021-10-01/Employees_Get_MaximumSet_Gen.json
-    - src: ../fixtures/001006-version-add-preview-after-stable\examples\2021-10-01\Employees_ListByResourceGroup_MaximumSet_Gen.json
+    - src: ../fixtures/001006-version-add-preview-after-stable/examples/2021-10-01/Employees_ListByResourceGroup_MaximumSet_Gen.json
       dest: examples/2021-10-01/Employees_ListByResourceGroup_MaximumSet_Gen.json
-    - src: ../fixtures/001006-version-add-preview-after-stable\examples\2021-10-01\Employees_ListBySubscription_MaximumSet_Gen.json
+    - src: ../fixtures/001006-version-add-preview-after-stable/examples/2021-10-01/Employees_ListBySubscription_MaximumSet_Gen.json
       dest: examples/2021-10-01/Employees_ListBySubscription_MaximumSet_Gen.json
-    - src: ../fixtures/001006-version-add-preview-after-stable\examples\2021-10-01\Employees_Update_MaximumSet_Gen.json
+    - src: ../fixtures/001006-version-add-preview-after-stable/examples/2021-10-01/Employees_Update_MaximumSet_Gen.json
       dest: examples/2021-10-01/Employees_Update_MaximumSet_Gen.json
-    - src: ../fixtures/001006-version-add-preview-after-stable\examples\2021-10-01\Operations_List_MaximumSet_Gen.json
+    - src: ../fixtures/001006-version-add-preview-after-stable/examples/2021-10-01/Operations_List_MaximumSet_Gen.json
       dest: examples/2021-10-01/Operations_List_MaximumSet_Gen.json
-    - src: ../fixtures/001006-version-add-preview-after-stable\examples\2021-10-01\Operations_List_MinimumSet_Gen.json
+    - src: ../fixtures/001006-version-add-preview-after-stable/examples/2021-10-01/Operations_List_MinimumSet_Gen.json
       dest: examples/2021-10-01/Operations_List_MinimumSet_Gen.json
-    - src: ../fixtures/001006-version-add-preview-after-stable\examples\2024-10-01\Employees_CreateOrUpdate_MaximumSet_Gen.json
+    - src: ../fixtures/001006-version-add-preview-after-stable/examples/2024-10-01/Employees_CreateOrUpdate_MaximumSet_Gen.json
       dest: examples/2024-10-01/Employees_CreateOrUpdate_MaximumSet_Gen.json
-    - src: ../fixtures/001006-version-add-preview-after-stable\examples\2024-10-01\Employees_Delete_MaximumSet_Gen.json
+    - src: ../fixtures/001006-version-add-preview-after-stable/examples/2024-10-01/Employees_Delete_MaximumSet_Gen.json
       dest: examples/2024-10-01/Employees_Delete_MaximumSet_Gen.json
-    - src: ../fixtures/001006-version-add-preview-after-stable\examples\2024-10-01\Employees_Get_MaximumSet_Gen.json
+    - src: ../fixtures/001006-version-add-preview-after-stable/examples/2024-10-01/Employees_Get_MaximumSet_Gen.json
       dest: examples/2024-10-01/Employees_Get_MaximumSet_Gen.json
-    - src: ../fixtures/001006-version-add-preview-after-stable\examples\2024-10-01\Employees_ListByResourceGroup_MaximumSet_Gen.json
+    - src: ../fixtures/001006-version-add-preview-after-stable/examples/2024-10-01/Employees_ListByResourceGroup_MaximumSet_Gen.json
       dest: examples/2024-10-01/Employees_ListByResourceGroup_MaximumSet_Gen.json
-    - src: ../fixtures/001006-version-add-preview-after-stable\examples\2024-10-01\Employees_ListByResourceGroup_MinimumSet_Gen.json
+    - src: ../fixtures/001006-version-add-preview-after-stable/examples/2024-10-01/Employees_ListByResourceGroup_MinimumSet_Gen.json
       dest: examples/2024-10-01/Employees_ListByResourceGroup_MinimumSet_Gen.json
-    - src: ../fixtures/001006-version-add-preview-after-stable\examples\2024-10-01\Employees_ListBySubscription_MaximumSet_Gen.json
+    - src: ../fixtures/001006-version-add-preview-after-stable/examples/2024-10-01/Employees_ListBySubscription_MaximumSet_Gen.json
       dest: examples/2024-10-01/Employees_ListBySubscription_MaximumSet_Gen.json
-    - src: ../fixtures/001006-version-add-preview-after-stable\examples\2024-10-01\Employees_ListBySubscription_MinimumSet_Gen.json
+    - src: ../fixtures/001006-version-add-preview-after-stable/examples/2024-10-01/Employees_ListBySubscription_MinimumSet_Gen.json
       dest: examples/2024-10-01/Employees_ListBySubscription_MinimumSet_Gen.json
-    - src: ../fixtures/001006-version-add-preview-after-stable\examples\2024-10-01\Employees_Update_MaximumSet_Gen.json
+    - src: ../fixtures/001006-version-add-preview-after-stable/examples/2024-10-01/Employees_Update_MaximumSet_Gen.json
       dest: examples/2024-10-01/Employees_Update_MaximumSet_Gen.json
-    - src: ../fixtures/001006-version-add-preview-after-stable\examples\2024-10-01\Operations_List_MaximumSet_Gen.json
+    - src: ../fixtures/001006-version-add-preview-after-stable/examples/2024-10-01/Operations_List_MaximumSet_Gen.json
       dest: examples/2024-10-01/Operations_List_MaximumSet_Gen.json
-    - src: ../fixtures/001006-version-add-preview-after-stable\examples\2024-10-01\Operations_List_MinimumSet_Gen.json
+    - src: ../fixtures/001006-version-add-preview-after-stable/examples/2024-10-01/Operations_List_MinimumSet_Gen.json
       dest: examples/2024-10-01/Operations_List_MinimumSet_Gen.json
   graders:
   - type: tool-calls
     config:
       required:
         - edit
+        - web_fetch
         - azure-sdk-mcp-azsdk_run_typespec_validation
   - type: skill-invocation
     config:
diff --git a/.github/skills/azure-typespec-author/evaluate/evals/001007.eval.yaml b/.github/skills/azure-typespec-author/evaluate/evals/001007.eval.yaml
index 87c42bc2b1e..b474e77add5 100644
--- a/.github/skills/azure-typespec-author/evaluate/evals/001007.eval.yaml
+++ b/.github/skills/azure-typespec-author/evaluate/evals/001007.eval.yaml
@@ -8,10 +8,10 @@ environment: azsdk-mcp
 
 # Execution configuration
 config:
-  runs: 1                      # Trials per stimulus
-  timeout: 1800                # Seconds per trial
-  model: claude-opus-4.6    #gpt-4o #claude-sonnet-4.6 # Model for agent execution
-  executor: copilot-sdk        # Which executor to use
+  runs: 1                        # Trials per stimulus
+  timeout: "660s"                # Seconds per trial
+  model: claude-opus-4.6         #gpt-4o #claude-sonnet-4.6 # Model for agent execution
+  executor: copilot-sdk          # Which executor to use
 
 # Test cases
 stimuli:
@@ -23,63 +23,64 @@ stimuli:
     max_tokens: 5000
   environment:
     files:
-    - src: ../fixtures/001007-version-add-stable-after-preview\employee.tsp
+    - src: ../fixtures/001007-version-add-stable-after-preview/employee.tsp
       dest: employee.tsp
-    - src: ../fixtures/001007-version-add-stable-after-preview\main.tsp
+    - src: ../fixtures/001007-version-add-stable-after-preview/main.tsp
       dest: main.tsp
-    - src: ../fixtures/001007-version-add-stable-after-preview\shared.tsp
+    - src: ../fixtures/001007-version-add-stable-after-preview/shared.tsp
       dest: shared.tsp
-    - src: ../fixtures/001007-version-add-stable-after-preview\tspconfig.yaml
+    - src: ../fixtures/001007-version-add-stable-after-preview/tspconfig.yaml
       dest: tspconfig.yaml
-    - src: ../fixtures/001007-version-add-stable-after-preview\package.json
+    - src: ../fixtures/001007-version-add-stable-after-preview/package.json
       dest: package.json
-    - src: ../fixtures/001007-version-add-stable-after-preview\readme.md
+    - src: ../fixtures/001007-version-add-stable-after-preview/readme.md
       dest: readme.md
-    - src: ../fixtures/001007-version-add-stable-after-preview\examples\2021-10-01\Employees_CreateOrUpdate_MaximumSet_Gen.json
+    - src: ../fixtures/001007-version-add-stable-after-preview/examples/2021-10-01/Employees_CreateOrUpdate_MaximumSet_Gen.json
       dest: examples/2021-10-01/Employees_CreateOrUpdate_MaximumSet_Gen.json
-    - src: ../fixtures/001007-version-add-stable-after-preview\examples\2021-10-01\Employees_Delete_MaximumSet_Gen.json
+    - src: ../fixtures/001007-version-add-stable-after-preview/examples/2021-10-01/Employees_Delete_MaximumSet_Gen.json
       dest: examples/2021-10-01/Employees_Delete_MaximumSet_Gen.json
-    - src: ../fixtures/001007-version-add-stable-after-preview\examples\2021-10-01\Employees_Get_MaximumSet_Gen.json
+    - src: ../fixtures/001007-version-add-stable-after-preview/examples/2021-10-01/Employees_Get_MaximumSet_Gen.json
       dest: examples/2021-10-01/Employees_Get_MaximumSet_Gen.json
-    - src: ../fixtures/001007-version-add-stable-after-preview\examples\2021-10-01\Employees_ListByResourceGroup_MaximumSet_Gen.json
+    - src: ../fixtures/001007-version-add-stable-after-preview/examples/2021-10-01/Employees_ListByResourceGroup_MaximumSet_Gen.json
       dest: examples/2021-10-01/Employees_ListByResourceGroup_MaximumSet_Gen.json
-    - src: ../fixtures/001007-version-add-stable-after-preview\examples\2021-10-01\Employees_ListByResourceGroup_MinimumSet_Gen.json
+    - src: ../fixtures/001007-version-add-stable-after-preview/examples/2021-10-01/Employees_ListByResourceGroup_MinimumSet_Gen.json
       dest: examples/2021-10-01/Employees_ListByResourceGroup_MinimumSet_Gen.json
-    - src: ../fixtures/001007-version-add-stable-after-preview\examples\2021-10-01\Employees_ListBySubscription_MaximumSet_Gen.json
+    - src: ../fixtures/001007-version-add-stable-after-preview/examples/2021-10-01/Employees_ListBySubscription_MaximumSet_Gen.json
       dest: examples/2021-10-01/Employees_ListBySubscription_MaximumSet_Gen.json
-    - src: ../fixtures/001007-version-add-stable-after-preview\examples\2021-10-01\Employees_ListBySubscription_MinimumSet_Gen.json
+    - src: ../fixtures/001007-version-add-stable-after-preview/examples/2021-10-01/Employees_ListBySubscription_MinimumSet_Gen.json
       dest: examples/2021-10-01/Employees_ListBySubscription_MinimumSet_Gen.json
-    - src: ../fixtures/001007-version-add-stable-after-preview\examples\2021-10-01\Employees_Update_MaximumSet_Gen.json
+    - src: ../fixtures/001007-version-add-stable-after-preview/examples/2021-10-01/Employees_Update_MaximumSet_Gen.json
       dest: examples/2021-10-01/Employees_Update_MaximumSet_Gen.json
-    - src: ../fixtures/001007-version-add-stable-after-preview\examples\2021-10-01\Operations_List_MaximumSet_Gen.json
+    - src: ../fixtures/001007-version-add-stable-after-preview/examples/2021-10-01/Operations_List_MaximumSet_Gen.json
       dest: examples/2021-10-01/Operations_List_MaximumSet_Gen.json
-    - src: ../fixtures/001007-version-add-stable-after-preview\examples\2021-10-01\Operations_List_MinimumSet_Gen.json
+    - src: ../fixtures/001007-version-add-stable-after-preview/examples/2021-10-01/Operations_List_MinimumSet_Gen.json
       dest: examples/2021-10-01/Operations_List_MinimumSet_Gen.json
-    - src: ../fixtures/001007-version-add-stable-after-preview\examples\2024-10-01-preview\Employees_CreateOrUpdate_MaximumSet_Gen.json
+    - src: ../fixtures/001007-version-add-stable-after-preview/examples/2024-10-01-preview/Employees_CreateOrUpdate_MaximumSet_Gen.json
       dest: examples/2024-10-01-preview/Employees_CreateOrUpdate_MaximumSet_Gen.json
-    - src: ../fixtures/001007-version-add-stable-after-preview\examples\2024-10-01-preview\Employees_Delete_MaximumSet_Gen.json
+    - src: ../fixtures/001007-version-add-stable-after-preview/examples/2024-10-01-preview/Employees_Delete_MaximumSet_Gen.json
       dest: examples/2024-10-01-preview/Employees_Delete_MaximumSet_Gen.json
-    - src: ../fixtures/001007-version-add-stable-after-preview\examples\2024-10-01-preview\Employees_Get_MaximumSet_Gen.json
+    - src: ../fixtures/001007-version-add-stable-after-preview/examples/2024-10-01-preview/Employees_Get_MaximumSet_Gen.json
       dest: examples/2024-10-01-preview/Employees_Get_MaximumSet_Gen.json
-    - src: ../fixtures/001007-version-add-stable-after-preview\examples\2024-10-01-preview\Employees_ListByResourceGroup_MaximumSet_Gen.json
+    - src: ../fixtures/001007-version-add-stable-after-preview/examples/2024-10-01-preview/Employees_ListByResourceGroup_MaximumSet_Gen.json
       dest: examples/2024-10-01-preview/Employees_ListByResourceGroup_MaximumSet_Gen.json
-    - src: ../fixtures/001007-version-add-stable-after-preview\examples\2024-10-01-preview\Employees_ListByResourceGroup_MinimumSet_Gen.json
+    - src: ../fixtures/001007-version-add-stable-after-preview/examples/2024-10-01-preview/Employees_ListByResourceGroup_MinimumSet_Gen.json
       dest: examples/2024-10-01-preview/Employees_ListByResourceGroup_MinimumSet_Gen.json
-    - src: ../fixtures/001007-version-add-stable-after-preview\examples\2024-10-01-preview\Employees_ListBySubscription_MaximumSet_Gen.json
+    - src: ../fixtures/001007-version-add-stable-after-preview/examples/2024-10-01-preview/Employees_ListBySubscription_MaximumSet_Gen.json
       dest: examples/2024-10-01-preview/Employees_ListBySubscription_MaximumSet_Gen.json
-    - src: ../fixtures/001007-version-add-stable-after-preview\examples\2024-10-01-preview\Employees_ListBySubscription_MinimumSet_Gen.json
+    - src: ../fixtures/001007-version-add-stable-after-preview/examples/2024-10-01-preview/Employees_ListBySubscription_MinimumSet_Gen.json
       dest: examples/2024-10-01-preview/Employees_ListBySubscription_MinimumSet_Gen.json
-    - src: ../fixtures/001007-version-add-stable-after-preview\examples\2024-10-01-preview\Employees_Update_MaximumSet_Gen.json
+    - src: ../fixtures/001007-version-add-stable-after-preview/examples/2024-10-01-preview/Employees_Update_MaximumSet_Gen.json
       dest: examples/2024-10-01-preview/Employees_Update_MaximumSet_Gen.json
-    - src: ../fixtures/001007-version-add-stable-after-preview\examples\2024-10-01-preview\Operations_List_MaximumSet_Gen.json
+    - src: ../fixtures/001007-version-add-stable-after-preview/examples/2024-10-01-preview/Operations_List_MaximumSet_Gen.json
       dest: examples/2024-10-01-preview/Operations_List_MaximumSet_Gen.json
-    - src: ../fixtures/001007-version-add-stable-after-preview\examples\2024-10-01-preview\Operations_List_MinimumSet_Gen.json
+    - src: ../fixtures/001007-version-add-stable-after-preview/examples/2024-10-01-preview/Operations_List_MinimumSet_Gen.json
       dest: examples/2024-10-01-preview/Operations_List_MinimumSet_Gen.json
   graders:
   - type: tool-calls
     config:
       required:
         - edit
+        - web_fetch
         - azure-sdk-mcp-azsdk_run_typespec_validation
   - type: skill-invocation
     config:
diff --git a/.github/skills/azure-typespec-author/evaluate/evals/001008.eval.yaml b/.github/skills/azure-typespec-author/evaluate/evals/001008.eval.yaml
index a7900d3f387..2a931ad453b 100644
--- a/.github/skills/azure-typespec-author/evaluate/evals/001008.eval.yaml
+++ b/.github/skills/azure-typespec-author/evaluate/evals/001008.eval.yaml
@@ -8,10 +8,10 @@ environment: azsdk-mcp
 
 # Execution configuration
 config:
-  runs: 1                      # Trials per stimulus
-  timeout: 1800                # Seconds per trial
-  model: claude-opus-4.6    #gpt-4o #claude-sonnet-4.6 # Model for agent execution
-  executor: copilot-sdk        # Which executor to use
+  runs: 1                        # Trials per stimulus
+  timeout: "660s"                # Seconds per trial
+  model: claude-opus-4.6         #gpt-4o #claude-sonnet-4.6 # Model for agent execution
+  executor: copilot-sdk          # Which executor to use
 
 # Test cases
 stimuli:
@@ -23,63 +23,64 @@ stimuli:
     max_tokens: 5000
   environment:
     files:
-    - src: ../fixtures/001008-version-add-stable-after-stable\employee.tsp
+    - src: ../fixtures/001008-version-add-stable-after-stable/employee.tsp
       dest: employee.tsp
-    - src: ../fixtures/001008-version-add-stable-after-stable\main.tsp
+    - src: ../fixtures/001008-version-add-stable-after-stable/main.tsp
       dest: main.tsp
-    - src: ../fixtures/001008-version-add-stable-after-stable\shared.tsp
+    - src: ../fixtures/001008-version-add-stable-after-stable/shared.tsp
       dest: shared.tsp
-    - src: ../fixtures/001008-version-add-stable-after-stable\tspconfig.yaml
+    - src: ../fixtures/001008-version-add-stable-after-stable/tspconfig.yaml
       dest: tspconfig.yaml
-    - src: ../fixtures/001008-version-add-stable-after-stable\package.json
+    - src: ../fixtures/001008-version-add-stable-after-stable/package.json
       dest: package.json
-    - src: ../fixtures/001008-version-add-stable-after-stable\readme.md
+    - src: ../fixtures/001008-version-add-stable-after-stable/readme.md
       dest: readme.md
-    - src: ../fixtures/001008-version-add-stable-after-stable\examples\2021-10-01\Employees_CreateOrUpdate_MaximumSet_Gen.json
+    - src: ../fixtures/001008-version-add-stable-after-stable/examples/2021-10-01/Employees_CreateOrUpdate_MaximumSet_Gen.json
       dest: examples/2021-10-01/Employees_CreateOrUpdate_MaximumSet_Gen.json
-    - src: ../fixtures/001008-version-add-stable-after-stable\examples\2021-10-01\Employees_Delete_MaximumSet_Gen.json
+    - src: ../fixtures/001008-version-add-stable-after-stable/examples/2021-10-01/Employees_Delete_MaximumSet_Gen.json
       dest: examples/2021-10-01/Employees_Delete_MaximumSet_Gen.json
-    - src: ../fixtures/001008-version-add-stable-after-stable\examples\2021-10-01\Employees_Get_MaximumSet_Gen.json
+    - src: ../fixtures/001008-version-add-stable-after-stable/examples/2021-10-01/Employees_Get_MaximumSet_Gen.json
       dest: examples/2021-10-01/Employees_Get_MaximumSet_Gen.json
-    - src: ../fixtures/001008-version-add-stable-after-stable\examples\2021-10-01\Employees_ListByResourceGroup_MaximumSet_Gen.json
+    - src: ../fixtures/001008-version-add-stable-after-stable/examples/2021-10-01/Employees_ListByResourceGroup_MaximumSet_Gen.json
       dest: examples/2021-10-01/Employees_ListByResourceGroup_MaximumSet_Gen.json
-    - src: ../fixtures/001008-version-add-stable-after-stable\examples\2021-10-01\Employees_ListByResourceGroup_MinimumSet_Gen.json
+    - src: ../fixtures/001008-version-add-stable-after-stable/examples/2021-10-01/Employees_ListByResourceGroup_MinimumSet_Gen.json
       dest: examples/2021-10-01/Employees_ListByResourceGroup_MinimumSet_Gen.json
-    - src: ../fixtures/001008-version-add-stable-after-stable\examples\2021-10-01\Employees_ListBySubscription_MaximumSet_Gen.json
+    - src: ../fixtures/001008-version-add-stable-after-stable/examples/2021-10-01/Employees_ListBySubscription_MaximumSet_Gen.json
       dest: examples/2021-10-01/Employees_ListBySubscription_MaximumSet_Gen.json
-    - src: ../fixtures/001008-version-add-stable-after-stable\examples\2021-10-01\Employees_ListBySubscription_MinimumSet_Gen.json
+    - src: ../fixtures/001008-version-add-stable-after-stable/examples/2021-10-01/Employees_ListBySubscription_MinimumSet_Gen.json
       dest: examples/2021-10-01/Employees_ListBySubscription_MinimumSet_Gen.json
-    - src: ../fixtures/001008-version-add-stable-after-stable\examples\2021-10-01\Employees_Update_MaximumSet_Gen.json
+    - src: ../fixtures/001008-version-add-stable-after-stable/examples/2021-10-01/Employees_Update_MaximumSet_Gen.json
       dest: examples/2021-10-01/Employees_Update_MaximumSet_Gen.json
-    - src: ../fixtures/001008-version-add-stable-after-stable\examples\2021-10-01\Operations_List_MaximumSet_Gen.json
+    - src: ../fixtures/001008-version-add-stable-after-stable/examples/2021-10-01/Operations_List_MaximumSet_Gen.json
       dest: examples/2021-10-01/Operations_List_MaximumSet_Gen.json
-    - src: ../fixtures/001008-version-add-stable-after-stable\examples\2021-10-01\Operations_List_MinimumSet_Gen.json
+    - src: ../fixtures/001008-version-add-stable-after-stable/examples/2021-10-01/Operations_List_MinimumSet_Gen.json
       dest: examples/2021-10-01/Operations_List_MinimumSet_Gen.json
-    - src: ../fixtures/001008-version-add-stable-after-stable\examples\2024-10-01\Employees_CreateOrUpdate_MaximumSet_Gen.json
+    - src: ../fixtures/001008-version-add-stable-after-stable/examples/2024-10-01/Employees_CreateOrUpdate_MaximumSet_Gen.json
       dest: examples/2024-10-01/Employees_CreateOrUpdate_MaximumSet_Gen.json
-    - src: ../fixtures/001008-version-add-stable-after-stable\examples\2024-10-01\Employees_Delete_MaximumSet_Gen.json
+    - src: ../fixtures/001008-version-add-stable-after-stable/examples/2024-10-01/Employees_Delete_MaximumSet_Gen.json
       dest: examples/2024-10-01/Employees_Delete_MaximumSet_Gen.json
-    - src: ../fixtures/001008-version-add-stable-after-stable\examples\2024-10-01\Employees_Get_MaximumSet_Gen.json
+    - src: ../fixtures/001008-version-add-stable-after-stable/examples/2024-10-01/Employees_Get_MaximumSet_Gen.json
       dest: examples/2024-10-01/Employees_Get_MaximumSet_Gen.json
-    - src: ../fixtures/001008-version-add-stable-after-stable\examples\2024-10-01\Employees_ListByResourceGroup_MaximumSet_Gen.json
+    - src: ../fixtures/001008-version-add-stable-after-stable/examples/2024-10-01/Employees_ListByResourceGroup_MaximumSet_Gen.json
       dest: examples/2024-10-01/Employees_ListByResourceGroup_MaximumSet_Gen.json
-    - src: ../fixtures/001008-version-add-stable-after-stable\examples\2024-10-01\Employees_ListByResourceGroup_MinimumSet_Gen.json
+    - src: ../fixtures/001008-version-add-stable-after-stable/examples/2024-10-01/Employees_ListByResourceGroup_MinimumSet_Gen.json
       dest: examples/2024-10-01/Employees_ListByResourceGroup_MinimumSet_Gen.json
-    - src: ../fixtures/001008-version-add-stable-after-stable\examples\2024-10-01\Employees_ListBySubscription_MaximumSet_Gen.json
+    - src: ../fixtures/001008-version-add-stable-after-stable/examples/2024-10-01/Employees_ListBySubscription_MaximumSet_Gen.json
       dest: examples/2024-10-01/Employees_ListBySubscription_MaximumSet_Gen.json
-    - src: ../fixtures/001008-version-add-stable-after-stable\examples\2024-10-01\Employees_ListBySubscription_MinimumSet_Gen.json
+    - src: ../fixtures/001008-version-add-stable-after-stable/examples/2024-10-01/Employees_ListBySubscription_MinimumSet_Gen.json
       dest: examples/2024-10-01/Employees_ListBySubscription_MinimumSet_Gen.json
-    - src: ../fixtures/001008-version-add-stable-after-stable\examples\2024-10-01\Employees_Update_MaximumSet_Gen.json
+    - src: ../fixtures/001008-version-add-stable-after-stable/examples/2024-10-01/Employees_Update_MaximumSet_Gen.json
       dest: examples/2024-10-01/Employees_Update_MaximumSet_Gen.json
-    - src: ../fixtures/001008-version-add-stable-after-stable\examples\2024-10-01\Operations_List_MaximumSet_Gen.json
+    - src: ../fixtures/001008-version-add-stable-after-stable/examples/2024-10-01/Operations_List_MaximumSet_Gen.json
       dest: examples/2024-10-01/Operations_List_MaximumSet_Gen.json
-    - src: ../fixtures/001008-version-add-stable-after-stable\examples\2024-10-01\Operations_List_MinimumSet_Gen.json
+    - src: ../fixtures/001008-version-add-stable-after-stable/examples/2024-10-01/Operations_List_MinimumSet_Gen.json
       dest: examples/2024-10-01/Operations_List_MinimumSet_Gen.json
   graders:
   - type: tool-calls
     config:
       required:
         - edit
+        - web_fetch
         - azure-sdk-mcp-azsdk_run_typespec_validation
   - type: skill-invocation
     config:
diff --git a/.github/skills/azure-typespec-author/evaluate/evals/001009.eval.yaml b/.github/skills/azure-typespec-author/evaluate/evals/001009.eval.yaml
index a8d6b64e0ac..4f300d120bf 100644
--- a/.github/skills/azure-typespec-author/evaluate/evals/001009.eval.yaml
+++ b/.github/skills/azure-typespec-author/evaluate/evals/001009.eval.yaml
@@ -8,10 +8,10 @@ environment: azsdk-mcp
 
 # Execution configuration
 config:
-  runs: 1                      # Trials per stimulus
-  timeout: 1800                # Seconds per trial
-  model: claude-opus-4.6    #gpt-4o #claude-sonnet-4.6 # Model for agent execution
-  executor: copilot-sdk        # Which executor to use
+  runs: 1                        # Trials per stimulus
+  timeout: "660s"                # Seconds per trial
+  model: claude-opus-4.6         #gpt-4o #claude-sonnet-4.6 # Model for agent execution
+  executor: copilot-sdk          # Which executor to use
 
 # Test cases
 stimuli:
diff --git a/.github/skills/azure-typespec-author/evaluate/evals/001010.eval.yaml b/.github/skills/azure-typespec-author/evaluate/evals/001010.eval.yaml
index 078f85448d4..f07692373b6 100644
--- a/.github/skills/azure-typespec-author/evaluate/evals/001010.eval.yaml
+++ b/.github/skills/azure-typespec-author/evaluate/evals/001010.eval.yaml
@@ -8,10 +8,10 @@ environment: azsdk-mcp
 
 # Execution configuration
 config:
-  runs: 1                      # Trials per stimulus
-  timeout: 1800                # Seconds per trial
-  model: claude-opus-4.6    #gpt-4o #claude-sonnet-4.6 # Model for agent execution
-  executor: copilot-sdk        # Which executor to use
+  runs: 1                        # Trials per stimulus
+  timeout: "660s"                # Seconds per trial
+  model: claude-opus-4.6         #gpt-4o #claude-sonnet-4.6 # Model for agent execution
+  executor: copilot-sdk          # Which executor to use
 
 # Test cases
 stimuli:
diff --git a/.github/skills/azure-typespec-author/evaluate/evals/001011.eval.yaml b/.github/skills/azure-typespec-author/evaluate/evals/001011.eval.yaml
index 05512728bd2..e658dbe3275 100644
--- a/.github/skills/azure-typespec-author/evaluate/evals/001011.eval.yaml
+++ b/.github/skills/azure-typespec-author/evaluate/evals/001011.eval.yaml
@@ -8,10 +8,10 @@ environment: azsdk-mcp
 
 # Execution configuration
 config:
-  runs: 1                      # Trials per stimulus
-  timeout: 1800                # Seconds per trial
-  model: claude-opus-4.6    #gpt-4o #claude-sonnet-4.6 # Model for agent execution
-  executor: copilot-sdk        # Which executor to use
+  runs: 1                        # Trials per stimulus
+  timeout: "660s"                # Seconds per trial
+  model: claude-opus-4.6         #gpt-4o #claude-sonnet-4.6 # Model for agent execution
+  executor: copilot-sdk          # Which executor to use
 
 # Test cases
 stimuli:
diff --git a/.github/skills/azure-typespec-author/evaluate/evals/001012.eval.yaml b/.github/skills/azure-typespec-author/evaluate/evals/001012.eval.yaml
index 223f339b0c1..f1110c53445 100644
--- a/.github/skills/azure-typespec-author/evaluate/evals/001012.eval.yaml
+++ b/.github/skills/azure-typespec-author/evaluate/evals/001012.eval.yaml
@@ -8,10 +8,10 @@ environment: azsdk-mcp
 
 # Execution configuration
 config:
-  runs: 1                      # Trials per stimulus
-  timeout: 1800                # Seconds per trial
-  model: claude-opus-4.6    #gpt-4o #claude-sonnet-4.6 # Model for agent execution
-  executor: copilot-sdk        # Which executor to use
+  runs: 1                        # Trials per stimulus
+  timeout: "660s"                # Seconds per trial
+  model: claude-opus-4.6         #gpt-4o #claude-sonnet-4.6 # Model for agent execution
+  executor: copilot-sdk          # Which executor to use
 
 # Test cases
 stimuli:
diff --git a/.github/skills/azure-typespec-author/evaluate/evals/001013.eval.yaml b/.github/skills/azure-typespec-author/evaluate/evals/001013.eval.yaml
index ccefd24795b..e7ff82bfda2 100644
--- a/.github/skills/azure-typespec-author/evaluate/evals/001013.eval.yaml
+++ b/.github/skills/azure-typespec-author/evaluate/evals/001013.eval.yaml
@@ -8,10 +8,10 @@ environment: azsdk-mcp
 
 # Execution configuration
 config:
-  runs: 1                      # Trials per stimulus
-  timeout: 1800                # Seconds per trial
-  model: claude-opus-4.6       #gpt-4o #claude-sonnet-4.6 # Model for agent execution
-  executor: copilot-sdk        # Which executor to use
+  runs: 1                        # Trials per stimulus
+  timeout: "660s"                # Seconds per trial
+  model: claude-opus-4.6         #gpt-4o #claude-sonnet-4.6 # Model for agent execution
+  executor: copilot-sdk          # Which executor to use
 
 # Test cases
 stimuli:
diff --git a/.github/skills/azure-typespec-author/evaluate/evals/002001.eval.yaml b/.github/skills/azure-typespec-author/evaluate/evals/002001.eval.yaml
index 3104bd51d32..4e82c8460ad 100644
--- a/.github/skills/azure-typespec-author/evaluate/evals/002001.eval.yaml
+++ b/.github/skills/azure-typespec-author/evaluate/evals/002001.eval.yaml
@@ -8,10 +8,10 @@ environment: azsdk-mcp
 
 # Execution configuration
 config:
-  runs: 1                      # Trials per stimulus
-  timeout: 1800                # Seconds per trial
-  model: claude-opus-4.6    #gpt-4o #claude-sonnet-4.6 # Model for agent execution
-  executor: copilot-sdk        # Which executor to use
+  runs: 1                        # Trials per stimulus
+  timeout: "660s"                # Seconds per trial
+  model: claude-opus-4.6         #gpt-4o #claude-sonnet-4.6 # Model for agent execution
+  executor: copilot-sdk          # Which executor to use
 
 # Test cases
 stimuli:
diff --git a/.github/skills/azure-typespec-author/evaluate/evals/002002.eval.yaml b/.github/skills/azure-typespec-author/evaluate/evals/002002.eval.yaml
index bc2347f6392..a0ab2ea4910 100644
--- a/.github/skills/azure-typespec-author/evaluate/evals/002002.eval.yaml
+++ b/.github/skills/azure-typespec-author/evaluate/evals/002002.eval.yaml
@@ -8,10 +8,10 @@ environment: azsdk-mcp
 
 # Execution configuration
 config:
-  runs: 1                      # Trials per stimulus
-  timeout: 1800                # Seconds per trial
-  model: claude-opus-4.6    #gpt-4o #claude-sonnet-4.6 # Model for agent execution
-  executor: copilot-sdk        # Which executor to use
+  runs: 1                        # Trials per stimulus
+  timeout: "660s"                # Seconds per trial
+  model: claude-opus-4.6         #gpt-4o #claude-sonnet-4.6 # Model for agent execution
+  executor: copilot-sdk          # Which executor to use
 
 # Test cases
 stimuli:
diff --git a/.github/skills/azure-typespec-author/evaluate/evals/002003.eval.yaml b/.github/skills/azure-typespec-author/evaluate/evals/002003.eval.yaml
index d5b2e7d6ba8..ae679640d74 100644
--- a/.github/skills/azure-typespec-author/evaluate/evals/002003.eval.yaml
+++ b/.github/skills/azure-typespec-author/evaluate/evals/002003.eval.yaml
@@ -8,10 +8,10 @@ environment: azsdk-mcp
 
 # Execution configuration
 config:
-  runs: 1                      # Trials per stimulus
-  timeout: 1800                # Seconds per trial
-  model: claude-opus-4.6    #gpt-4o #claude-sonnet-4.6 # Model for agent execution
-  executor: copilot-sdk        # Which executor to use
+  runs: 1                        # Trials per stimulus
+  timeout: "660s"                # Seconds per trial
+  model: claude-opus-4.6         #gpt-4o #claude-sonnet-4.6 # Model for agent execution
+  executor: copilot-sdk          # Which executor to use
 
 # Test cases
 stimuli:
diff --git a/.github/skills/azure-typespec-author/evaluate/evals/002004.eval.yaml b/.github/skills/azure-typespec-author/evaluate/evals/002004.eval.yaml
index 34d569946f8..654a6326b8c 100644
--- a/.github/skills/azure-typespec-author/evaluate/evals/002004.eval.yaml
+++ b/.github/skills/azure-typespec-author/evaluate/evals/002004.eval.yaml
@@ -8,10 +8,10 @@ environment: azsdk-mcp
 
 # Execution configuration
 config:
-  runs: 1                      # Trials per stimulus
-  timeout: 1800                # Seconds per trial
-  model: claude-opus-4.6    #gpt-4o #claude-sonnet-4.6 # Model for agent execution
-  executor: copilot-sdk        # Which executor to use
+  runs: 1                        # Trials per stimulus
+  timeout: "660s"                # Seconds per trial
+  model: claude-opus-4.6         #gpt-4o #claude-sonnet-4.6 # Model for agent execution
+  executor: copilot-sdk          # Which executor to use
 
 # Test cases
 stimuli:
diff --git a/.github/skills/azure-typespec-author/evaluate/evals/002005.eval.yaml b/.github/skills/azure-typespec-author/evaluate/evals/002005.eval.yaml
index 0d62311c28b..4b99638ff29 100644
--- a/.github/skills/azure-typespec-author/evaluate/evals/002005.eval.yaml
+++ b/.github/skills/azure-typespec-author/evaluate/evals/002005.eval.yaml
@@ -8,10 +8,10 @@ environment: azsdk-mcp
 
 # Execution configuration
 config:
-  runs: 1                      # Trials per stimulus
-  timeout: 1800                # Seconds per trial
-  model: claude-opus-4.6    #gpt-4o #claude-sonnet-4.6 # Model for agent execution
-  executor: copilot-sdk        # Which executor to use
+  runs: 1                        # Trials per stimulus
+  timeout: "660s"                # Seconds per trial
+  model: claude-opus-4.6         #gpt-4o #claude-sonnet-4.6 # Model for agent execution
+  executor: copilot-sdk          # Which executor to use
 
 # Test cases
 stimuli:
diff --git a/.github/skills/azure-typespec-author/evaluate/evals/002006.eval.yaml b/.github/skills/azure-typespec-author/evaluate/evals/002006.eval.yaml
index d5014ddbe0f..4f506cfdbf1 100644
--- a/.github/skills/azure-typespec-author/evaluate/evals/002006.eval.yaml
+++ b/.github/skills/azure-typespec-author/evaluate/evals/002006.eval.yaml
@@ -8,10 +8,10 @@ environment: azsdk-mcp
 
 # Execution configuration
 config:
-  runs: 1                      # Trials per stimulus
-  timeout: 1800                # Seconds per trial
-  model: claude-opus-4.6    #gpt-4o #claude-sonnet-4.6 # Model for agent execution
-  executor: copilot-sdk        # Which executor to use
+  runs: 1                        # Trials per stimulus
+  timeout: "660s"                # Seconds per trial
+  model: claude-opus-4.6         #gpt-4o #claude-sonnet-4.6 # Model for agent execution
+  executor: copilot-sdk          # Which executor to use
 
 # Test cases
 stimuli:
diff --git a/.github/skills/azure-typespec-author/evaluate/evals/002007.eval.yaml b/.github/skills/azure-typespec-author/evaluate/evals/002007.eval.yaml
index bb239bb5e4b..52ada2b010d 100644
--- a/.github/skills/azure-typespec-author/evaluate/evals/002007.eval.yaml
+++ b/.github/skills/azure-typespec-author/evaluate/evals/002007.eval.yaml
@@ -8,10 +8,10 @@ environment: azsdk-mcp
 
 # Execution configuration
 config:
-  runs: 1                      # Trials per stimulus
-  timeout: 1800                # Seconds per trial
-  model: claude-opus-4.6    #gpt-4o #claude-sonnet-4.6 # Model for agent execution
-  executor: copilot-sdk        # Which executor to use
+  runs: 1                        # Trials per stimulus
+  timeout: "660s"                # Seconds per trial
+  model: claude-opus-4.6         #gpt-4o #claude-sonnet-4.6 # Model for agent execution
+  executor: copilot-sdk          # Which executor to use
 
 # Test cases
 stimuli:
diff --git a/.github/skills/azure-typespec-author/evaluate/evals/002008.eval.yaml b/.github/skills/azure-typespec-author/evaluate/evals/002008.eval.yaml
index 9f17bc7a107..17652dc75b8 100644
--- a/.github/skills/azure-typespec-author/evaluate/evals/002008.eval.yaml
+++ b/.github/skills/azure-typespec-author/evaluate/evals/002008.eval.yaml
@@ -8,10 +8,10 @@ environment: azsdk-mcp
 
 # Execution configuration
 config:
-  runs: 1                      # Trials per stimulus
-  timeout: 1800                # Seconds per trial
-  model: claude-opus-4.6    #gpt-4o #claude-sonnet-4.6 # Model for agent execution
-  executor: copilot-sdk        # Which executor to use
+  runs: 1                        # Trials per stimulus
+  timeout: "660s"                # Seconds per trial
+  model: claude-opus-4.6         #gpt-4o #claude-sonnet-4.6 # Model for agent execution
+  executor: copilot-sdk          # Which executor to use
 
 # Test cases
 stimuli:
diff --git a/.github/skills/azure-typespec-author/evaluate/evals/002009.eval.yaml b/.github/skills/azure-typespec-author/evaluate/evals/002009.eval.yaml
index 6d92881c639..f934d9e717a 100644
--- a/.github/skills/azure-typespec-author/evaluate/evals/002009.eval.yaml
+++ b/.github/skills/azure-typespec-author/evaluate/evals/002009.eval.yaml
@@ -8,10 +8,10 @@ environment: azsdk-mcp
 
 # Execution configuration
 config:
-  runs: 1                      # Trials per stimulus
-  timeout: 1800                # Seconds per trial
-  model: claude-opus-4.6    #gpt-4o #claude-sonnet-4.6 # Model for agent execution
-  executor: copilot-sdk        # Which executor to use
+  runs: 1                        # Trials per stimulus
+  timeout: "660s"                # Seconds per trial
+  model: claude-opus-4.6         #gpt-4o #claude-sonnet-4.6 # Model for agent execution
+  executor: copilot-sdk          # Which executor to use
 
 # Test cases
 stimuli:
diff --git a/.github/skills/azure-typespec-author/evaluate/evals/002010.eval.yaml b/.github/skills/azure-typespec-author/evaluate/evals/002010.eval.yaml
index ebed4a60f67..34cc8ec3d1d 100644
--- a/.github/skills/azure-typespec-author/evaluate/evals/002010.eval.yaml
+++ b/.github/skills/azure-typespec-author/evaluate/evals/002010.eval.yaml
@@ -8,10 +8,10 @@ environment: azsdk-mcp
 
 # Execution configuration
 config:
-  runs: 1                      # Trials per stimulus
-  timeout: 1800                # Seconds per trial
-  model: claude-opus-4.6    #gpt-4o #claude-sonnet-4.6 # Model for agent execution
-  executor: copilot-sdk        # Which executor to use
+  runs: 1                        # Trials per stimulus
+  timeout: "660s"                # Seconds per trial
+  model: claude-opus-4.6         #gpt-4o #claude-sonnet-4.6 # Model for agent execution
+  executor: copilot-sdk          # Which executor to use
 
 # Test cases
 stimuli:
diff --git a/.github/skills/azure-typespec-author/evaluate/evals/003001.eval.yaml b/.github/skills/azure-typespec-author/evaluate/evals/003001.eval.yaml
index 464d49b6d2a..b58615b1015 100644
--- a/.github/skills/azure-typespec-author/evaluate/evals/003001.eval.yaml
+++ b/.github/skills/azure-typespec-author/evaluate/evals/003001.eval.yaml
@@ -8,10 +8,10 @@ environment: azsdk-mcp
 
 # Execution configuration
 config:
-  runs: 1                      # Trials per stimulus
-  timeout: 1800                # Seconds per trial
-  model: claude-opus-4.6    #gpt-4o #claude-sonnet-4.6 # Model for agent execution
-  executor: copilot-sdk        # Which executor to use
+  runs: 1                        # Trials per stimulus
+  timeout: "660s"                # Seconds per trial
+  model: claude-opus-4.6         #gpt-4o #claude-sonnet-4.6 # Model for agent execution
+  executor: copilot-sdk          # Which executor to use
 
 # Test cases
 stimuli:
diff --git a/.github/skills/azure-typespec-author/evaluate/evals/003002.eval.yaml b/.github/skills/azure-typespec-author/evaluate/evals/003002.eval.yaml
index f9c6e3db279..1a5553f9e7e 100644
--- a/.github/skills/azure-typespec-author/evaluate/evals/003002.eval.yaml
+++ b/.github/skills/azure-typespec-author/evaluate/evals/003002.eval.yaml
@@ -8,10 +8,10 @@ environment: azsdk-mcp
 
 # Execution configuration
 config:
-  runs: 1                      # Trials per stimulus
-  timeout: 1800                # Seconds per trial
-  model: claude-opus-4.6    #gpt-4o #claude-sonnet-4.6 # Model for agent execution
-  executor: copilot-sdk        # Which executor to use
+  runs: 1                        # Trials per stimulus
+  timeout: "660s"                # Seconds per trial
+  model: claude-opus-4.6         #gpt-4o #claude-sonnet-4.6 # Model for agent execution
+  executor: copilot-sdk          # Which executor to use
 
 # Test cases
 stimuli:
diff --git a/.github/skills/azure-typespec-author/evaluate/evals/004001.eval.yaml b/.github/skills/azure-typespec-author/evaluate/evals/004001.eval.yaml
index a30a48400ce..3ebe320c61e 100644
--- a/.github/skills/azure-typespec-author/evaluate/evals/004001.eval.yaml
+++ b/.github/skills/azure-typespec-author/evaluate/evals/004001.eval.yaml
@@ -8,10 +8,10 @@ environment: azsdk-mcp
 
 # Execution configuration
 config:
-  runs: 1                      # Trials per stimulus
-  timeout: 1800                # Seconds per trial
-  model: claude-opus-4.6    #gpt-4o #claude-sonnet-4.6 # Model for agent execution
-  executor: copilot-sdk        # Which executor to use
+  runs: 1                        # Trials per stimulus
+  timeout: "660s"                # Seconds per trial
+  model: claude-opus-4.6         #gpt-4o #claude-sonnet-4.6 # Model for agent execution
+  executor: copilot-sdk          # Which executor to use
 
 # Test cases
 stimuli:
diff --git a/.github/skills/azure-typespec-author/evaluate/evals/004002.eval.yaml b/.github/skills/azure-typespec-author/evaluate/evals/004002.eval.yaml
index 38c8cfebecf..6766ddef802 100644
--- a/.github/skills/azure-typespec-author/evaluate/evals/004002.eval.yaml
+++ b/.github/skills/azure-typespec-author/evaluate/evals/004002.eval.yaml
@@ -8,10 +8,10 @@ environment: azsdk-mcp
 
 # Execution configuration
 config:
-  runs: 1                      # Trials per stimulus
-  timeout: 1800                # Seconds per trial
-  model: claude-opus-4.6    #gpt-4o #claude-sonnet-4.6 # Model for agent execution
-  executor: copilot-sdk        # Which executor to use
+  runs: 1                        # Trials per stimulus
+  timeout: "660s"                # Seconds per trial
+  model: claude-opus-4.6         #gpt-4o #claude-sonnet-4.6 # Model for agent execution
+  executor: copilot-sdk          # Which executor to use
 
 # Test cases
 stimuli:
diff --git a/.github/skills/azure-typespec-author/evaluate/evals/004003.eval.yaml b/.github/skills/azure-typespec-author/evaluate/evals/004003.eval.yaml
index a1e94e44a3f..6475a92884f 100644
--- a/.github/skills/azure-typespec-author/evaluate/evals/004003.eval.yaml
+++ b/.github/skills/azure-typespec-author/evaluate/evals/004003.eval.yaml
@@ -8,10 +8,10 @@ environment: azsdk-mcp
 
 # Execution configuration
 config:
-  runs: 1                      # Trials per stimulus
-  timeout: 1800                # Seconds per trial
-  model: claude-opus-4.6    #gpt-4o #claude-sonnet-4.6 # Model for agent execution
-  executor: copilot-sdk        # Which executor to use
+  runs: 1                        # Trials per stimulus
+  timeout: "660s"                # Seconds per trial
+  model: claude-opus-4.6         #gpt-4o #claude-sonnet-4.6 # Model for agent execution
+  executor: copilot-sdk          # Which executor to use
 
 # Test cases
 stimuli:
diff --git a/.github/skills/azure-typespec-author/evaluate/evals/005001.eval.yaml b/.github/skills/azure-typespec-author/evaluate/evals/005001.eval.yaml
index a9c30d6b466..e3cb5fdcb17 100644
--- a/.github/skills/azure-typespec-author/evaluate/evals/005001.eval.yaml
+++ b/.github/skills/azure-typespec-author/evaluate/evals/005001.eval.yaml
@@ -8,10 +8,10 @@ environment: azsdk-mcp
 
 # Execution configuration
 config:
-  runs: 1                      # Trials per stimulus
-  timeout: 1800                # Seconds per trial
-  model: claude-opus-4.6    #gpt-4o #claude-sonnet-4.6 # Model for agent execution
-  executor: copilot-sdk        # Which executor to use
+  runs: 1                        # Trials per stimulus
+  timeout: "660s"                # Seconds per trial
+  model: claude-opus-4.6         #gpt-4o #claude-sonnet-4.6 # Model for agent execution
+  executor: copilot-sdk          # Which executor to use
 
 # Test cases
 stimuli:
diff --git a/.github/skills/azure-typespec-author/evaluate/evals/eval.yaml b/.github/skills/azure-typespec-author/evaluate/evals/eval.yaml
index b02a0868e0a..3d9ba95824b 100644
--- a/.github/skills/azure-typespec-author/evaluate/evals/eval.yaml
+++ b/.github/skills/azure-typespec-author/evaluate/evals/eval.yaml
@@ -13,7 +13,7 @@ environment: azsdk-mcp
 # Execution configuration
 config:
   runs: 1
-  timeout: 1800
+  timeout: 1200
   model: claude-opus-4.6
   executor: copilot-sdk
 # Cases are intentionally sorted by case id.
diff --git a/eng/pipelines/azure-typespec-author-benchmark.yml b/eng/pipelines/azure-typespec-author-benchmark.yml
new file mode 100644
index 00000000000..f766bd53637
--- /dev/null
+++ b/eng/pipelines/azure-typespec-author-benchmark.yml
@@ -0,0 +1,227 @@
+# NOTE: Please refer to https://aka.ms/azsdk/engsys/ci-yaml before editing this file.
+trigger: none
+pr: none
+
+parameters:
+- name: PythonVersion
+  type: string
+  default: '3.10'
+- name: SkillBranch
+  displayName: Branch containing the azure-typespec-author skill source (SKILL.md + references/) to evaluate. Leave empty to use the branch this pipeline was triggered from.
+  type: string
+  default: ''
+
+extends:
+  template: /eng/pipelines/templates/stages/1es-redirect.yml
+  parameters:
+    Use1ESOfficial: true
+    stages:
+      - stage: EvalTypeSpecAuthor
+        displayName: Azure TypeSpec Author Skill Evaluation
+        variables:
+          - template: /eng/pipelines/templates/variables/globals.yml
+          - template: /eng/pipelines/templates/variables/image.yml
+          - group: Azure SDK QA Bot Dev Variables
+          - group: AzSDK_Eval_Variable_group
+        pool:
+          name: $(LINUXPOOL)
+          image: $(LINUXVMIMAGE)
+          os: linux
+        jobs:
+          - job: RunEvals
+            displayName: Run Vally Evaluations
+            timeoutInMinutes: 120
+            steps:
+              - checkout: self
+                fetchDepth: 0
+
+              # Optionally overlay the azure-typespec-author skill source
+              # (SKILL.md + references/) from a different branch so we can
+              # evaluate skill changes that live on another branch while
+              # keeping the evaluate/ directory and pipeline config from
+              # the branch this pipeline was triggered from.
+              - script: |
+                  set -e
+                  BRANCH="${{ parameters.SkillBranch }}"
+                  if [ -z "$BRANCH" ]; then
+                    echo "SkillBranch parameter not provided; using the source branch checked out by the pipeline."
+                    git rev-parse --abbrev-ref HEAD
+                    git rev-parse HEAD
+                    exit 0
+                  fi
+                  echo "Overlaying azure-typespec-author skill source from branch: $BRANCH"
+                  git fetch origin "$BRANCH"
+                  rm -rf .github/skills/azure-typespec-author/SKILL.md \
+                         .github/skills/azure-typespec-author/references
+                  git checkout "origin/$BRANCH" -- \
+                    .github/skills/azure-typespec-author/SKILL.md \
+                    .github/skills/azure-typespec-author/references
+                  echo "Skill source overlaid from origin/$BRANCH ($(git rev-parse origin/$BRANCH))"
+                  ls -R .github/skills/azure-typespec-author
+                displayName: Checkout skill branch
+
+              # Install Go and build/start the QA bot backend
+              - script: |
+                  echo "Downloading Go 1.24.0..."
+                  curl -LO https://go.dev/dl/go1.24.0.linux-amd64.tar.gz
+                  echo "Removing existing Go installation..."
+                  sudo rm -rf /usr/local/go
+                  echo "Extracting Go 1.24.0..."
+                  sudo tar -C /usr/local -xzf go1.24.0.linux-amd64.tar.gz
+                  echo "Setting environment variables..."
+                  echo "##vso[task.setvariable variable=GOROOT]/usr/local/go"
+                  echo "##vso[task.prependpath]/usr/local/go/bin"
+                displayName: Install Go
+
+              - script: go version
+                displayName: Check Go version
+
+              - task: AzureCLI@2
+                displayName: Start QA bot backend service
+                inputs:
+                  azureSubscription: 'azuresdkqabot-dev'
+                  scriptType: bash
+                  scriptLocation: inlineScript
+                  workingDirectory: $(Build.SourcesDirectory)/tools/sdk-ai-bots/azure-sdk-qa-bot-backend
+                  inlineScript: |
+                    export GOPROXY=https://proxy.golang.org,direct
+                    go build -o qa-bot-service
+                    chmod +x qa-bot-service
+                    nohup ./qa-bot-service > qa-bot-service.log 2>&1 &
+                    SERVICE_PID=$!
+                    echo "Service started with PID: $SERVICE_PID"
+                    sleep 20
+                    echo "=== Service log ==="
+                    cat qa-bot-service.log || true
+                    echo "==================="
+                    if ! kill -0 $SERVICE_PID 2>/dev/null; then
+                      echo "ERROR: Service process has exited unexpectedly"
+                      exit 1
+                    fi
+                    if ! lsof -ti:8088 > /dev/null 2>&1; then
+                      echo "ERROR: No process listening on port 8088"
+                      exit 1
+                    fi
+                    echo "Service is running on port 8088"
+                env:
+                  SYSTEM_ACCESSTOKEN: $(System.AccessToken)
+                  AZURE_APPCONFIG_ENDPOINT: $(AZURE_APPCONFIG_ENDPOINT)
+
+              # Install .NET SDK for azsdk-cli MCP server.
+              # Use the version pinned in global.json so the build/restore works.
+              - task: UseDotNet@2
+                displayName: Install .NET SDK (global.json)
+                inputs:
+                  useGlobalJson: true
+                  workingDirectory: $(Build.SourcesDirectory)
+              - task: UseDotNet@2
+                displayName: Install .NET 8.0 SDK
+                inputs:
+                  version: '8.x'
+
+              # Authenticate to NuGet so the azsdk-cli MCP server can restore
+              # packages from the internal Azure Artifacts feed (the 1ES pool
+              # blocks direct egress to nuget.org).
+              - task: NuGetAuthenticate@1
+                displayName: Authenticate NuGet
+
+              # Pre-build the azsdk-cli (the MCP server). vally launches it
+              # via `dotnet run` (defaults to Debug, no -c flag), but doing
+              # the restore + build here surfaces any failure as a clear
+              # pipeline error instead of a silent MCP server startup
+              # failure during evaluations.
+              - script: |
+                  set -e
+                  dotnet restore tools/azsdk-cli/Azure.Sdk.Tools.Cli/Azure.Sdk.Tools.Cli.csproj
+                  dotnet build tools/azsdk-cli/Azure.Sdk.Tools.Cli/Azure.Sdk.Tools.Cli.csproj \
+                    --no-restore --nologo
+                displayName: Build azsdk-cli (MCP server)
+
+              # Install Node.js, Vally CLI, and Copilot SDK.
+              # Use the internal Azure Artifacts npm mirror (with auth) because
+              # the 1ES managed pool blocks direct egress to registry.npmjs.org.
+              - task: NodeTool@0
+                displayName: Use Node.js 22
+                inputs:
+                  versionSpec: '22.x'
+
+              - template: /eng/common/pipelines/templates/steps/create-authenticated-npmrc.yml
+                parameters:
+                  npmrcPath: $(Build.SourcesDirectory)/.npmrc
+                  registryUrl: https://pkgs.dev.azure.com/azure-sdk/public/_packaging/azure-sdk-tools/npm/registry/
+
+              - script: npm install --global @microsoft/vally-cli@0.4.0 --userconfig $(Build.SourcesDirectory)/.npmrc
+                displayName: Install Vally CLI
+
+              - script: npm install --global @github/copilot-sdk --userconfig $(Build.SourcesDirectory)/.npmrc
+                displayName: Install Copilot SDK
+
+              # Pre-install TypeSpec compiler globally from the internal npm
+              # mirror so `azsdk_run_typespec_validation` (and any direct
+              # `npx tsp`/`npm install` the agent attempts) succeeds without
+              # network access to registry.npmjs.org.
+              - script: |
+                  set -e
+                  npm install --global @typespec/compiler --userconfig $(Build.SourcesDirectory)/.npmrc
+                  echo "tsp version:"
+                  tsp --version || true
+                displayName: Install TypeSpec compiler
+
+              # Smoke test (informational only): try to start MCP server and
+              # capture any startup errors. We don't fail the pipeline here
+              # because reliably driving an MCP stdio JSON-RPC handshake from
+              # a shell is fragile; vally/copilot-sdk drives the protocol
+              # properly during the eval step.
+              - script: |
+                  cd $(Build.SourcesDirectory)/tools/azsdk-cli/Azure.Sdk.Tools.Cli
+                  echo "--- CLI commands ---"
+                  dotnet run --no-build -- --help 2>&1 | head -40 || true
+                  echo "--- list registered MCP tools ---"
+                  dotnet run --no-build -- list 2>&1 | head -200 || true
+                displayName: MCP server smoke test (informational)
+                continueOnError: true
+                env:
+                  AZSDKTOOLS_AGENT_TESTING: "true"
+                  AZSDKTOOLS_COLLECT_TELEMETRY: "false"
+                  AZURE_SDK_KB_ENDPOINT: "http://localhost:8088"
+
+              # Run evaluations
+              # The vally config (.vally.yaml) lives in the skill's evaluate/
+              # directory; cd there so vally discovers evals/*.eval.yaml.
+              # Vally requires 	imeout to be a duration string (e.g. "30m"),
+              # but the source eval files use bare integer seconds. Rewrite
+              # at runtime so the source stays untouched.
+              - script: |
+                  cd .github/skills/azure-typespec-author/evaluate
+                  mkdir -p results
+                  for f in evals/*.eval.yaml; do
+                    sed -i -E 's/^([[:space:]]*timeout:[[:space:]]+)([0-9]+)([[:space:]]*(#.*)?)$/\1"\2s"\3/' "$f"
+                  done
+                  vally eval --suite all --output-dir results --verbose
+                displayName: Run evaluations
+                continueOnError: true
+                env:
+                  GITHUB_TOKEN: $(azuresdk-copilot-github-pat)
+              
+              - script: |
+                  cd .github/skills/azure-typespec-author/evaluate
+                  mkdir -p results-2
+                  vally eval --eval-spec eval.yaml --output-dir results-2 --workers 3 --verbose
+                displayName: Run all evaluations
+                continueOnError: true
+                condition: succeededOrFailed()
+                env:
+                  GITHUB_TOKEN: $(azuresdk-copilot-github-pat)
+
+            templateContext:
+              outputs:
+                - output: pipelineArtifact
+                  path: $(Build.SourcesDirectory)/.github/skills/azure-typespec-author/evaluate/results
+                  artifact: eval-results-$(Build.BuildId)
+                  displayName: Upload eval results
+                  condition: always()
+                - output: pipelineArtifact
+                  path: $(Build.SourcesDirectory)/.github/skills/azure-typespec-author/evaluate/results-2
+                  artifact: eval-results2-$(Build.BuildId)
+                  displayName: Upload eval results 2
+                  condition: always()