toeverything · darkskygit · Nov 7, 2024 · Nov 7, 2024 · Nov 7, 2024 · Nov 7, 2024
diff --git a/.github/actions/copilot-test/action.yml b/.github/actions/copilot-test/action.yml
@@ -3,7 +3,7 @@ description: 'Run Copilot E2E Test'
 inputs:
   script:
     description: 'Script to run'
-    default: 'yarn workspace @affine-test/affine-cloud-copilot e2e --forbid-only'
+    default: 'yarn workspace @affine-test/affine-cloud-copilot test:e2e --forbid-only'
     required: false
   openai-key:
     description: 'OpenAI secret key'

diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
@@ -425,6 +425,7 @@ jobs:
           filters: |
             backend:
               - 'packages/backend/server/src/**'
+              - 'tests/affine-cloud-copilot/**'
 
       - name: Setup Node.js
         if: ${{ steps.check-blocksuite-update.outputs.skip != 'true' || steps.filter.outputs.backend == 'true' }}
@@ -446,7 +447,7 @@ jobs:
 
       - name: Run server tests
         if: ${{ steps.check-blocksuite-update.outputs.skip != 'true' || steps.filter.outputs.backend == 'true' }}
-        run: yarn workspace @affine/server test:copilot:coverage --forbid-only
+        run: yarn workspace @affine/server test:copilot:spec:coverage --forbid-only
         env:
           CARGO_TARGET_DIR: '${{ github.workspace }}/target'
           COPILOT_OPENAI_API_KEY: ${{ secrets.COPILOT_OPENAI_API_KEY }}
@@ -502,26 +503,34 @@ jobs:
             echo "skip=true" >> $GITHUB_OUTPUT
           fi
 
+      - uses: dorny/paths-filter@v3
+        id: filter
+        with:
+          filters: |
+            backend:
+              - 'packages/backend/server/src/**'
+              - 'tests/affine-cloud-copilot/**'
+
       - name: Setup Node.js
-        if: ${{ steps.check-blocksuite-update.outputs.skip != 'true' }}
+        if: ${{ steps.check-blocksuite-update.outputs.skip != 'true' || steps.filter.outputs.backend == 'true' }}
         uses: ./.github/actions/setup-node
         with:
           playwright-install: true
           electron-install: false
           hard-link-nm: false
 
       - name: Download server-native.node
-        if: ${{ steps.check-blocksuite-update.outputs.skip != 'true' }}
+        if: ${{ steps.check-blocksuite-update.outputs.skip != 'true' || steps.filter.outputs.backend == 'true' }}
         uses: actions/download-artifact@v4
         with:
           name: server-native.node
           path: ./packages/backend/server
 
       - name: Run Copilot E2E Test ${{ matrix.shardIndex }}/${{ matrix.shardTotal }}
-        if: ${{ steps.check-blocksuite-update.outputs.skip != 'true' }}
+        if: ${{ steps.check-blocksuite-update.outputs.skip != 'true' || steps.filter.outputs.backend == 'true' }}
         uses: ./.github/actions/copilot-test
         with:
-          script: yarn workspace @affine-test/affine-cloud-copilot e2e --forbid-only --shard=${{ matrix.shardIndex }}/${{ matrix.shardTotal }}
+          script: yarn workspace @affine-test/affine-cloud-copilot test:e2e --forbid-only --shard=${{ matrix.shardIndex }}/${{ matrix.shardTotal }}
           openai-key: ${{ secrets.COPILOT_OPENAI_API_KEY }}
           fal-key: ${{ secrets.COPILOT_FAL_API_KEY }}
 

diff --git a/.github/workflows/copilot-test.yml b/.github/workflows/copilot-test.yml
@@ -42,6 +42,16 @@ jobs:
       NODE_ENV: test
       DISTRIBUTION: web
       DATABASE_URL: postgresql://affine:affine@localhost:5432/affine
+    strategy:
+      fail-fast: false
+      matrix:
+        spec:
+          - {
+              name: e2e,
+              package: '@affine-test/affine-cloud-copilot',
+              type: e2e,
+            }
+          - { name: spec, package: '@affine/server', type: copilot:spec }
     services:
       postgres:
         image: postgres
@@ -78,12 +88,16 @@ jobs:
       - name: Prepare Server Test Environment
         uses: ./.github/actions/server-test-env
 
-      - name: Run server tests
-        run: yarn workspace @affine/server test:copilot:coverage --forbid-only
+      - name: Run copilot api ${{ matrix.spec.name }} tests
+        run: yarn workspace ${{ matrix.spec.package }} test:${{ matrix.spec.type }}:coverage --forbid-only
         env:
           CARGO_TARGET_DIR: '${{ github.workspace }}/target'
           COPILOT_OPENAI_API_KEY: ${{ secrets.COPILOT_OPENAI_API_KEY }}
           COPILOT_FAL_API_KEY: ${{ secrets.COPILOT_FAL_API_KEY }}
+          COPILOT_E2E_ENDPOINT: ${{ secrets.COPILOT_E2E_ENDPOINT }}
+          COPILOT_E2E_USER: ${{ secrets.COPILOT_E2E_USER }}
+          COPILOT_E2E_PASSWORD: ${{ secrets.COPILOT_E2E_PASSWORD }}
+          COPILOT_E2E_SECRET: ${{ secrets.COPILOT_E2E_SECRET }}
 
       - name: Upload server test coverage results
         uses: codecov/codecov-action@v4
@@ -139,7 +153,7 @@ jobs:
       - name: Run Copilot E2E Test ${{ matrix.shardIndex }}/${{ matrix.shardTotal }}
         uses: ./.github/actions/copilot-test
         with:
-          script: yarn workspace @affine-test/affine-cloud-copilot e2e --forbid-only --shard=${{ matrix.shardIndex }}/${{ matrix.shardTotal }}
+          script: yarn workspace @affine-test/affine-cloud-copilot test:e2e --forbid-only --shard=${{ matrix.shardIndex }}/${{ matrix.shardTotal }}
           openai-key: ${{ secrets.COPILOT_OPENAI_API_KEY }}
           fal-key: ${{ secrets.COPILOT_FAL_API_KEY }}
 

diff --git a/packages/backend/server/package.json b/packages/backend/server/package.json
@@ -12,9 +12,9 @@
     "start": "node --loader ts-node/esm/transpile-only.mjs ./src/index.ts",
     "dev": "nodemon ./src/index.ts",
     "test": "ava --concurrency 1 --serial",
-    "test:copilot": "ava \"tests/**/copilot-*.spec.ts\"",
+    "test:copilot:spec": "ava \"tests/**/copilot-*.spec.ts\"",
     "test:coverage": "c8 ava --concurrency 1 --serial",
-    "test:copilot:coverage": "c8 ava --timeout=5m \"tests/**/copilot-*.spec.ts\"",
+    "test:copilot:spec:coverage": "c8 ava --timeout=5m \"tests/**/copilot-*.spec.ts\"",
     "postinstall": "prisma generate",
     "data-migration": "node --loader ts-node/esm/transpile-only.mjs ./src/data/index.ts",
     "predeploy": "yarn prisma migrate deploy && node --import ./scripts/register.js ./dist/data/index.js run",

diff --git a/packages/backend/server/tests/copilot-provider.spec.ts b/packages/backend/server/tests/copilot-provider.spec.ts
@@ -27,7 +27,11 @@ import {
   CopilotCheckJsonExecutor,
 } from '../src/plugins/copilot/workflow/executor';
 import { createTestingModule } from './utils';
-import { TestAssets } from './utils/copilot';
+import {
+  checkMDList,
+  ProviderActionTestCase,
+  ProviderWorkflowTestCase,
+} from './utils/copilot';
 
 type Tester = {
   auth: AuthService;
@@ -130,59 +134,6 @@ test.after(async t => {
   await t.context.module.close();
 });
 
-const assertNotWrappedInCodeBlock = (
-  t: ExecutionContext<Tester>,
-  result: string
-) => {
-  t.assert(
-    !result.replaceAll('\n', '').trim().startsWith('```') &&
-      !result.replaceAll('\n', '').trim().endsWith('```'),
-    'should not wrap in code block'
-  );
-};
-
-const checkMDList = (text: string) => {
-  const lines = text.split('\n');
-  const listItemRegex = /^( {2})*(-|\u2010-\u2015|\*|\+)? .+$/;
-  let prevIndent = null;
-
-  for (const line of lines) {
-    if (line.trim() === '') continue;
-    if (!listItemRegex.test(line)) {
-      return false;
-    }
-
-    // eslint-disable-next-line @typescript-eslint/no-non-null-asserted-optional-chain
-    const currentIndent = line.match(/^( *)/)?.[0].length!;
-    if (Number.isNaN(currentIndent) || currentIndent % 2 !== 0) {
-      return false;
-    }
-
-    if (prevIndent !== null && currentIndent > 0) {
-      const indentDiff = currentIndent - prevIndent;
-      // allow 1 level of indentation difference
-      if (indentDiff > 2) {
-        return false;
-      }
-    }
-
-    if (line.trim().startsWith('-')) {
-      prevIndent = currentIndent;
-    }
-  }
-
-  return true;
-};
-
-const checkUrl = (url: string) => {
-  try {
-    new URL(url);
-    return true;
-  } catch {
-    return false;
-  }
-};
-
 const retry = async (
   action: string,
   t: ExecutionContext<Tester>,
@@ -264,138 +215,7 @@ test('should validate markdown list', t => {
 
 // ==================== action ====================
 
-const actions = [
-  {
-    promptName: [
-      'Summary',
-      'Explain this',
-      'Write an article about this',
-      'Write a twitter about this',
-      'Write a poem about this',
-      'Write a blog post about this',
-      'Write outline',
-      'Change tone to',
-      'Improve writing for it',
-      'Improve grammar for it',
-      'Fix spelling for it',
-      'Create headings',
-      'Make it longer',
-      'Make it shorter',
-      'Continue writing',
-    ],
-    messages: [{ role: 'user' as const, content: TestAssets.SSOT }],
-    verifier: (t: ExecutionContext<Tester>, result: string) => {
-      assertNotWrappedInCodeBlock(t, result);
-      t.assert(
-        result.toLowerCase().includes('single source of truth'),
-        'should include original keyword'
-      );
-    },
-    type: 'text' as const,
-  },
-  {
-    promptName: ['Brainstorm ideas about this', 'Brainstorm mindmap'],
-    messages: [{ role: 'user' as const, content: TestAssets.SSOT }],
-    verifier: (t: ExecutionContext<Tester>, result: string) => {
-      assertNotWrappedInCodeBlock(t, result);
-      t.assert(checkMDList(result), 'should be a markdown list');
-    },
-    type: 'text' as const,
-  },
-  {
-    promptName: 'Expand mind map',
-    messages: [{ role: 'user' as const, content: '- Single source of truth' }],
-    verifier: (t: ExecutionContext<Tester>, result: string) => {
-      assertNotWrappedInCodeBlock(t, result);
-      t.assert(checkMDList(result), 'should be a markdown list');
-    },
-    type: 'text' as const,
-  },
-  {
-    promptName: 'Find action items from it',
-    messages: [{ role: 'user' as const, content: TestAssets.TODO }],
-    verifier: (t: ExecutionContext<Tester>, result: string) => {
-      assertNotWrappedInCodeBlock(t, result);
-      t.assert(checkMDList(result), 'should be a markdown list');
-    },
-    type: 'text' as const,
-  },
-  {
-    promptName: ['Explain this code', 'Check code error'],
-    messages: [{ role: 'user' as const, content: TestAssets.Code }],
-    verifier: (t: ExecutionContext<Tester>, result: string) => {
-      assertNotWrappedInCodeBlock(t, result);
-      t.assert(
-        result.toLowerCase().includes('distance'),
-        'explain code result should include keyword'
-      );
-    },
-    type: 'text' as const,
-  },
-  {
-    promptName: 'Translate to',
-    messages: [
-      {
-        role: 'user' as const,
-        content: TestAssets.SSOT,
-        params: { language: 'Simplified Chinese' },
-      },
-    ],
-    verifier: (t: ExecutionContext<Tester>, result: string) => {
-      assertNotWrappedInCodeBlock(t, result);
-      t.assert(
-        result.toLowerCase().includes('单一事实来源'),
-        'explain code result should include keyword'
-      );
-    },
-    type: 'text' as const,
-  },
-  {
-    promptName: ['Generate a caption', 'Explain this image'],
-    messages: [
-      {
-        role: 'user' as const,
-        content: '',
-        attachments: [
-          'https://cdn.affine.pro/copilot-test/Qgqy9qZT3VGIEuMIotJYoCCH.jpg',
-        ],
-      },
-    ],
-    verifier: (t: ExecutionContext<Tester>, result: string) => {
-      assertNotWrappedInCodeBlock(t, result);
-      const content = result.toLowerCase();
-      t.assert(
-        content.includes('classroom') ||
-          content.includes('school') ||
-          content.includes('sky'),
-        'explain code result should include keyword'
-      );
-    },
-    type: 'text' as const,
-  },
-  {
-    promptName: [
-      'debug:action:fal-face-to-sticker',
-      'debug:action:fal-remove-bg',
-      'debug:action:fal-sd15',
-      'debug:action:fal-upscaler',
-    ],
-    messages: [
-      {
-        role: 'user' as const,
-        content: '',
-        attachments: [
-          'https://cdn.affine.pro/copilot-test/Zkas098lkjdf-908231.jpg',
-        ],
-      },
-    ],
-    verifier: (t: ExecutionContext<Tester>, link: string) => {
-      t.truthy(checkUrl(link), 'should be a valid url');
-    },
-    type: 'image' as const,
-  },
-];
-for (const { promptName, messages, verifier, type } of actions) {
+for (const { promptName, messages, verifier, type } of ProviderActionTestCase) {
   const prompts = Array.isArray(promptName) ? promptName : [promptName];
   for (const promptName of prompts) {
     test(
@@ -455,28 +275,7 @@ for (const { promptName, messages, verifier, type } of actions) {
 
 // ==================== workflow ====================
 
-const workflows = [
-  {
-    name: 'brainstorm',
-    content: 'apple company',
-    verifier: (t: ExecutionContext, result: string) => {
-      t.assert(checkMDList(result), 'should be a markdown list');
-    },
-  },
-  {
-    name: 'presentation',
-    content: 'apple company',
-    verifier: (t: ExecutionContext, result: string) => {
-      for (const l of result.split('\n')) {
-        t.notThrows(() => {
-          JSON.parse(l.trim());
-        }, 'should be valid json');
-      }
-    },
-  },
-];
-
-for (const { name, content, verifier } of workflows) {
+for (const { name, content, verifier } of ProviderWorkflowTestCase) {
   test(
     `should be able to run workflow: ${name}`,
     runIfCopilotConfigured,