Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: online copilot api test #8732

Closed
wants to merge 10 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/actions/copilot-test/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ description: 'Run Copilot E2E Test'
inputs:
script:
description: 'Script to run'
default: 'yarn workspace @affine-test/affine-cloud-copilot e2e --forbid-only'
default: 'yarn workspace @affine-test/affine-cloud-copilot test:e2e --forbid-only'
required: false
openai-key:
description: 'OpenAI secret key'
Expand Down
19 changes: 14 additions & 5 deletions .github/workflows/build-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,7 @@ jobs:
filters: |
backend:
- 'packages/backend/server/src/**'
- 'tests/affine-cloud-copilot/**'

- name: Setup Node.js
if: ${{ steps.check-blocksuite-update.outputs.skip != 'true' || steps.filter.outputs.backend == 'true' }}
Expand All @@ -446,7 +447,7 @@ jobs:

- name: Run server tests
if: ${{ steps.check-blocksuite-update.outputs.skip != 'true' || steps.filter.outputs.backend == 'true' }}
run: yarn workspace @affine/server test:copilot:coverage --forbid-only
run: yarn workspace @affine/server test:copilot:spec:coverage --forbid-only
env:
CARGO_TARGET_DIR: '${{ github.workspace }}/target'
COPILOT_OPENAI_API_KEY: ${{ secrets.COPILOT_OPENAI_API_KEY }}
Expand Down Expand Up @@ -502,26 +503,34 @@ jobs:
echo "skip=true" >> $GITHUB_OUTPUT
fi

- uses: dorny/paths-filter@v3
id: filter
with:
filters: |
backend:
- 'packages/backend/server/src/**'
- 'tests/affine-cloud-copilot/**'

- name: Setup Node.js
if: ${{ steps.check-blocksuite-update.outputs.skip != 'true' }}
if: ${{ steps.check-blocksuite-update.outputs.skip != 'true' || steps.filter.outputs.backend == 'true' }}
uses: ./.github/actions/setup-node
with:
playwright-install: true
electron-install: false
hard-link-nm: false

- name: Download server-native.node
if: ${{ steps.check-blocksuite-update.outputs.skip != 'true' }}
if: ${{ steps.check-blocksuite-update.outputs.skip != 'true' || steps.filter.outputs.backend == 'true' }}
uses: actions/download-artifact@v4
with:
name: server-native.node
path: ./packages/backend/server

- name: Run Copilot E2E Test ${{ matrix.shardIndex }}/${{ matrix.shardTotal }}
if: ${{ steps.check-blocksuite-update.outputs.skip != 'true' }}
if: ${{ steps.check-blocksuite-update.outputs.skip != 'true' || steps.filter.outputs.backend == 'true' }}
uses: ./.github/actions/copilot-test
with:
script: yarn workspace @affine-test/affine-cloud-copilot e2e --forbid-only --shard=${{ matrix.shardIndex }}/${{ matrix.shardTotal }}
script: yarn workspace @affine-test/affine-cloud-copilot test:e2e --forbid-only --shard=${{ matrix.shardIndex }}/${{ matrix.shardTotal }}
openai-key: ${{ secrets.COPILOT_OPENAI_API_KEY }}
fal-key: ${{ secrets.COPILOT_FAL_API_KEY }}

Expand Down
20 changes: 17 additions & 3 deletions .github/workflows/copilot-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,16 @@ jobs:
NODE_ENV: test
DISTRIBUTION: web
DATABASE_URL: postgresql://affine:affine@localhost:5432/affine
strategy:
fail-fast: false
matrix:
spec:
- {
name: e2e,
package: '@affine-test/affine-cloud-copilot',
type: e2e,
}
- { name: spec, package: '@affine/server', type: copilot:spec }
services:
postgres:
image: postgres
Expand Down Expand Up @@ -78,12 +88,16 @@ jobs:
- name: Prepare Server Test Environment
uses: ./.github/actions/server-test-env

- name: Run server tests
run: yarn workspace @affine/server test:copilot:coverage --forbid-only
- name: Run copilot api ${{ matrix.spec.name }} tests
run: yarn workspace ${{ matrix.spec.package }} test:${{ matrix.spec.type }}:coverage --forbid-only
env:
CARGO_TARGET_DIR: '${{ github.workspace }}/target'
COPILOT_OPENAI_API_KEY: ${{ secrets.COPILOT_OPENAI_API_KEY }}
COPILOT_FAL_API_KEY: ${{ secrets.COPILOT_FAL_API_KEY }}
COPILOT_E2E_ENDPOINT: ${{ secrets.COPILOT_E2E_ENDPOINT }}
COPILOT_E2E_USER: ${{ secrets.COPILOT_E2E_USER }}
COPILOT_E2E_PASSWORD: ${{ secrets.COPILOT_E2E_PASSWORD }}
COPILOT_E2E_SECRET: ${{ secrets.COPILOT_E2E_SECRET }}

- name: Upload server test coverage results
uses: codecov/codecov-action@v4
Expand Down Expand Up @@ -139,7 +153,7 @@ jobs:
- name: Run Copilot E2E Test ${{ matrix.shardIndex }}/${{ matrix.shardTotal }}
uses: ./.github/actions/copilot-test
with:
script: yarn workspace @affine-test/affine-cloud-copilot e2e --forbid-only --shard=${{ matrix.shardIndex }}/${{ matrix.shardTotal }}
script: yarn workspace @affine-test/affine-cloud-copilot test:e2e --forbid-only --shard=${{ matrix.shardIndex }}/${{ matrix.shardTotal }}
openai-key: ${{ secrets.COPILOT_OPENAI_API_KEY }}
fal-key: ${{ secrets.COPILOT_FAL_API_KEY }}

Expand Down
4 changes: 2 additions & 2 deletions packages/backend/server/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
"start": "node --loader ts-node/esm/transpile-only.mjs ./src/index.ts",
"dev": "nodemon ./src/index.ts",
"test": "ava --concurrency 1 --serial",
"test:copilot": "ava \"tests/**/copilot-*.spec.ts\"",
"test:copilot:spec": "ava \"tests/**/copilot-*.spec.ts\"",
"test:coverage": "c8 ava --concurrency 1 --serial",
"test:copilot:coverage": "c8 ava --timeout=5m \"tests/**/copilot-*.spec.ts\"",
"test:copilot:spec:coverage": "c8 ava --timeout=5m \"tests/**/copilot-*.spec.ts\"",
"postinstall": "prisma generate",
"data-migration": "node --loader ts-node/esm/transpile-only.mjs ./src/data/index.ts",
"predeploy": "yarn prisma migrate deploy && node --import ./scripts/register.js ./dist/data/index.js run",
Expand Down
215 changes: 7 additions & 208 deletions packages/backend/server/tests/copilot-provider.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,11 @@ import {
CopilotCheckJsonExecutor,
} from '../src/plugins/copilot/workflow/executor';
import { createTestingModule } from './utils';
import { TestAssets } from './utils/copilot';
import {
checkMDList,
ProviderActionTestCase,
ProviderWorkflowTestCase,
} from './utils/copilot';

type Tester = {
auth: AuthService;
Expand Down Expand Up @@ -130,59 +134,6 @@ test.after(async t => {
await t.context.module.close();
});

const assertNotWrappedInCodeBlock = (
t: ExecutionContext<Tester>,
result: string
) => {
t.assert(
!result.replaceAll('\n', '').trim().startsWith('```') &&
!result.replaceAll('\n', '').trim().endsWith('```'),
'should not wrap in code block'
);
};

const checkMDList = (text: string) => {
const lines = text.split('\n');
const listItemRegex = /^( {2})*(-|\u2010-\u2015|\*|\+)? .+$/;
let prevIndent = null;

for (const line of lines) {
if (line.trim() === '') continue;
if (!listItemRegex.test(line)) {
return false;
}

// eslint-disable-next-line @typescript-eslint/no-non-null-asserted-optional-chain
const currentIndent = line.match(/^( *)/)?.[0].length!;
if (Number.isNaN(currentIndent) || currentIndent % 2 !== 0) {
return false;
}

if (prevIndent !== null && currentIndent > 0) {
const indentDiff = currentIndent - prevIndent;
// allow 1 level of indentation difference
if (indentDiff > 2) {
return false;
}
}

if (line.trim().startsWith('-')) {
prevIndent = currentIndent;
}
}

return true;
};

const checkUrl = (url: string) => {
try {
new URL(url);
return true;
} catch {
return false;
}
};

const retry = async (
action: string,
t: ExecutionContext<Tester>,
Expand Down Expand Up @@ -264,138 +215,7 @@ test('should validate markdown list', t => {

// ==================== action ====================

const actions = [
{
promptName: [
'Summary',
'Explain this',
'Write an article about this',
'Write a twitter about this',
'Write a poem about this',
'Write a blog post about this',
'Write outline',
'Change tone to',
'Improve writing for it',
'Improve grammar for it',
'Fix spelling for it',
'Create headings',
'Make it longer',
'Make it shorter',
'Continue writing',
],
messages: [{ role: 'user' as const, content: TestAssets.SSOT }],
verifier: (t: ExecutionContext<Tester>, result: string) => {
assertNotWrappedInCodeBlock(t, result);
t.assert(
result.toLowerCase().includes('single source of truth'),
'should include original keyword'
);
},
type: 'text' as const,
},
{
promptName: ['Brainstorm ideas about this', 'Brainstorm mindmap'],
messages: [{ role: 'user' as const, content: TestAssets.SSOT }],
verifier: (t: ExecutionContext<Tester>, result: string) => {
assertNotWrappedInCodeBlock(t, result);
t.assert(checkMDList(result), 'should be a markdown list');
},
type: 'text' as const,
},
{
promptName: 'Expand mind map',
messages: [{ role: 'user' as const, content: '- Single source of truth' }],
verifier: (t: ExecutionContext<Tester>, result: string) => {
assertNotWrappedInCodeBlock(t, result);
t.assert(checkMDList(result), 'should be a markdown list');
},
type: 'text' as const,
},
{
promptName: 'Find action items from it',
messages: [{ role: 'user' as const, content: TestAssets.TODO }],
verifier: (t: ExecutionContext<Tester>, result: string) => {
assertNotWrappedInCodeBlock(t, result);
t.assert(checkMDList(result), 'should be a markdown list');
},
type: 'text' as const,
},
{
promptName: ['Explain this code', 'Check code error'],
messages: [{ role: 'user' as const, content: TestAssets.Code }],
verifier: (t: ExecutionContext<Tester>, result: string) => {
assertNotWrappedInCodeBlock(t, result);
t.assert(
result.toLowerCase().includes('distance'),
'explain code result should include keyword'
);
},
type: 'text' as const,
},
{
promptName: 'Translate to',
messages: [
{
role: 'user' as const,
content: TestAssets.SSOT,
params: { language: 'Simplified Chinese' },
},
],
verifier: (t: ExecutionContext<Tester>, result: string) => {
assertNotWrappedInCodeBlock(t, result);
t.assert(
result.toLowerCase().includes('单一事实来源'),
'explain code result should include keyword'
);
},
type: 'text' as const,
},
{
promptName: ['Generate a caption', 'Explain this image'],
messages: [
{
role: 'user' as const,
content: '',
attachments: [
'https://cdn.affine.pro/copilot-test/Qgqy9qZT3VGIEuMIotJYoCCH.jpg',
],
},
],
verifier: (t: ExecutionContext<Tester>, result: string) => {
assertNotWrappedInCodeBlock(t, result);
const content = result.toLowerCase();
t.assert(
content.includes('classroom') ||
content.includes('school') ||
content.includes('sky'),
'explain code result should include keyword'
);
},
type: 'text' as const,
},
{
promptName: [
'debug:action:fal-face-to-sticker',
'debug:action:fal-remove-bg',
'debug:action:fal-sd15',
'debug:action:fal-upscaler',
],
messages: [
{
role: 'user' as const,
content: '',
attachments: [
'https://cdn.affine.pro/copilot-test/Zkas098lkjdf-908231.jpg',
],
},
],
verifier: (t: ExecutionContext<Tester>, link: string) => {
t.truthy(checkUrl(link), 'should be a valid url');
},
type: 'image' as const,
},
];
for (const { promptName, messages, verifier, type } of actions) {
for (const { promptName, messages, verifier, type } of ProviderActionTestCase) {
const prompts = Array.isArray(promptName) ? promptName : [promptName];
for (const promptName of prompts) {
test(
Expand Down Expand Up @@ -455,28 +275,7 @@ for (const { promptName, messages, verifier, type } of actions) {

// ==================== workflow ====================

const workflows = [
{
name: 'brainstorm',
content: 'apple company',
verifier: (t: ExecutionContext, result: string) => {
t.assert(checkMDList(result), 'should be a markdown list');
},
},
{
name: 'presentation',
content: 'apple company',
verifier: (t: ExecutionContext, result: string) => {
for (const l of result.split('\n')) {
t.notThrows(() => {
JSON.parse(l.trim());
}, 'should be valid json');
}
},
},
];

for (const { name, content, verifier } of workflows) {
for (const { name, content, verifier } of ProviderWorkflowTestCase) {
test(
`should be able to run workflow: ${name}`,
runIfCopilotConfigured,
Expand Down
Loading
Loading