agent-orchestrator/tests/e2e/specs/phase-c-llm.test.ts
Hibryda 6a8181f33a fix(e2e): cross-protocol browser.execute() — works with both WebDriver + CDP
Root cause: WebDriverIO devtools protocol wraps functions in a polyfill
that puts `return` inside eval() (not a function body) → "Illegal return".

Fix: exec() wrapper in helpers/execute.ts converts function args to IIFE
strings before passing to browser.execute(). Works identically on both
WebDriver (Tauri) and CDP/devtools (Electrobun CEF).

- 35 spec files updated (browser.execute → exec)
- 4 config files updated (string-form expressions)
- helpers/actions.ts + assertions.ts updated
- 560 vitest + 116 cargo passing
2026-03-22 06:33:55 +01:00

80 lines
3.3 KiB
TypeScript

import { browser, expect } from '@wdio/globals';
import { isJudgeAvailable, assertWithJudge } from '../infra/llm-judge';
import { exec } from '../helpers/execute.ts';
// Phase C — LLM-Judged Tests (C10-C11)
// Settings completeness and status bar completeness via LLM judge.
// --- Scenario C10: LLM-Judged Settings Completeness ---
describe('Scenario C10 — LLM-Judged Settings Completeness', () => {
it('should have comprehensive settings panel', async function () {
if (!isJudgeAvailable()) {
console.log('Skipping — LLM judge not available (no CLI or API key)');
this.skip();
return;
}
// Open settings
await exec(() => {
const btn = document.querySelector('[data-testid="settings-btn"]');
if (btn) (btn as HTMLElement).click();
});
await browser.pause(500);
const settingsContent = await exec(() => {
const panel = document.querySelector('.sidebar-panel .settings-panel');
return panel?.textContent ?? '';
});
const verdict = await assertWithJudge(
'The settings panel should contain configuration options for: (1) theme/appearance, (2) font settings (UI and terminal), (3) default shell, and optionally (4) provider settings. It should look like a real settings UI, not an error message.',
settingsContent,
{ context: 'Agent Orchestrator v3 settings panel with Appearance category (theme dropdown, UI font, terminal font, cursor settings) and category sidebar (Appearance, Agents, Security, Projects, Orchestration, Advanced).' },
);
expect(verdict.pass).toBe(true);
if (!verdict.pass) {
console.log(`LLM Judge: ${verdict.reasoning} (confidence: ${verdict.confidence})`);
}
await exec(() => {
const btn = document.querySelector('.panel-close');
if (btn) (btn as HTMLElement).click();
});
await browser.pause(300);
});
});
// --- Scenario C11: LLM-Judged Status Bar ---
describe('Scenario C11 — LLM-Judged Status Bar Completeness', () => {
it('should render a comprehensive status bar', async function () {
if (!isJudgeAvailable()) {
console.log('Skipping — LLM judge not available (no CLI or API key)');
this.skip();
return;
}
const statusBarContent = await exec(() => {
const bar = document.querySelector('[data-testid="status-bar"]');
return bar?.textContent ?? '';
});
const statusBarHtml = await exec(() => {
const bar = document.querySelector('[data-testid="status-bar"]');
return bar?.innerHTML ?? '';
});
const verdict = await assertWithJudge(
'The status bar should display agent fleet information including: project count, and optionally agent status counts (idle/running/stalled with numbers), burn rate ($/hr), and cost tracking. It should look like a real monitoring dashboard status bar.',
`Text: ${statusBarContent}\n\nHTML structure: ${statusBarHtml.substring(0, 2000)}`,
{ context: 'Agent Orchestrator Mission Control status bar shows group name, project count, running/idle/stalled agent counts, total $/hr burn rate, attention queue, and total cost. Version text shows "Agent Orchestrator v3".' },
);
expect(verdict.pass).toBe(true);
if (!verdict.pass) {
console.log(`LLM Judge: ${verdict.reasoning} (confidence: ${verdict.confidence})`);
}
});
});