agent-orchestrator/tests/e2e/specs/phase-c-llm.test.ts
Hibryda f08c4b18cf refactor(e2e): split spec files under 300-line limit
- phase-c.test.ts (626 lines) → phase-c-ui.test.ts (279), phase-c-tabs.test.ts
  (272), phase-c-llm.test.ts (76) — all 11 scenarios preserved
- agor.test.ts (799 lines) → smoke.test.ts (47), workspace.test.ts (79),
  settings.test.ts (247), features.test.ts (488) — split in progress
- Reset-to-home-state hooks added to stateful before() blocks
- wdio.conf.js specs array updated for all new filenames
2026-03-18 03:09:29 +01:00

76 lines
3.2 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { browser, expect } from '@wdio/globals';
import { isJudgeAvailable, assertWithJudge } from '../infra/llm-judge';
// Phase C — LLM-Judged Tests (C10C11)
// Settings completeness and status bar completeness via LLM judge.
// ─── Scenario C10: LLM-Judged Settings Completeness ──────────────────
describe('Scenario C10 — LLM-Judged Settings Completeness', () => {
it('should have comprehensive settings panel', async function () {
if (!isJudgeAvailable()) {
console.log('Skipping — LLM judge not available (no CLI or API key)');
this.skip();
return;
}
// Open settings
await browser.execute(() => {
const btn = document.querySelector('[data-testid="settings-btn"]');
if (btn) (btn as HTMLElement).click();
});
await browser.pause(500);
const settingsContent = await browser.execute(() => {
const panel = document.querySelector('.sidebar-panel, .settings-tab');
return panel?.textContent ?? '';
});
const verdict = await assertWithJudge(
'The settings panel should contain configuration options for: (1) theme/appearance, (2) font settings (UI and terminal), (3) default shell, and optionally (4) provider settings. It should look like a real settings UI, not an error message.',
settingsContent,
{ context: 'BTerminal v3 settings panel with Appearance section (theme dropdown, UI font, terminal font) and Defaults section (shell, CWD). May also have Providers section.' },
);
expect(verdict.pass).toBe(true);
if (!verdict.pass) {
console.log(`LLM Judge: ${verdict.reasoning} (confidence: ${verdict.confidence})`);
}
await browser.keys('Escape');
await browser.pause(300);
});
});
// ─── Scenario C11: LLM-Judged Status Bar ──────────────────────────────
describe('Scenario C11 — LLM-Judged Status Bar Completeness', () => {
it('should render a comprehensive status bar', async function () {
if (!isJudgeAvailable()) {
console.log('Skipping — LLM judge not available (no CLI or API key)');
this.skip();
return;
}
const statusBarContent = await browser.execute(() => {
const bar = document.querySelector('[data-testid="status-bar"]');
return bar?.textContent ?? '';
});
const statusBarHtml = await browser.execute(() => {
const bar = document.querySelector('[data-testid="status-bar"]');
return bar?.innerHTML ?? '';
});
const verdict = await assertWithJudge(
'The status bar should display agent fleet information including: agent status counts (idle/running/stalled with numbers), and optionally burn rate ($/hr) and cost tracking. It should look like a real monitoring dashboard status bar.',
`Text: ${statusBarContent}\n\nHTML structure: ${statusBarHtml.substring(0, 2000)}`,
{ context: 'BTerminal Mission Control status bar shows running/idle/stalled agent counts, total $/hr burn rate, attention queue, and total cost.' },
);
expect(verdict.pass).toBe(true);
if (!verdict.pass) {
console.log(`LLM Judge: ${verdict.reasoning} (confidence: ${verdict.confidence})`);
}
});
});