agent-orchestrator/tests/e2e/specs/phase-c-llm.test.ts

import { browser, expect } from '@wdio/globals';
import { isJudgeAvailable, assertWithJudge } from '../infra/llm-judge';

// Phase C — LLM-Judged Tests (C10–C11)
// Settings completeness and status bar completeness via LLM judge.

// ─── Scenario C10: LLM-Judged Settings Completeness ──────────────────

describe('Scenario C10 — LLM-Judged Settings Completeness', () => {
  it('should have comprehensive settings panel', async function () {
    if (!isJudgeAvailable()) {
      console.log('Skipping — LLM judge not available (no CLI or API key)');
      this.skip();
      return;
    }

    // Open settings
    await browser.execute(() => {
      const btn = document.querySelector('[data-testid="settings-btn"]');
      if (btn) (btn as HTMLElement).click();
    });
    await browser.pause(500);

    const settingsContent = await browser.execute(() => {
      const panel = document.querySelector('.sidebar-panel, .settings-tab');
      return panel?.textContent ?? '';
    });

    const verdict = await assertWithJudge(
      'The settings panel should contain configuration options for: (1) theme/appearance, (2) font settings (UI and terminal), (3) default shell, and optionally (4) provider settings. It should look like a real settings UI, not an error message.',
      settingsContent,
      { context: 'BTerminal v3 settings panel with Appearance section (theme dropdown, UI font, terminal font) and Defaults section (shell, CWD). May also have Providers section.' },
    );

    expect(verdict.pass).toBe(true);
    if (!verdict.pass) {
      console.log(`LLM Judge: ${verdict.reasoning} (confidence: ${verdict.confidence})`);
    }

    await browser.keys('Escape');
    await browser.pause(300);
  });
});

// ─── Scenario C11: LLM-Judged Status Bar ──────────────────────────────

describe('Scenario C11 — LLM-Judged Status Bar Completeness', () => {
  it('should render a comprehensive status bar', async function () {
    if (!isJudgeAvailable()) {
      console.log('Skipping — LLM judge not available (no CLI or API key)');
      this.skip();
      return;
    }

    const statusBarContent = await browser.execute(() => {
      const bar = document.querySelector('[data-testid="status-bar"]');
      return bar?.textContent ?? '';
    });

    const statusBarHtml = await browser.execute(() => {
      const bar = document.querySelector('[data-testid="status-bar"]');
      return bar?.innerHTML ?? '';
    });

    const verdict = await assertWithJudge(
      'The status bar should display agent fleet information including: agent status counts (idle/running/stalled with numbers), and optionally burn rate ($/hr) and cost tracking. It should look like a real monitoring dashboard status bar.',
      `Text: ${statusBarContent}\n\nHTML structure: ${statusBarHtml.substring(0, 2000)}`,
      { context: 'BTerminal Mission Control status bar shows running/idle/stalled agent counts, total $/hr burn rate, attention queue, and total cost.' },
    );

    expect(verdict.pass).toBe(true);
    if (!verdict.pass) {
      console.log(`LLM Judge: ${verdict.reasoning} (confidence: ${verdict.confidence})`);
    }
  });
});