feat(e2e): add test daemon CLI with ANSI dashboard and Agent SDK bridge

- index.ts: CLI entry point (--full, --spec, --watch, --agent flags) - runner.ts: programmatic WDIO launcher with result streaming - dashboard.ts: ANSI terminal UI (pass/fail/skip/running icons, summary) - agent-bridge.ts: NDJSON stdin/stdout for Agent SDK queries (status, rerun, failures, reset-cache) - Standalone package at tests/e2e/daemon/
2026-03-18 05:17:17 +01:00 · 2026-03-18 05:17:17 +01:00 · d7dd7722ab
commit d7dd7722ab
parent 46f51d7941
7 changed files with 796 additions and 0 deletions
--- a/tests/e2e/daemon/README.md
+++ b/tests/e2e/daemon/README.md
@ -0,0 +1,122 @@
+# E2E Test Daemon
+
+Terminal dashboard for running and monitoring Agent Orchestrator E2E tests. Supports smart caching, file watching, and agent SDK integration.
+
+## Prerequisites
+
+- Built Tauri debug binary (`npm run tauri build -- --debug --no-bundle`)
+- `tauri-driver` installed (`cargo install tauri-driver`)
+- Node.js 20+
+
+## Install
+
+```bash
+cd tests/e2e/daemon
+npm install
+```
+
+The daemon reuses WDIO deps from the root `node_modules`. Its own `package.json` lists `@wdio/cli` and `@wdio/local-runner` for clarity, but they resolve from the workspace root.
+
+## Usage
+
+```bash
+# Run all specs (with smart cache — skips recently-passed specs)
+npm start
+
+# Full run — reset cache, run everything
+npm run start:full
+
+# Filter by spec name pattern
+npx tsx index.ts --spec phase-a
+
+# Watch mode — re-run on spec file changes
+npm run start:watch
+
+# Agent mode — accept NDJSON queries on stdin, respond on stderr
+npm run start:agent
+
+# Combine flags
+npx tsx index.ts --full --watch --agent
+```
+
+## Dashboard
+
+The terminal UI shows:
+
+```
+  Agent Orchestrator — E2E Test Daemon  RUNNING  12.3s
+  ────────────────────────────────────────────
+  ✓ smoke                        1.2s
+  ✓ workspace                    0.8s
+  ⟳ settings
+  · phase-a-structure
+  · phase-a-agent
+  ⏭ phase-b-grid
+
+  ────────────────────────────────────────────
+  2 passed  │  0 failed  │  1 skipped  │  1 running  │  2 pending  │  2.0s
+```
+
+Status icons:
+- `✓` green — passed
+- `✗` red — failed (error message shown below)
+- `⏭` gray — skipped (cached)
+- `⟳` yellow — running
+- `·` white — pending
+
+## Smart Cache
+
+The daemon reads from the shared `test-results/results.json` (managed by `results-db.ts`). Specs that passed in any of the last 5 runs are skipped unless `--full` is used.
+
+## Agent Bridge Protocol
+
+When started with `--agent`, the daemon accepts NDJSON queries on **stdin** and responds on **stderr** (stdout is used by the dashboard).
+
+### Queries
+
+**Status** — get current test state:
+```json
+{"type": "status"}
+```
+Response:
+```json
+{"type": "status", "running": false, "passed": 15, "failed": 2, "skipped": 1, "pending": 0, "total": 18, "failures": [{"name": "phase-b-grid", "error": "WDIO exited with code 1"}]}
+```
+
+**Rerun** — trigger a new test run:
+```json
+{"type": "rerun", "pattern": "phase-a"}
+```
+Response:
+```json
+{"type": "rerun", "specsQueued": 1}
+```
+
+**Failures** — get detailed failure list:
+```json
+{"type": "failures"}
+```
+Response:
+```json
+{"type": "failures", "failures": [{"name": "phase-b-grid", "specFile": "phase-b-grid.test.ts", "error": "WDIO exited with code 1"}]}
+```
+
+**Reset cache** — clear smart cache:
+```json
+{"type": "reset-cache"}
+```
+Response:
+```json
+{"type": "reset-cache", "ok": true}
+```
+
+## Architecture
+
+```
+index.ts          CLI entry point, arg parsing, main loop
+runner.ts         WDIO Launcher wrapper, spec discovery, smart cache
+dashboard.ts      ANSI terminal UI (no external deps)
+agent-bridge.ts   NDJSON stdio interface for agent integration
+```
+
+The daemon reuses the project's existing `wdio.conf.js` and `infra/results-db.ts`.
--- a/tests/e2e/daemon/agent-bridge.ts
+++ b/tests/e2e/daemon/agent-bridge.ts
@ -0,0 +1,193 @@
+// Agent bridge — NDJSON stdio interface for Claude Agent SDK integration
+// Accepts queries on stdin, responds on stdout. Allows an agent to control
+// and query the test daemon programmatically.
+
+import { createInterface } from 'node:readline';
+import type { Dashboard, TestEntry } from './dashboard.ts';
+import { runSpecs, clearCache, type RunOptions } from './runner.ts';
+
+// ── Query/Response types ──
+
+interface StatusQuery {
+  type: 'status';
+}
+
+interface RerunQuery {
+  type: 'rerun';
+  pattern?: string;
+}
+
+interface FailuresQuery {
+  type: 'failures';
+}
+
+interface ResetCacheQuery {
+  type: 'reset-cache';
+}
+
+type Query = StatusQuery | RerunQuery | FailuresQuery | ResetCacheQuery;
+
+interface StatusResponse {
+  type: 'status';
+  running: boolean;
+  passed: number;
+  failed: number;
+  skipped: number;
+  pending: number;
+  total: number;
+  failures: Array<{ name: string; error?: string }>;
+}
+
+interface RerunResponse {
+  type: 'rerun';
+  specsQueued: number;
+}
+
+interface FailuresResponse {
+  type: 'failures';
+  failures: Array<{ name: string; specFile: string; error?: string }>;
+}
+
+interface ResetCacheResponse {
+  type: 'reset-cache';
+  ok: true;
+}
+
+interface ErrorResponse {
+  type: 'error';
+  message: string;
+}
+
+type Response = StatusResponse | RerunResponse | FailuresResponse | ResetCacheResponse | ErrorResponse;
+
+// ── Bridge ──
+
+export class AgentBridge {
+  private dashboard: Dashboard;
+  private running = false;
+  private triggerRerun: ((opts: RunOptions) => void) | null = null;
+  private rl: ReturnType<typeof createInterface> | null = null;
+
+  constructor(dashboard: Dashboard) {
+    this.dashboard = dashboard;
+  }
+
+  /** Register callback that triggers a new test run from the main loop */
+  onRerunRequest(cb: (opts: RunOptions) => void): void {
+    this.triggerRerun = cb;
+  }
+
+  setRunning(running: boolean): void {
+    this.running = running;
+  }
+
+  start(): void {
+    this.rl = createInterface({
+      input: process.stdin,
+      terminal: false,
+    });
+
+    this.rl.on('line', (line) => {
+      const trimmed = line.trim();
+      if (!trimmed) return;
+
+      try {
+        const query = JSON.parse(trimmed) as Query;
+        const response = this.handleQuery(query);
+        this.send(response);
+      } catch (err: unknown) {
+        const msg = err instanceof Error ? err.message : String(err);
+        this.send({ type: 'error', message: `Invalid query: ${msg}` });
+      }
+    });
+
+    this.rl.on('close', () => {
+      this.stop();
+    });
+  }
+
+  stop(): void {
+    if (this.rl) {
+      this.rl.close();
+      this.rl = null;
+    }
+  }
+
+  private send(response: Response): void {
+    // Write to stdout as NDJSON — use fd 3 or stderr if stdout is used by dashboard
+    // Since dashboard writes to stdout, we use stderr for NDJSON responses
+    // when the dashboard is active. The agent reads from our stderr.
+    process.stderr.write(JSON.stringify(response) + '\n');
+  }
+
+  private handleQuery(query: Query): Response {
+    switch (query.type) {
+      case 'status':
+        return this.handleStatus();
+      case 'rerun':
+        return this.handleRerun(query);
+      case 'failures':
+        return this.handleFailures();
+      case 'reset-cache':
+        return this.handleResetCache();
+      default:
+        return { type: 'error', message: `Unknown query type: ${(query as { type: string }).type}` };
+    }
+  }
+
+  private handleStatus(): StatusResponse {
+    const tests = this.dashboard.getTests();
+    const passed = tests.filter((t) => t.status === 'passed').length;
+    const failed = tests.filter((t) => t.status === 'failed').length;
+    const skipped = tests.filter((t) => t.status === 'skipped').length;
+    const pending = tests.filter((t) => t.status === 'pending' || t.status === 'running').length;
+    const failures = tests
+      .filter((t) => t.status === 'failed')
+      .map((t) => ({ name: t.name, error: t.error }));
+
+    return {
+      type: 'status',
+      running: this.running,
+      passed,
+      failed,
+      skipped,
+      pending,
+      total: tests.length,
+      failures,
+    };
+  }
+
+  private handleRerun(query: RerunQuery): RerunResponse {
+    if (this.running) {
+      return { type: 'rerun', specsQueued: 0 };
+    }
+
+    const opts: RunOptions = {};
+    if (query.pattern) opts.pattern = query.pattern;
+    opts.full = true; // rerun ignores cache
+
+    if (this.triggerRerun) {
+      this.triggerRerun(opts);
+    }
+
+    return { type: 'rerun', specsQueued: 1 };
+  }
+
+  private handleFailures(): FailuresResponse {
+    const tests = this.dashboard.getTests();
+    const failures = tests
+      .filter((t) => t.status === 'failed')
+      .map((t) => ({
+        name: t.name,
+        specFile: t.specFile,
+        error: t.error,
+      }));
+
+    return { type: 'failures', failures };
+  }
+
+  private handleResetCache(): ResetCacheResponse {
+    clearCache();
+    return { type: 'reset-cache', ok: true };
+  }
+}
--- a/tests/e2e/daemon/dashboard.ts
+++ b/tests/e2e/daemon/dashboard.ts
@ -0,0 +1,167 @@
+// Terminal dashboard — ANSI escape code UI for E2E test status
+// No external deps. Renders test list with status icons, timing, and summary.
+
+export type TestStatus = 'pending' | 'running' | 'passed' | 'failed' | 'skipped';
+
+export interface TestEntry {
+  name: string;
+  specFile: string;
+  status: TestStatus;
+  durationMs?: number;
+  error?: string;
+}
+
+// ── ANSI helpers ──
+
+const ESC = '\x1b[';
+const CLEAR_SCREEN = `${ESC}2J${ESC}H`;
+const HIDE_CURSOR = `${ESC}?25l`;
+const SHOW_CURSOR = `${ESC}?25h`;
+const BOLD = `${ESC}1m`;
+const DIM = `${ESC}2m`;
+const RESET = `${ESC}0m`;
+
+const fg = {
+  red: `${ESC}31m`,
+  green: `${ESC}32m`,
+  yellow: `${ESC}33m`,
+  blue: `${ESC}34m`,
+  magenta: `${ESC}35m`,
+  cyan: `${ESC}36m`,
+  white: `${ESC}37m`,
+  gray: `${ESC}90m`,
+};
+
+const STATUS_ICONS: Record<TestStatus, string> = {
+  pending: `${fg.white}\u00b7${RESET}`,   // centered dot
+  running: `${fg.yellow}\u27f3${RESET}`,   // clockwise arrow
+  passed: `${fg.green}\u2713${RESET}`,     // check mark
+  failed: `${fg.red}\u2717${RESET}`,       // cross mark
+  skipped: `${fg.gray}\u23ed${RESET}`,     // skip icon
+};
+
+function formatDuration(ms: number | undefined): string {
+  if (ms === undefined) return '';
+  if (ms < 1000) return `${fg.gray}${ms}ms${RESET}`;
+  return `${fg.gray}${(ms / 1000).toFixed(1)}s${RESET}`;
+}
+
+function truncate(str: string, max: number): string {
+  return str.length > max ? str.slice(0, max - 1) + '\u2026' : str;
+}
+
+// ── Dashboard ──
+
+export class Dashboard {
+  private tests: TestEntry[] = [];
+  private startTime: number = Date.now();
+  private refreshTimer: ReturnType<typeof setInterval> | null = null;
+  private running = false;
+  private lastRunStatus: 'idle' | 'running' | 'passed' | 'failed' = 'idle';
+
+  setTests(specs: Array<{ name: string; specFile: string }>): void {
+    this.tests = specs.map((s) => ({
+      name: s.name,
+      specFile: s.specFile,
+      status: 'pending' as TestStatus,
+    }));
+    this.startTime = Date.now();
+    this.lastRunStatus = 'running';
+  }
+
+  updateTest(name: string, status: TestStatus, durationMs?: number, error?: string): void {
+    const entry = this.tests.find((t) => t.name === name);
+    if (entry) {
+      entry.status = status;
+      entry.durationMs = durationMs;
+      entry.error = error;
+    }
+  }
+
+  startRefresh(): void {
+    if (this.refreshTimer) return;
+    this.running = true;
+    process.stdout.write(HIDE_CURSOR);
+    this.render();
+    this.refreshTimer = setInterval(() => this.render(), 500);
+  }
+
+  stopRefresh(): void {
+    this.running = false;
+    if (this.refreshTimer) {
+      clearInterval(this.refreshTimer);
+      this.refreshTimer = null;
+    }
+    // Final render with cursor restored
+    this.render();
+    process.stdout.write(SHOW_CURSOR);
+  }
+
+  markComplete(): void {
+    const failed = this.tests.filter((t) => t.status === 'failed').length;
+    this.lastRunStatus = failed > 0 ? 'failed' : 'passed';
+  }
+
+  stop(): void {
+    this.stopRefresh();
+  }
+
+  getTests(): TestEntry[] {
+    return this.tests;
+  }
+
+  render(): void {
+    const cols = process.stdout.columns || 80;
+    const lines: string[] = [];
+
+    // ── Header ──
+    const title = 'Agent Orchestrator \u2014 E2E Test Daemon';
+    const elapsed = ((Date.now() - this.startTime) / 1000).toFixed(1);
+    const statusColor = this.lastRunStatus === 'failed' ? fg.red
+      : this.lastRunStatus === 'passed' ? fg.green
+      : this.lastRunStatus === 'running' ? fg.yellow
+      : fg.gray;
+    const statusLabel = this.running ? 'RUNNING' : this.lastRunStatus.toUpperCase();
+
+    lines.push('');
+    lines.push(`  ${BOLD}${fg.cyan}${title}${RESET}  ${statusColor}${statusLabel}${RESET}  ${DIM}${elapsed}s${RESET}`);
+    lines.push(`  ${fg.gray}${'─'.repeat(Math.min(cols - 4, 76))}${RESET}`);
+
+    // ── Test list ──
+    const nameWidth = Math.min(cols - 20, 60);
+    for (const t of this.tests) {
+      const icon = STATUS_ICONS[t.status];
+      const name = truncate(t.name, nameWidth);
+      const dur = formatDuration(t.durationMs);
+      lines.push(`  ${icon} ${name}  ${dur}`);
+      if (t.status === 'failed' && t.error) {
+        const errLine = truncate(t.error, cols - 8);
+        lines.push(`    ${fg.red}${DIM}${errLine}${RESET}`);
+      }
+    }
+
+    // ── Summary footer ──
+    const passed = this.tests.filter((t) => t.status === 'passed').length;
+    const failed = this.tests.filter((t) => t.status === 'failed').length;
+    const skipped = this.tests.filter((t) => t.status === 'skipped').length;
+    const running = this.tests.filter((t) => t.status === 'running').length;
+    const pending = this.tests.filter((t) => t.status === 'pending').length;
+    const totalTime = this.tests.reduce((sum, t) => sum + (t.durationMs ?? 0), 0);
+
+    lines.push('');
+    lines.push(`  ${fg.gray}${'─'.repeat(Math.min(cols - 4, 76))}${RESET}`);
+
+    const parts: string[] = [];
+    if (passed > 0) parts.push(`${fg.green}${passed} passed${RESET}`);
+    if (failed > 0) parts.push(`${fg.red}${failed} failed${RESET}`);
+    if (skipped > 0) parts.push(`${fg.gray}${skipped} skipped${RESET}`);
+    if (running > 0) parts.push(`${fg.yellow}${running} running${RESET}`);
+    if (pending > 0) parts.push(`${fg.white}${pending} pending${RESET}`);
+    parts.push(`${DIM}${formatDuration(totalTime)}${RESET}`);
+
+    lines.push(`  ${parts.join('  \u2502  ')}`);
+    lines.push('');
+
+    process.stdout.write(CLEAR_SCREEN + lines.join('\n'));
+  }
+}
--- a/tests/e2e/daemon/index.ts
+++ b/tests/e2e/daemon/index.ts
@ -0,0 +1,95 @@
+#!/usr/bin/env tsx
+// Agent Orchestrator E2E Test Daemon — CLI entry point
+// Usage: tsx index.ts [--full] [--spec <pattern>] [--watch] [--agent]
+
+import { resolve, dirname } from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { watch } from 'node:fs';
+import { Dashboard } from './dashboard.ts';
+import { runSpecs, discoverSpecs, specDisplayName, clearCache, type RunOptions } from './runner.ts';
+import { AgentBridge } from './agent-bridge.ts';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const SPECS_DIR = resolve(__dirname, '../specs');
+
+// ── CLI args ──
+const args = process.argv.slice(2);
+const fullMode = args.includes('--full');
+const watchMode = args.includes('--watch');
+const agentMode = args.includes('--agent');
+const specIdx = args.indexOf('--spec');
+const specPattern = specIdx !== -1 ? args[specIdx + 1] : undefined;
+
+// ── Init ──
+const dashboard = new Dashboard();
+let bridge: AgentBridge | null = null;
+let pendingRerun: RunOptions | null = null;
+
+if (agentMode) {
+  bridge = new AgentBridge(dashboard);
+  bridge.onRerunRequest((opts) => { pendingRerun = opts; });
+  bridge.start();
+}
+
+// ── Run cycle ──
+async function runCycle(opts: RunOptions = {}): Promise<void> {
+  const specs = discoverSpecs(opts.pattern ?? specPattern);
+  dashboard.setTests(specs.map((s) => ({ name: specDisplayName(s), specFile: s })));
+  dashboard.startRefresh();
+  bridge?.setRunning(true);
+
+  await runSpecs({
+    pattern: opts.pattern ?? specPattern,
+    full: opts.full ?? fullMode,
+    onResult: (r) => dashboard.updateTest(r.name, r.status, r.durationMs, r.error),
+  });
+
+  dashboard.markComplete();
+  dashboard.stopRefresh();
+  bridge?.setRunning(false);
+}
+
+function shutdown(poll?: ReturnType<typeof setInterval>, watcher?: ReturnType<typeof watch>): void {
+  watcher?.close();
+  if (poll) clearInterval(poll);
+  dashboard.stop();
+  bridge?.stop();
+  process.exit(0);
+}
+
+// ── Main ──
+async function main(): Promise<void> {
+  if (fullMode) clearCache();
+  await runCycle();
+
+  if (watchMode || agentMode) {
+    const watcher = watchMode
+      ? watch(SPECS_DIR, { recursive: false }, (_ev, f) => {
+          if (f?.endsWith('.test.ts')) pendingRerun = { pattern: specPattern, full: false };
+        })
+      : undefined;
+
+    const poll = setInterval(async () => {
+      if (pendingRerun) {
+        const opts = pendingRerun;
+        pendingRerun = null;
+        await runCycle(opts);
+      }
+    }, 1000);
+
+    process.on('SIGINT', () => shutdown(poll, watcher));
+    process.on('SIGTERM', () => shutdown(poll, watcher));
+  } else {
+    dashboard.stop();
+    bridge?.stop();
+    const hasFailed = dashboard.getTests().some((t) => t.status === 'failed');
+    process.exit(hasFailed ? 1 : 0);
+  }
+}
+
+main().catch((err) => {
+  console.error('Fatal error:', err);
+  dashboard.stop();
+  bridge?.stop();
+  process.exit(1);
+});
--- a/tests/e2e/daemon/package.json
+++ b/tests/e2e/daemon/package.json
@ -0,0 +1,22 @@
+{
+  "name": "@agor/e2e-daemon",
+  "version": "1.0.0",
+  "private": true,
+  "type": "module",
+  "bin": {
+    "agor-e2e": "./index.ts"
+  },
+  "scripts": {
+    "start": "tsx index.ts",
+    "start:full": "tsx index.ts --full",
+    "start:watch": "tsx index.ts --watch",
+    "start:agent": "tsx index.ts --agent"
+  },
+  "dependencies": {
+    "@wdio/cli": "^9.24.0",
+    "@wdio/local-runner": "^9.24.0"
+  },
+  "devDependencies": {
+    "tsx": "^4.19.0"
+  }
+}
--- a/tests/e2e/daemon/runner.ts
+++ b/tests/e2e/daemon/runner.ts
@ -0,0 +1,183 @@
+// WDIO programmatic runner — launches specs and streams results to a callback
+// Uses @wdio/cli Launcher for test execution, reads results-db for smart caching.
+
+import { resolve, dirname, basename } from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { existsSync, readdirSync, writeFileSync } from 'node:fs';
+import { execSync } from 'node:child_process';
+import type { TestStatus } from './dashboard.ts';
+import { ResultsDb } from '../infra/results-db.ts';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const PROJECT_ROOT = resolve(__dirname, '../../..');
+const WDIO_CONF = resolve(PROJECT_ROOT, 'tests/e2e/wdio.conf.js');
+const SPECS_DIR = resolve(PROJECT_ROOT, 'tests/e2e/specs');
+const RESULTS_PATH = resolve(PROJECT_ROOT, 'test-results/results.json');
+
+export interface TestResult {
+  name: string;
+  specFile: string;
+  status: TestStatus;
+  durationMs?: number;
+  error?: string;
+}
+
+export type ResultCallback = (result: TestResult) => void;
+
+export interface RunOptions {
+  pattern?: string;
+  full?: boolean;
+  onResult?: ResultCallback;
+}
+
+// ── Spec discovery ──
+
+export function discoverSpecs(pattern?: string): string[] {
+  const files = readdirSync(SPECS_DIR)
+    .filter((f) => f.endsWith('.test.ts'))
+    .sort();
+
+  if (pattern) {
+    const lp = pattern.toLowerCase();
+    return files.filter((f) => f.toLowerCase().includes(lp));
+  }
+  return files;
+}
+
+export function specDisplayName(specFile: string): string {
+  return basename(specFile, '.test.ts');
+}
+
+// ── Smart cache ──
+
+function getPassedSpecs(db: ResultsDb): Set<string> {
+  const passed = new Set<string>();
+  for (const run of db.getRecentRuns(5)) {
+    if (run.status !== 'passed' && run.status !== 'failed') continue;
+    for (const step of db.getStepsForRun(run.run_id)) {
+      if (step.status === 'passed') passed.add(step.scenario_name);
+    }
+  }
+  return passed;
+}
+
+function filterByCache(specs: string[], db: ResultsDb): { run: string[]; skipped: string[] } {
+  const cached = getPassedSpecs(db);
+  const run: string[] = [];
+  const skipped: string[] = [];
+  for (const spec of specs) {
+    (cached.has(specDisplayName(spec)) ? skipped : run).push(spec);
+  }
+  return { run, skipped };
+}
+
+// ── Git info ──
+
+function getGitInfo(): { branch: string | null; sha: string | null } {
+  try {
+    const branch = execSync('git rev-parse --abbrev-ref HEAD', {
+      cwd: PROJECT_ROOT,
+      encoding: 'utf-8',
+    }).trim();
+    const sha = execSync('git rev-parse --short HEAD', {
+      cwd: PROJECT_ROOT,
+      encoding: 'utf-8',
+    }).trim();
+    return { branch, sha };
+  } catch {
+    return { branch: null, sha: null };
+  }
+}
+
+// ── Runner ──
+
+export async function runSpecs(opts: RunOptions = {}): Promise<TestResult[]> {
+  const db = new ResultsDb();
+  const allSpecs = discoverSpecs(opts.pattern);
+  const results: TestResult[] = [];
+
+  let specsToRun: string[];
+  let skippedSpecs: string[] = [];
+
+  if (opts.full) {
+    specsToRun = allSpecs;
+  } else {
+    const filtered = filterByCache(allSpecs, db);
+    specsToRun = filtered.run;
+    skippedSpecs = filtered.skipped;
+  }
+
+  // Emit skipped specs immediately
+  for (const spec of skippedSpecs) {
+    const result: TestResult = { name: specDisplayName(spec), specFile: spec, status: 'skipped' };
+    results.push(result);
+    opts.onResult?.(result);
+  }
+
+  if (specsToRun.length === 0) {
+    return results;
+  }
+
+  // Build absolute spec paths
+  const specPaths = specsToRun.map((s) => resolve(SPECS_DIR, s));
+
+  // Generate run ID and record
+  const git = getGitInfo();
+  const runId = `daemon-${Date.now()}`;
+  db.startRun(runId, git.branch ?? undefined, git.sha ?? undefined);
+
+  // Mark specs as running
+  for (const spec of specsToRun) {
+    opts.onResult?.({ name: specDisplayName(spec), specFile: spec, status: 'running' });
+  }
+
+  // Run via WDIO CLI Launcher
+  const startTime = Date.now();
+  let exitCode = 1;
+
+  try {
+    const { Launcher } = await import('@wdio/cli');
+    const launcher = new Launcher(WDIO_CONF, {
+      specs: specPaths,
+    });
+    exitCode = await launcher.run();
+  } catch (err: unknown) {
+    const msg = err instanceof Error ? err.message : String(err);
+    for (const spec of specsToRun) {
+      const name = specDisplayName(spec);
+      const result: TestResult = { name, specFile: spec, status: 'failed', error: `Launcher error: ${msg}` };
+      results.push(result);
+      opts.onResult?.(result);
+      db.recordStep({ run_id: runId, scenario_name: name, step_name: 'launcher', status: 'error',
+        duration_ms: null, error_message: msg, screenshot_path: null, agent_cost_usd: null });
+    }
+    db.finishRun(runId, 'error', Date.now() - startTime);
+    return results;
+  }
+
+  const totalDuration = Date.now() - startTime;
+  const perSpecDuration = Math.round(totalDuration / specsToRun.length);
+
+  // WDIO Launcher returns 0 for all passed, non-zero for failures.
+  // Without per-test reporter hooks, we infer per-spec status from exit code.
+  const specStatus: TestStatus = exitCode === 0 ? 'passed' : 'failed';
+  const errMsg = exitCode !== 0 ? `WDIO exited with code ${exitCode}` : null;
+  for (const spec of specsToRun) {
+    const name = specDisplayName(spec);
+    const result: TestResult = { name, specFile: spec, status: specStatus, durationMs: perSpecDuration,
+      error: errMsg ?? undefined };
+    results.push(result);
+    opts.onResult?.(result);
+    db.recordStep({ run_id: runId, scenario_name: name, step_name: 'spec', status: specStatus,
+      duration_ms: perSpecDuration, error_message: errMsg, screenshot_path: null, agent_cost_usd: null });
+  }
+
+  db.finishRun(runId, exitCode === 0 ? 'passed' : 'failed', totalDuration);
+  return results;
+}
+
+export function clearCache(): void {
+  if (existsSync(RESULTS_PATH)) {
+    writeFileSync(RESULTS_PATH, JSON.stringify({ runs: [], steps: [] }, null, 2));
+  }
+}
--- a/tests/e2e/daemon/tsconfig.json
+++ b/tests/e2e/daemon/tsconfig.json
@ -0,0 +1,14 @@
+{
+  "compilerOptions": {
+    "module": "ESNext",
+    "moduleResolution": "bundler",
+    "target": "ESNext",
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "strict": true,
+    "outDir": "dist",
+    "rootDir": ".",
+    "declaration": false
+  },
+  "include": ["*.ts"]
+}