feat(e2e): add test daemon CLI with ANSI dashboard and Agent SDK bridge

- index.ts: CLI entry point (--full, --spec, --watch, --agent flags)
- runner.ts: programmatic WDIO launcher with result streaming
- dashboard.ts: ANSI terminal UI (pass/fail/skip/running icons, summary)
- agent-bridge.ts: NDJSON stdin/stdout for Agent SDK queries
  (status, rerun, failures, reset-cache)
- Standalone package at tests/e2e/daemon/
This commit is contained in:
Hibryda 2026-03-18 05:17:17 +01:00
parent 46f51d7941
commit d7dd7722ab
7 changed files with 796 additions and 0 deletions

122
tests/e2e/daemon/README.md Normal file
View file

@ -0,0 +1,122 @@
# E2E Test Daemon
Terminal dashboard for running and monitoring Agent Orchestrator E2E tests. Supports smart caching, file watching, and agent SDK integration.
## Prerequisites
- Built Tauri debug binary (`npm run tauri build -- --debug --no-bundle`)
- `tauri-driver` installed (`cargo install tauri-driver`)
- Node.js 20+
## Install
```bash
cd tests/e2e/daemon
npm install
```
The daemon reuses WDIO deps from the root `node_modules`. Its own `package.json` lists `@wdio/cli` and `@wdio/local-runner` for clarity, but they resolve from the workspace root.
## Usage
```bash
# Run all specs (with smart cache — skips recently-passed specs)
npm start
# Full run — reset cache, run everything
npm run start:full
# Filter by spec name pattern
npx tsx index.ts --spec phase-a
# Watch mode — re-run on spec file changes
npm run start:watch
# Agent mode — accept NDJSON queries on stdin, respond on stderr
npm run start:agent
# Combine flags
npx tsx index.ts --full --watch --agent
```
## Dashboard
The terminal UI shows:
```
Agent Orchestrator — E2E Test Daemon RUNNING 12.3s
────────────────────────────────────────────
✓ smoke 1.2s
✓ workspace 0.8s
⟳ settings
· phase-a-structure
· phase-a-agent
⏭ phase-b-grid
────────────────────────────────────────────
2 passed │ 0 failed │ 1 skipped │ 1 running │ 2 pending │ 2.0s
```
Status icons:
- `✓` green — passed
- `✗` red — failed (error message shown below)
- `⏭` gray — skipped (cached)
- `⟳` yellow — running
- `·` white — pending
## Smart Cache
The daemon reads from the shared `test-results/results.json` (managed by `results-db.ts`). Specs that passed in any of the last 5 runs are skipped unless `--full` is used.
## Agent Bridge Protocol
When started with `--agent`, the daemon accepts NDJSON queries on **stdin** and responds on **stderr** (stdout is used by the dashboard).
### Queries
**Status** — get current test state:
```json
{"type": "status"}
```
Response:
```json
{"type": "status", "running": false, "passed": 15, "failed": 2, "skipped": 1, "pending": 0, "total": 18, "failures": [{"name": "phase-b-grid", "error": "WDIO exited with code 1"}]}
```
**Rerun** — trigger a new test run:
```json
{"type": "rerun", "pattern": "phase-a"}
```
Response:
```json
{"type": "rerun", "specsQueued": 1}
```
**Failures** — get detailed failure list:
```json
{"type": "failures"}
```
Response:
```json
{"type": "failures", "failures": [{"name": "phase-b-grid", "specFile": "phase-b-grid.test.ts", "error": "WDIO exited with code 1"}]}
```
**Reset cache** — clear smart cache:
```json
{"type": "reset-cache"}
```
Response:
```json
{"type": "reset-cache", "ok": true}
```
## Architecture
```
index.ts CLI entry point, arg parsing, main loop
runner.ts WDIO Launcher wrapper, spec discovery, smart cache
dashboard.ts ANSI terminal UI (no external deps)
agent-bridge.ts NDJSON stdio interface for agent integration
```
The daemon reuses the project's existing `wdio.conf.js` and `infra/results-db.ts`.

View file

@ -0,0 +1,193 @@
// Agent bridge — NDJSON stdio interface for Claude Agent SDK integration
// Accepts queries on stdin, responds on stdout. Allows an agent to control
// and query the test daemon programmatically.
import { createInterface } from 'node:readline';
import type { Dashboard, TestEntry } from './dashboard.ts';
import { runSpecs, clearCache, type RunOptions } from './runner.ts';
// ── Query/Response types ──
interface StatusQuery {
type: 'status';
}
interface RerunQuery {
type: 'rerun';
pattern?: string;
}
interface FailuresQuery {
type: 'failures';
}
interface ResetCacheQuery {
type: 'reset-cache';
}
type Query = StatusQuery | RerunQuery | FailuresQuery | ResetCacheQuery;
interface StatusResponse {
type: 'status';
running: boolean;
passed: number;
failed: number;
skipped: number;
pending: number;
total: number;
failures: Array<{ name: string; error?: string }>;
}
interface RerunResponse {
type: 'rerun';
specsQueued: number;
}
interface FailuresResponse {
type: 'failures';
failures: Array<{ name: string; specFile: string; error?: string }>;
}
interface ResetCacheResponse {
type: 'reset-cache';
ok: true;
}
interface ErrorResponse {
type: 'error';
message: string;
}
type Response = StatusResponse | RerunResponse | FailuresResponse | ResetCacheResponse | ErrorResponse;
// ── Bridge ──
export class AgentBridge {
private dashboard: Dashboard;
private running = false;
private triggerRerun: ((opts: RunOptions) => void) | null = null;
private rl: ReturnType<typeof createInterface> | null = null;
constructor(dashboard: Dashboard) {
this.dashboard = dashboard;
}
/** Register callback that triggers a new test run from the main loop */
onRerunRequest(cb: (opts: RunOptions) => void): void {
this.triggerRerun = cb;
}
setRunning(running: boolean): void {
this.running = running;
}
start(): void {
this.rl = createInterface({
input: process.stdin,
terminal: false,
});
this.rl.on('line', (line) => {
const trimmed = line.trim();
if (!trimmed) return;
try {
const query = JSON.parse(trimmed) as Query;
const response = this.handleQuery(query);
this.send(response);
} catch (err: unknown) {
const msg = err instanceof Error ? err.message : String(err);
this.send({ type: 'error', message: `Invalid query: ${msg}` });
}
});
this.rl.on('close', () => {
this.stop();
});
}
stop(): void {
if (this.rl) {
this.rl.close();
this.rl = null;
}
}
private send(response: Response): void {
// Write to stdout as NDJSON — use fd 3 or stderr if stdout is used by dashboard
// Since dashboard writes to stdout, we use stderr for NDJSON responses
// when the dashboard is active. The agent reads from our stderr.
process.stderr.write(JSON.stringify(response) + '\n');
}
private handleQuery(query: Query): Response {
switch (query.type) {
case 'status':
return this.handleStatus();
case 'rerun':
return this.handleRerun(query);
case 'failures':
return this.handleFailures();
case 'reset-cache':
return this.handleResetCache();
default:
return { type: 'error', message: `Unknown query type: ${(query as { type: string }).type}` };
}
}
private handleStatus(): StatusResponse {
const tests = this.dashboard.getTests();
const passed = tests.filter((t) => t.status === 'passed').length;
const failed = tests.filter((t) => t.status === 'failed').length;
const skipped = tests.filter((t) => t.status === 'skipped').length;
const pending = tests.filter((t) => t.status === 'pending' || t.status === 'running').length;
const failures = tests
.filter((t) => t.status === 'failed')
.map((t) => ({ name: t.name, error: t.error }));
return {
type: 'status',
running: this.running,
passed,
failed,
skipped,
pending,
total: tests.length,
failures,
};
}
private handleRerun(query: RerunQuery): RerunResponse {
if (this.running) {
return { type: 'rerun', specsQueued: 0 };
}
const opts: RunOptions = {};
if (query.pattern) opts.pattern = query.pattern;
opts.full = true; // rerun ignores cache
if (this.triggerRerun) {
this.triggerRerun(opts);
}
return { type: 'rerun', specsQueued: 1 };
}
private handleFailures(): FailuresResponse {
const tests = this.dashboard.getTests();
const failures = tests
.filter((t) => t.status === 'failed')
.map((t) => ({
name: t.name,
specFile: t.specFile,
error: t.error,
}));
return { type: 'failures', failures };
}
private handleResetCache(): ResetCacheResponse {
clearCache();
return { type: 'reset-cache', ok: true };
}
}

View file

@ -0,0 +1,167 @@
// Terminal dashboard — ANSI escape code UI for E2E test status
// No external deps. Renders test list with status icons, timing, and summary.
export type TestStatus = 'pending' | 'running' | 'passed' | 'failed' | 'skipped';
export interface TestEntry {
name: string;
specFile: string;
status: TestStatus;
durationMs?: number;
error?: string;
}
// ── ANSI helpers ──
const ESC = '\x1b[';
const CLEAR_SCREEN = `${ESC}2J${ESC}H`;
const HIDE_CURSOR = `${ESC}?25l`;
const SHOW_CURSOR = `${ESC}?25h`;
const BOLD = `${ESC}1m`;
const DIM = `${ESC}2m`;
const RESET = `${ESC}0m`;
const fg = {
red: `${ESC}31m`,
green: `${ESC}32m`,
yellow: `${ESC}33m`,
blue: `${ESC}34m`,
magenta: `${ESC}35m`,
cyan: `${ESC}36m`,
white: `${ESC}37m`,
gray: `${ESC}90m`,
};
const STATUS_ICONS: Record<TestStatus, string> = {
pending: `${fg.white}\u00b7${RESET}`, // centered dot
running: `${fg.yellow}\u27f3${RESET}`, // clockwise arrow
passed: `${fg.green}\u2713${RESET}`, // check mark
failed: `${fg.red}\u2717${RESET}`, // cross mark
skipped: `${fg.gray}\u23ed${RESET}`, // skip icon
};
function formatDuration(ms: number | undefined): string {
if (ms === undefined) return '';
if (ms < 1000) return `${fg.gray}${ms}ms${RESET}`;
return `${fg.gray}${(ms / 1000).toFixed(1)}s${RESET}`;
}
function truncate(str: string, max: number): string {
return str.length > max ? str.slice(0, max - 1) + '\u2026' : str;
}
// ── Dashboard ──
export class Dashboard {
private tests: TestEntry[] = [];
private startTime: number = Date.now();
private refreshTimer: ReturnType<typeof setInterval> | null = null;
private running = false;
private lastRunStatus: 'idle' | 'running' | 'passed' | 'failed' = 'idle';
setTests(specs: Array<{ name: string; specFile: string }>): void {
this.tests = specs.map((s) => ({
name: s.name,
specFile: s.specFile,
status: 'pending' as TestStatus,
}));
this.startTime = Date.now();
this.lastRunStatus = 'running';
}
updateTest(name: string, status: TestStatus, durationMs?: number, error?: string): void {
const entry = this.tests.find((t) => t.name === name);
if (entry) {
entry.status = status;
entry.durationMs = durationMs;
entry.error = error;
}
}
startRefresh(): void {
if (this.refreshTimer) return;
this.running = true;
process.stdout.write(HIDE_CURSOR);
this.render();
this.refreshTimer = setInterval(() => this.render(), 500);
}
stopRefresh(): void {
this.running = false;
if (this.refreshTimer) {
clearInterval(this.refreshTimer);
this.refreshTimer = null;
}
// Final render with cursor restored
this.render();
process.stdout.write(SHOW_CURSOR);
}
markComplete(): void {
const failed = this.tests.filter((t) => t.status === 'failed').length;
this.lastRunStatus = failed > 0 ? 'failed' : 'passed';
}
stop(): void {
this.stopRefresh();
}
getTests(): TestEntry[] {
return this.tests;
}
render(): void {
const cols = process.stdout.columns || 80;
const lines: string[] = [];
// ── Header ──
const title = 'Agent Orchestrator \u2014 E2E Test Daemon';
const elapsed = ((Date.now() - this.startTime) / 1000).toFixed(1);
const statusColor = this.lastRunStatus === 'failed' ? fg.red
: this.lastRunStatus === 'passed' ? fg.green
: this.lastRunStatus === 'running' ? fg.yellow
: fg.gray;
const statusLabel = this.running ? 'RUNNING' : this.lastRunStatus.toUpperCase();
lines.push('');
lines.push(` ${BOLD}${fg.cyan}${title}${RESET} ${statusColor}${statusLabel}${RESET} ${DIM}${elapsed}s${RESET}`);
lines.push(` ${fg.gray}${'─'.repeat(Math.min(cols - 4, 76))}${RESET}`);
// ── Test list ──
const nameWidth = Math.min(cols - 20, 60);
for (const t of this.tests) {
const icon = STATUS_ICONS[t.status];
const name = truncate(t.name, nameWidth);
const dur = formatDuration(t.durationMs);
lines.push(` ${icon} ${name} ${dur}`);
if (t.status === 'failed' && t.error) {
const errLine = truncate(t.error, cols - 8);
lines.push(` ${fg.red}${DIM}${errLine}${RESET}`);
}
}
// ── Summary footer ──
const passed = this.tests.filter((t) => t.status === 'passed').length;
const failed = this.tests.filter((t) => t.status === 'failed').length;
const skipped = this.tests.filter((t) => t.status === 'skipped').length;
const running = this.tests.filter((t) => t.status === 'running').length;
const pending = this.tests.filter((t) => t.status === 'pending').length;
const totalTime = this.tests.reduce((sum, t) => sum + (t.durationMs ?? 0), 0);
lines.push('');
lines.push(` ${fg.gray}${'─'.repeat(Math.min(cols - 4, 76))}${RESET}`);
const parts: string[] = [];
if (passed > 0) parts.push(`${fg.green}${passed} passed${RESET}`);
if (failed > 0) parts.push(`${fg.red}${failed} failed${RESET}`);
if (skipped > 0) parts.push(`${fg.gray}${skipped} skipped${RESET}`);
if (running > 0) parts.push(`${fg.yellow}${running} running${RESET}`);
if (pending > 0) parts.push(`${fg.white}${pending} pending${RESET}`);
parts.push(`${DIM}${formatDuration(totalTime)}${RESET}`);
lines.push(` ${parts.join(' \u2502 ')}`);
lines.push('');
process.stdout.write(CLEAR_SCREEN + lines.join('\n'));
}
}

95
tests/e2e/daemon/index.ts Normal file
View file

@ -0,0 +1,95 @@
#!/usr/bin/env tsx
// Agent Orchestrator E2E Test Daemon — CLI entry point
// Usage: tsx index.ts [--full] [--spec <pattern>] [--watch] [--agent]
import { resolve, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
import { watch } from 'node:fs';
import { Dashboard } from './dashboard.ts';
import { runSpecs, discoverSpecs, specDisplayName, clearCache, type RunOptions } from './runner.ts';
import { AgentBridge } from './agent-bridge.ts';
const __dirname = dirname(fileURLToPath(import.meta.url));
const SPECS_DIR = resolve(__dirname, '../specs');
// ── CLI args ──
const args = process.argv.slice(2);
const fullMode = args.includes('--full');
const watchMode = args.includes('--watch');
const agentMode = args.includes('--agent');
const specIdx = args.indexOf('--spec');
const specPattern = specIdx !== -1 ? args[specIdx + 1] : undefined;
// ── Init ──
const dashboard = new Dashboard();
let bridge: AgentBridge | null = null;
let pendingRerun: RunOptions | null = null;
if (agentMode) {
bridge = new AgentBridge(dashboard);
bridge.onRerunRequest((opts) => { pendingRerun = opts; });
bridge.start();
}
// ── Run cycle ──
async function runCycle(opts: RunOptions = {}): Promise<void> {
const specs = discoverSpecs(opts.pattern ?? specPattern);
dashboard.setTests(specs.map((s) => ({ name: specDisplayName(s), specFile: s })));
dashboard.startRefresh();
bridge?.setRunning(true);
await runSpecs({
pattern: opts.pattern ?? specPattern,
full: opts.full ?? fullMode,
onResult: (r) => dashboard.updateTest(r.name, r.status, r.durationMs, r.error),
});
dashboard.markComplete();
dashboard.stopRefresh();
bridge?.setRunning(false);
}
function shutdown(poll?: ReturnType<typeof setInterval>, watcher?: ReturnType<typeof watch>): void {
watcher?.close();
if (poll) clearInterval(poll);
dashboard.stop();
bridge?.stop();
process.exit(0);
}
// ── Main ──
async function main(): Promise<void> {
if (fullMode) clearCache();
await runCycle();
if (watchMode || agentMode) {
const watcher = watchMode
? watch(SPECS_DIR, { recursive: false }, (_ev, f) => {
if (f?.endsWith('.test.ts')) pendingRerun = { pattern: specPattern, full: false };
})
: undefined;
const poll = setInterval(async () => {
if (pendingRerun) {
const opts = pendingRerun;
pendingRerun = null;
await runCycle(opts);
}
}, 1000);
process.on('SIGINT', () => shutdown(poll, watcher));
process.on('SIGTERM', () => shutdown(poll, watcher));
} else {
dashboard.stop();
bridge?.stop();
const hasFailed = dashboard.getTests().some((t) => t.status === 'failed');
process.exit(hasFailed ? 1 : 0);
}
}
main().catch((err) => {
console.error('Fatal error:', err);
dashboard.stop();
bridge?.stop();
process.exit(1);
});

View file

@ -0,0 +1,22 @@
{
"name": "@agor/e2e-daemon",
"version": "1.0.0",
"private": true,
"type": "module",
"bin": {
"agor-e2e": "./index.ts"
},
"scripts": {
"start": "tsx index.ts",
"start:full": "tsx index.ts --full",
"start:watch": "tsx index.ts --watch",
"start:agent": "tsx index.ts --agent"
},
"dependencies": {
"@wdio/cli": "^9.24.0",
"@wdio/local-runner": "^9.24.0"
},
"devDependencies": {
"tsx": "^4.19.0"
}
}

183
tests/e2e/daemon/runner.ts Normal file
View file

@ -0,0 +1,183 @@
// WDIO programmatic runner — launches specs and streams results to a callback
// Uses @wdio/cli Launcher for test execution, reads results-db for smart caching.
import { resolve, dirname, basename } from 'node:path';
import { fileURLToPath } from 'node:url';
import { existsSync, readdirSync, writeFileSync } from 'node:fs';
import { execSync } from 'node:child_process';
import type { TestStatus } from './dashboard.ts';
import { ResultsDb } from '../infra/results-db.ts';
const __dirname = dirname(fileURLToPath(import.meta.url));
const PROJECT_ROOT = resolve(__dirname, '../../..');
const WDIO_CONF = resolve(PROJECT_ROOT, 'tests/e2e/wdio.conf.js');
const SPECS_DIR = resolve(PROJECT_ROOT, 'tests/e2e/specs');
const RESULTS_PATH = resolve(PROJECT_ROOT, 'test-results/results.json');
export interface TestResult {
name: string;
specFile: string;
status: TestStatus;
durationMs?: number;
error?: string;
}
export type ResultCallback = (result: TestResult) => void;
export interface RunOptions {
pattern?: string;
full?: boolean;
onResult?: ResultCallback;
}
// ── Spec discovery ──
export function discoverSpecs(pattern?: string): string[] {
const files = readdirSync(SPECS_DIR)
.filter((f) => f.endsWith('.test.ts'))
.sort();
if (pattern) {
const lp = pattern.toLowerCase();
return files.filter((f) => f.toLowerCase().includes(lp));
}
return files;
}
export function specDisplayName(specFile: string): string {
return basename(specFile, '.test.ts');
}
// ── Smart cache ──
function getPassedSpecs(db: ResultsDb): Set<string> {
const passed = new Set<string>();
for (const run of db.getRecentRuns(5)) {
if (run.status !== 'passed' && run.status !== 'failed') continue;
for (const step of db.getStepsForRun(run.run_id)) {
if (step.status === 'passed') passed.add(step.scenario_name);
}
}
return passed;
}
function filterByCache(specs: string[], db: ResultsDb): { run: string[]; skipped: string[] } {
const cached = getPassedSpecs(db);
const run: string[] = [];
const skipped: string[] = [];
for (const spec of specs) {
(cached.has(specDisplayName(spec)) ? skipped : run).push(spec);
}
return { run, skipped };
}
// ── Git info ──
function getGitInfo(): { branch: string | null; sha: string | null } {
try {
const branch = execSync('git rev-parse --abbrev-ref HEAD', {
cwd: PROJECT_ROOT,
encoding: 'utf-8',
}).trim();
const sha = execSync('git rev-parse --short HEAD', {
cwd: PROJECT_ROOT,
encoding: 'utf-8',
}).trim();
return { branch, sha };
} catch {
return { branch: null, sha: null };
}
}
// ── Runner ──
export async function runSpecs(opts: RunOptions = {}): Promise<TestResult[]> {
const db = new ResultsDb();
const allSpecs = discoverSpecs(opts.pattern);
const results: TestResult[] = [];
let specsToRun: string[];
let skippedSpecs: string[] = [];
if (opts.full) {
specsToRun = allSpecs;
} else {
const filtered = filterByCache(allSpecs, db);
specsToRun = filtered.run;
skippedSpecs = filtered.skipped;
}
// Emit skipped specs immediately
for (const spec of skippedSpecs) {
const result: TestResult = { name: specDisplayName(spec), specFile: spec, status: 'skipped' };
results.push(result);
opts.onResult?.(result);
}
if (specsToRun.length === 0) {
return results;
}
// Build absolute spec paths
const specPaths = specsToRun.map((s) => resolve(SPECS_DIR, s));
// Generate run ID and record
const git = getGitInfo();
const runId = `daemon-${Date.now()}`;
db.startRun(runId, git.branch ?? undefined, git.sha ?? undefined);
// Mark specs as running
for (const spec of specsToRun) {
opts.onResult?.({ name: specDisplayName(spec), specFile: spec, status: 'running' });
}
// Run via WDIO CLI Launcher
const startTime = Date.now();
let exitCode = 1;
try {
const { Launcher } = await import('@wdio/cli');
const launcher = new Launcher(WDIO_CONF, {
specs: specPaths,
});
exitCode = await launcher.run();
} catch (err: unknown) {
const msg = err instanceof Error ? err.message : String(err);
for (const spec of specsToRun) {
const name = specDisplayName(spec);
const result: TestResult = { name, specFile: spec, status: 'failed', error: `Launcher error: ${msg}` };
results.push(result);
opts.onResult?.(result);
db.recordStep({ run_id: runId, scenario_name: name, step_name: 'launcher', status: 'error',
duration_ms: null, error_message: msg, screenshot_path: null, agent_cost_usd: null });
}
db.finishRun(runId, 'error', Date.now() - startTime);
return results;
}
const totalDuration = Date.now() - startTime;
const perSpecDuration = Math.round(totalDuration / specsToRun.length);
// WDIO Launcher returns 0 for all passed, non-zero for failures.
// Without per-test reporter hooks, we infer per-spec status from exit code.
const specStatus: TestStatus = exitCode === 0 ? 'passed' : 'failed';
const errMsg = exitCode !== 0 ? `WDIO exited with code ${exitCode}` : null;
for (const spec of specsToRun) {
const name = specDisplayName(spec);
const result: TestResult = { name, specFile: spec, status: specStatus, durationMs: perSpecDuration,
error: errMsg ?? undefined };
results.push(result);
opts.onResult?.(result);
db.recordStep({ run_id: runId, scenario_name: name, step_name: 'spec', status: specStatus,
duration_ms: perSpecDuration, error_message: errMsg, screenshot_path: null, agent_cost_usd: null });
}
db.finishRun(runId, exitCode === 0 ? 'passed' : 'failed', totalDuration);
return results;
}
export function clearCache(): void {
if (existsSync(RESULTS_PATH)) {
writeFileSync(RESULTS_PATH, JSON.stringify({ runs: [], steps: [] }, null, 2));
}
}

View file

@ -0,0 +1,14 @@
{
"compilerOptions": {
"module": "ESNext",
"moduleResolution": "bundler",
"target": "ESNext",
"esModuleInterop": true,
"skipLibCheck": true,
"strict": true,
"outDir": "dist",
"rootDir": ".",
"declaration": false
},
"include": ["*.ts"]
}