From e76bc341f24204d9a6770afb0e82f60ed335b851 Mon Sep 17 00:00:00 2001 From: Hibryda Date: Wed, 18 Mar 2026 03:06:57 +0100 Subject: [PATCH] refactor(e2e): extract infrastructure into tests/e2e/infra/ module - Move fixtures.ts, llm-judge.ts, results-db.ts to tests/e2e/infra/ - Deduplicate wdio.conf.js: use createTestFixture() instead of inline copy - Replace __dirname paths with projectRoot-anchored paths - Create test-mode-constants.ts (typed env var names, flag registry) - Create scripts/preflight-check.sh (validates tauri-driver, display, Claude CLI) - Create scripts/check-test-flags.sh (CI lint for AGOR_TEST flag drift) - Rewrite tests/e2e/README.md with full documentation - Update spec imports for moved infra files --- scripts/check-test-flags.sh | 50 +++++++ scripts/preflight-check.sh | 68 +++++++++ tests/e2e/README.md | 197 +++++++++---------------- tests/e2e/{ => infra}/fixtures.ts | 0 tests/e2e/{ => infra}/llm-judge.ts | 0 tests/e2e/{ => infra}/results-db.ts | 0 tests/e2e/infra/test-mode-constants.ts | 23 +++ tests/e2e/specs/phase-b.test.ts | 2 +- tests/e2e/specs/phase-c.test.ts | 2 +- tests/e2e/wdio.conf.js | 84 +++-------- 10 files changed, 235 insertions(+), 191 deletions(-) create mode 100755 scripts/check-test-flags.sh create mode 100755 scripts/preflight-check.sh rename tests/e2e/{ => infra}/fixtures.ts (100%) rename tests/e2e/{ => infra}/llm-judge.ts (100%) rename tests/e2e/{ => infra}/results-db.ts (100%) create mode 100644 tests/e2e/infra/test-mode-constants.ts diff --git a/scripts/check-test-flags.sh b/scripts/check-test-flags.sh new file mode 100755 index 0000000..2748e83 --- /dev/null +++ b/scripts/check-test-flags.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +# Lint check: verify all AGOR_TEST references are documented. +# Run in CI to catch flag drift between code and documentation. + +set -euo pipefail + +echo "Checking AGOR_TEST flag references..." + +# Known files that should reference AGOR_TEST (from test-mode-constants.ts) +KNOWN_FILES=( + "agor-core/src/config.rs" + "src-tauri/src/commands/misc.rs" + "src-tauri/src/lib.rs" + "src-tauri/src/watcher.rs" + "src-tauri/src/fs_watcher.rs" + "src-tauri/src/telemetry.rs" + "src/App.svelte" + "tests/e2e/wdio.conf.js" + "tests/e2e/infra/fixtures.ts" + "tests/e2e/infra/test-mode-constants.ts" +) + +# Find all files referencing AGOR_TEST (excluding node_modules, target, .git) +FOUND=$(grep -rl 'AGOR_TEST' --include='*.rs' --include='*.ts' --include='*.js' --include='*.svelte' \ + --exclude-dir=node_modules --exclude-dir=target --exclude-dir=.git . 2>/dev/null | \ + sed 's|^\./||' | sort) + +UNKNOWN="" +for f in $FOUND; do + MATCH=0 + for k in "${KNOWN_FILES[@]}"; do + if [[ "$f" == "$k" ]]; then + MATCH=1 + break + fi + done + if [[ $MATCH -eq 0 ]]; then + UNKNOWN="$UNKNOWN $f\n" + fi +done + +if [[ -n "$UNKNOWN" ]]; then + echo "" + echo "WARNING: AGOR_TEST referenced in files not in the known registry:" + echo -e "$UNKNOWN" + echo "Update tests/e2e/infra/test-mode-constants.ts and this script." + exit 1 +else + echo "All AGOR_TEST references are documented. ✓" +fi diff --git a/scripts/preflight-check.sh b/scripts/preflight-check.sh new file mode 100755 index 0000000..538054b --- /dev/null +++ b/scripts/preflight-check.sh @@ -0,0 +1,68 @@ +#!/usr/bin/env bash +# Preflight check for E2E tests — validates system dependencies. +# Run before E2E suite to catch missing tools early. + +set -euo pipefail + +OK=0 +WARN=0 + +check() { + local name="$1" cmd="$2" + if eval "$cmd" >/dev/null 2>&1; then + echo " ✓ $name" + else + echo " ✗ $name" + return 1 + fi +} + +echo "E2E Preflight Check" +echo "===================" + +echo "" +echo "Required:" + +if ! check "tauri-driver" "command -v tauri-driver"; then + echo " Install: cargo install tauri-driver" + OK=1 +fi + +if ! check "debug binary" "test -f target/debug/agent-orchestrator"; then + echo " Build: cargo tauri build --debug --no-bundle" + OK=1 +fi + +# Display server (Linux only) +if [[ "$(uname)" == "Linux" ]]; then + if [[ -n "${DISPLAY:-}" ]] || [[ -n "${WAYLAND_DISPLAY:-}" ]]; then + echo " ✓ display server (DISPLAY=$DISPLAY)" + else + echo " ✗ display server — no DISPLAY or WAYLAND_DISPLAY set" + echo " Use: xvfb-run --auto-servernum npm run test:e2e" + OK=1 + fi +fi + +echo "" +echo "Optional (LLM judge):" + +if command -v claude >/dev/null 2>&1; then + echo " ✓ Claude CLI ($(which claude))" +elif [[ -n "${ANTHROPIC_API_KEY:-}" ]]; then + echo " ✓ ANTHROPIC_API_KEY set" +else + echo " ~ Claude CLI not found, ANTHROPIC_API_KEY not set" + echo " LLM-judged tests will be skipped" + WARN=1 +fi + +echo "" +if [[ $OK -ne 0 ]]; then + echo "FAILED — missing required dependencies" + exit 1 +elif [[ $WARN -ne 0 ]]; then + echo "PASSED with warnings" +else + echo "ALL CHECKS PASSED" +fi diff --git a/tests/e2e/README.md b/tests/e2e/README.md index b959e29..b46308f 100644 --- a/tests/e2e/README.md +++ b/tests/e2e/README.md @@ -1,143 +1,88 @@ -# E2E Tests (WebDriver) +# E2E Testing Module -Tauri apps use the WebDriver protocol for E2E testing (not Playwright directly). -The app runs inside WebKit2GTK on Linux, so tests interact with the real WebView. +Browser automation tests for Agent Orchestrator using WebDriverIO + tauri-driver. -## Prerequisites - -- Rust toolchain (for building the Tauri app) -- Display server (X11 or Wayland) — headless Xvfb works for CI -- `tauri-driver` installed: `cargo install tauri-driver` -- `webkit2gtk-driver` system package: `sudo apt install webkit2gtk-driver` -- npm devDeps already in package.json (`@wdio/cli`, `@wdio/local-runner`, `@wdio/mocha-framework`, `@wdio/spec-reporter`) - -## Running +## Quick Start ```bash -# From v2/ directory — builds debug binary automatically, spawns tauri-driver -npm run test:e2e +# Preflight check (validates dependencies) +./scripts/preflight-check.sh -# Skip rebuild (use existing binary) +# Build debug binary + run E2E +npm run test:all:e2e + +# Run E2E only (skip build) SKIP_BUILD=1 npm run test:e2e -# With test isolation (custom data/config dirs) -AGOR_TEST_DATA_DIR=/tmp/bt-test/data AGOR_TEST_CONFIG_DIR=/tmp/bt-test/config npm run test:e2e +# Headless (CI) +xvfb-run --auto-servernum npm run test:e2e ``` -The `wdio.conf.js` handles: -1. Building the debug binary (`cargo tauri build --debug --no-bundle`) in `onPrepare` -2. Spawning `tauri-driver` before each session (TCP readiness probe, 10s deadline) -3. Killing `tauri-driver` after each session -4. Passing `AGOR_TEST=1` env var to the app for test mode isolation +## System Dependencies -## Test Mode (`AGOR_TEST=1`) +| Tool | Required | Install | +|------|----------|---------| +| tauri-driver | Yes | `cargo install tauri-driver` | +| Debug binary | Yes | `cargo tauri build --debug --no-bundle` | +| X11/Wayland | Yes (Linux) | Use `xvfb-run` in CI | +| Claude CLI | Optional | LLM-judged tests skip if absent | +| ANTHROPIC_API_KEY | Optional | Alternative to Claude CLI for LLM judge | -When `AGOR_TEST=1` is set: -- File watchers (watcher.rs, fs_watcher.rs) are disabled to avoid inotify noise -- Wake scheduler is disabled (no auto-wake timers) -- Data/config directories can be overridden via `AGOR_TEST_DATA_DIR` / `AGOR_TEST_CONFIG_DIR` - -## CI setup (headless) - -```bash -# Install virtual framebuffer + WebKit driver -sudo apt install xvfb webkit2gtk-driver - -# Run with Xvfb wrapper -xvfb-run npm run test:e2e -``` - -## Writing tests - -Tests use WebdriverIO with Mocha. Specs go in `specs/`: - -```typescript -import { browser, expect } from '@wdio/globals'; - -describe('BTerminal', () => { - it('should show the status bar', async () => { - const statusBar = await browser.$('[data-testid="status-bar"]'); - await expect(statusBar).toBeDisplayed(); - }); -}); -``` - -### Stable selectors - -Prefer `data-testid` attributes over CSS class selectors: - -| Element | Selector | -|---------|----------| -| Status bar | `[data-testid="status-bar"]` | -| Sidebar rail | `[data-testid="sidebar-rail"]` | -| Settings button | `[data-testid="settings-btn"]` | -| Project box | `[data-testid="project-box"]` | -| Project ID | `[data-project-id="..."]` | -| Project tabs | `[data-testid="project-tabs"]` | -| Agent session | `[data-testid="agent-session"]` | -| Agent pane | `[data-testid="agent-pane"]` | -| Agent status | `[data-agent-status="idle\|running\|..."]` | -| Agent messages | `[data-testid="agent-messages"]` | -| Agent prompt | `[data-testid="agent-prompt"]` | -| Agent submit | `[data-testid="agent-submit"]` | -| Agent stop | `[data-testid="agent-stop"]` | -| Terminal tabs | `[data-testid="terminal-tabs"]` | -| Add tab button | `[data-testid="tab-add"]` | -| Terminal toggle | `[data-testid="terminal-toggle"]` | -| Command palette | `[data-testid="command-palette"]` | -| Palette input | `[data-testid="palette-input"]` | - -### Key constraints - -- `maxInstances: 1` — Tauri doesn't support parallel WebDriver sessions -- Mocha timeout is 60s — the app needs time to initialize -- Tests interact with the real WebKit2GTK WebView, not a browser -- Use `browser.execute()` for JS clicks when WebDriver clicks don't trigger Svelte handlers -- Agent tests (Scenario 7) require a real Claude CLI install + API key — they skip gracefully if unavailable - -## Test infrastructure - -### Fixtures (`fixtures.ts`) - -Creates isolated test environments with temp data/config dirs and git repos: - -```typescript -import { createTestFixture, destroyTestFixture } from '../fixtures'; - -const fixture = createTestFixture('my-test'); -// fixture.dataDir, fixture.configDir, fixture.projectDir, fixture.env -destroyTestFixture(fixture); -``` - -### Results DB (`results-db.ts`) - -JSON-based test results store for tracking runs and steps: - -```typescript -import { ResultsDb } from '../results-db'; - -const db = new ResultsDb(); -db.startRun('run-001', 'v2-mission-control', 'abc123'); -db.recordStep({ run_id: 'run-001', scenario_name: 'Smoke', step_name: 'renders', status: 'passed', ... }); -db.finishRun('run-001', 'passed', 5000); -``` - -## File structure +## Directory Structure ``` tests/e2e/ -├── README.md # This file -├── wdio.conf.js # WebdriverIO config with tauri-driver lifecycle -├── tsconfig.json # TypeScript config for test specs -├── fixtures.ts # Test fixture generator (isolated environments) -├── results-db.ts # JSON test results store -└── specs/ - ├── agor.test.ts # Smoke tests (CSS class selectors, 50+ tests) - └── agent-scenarios.test.ts # Phase A scenarios (data-testid selectors, 22 tests) +├── wdio.conf.js # WebDriverIO config + tauri-driver lifecycle +├── tsconfig.json # TypeScript config for specs +├── README.md # This file +├── infra/ # Test infrastructure (not specs) +│ ├── fixtures.ts # Test fixture generator (isolated temp dirs) +│ ├── llm-judge.ts # LLM-based assertion engine (Claude CLI / API) +│ ├── results-db.ts # JSON test results store +│ └── test-mode-constants.ts # Typed env var names for test mode +└── specs/ # Test specifications + ├── agor.test.ts # Smoke + UI tests (50+ tests) + ├── agent-scenarios.test.ts # Phase A: agent interaction (22 tests) + ├── phase-b.test.ts # Phase B: multi-project + LLM judge + └── phase-c.test.ts # Phase C: hardening features (11 scenarios) ``` -## References +## Test Mode Environment Variables -- Tauri WebDriver docs: https://v2.tauri.app/develop/tests/webdriver/ -- WebdriverIO docs: https://webdriver.io/ -- tauri-driver: https://crates.io/crates/tauri-driver +| Variable | Purpose | Read By | +|----------|---------|---------| +| `AGOR_TEST=1` | Enable test isolation | config.rs, misc.rs, lib.rs, watcher.rs, fs_watcher.rs, telemetry.rs, App.svelte | +| `AGOR_TEST_DATA_DIR` | Override data dir | config.rs | +| `AGOR_TEST_CONFIG_DIR` | Override config dir | config.rs | + +**Effects when AGOR_TEST=1:** +- File watchers disabled (watcher.rs, fs_watcher.rs) +- OTLP telemetry export disabled (telemetry.rs) +- CLI tool installation skipped (lib.rs) +- Wake scheduler disabled (App.svelte) +- Test env vars forwarded to sidecar processes (lib.rs) + +## Test Phases + +| Phase | File | Tests | Type | +|-------|------|-------|------| +| Smoke | agor.test.ts | 50+ | Deterministic (CSS/DOM assertions) | +| A | agent-scenarios.test.ts | 22 | Deterministic (data-testid selectors) | +| B | phase-b.test.ts | 6+ | LLM-judged (multi-project, agent quality) | +| C | phase-c.test.ts | 11 scenarios | Mixed (deterministic + LLM-judged) | + +## Adding a New Spec + +1. Create `tests/e2e/specs/my-feature.test.ts` +2. Import from `@wdio/globals` for `browser` and `expect` +3. Use `data-testid` selectors (preferred) or CSS classes +4. Add to `wdio.conf.js` specs array +5. For LLM assertions: `import { assertWithJudge } from '../infra/llm-judge'` +6. Run `./scripts/check-test-flags.sh` if you added new AGOR_TEST references + +## CI Workflow + +See `.github/workflows/e2e.yml` — 3 jobs: +1. **unit-tests**: vitest frontend +2. **cargo-tests**: Rust backend +3. **e2e-tests**: WebDriverIO (xvfb-run, Phase A+B+C, LLM tests gated on secret) diff --git a/tests/e2e/fixtures.ts b/tests/e2e/infra/fixtures.ts similarity index 100% rename from tests/e2e/fixtures.ts rename to tests/e2e/infra/fixtures.ts diff --git a/tests/e2e/llm-judge.ts b/tests/e2e/infra/llm-judge.ts similarity index 100% rename from tests/e2e/llm-judge.ts rename to tests/e2e/infra/llm-judge.ts diff --git a/tests/e2e/results-db.ts b/tests/e2e/infra/results-db.ts similarity index 100% rename from tests/e2e/results-db.ts rename to tests/e2e/infra/results-db.ts diff --git a/tests/e2e/infra/test-mode-constants.ts b/tests/e2e/infra/test-mode-constants.ts new file mode 100644 index 0000000..62cf3c0 --- /dev/null +++ b/tests/e2e/infra/test-mode-constants.ts @@ -0,0 +1,23 @@ +// Typed constants for test-mode environment variables. +// Single source of truth for env var names — prevents string literal duplication. +// +// These env vars are read by: +// Rust: agor-core/src/config.rs (AppConfig::from_env) +// src-tauri/src/commands/misc.rs (is_test_mode) +// src-tauri/src/lib.rs (setup: skip CLI install, forward to sidecar) +// src-tauri/src/watcher.rs (disable file watcher) +// src-tauri/src/fs_watcher.rs (disable fs watcher) +// src-tauri/src/telemetry.rs (disable OTLP) +// Svelte: src/App.svelte (disable wake scheduler) + +/** Main test mode flag — set to '1' to enable test isolation */ +export const AGOR_TEST = 'AGOR_TEST'; + +/** Override data directory (sessions.db, btmsg.db, search.db) */ +export const AGOR_TEST_DATA_DIR = 'AGOR_TEST_DATA_DIR'; + +/** Override config directory (groups.json, plugins/) */ +export const AGOR_TEST_CONFIG_DIR = 'AGOR_TEST_CONFIG_DIR'; + +/** All test-mode env vars for iteration */ +export const TEST_ENV_VARS = [AGOR_TEST, AGOR_TEST_DATA_DIR, AGOR_TEST_CONFIG_DIR] as const; diff --git a/tests/e2e/specs/phase-b.test.ts b/tests/e2e/specs/phase-b.test.ts index 7d24901..77f2d76 100644 --- a/tests/e2e/specs/phase-b.test.ts +++ b/tests/e2e/specs/phase-b.test.ts @@ -1,5 +1,5 @@ import { browser, expect } from '@wdio/globals'; -import { isJudgeAvailable, assertWithJudge } from '../llm-judge'; +import { isJudgeAvailable, assertWithJudge } from '../infra/llm-judge'; // Phase B: Multi-project scenarios + LLM-judged assertions. // Extends Phase A with tests that exercise multiple project boxes simultaneously diff --git a/tests/e2e/specs/phase-c.test.ts b/tests/e2e/specs/phase-c.test.ts index 00771d0..4d4f431 100644 --- a/tests/e2e/specs/phase-c.test.ts +++ b/tests/e2e/specs/phase-c.test.ts @@ -1,5 +1,5 @@ import { browser, expect } from '@wdio/globals'; -import { isJudgeAvailable, assertWithJudge } from '../llm-judge'; +import { isJudgeAvailable, assertWithJudge } from '../infra/llm-judge'; // Phase C: Hardening feature tests. // Tests the v3 production-readiness features added in the hardening sprint: diff --git a/tests/e2e/wdio.conf.js b/tests/e2e/wdio.conf.js index a0d582d..41f3999 100644 --- a/tests/e2e/wdio.conf.js +++ b/tests/e2e/wdio.conf.js @@ -1,68 +1,30 @@ -import { spawn, execSync } from 'node:child_process'; +import { spawn } from 'node:child_process'; import { createConnection } from 'node:net'; -import { resolve, dirname, join } from 'node:path'; +import { resolve, dirname } from 'node:path'; import { fileURLToPath } from 'node:url'; -import { mkdirSync, writeFileSync, rmSync } from 'node:fs'; -import { tmpdir } from 'node:os'; +import { rmSync } from 'node:fs'; +import { createTestFixture } from './infra/fixtures.ts'; const __dirname = dirname(fileURLToPath(import.meta.url)); const projectRoot = resolve(__dirname, '../..'); -// Debug binary path (built with `cargo tauri build --debug --no-bundle`) -// Cargo workspace target dir is at v2/target/, not v2/src-tauri/target/ +// Debug binary path (Cargo workspace target at repo root) const tauriBinary = resolve(projectRoot, 'target/debug/agent-orchestrator'); let tauriDriver; -// ── Test Fixture (created eagerly so env vars are available for capabilities) ── -const fixtureRoot = join(tmpdir(), `agor-e2e-${Date.now()}`); -const fixtureDataDir = join(fixtureRoot, 'data'); -const fixtureConfigDir = join(fixtureRoot, 'config'); -const fixtureProjectDir = join(fixtureRoot, 'test-project'); +// ── Test Fixture ── +// IMPORTANT: Must be created at module top-level (synchronously) because the +// capabilities object below references fixtureDataDir/fixtureConfigDir at eval time. +// tauri:options.env may not reliably set process-level env vars, so we also +// inject into process.env for tauri-driver inheritance. +const fixture = createTestFixture('agor-e2e'); -mkdirSync(fixtureDataDir, { recursive: true }); -mkdirSync(fixtureConfigDir, { recursive: true }); -mkdirSync(fixtureProjectDir, { recursive: true }); - -// Create a minimal git repo for agent testing -execSync('git init', { cwd: fixtureProjectDir, stdio: 'ignore' }); -execSync('git config user.email "test@agor.dev"', { cwd: fixtureProjectDir, stdio: 'ignore' }); -execSync('git config user.name "Agor Test"', { cwd: fixtureProjectDir, stdio: 'ignore' }); -writeFileSync(join(fixtureProjectDir, 'README.md'), '# Test Project\n\nA simple test project for Agor E2E tests.\n'); -writeFileSync(join(fixtureProjectDir, 'hello.py'), 'def greet(name: str) -> str:\n return f"Hello, {name}!"\n'); -execSync('git add -A && git commit -m "initial commit"', { cwd: fixtureProjectDir, stdio: 'ignore' }); - -// Write groups.json with one group containing the test project -writeFileSync( - join(fixtureConfigDir, 'groups.json'), - JSON.stringify({ - version: 1, - groups: [{ - id: 'test-group', - name: 'Test Group', - projects: [{ - id: 'test-project', - name: 'Test Project', - identifier: 'test-project', - description: 'E2E test project', - icon: '\uf120', - cwd: fixtureProjectDir, - profile: 'default', - enabled: true, - }], - agents: [], - }], - activeGroupId: 'test-group', - }, null, 2), -); - -// Inject env vars into process.env so tauri-driver inherits them -// (tauri:options.env may not reliably set process-level env vars) process.env.AGOR_TEST = '1'; -process.env.AGOR_TEST_DATA_DIR = fixtureDataDir; -process.env.AGOR_TEST_CONFIG_DIR = fixtureConfigDir; +process.env.AGOR_TEST_DATA_DIR = fixture.dataDir; +process.env.AGOR_TEST_CONFIG_DIR = fixture.configDir; -console.log(`Test fixture created at ${fixtureRoot}`); +console.log(`Test fixture created at ${fixture.rootDir}`); export const config = { // ── Runner ── @@ -78,10 +40,10 @@ export const config = { // Single spec file — Tauri launches one app instance per session, // and tauri-driver can't re-create sessions between spec files. specs: [ - resolve(__dirname, 'specs/agor.test.ts'), - resolve(__dirname, 'specs/agent-scenarios.test.ts'), - resolve(__dirname, 'specs/phase-b.test.ts'), - resolve(__dirname, 'specs/phase-c.test.ts'), + resolve(projectRoot, 'tests/e2e/specs/agor.test.ts'), + resolve(projectRoot, 'tests/e2e/specs/agent-scenarios.test.ts'), + resolve(projectRoot, 'tests/e2e/specs/phase-b.test.ts'), + resolve(projectRoot, 'tests/e2e/specs/phase-c.test.ts'), ], // ── Capabilities ── @@ -91,11 +53,7 @@ export const config = { 'tauri:options': { application: tauriBinary, // Test isolation: fixture-created data/config dirs, disable watchers/telemetry - env: { - AGOR_TEST: '1', - AGOR_TEST_DATA_DIR: fixtureDataDir, - AGOR_TEST_CONFIG_DIR: fixtureConfigDir, - }, + env: fixture.env, }, }], @@ -199,7 +157,7 @@ export const config = { } // Clean up test fixture try { - rmSync(fixtureRoot, { recursive: true, force: true }); + rmSync(fixture.rootDir, { recursive: true, force: true }); console.log('Test fixture cleaned up.'); } catch { /* best-effort cleanup */ } }, @@ -207,7 +165,7 @@ export const config = { // ── TypeScript (auto-compile via tsx) ── autoCompileOpts: { tsNodeOpts: { - project: resolve(__dirname, 'tsconfig.json'), + project: resolve(projectRoot, 'tests/e2e/tsconfig.json'), }, }, };