refactor(e2e): extract infrastructure into tests/e2e/infra/ module

- Move fixtures.ts, llm-judge.ts, results-db.ts to tests/e2e/infra/ - Deduplicate wdio.conf.js: use createTestFixture() instead of inline copy - Replace __dirname paths with projectRoot-anchored paths - Create test-mode-constants.ts (typed env var names, flag registry) - Create scripts/preflight-check.sh (validates tauri-driver, display, Claude CLI) - Create scripts/check-test-flags.sh (CI lint for AGOR_TEST flag drift) - Rewrite tests/e2e/README.md with full documentation - Update spec imports for moved infra files
2026-03-18 03:06:57 +01:00 · 2026-03-18 03:06:57 +01:00 · e76bc341f2
commit e76bc341f2
parent 538a31f85c
10 changed files with 235 additions and 191 deletions
--- a/scripts/check-test-flags.sh
+++ b/scripts/check-test-flags.sh
@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+# Lint check: verify all AGOR_TEST references are documented.
+# Run in CI to catch flag drift between code and documentation.
+
+set -euo pipefail
+
+echo "Checking AGOR_TEST flag references..."
+
+# Known files that should reference AGOR_TEST (from test-mode-constants.ts)
+KNOWN_FILES=(
+  "agor-core/src/config.rs"
+  "src-tauri/src/commands/misc.rs"
+  "src-tauri/src/lib.rs"
+  "src-tauri/src/watcher.rs"
+  "src-tauri/src/fs_watcher.rs"
+  "src-tauri/src/telemetry.rs"
+  "src/App.svelte"
+  "tests/e2e/wdio.conf.js"
+  "tests/e2e/infra/fixtures.ts"
+  "tests/e2e/infra/test-mode-constants.ts"
+)
+
+# Find all files referencing AGOR_TEST (excluding node_modules, target, .git)
+FOUND=$(grep -rl 'AGOR_TEST' --include='*.rs' --include='*.ts' --include='*.js' --include='*.svelte' \
+  --exclude-dir=node_modules --exclude-dir=target --exclude-dir=.git . 2>/dev/null | \
+  sed 's|^\./||' | sort)
+
+UNKNOWN=""
+for f in $FOUND; do
+  MATCH=0
+  for k in "${KNOWN_FILES[@]}"; do
+    if [[ "$f" == "$k" ]]; then
+      MATCH=1
+      break
+    fi
+  done
+  if [[ $MATCH -eq 0 ]]; then
+    UNKNOWN="$UNKNOWN  $f\n"
+  fi
+done
+
+if [[ -n "$UNKNOWN" ]]; then
+  echo ""
+  echo "WARNING: AGOR_TEST referenced in files not in the known registry:"
+  echo -e "$UNKNOWN"
+  echo "Update tests/e2e/infra/test-mode-constants.ts and this script."
+  exit 1
+else
+  echo "All AGOR_TEST references are documented. ✓"
+fi
--- a/scripts/preflight-check.sh
+++ b/scripts/preflight-check.sh
@ -0,0 +1,68 @@
+#!/usr/bin/env bash
+# Preflight check for E2E tests — validates system dependencies.
+# Run before E2E suite to catch missing tools early.
+
+set -euo pipefail
+
+OK=0
+WARN=0
+
+check() {
+  local name="$1" cmd="$2"
+  if eval "$cmd" >/dev/null 2>&1; then
+    echo "  ✓ $name"
+  else
+    echo "  ✗ $name"
+    return 1
+  fi
+}
+
+echo "E2E Preflight Check"
+echo "==================="
+
+echo ""
+echo "Required:"
+
+if ! check "tauri-driver" "command -v tauri-driver"; then
+  echo "    Install: cargo install tauri-driver"
+  OK=1
+fi
+
+if ! check "debug binary" "test -f target/debug/agent-orchestrator"; then
+  echo "    Build: cargo tauri build --debug --no-bundle"
+  OK=1
+fi
+
+# Display server (Linux only)
+if [[ "$(uname)" == "Linux" ]]; then
+  if [[ -n "${DISPLAY:-}" ]] || [[ -n "${WAYLAND_DISPLAY:-}" ]]; then
+    echo "  ✓ display server (DISPLAY=$DISPLAY)"
+  else
+    echo "  ✗ display server — no DISPLAY or WAYLAND_DISPLAY set"
+    echo "    Use: xvfb-run --auto-servernum npm run test:e2e"
+    OK=1
+  fi
+fi
+
+echo ""
+echo "Optional (LLM judge):"
+
+if command -v claude >/dev/null 2>&1; then
+  echo "  ✓ Claude CLI ($(which claude))"
+elif [[ -n "${ANTHROPIC_API_KEY:-}" ]]; then
+  echo "  ✓ ANTHROPIC_API_KEY set"
+else
+  echo "  ~ Claude CLI not found, ANTHROPIC_API_KEY not set"
+  echo "    LLM-judged tests will be skipped"
+  WARN=1
+fi
+
+echo ""
+if [[ $OK -ne 0 ]]; then
+  echo "FAILED — missing required dependencies"
+  exit 1
+elif [[ $WARN -ne 0 ]]; then
+  echo "PASSED with warnings"
+else
+  echo "ALL CHECKS PASSED"
+fi
--- a/tests/e2e/README.md
+++ b/tests/e2e/README.md
@ -1,143 +1,88 @@
-# E2E Tests (WebDriver)
+# E2E Testing Module

-Tauri apps use the WebDriver protocol for E2E testing (not Playwright directly).
-The app runs inside WebKit2GTK on Linux, so tests interact with the real WebView.
+Browser automation tests for Agent Orchestrator using WebDriverIO + tauri-driver.

-## Prerequisites
-
- Rust toolchain (for building the Tauri app)
- Display server (X11 or Wayland) — headless Xvfb works for CI
- `tauri-driver` installed: `cargo install tauri-driver`
- `webkit2gtk-driver` system package: `sudo apt install webkit2gtk-driver`
- npm devDeps already in package.json (`@wdio/cli`, `@wdio/local-runner`, `@wdio/mocha-framework`, `@wdio/spec-reporter`)
-
-## Running
+## Quick Start

 ```bash
-# From v2/ directory — builds debug binary automatically, spawns tauri-driver
-npm run test:e2e
+# Preflight check (validates dependencies)
+./scripts/preflight-check.sh

-# Skip rebuild (use existing binary)
+# Build debug binary + run E2E
+npm run test:all:e2e
+
+# Run E2E only (skip build)
 SKIP_BUILD=1 npm run test:e2e

-# With test isolation (custom data/config dirs)
-AGOR_TEST_DATA_DIR=/tmp/bt-test/data AGOR_TEST_CONFIG_DIR=/tmp/bt-test/config npm run test:e2e
+# Headless (CI)
+xvfb-run --auto-servernum npm run test:e2e
 ```

-The `wdio.conf.js` handles:
-1. Building the debug binary (`cargo tauri build --debug --no-bundle`) in `onPrepare`
-2. Spawning `tauri-driver` before each session (TCP readiness probe, 10s deadline)
-3. Killing `tauri-driver` after each session
-4. Passing `AGOR_TEST=1` env var to the app for test mode isolation
+## System Dependencies

-## Test Mode (`AGOR_TEST=1`)
+| Tool | Required | Install |
+|------|----------|---------|
+| tauri-driver | Yes | `cargo install tauri-driver` |
+| Debug binary | Yes | `cargo tauri build --debug --no-bundle` |
+| X11/Wayland | Yes (Linux) | Use `xvfb-run` in CI |
+| Claude CLI | Optional | LLM-judged tests skip if absent |
+| ANTHROPIC_API_KEY | Optional | Alternative to Claude CLI for LLM judge |

-When `AGOR_TEST=1` is set:
- File watchers (watcher.rs, fs_watcher.rs) are disabled to avoid inotify noise
- Wake scheduler is disabled (no auto-wake timers)
- Data/config directories can be overridden via `AGOR_TEST_DATA_DIR` / `AGOR_TEST_CONFIG_DIR`
-
-## CI setup (headless)
-
-```bash
-# Install virtual framebuffer + WebKit driver
-sudo apt install xvfb webkit2gtk-driver
-
-# Run with Xvfb wrapper
-xvfb-run npm run test:e2e
-```
-
-## Writing tests
-
-Tests use WebdriverIO with Mocha. Specs go in `specs/`:
-
-```typescript
-import { browser, expect } from '@wdio/globals';
-
-describe('BTerminal', () => {
-  it('should show the status bar', async () => {
-    const statusBar = await browser.$('[data-testid="status-bar"]');
-    await expect(statusBar).toBeDisplayed();
-  });
-});
-```
-
-### Stable selectors
-
-Prefer `data-testid` attributes over CSS class selectors:
-
-| Element | Selector |
-|---------|----------|
-| Status bar | `[data-testid="status-bar"]` |
-| Sidebar rail | `[data-testid="sidebar-rail"]` |
-| Settings button | `[data-testid="settings-btn"]` |
-| Project box | `[data-testid="project-box"]` |
-| Project ID | `[data-project-id="..."]` |
-| Project tabs | `[data-testid="project-tabs"]` |
-| Agent session | `[data-testid="agent-session"]` |
-| Agent pane | `[data-testid="agent-pane"]` |
-| Agent status | `[data-agent-status="idle\|running\|..."]` |
-| Agent messages | `[data-testid="agent-messages"]` |
-| Agent prompt | `[data-testid="agent-prompt"]` |
-| Agent submit | `[data-testid="agent-submit"]` |
-| Agent stop | `[data-testid="agent-stop"]` |
-| Terminal tabs | `[data-testid="terminal-tabs"]` |
-| Add tab button | `[data-testid="tab-add"]` |
-| Terminal toggle | `[data-testid="terminal-toggle"]` |
-| Command palette | `[data-testid="command-palette"]` |
-| Palette input | `[data-testid="palette-input"]` |
-
-### Key constraints
-
- `maxInstances: 1` — Tauri doesn't support parallel WebDriver sessions
- Mocha timeout is 60s — the app needs time to initialize
- Tests interact with the real WebKit2GTK WebView, not a browser
- Use `browser.execute()` for JS clicks when WebDriver clicks don't trigger Svelte handlers
- Agent tests (Scenario 7) require a real Claude CLI install + API key — they skip gracefully if unavailable
-
-## Test infrastructure
-
-### Fixtures (`fixtures.ts`)
-
-Creates isolated test environments with temp data/config dirs and git repos:
-
-```typescript
-import { createTestFixture, destroyTestFixture } from '../fixtures';
-
-const fixture = createTestFixture('my-test');
-// fixture.dataDir, fixture.configDir, fixture.projectDir, fixture.env
-destroyTestFixture(fixture);
-```
-
-### Results DB (`results-db.ts`)
-
-JSON-based test results store for tracking runs and steps:
-
-```typescript
-import { ResultsDb } from '../results-db';
-
-const db = new ResultsDb();
-db.startRun('run-001', 'v2-mission-control', 'abc123');
-db.recordStep({ run_id: 'run-001', scenario_name: 'Smoke', step_name: 'renders', status: 'passed', ... });
-db.finishRun('run-001', 'passed', 5000);
-```
-
-## File structure
+## Directory Structure

 ```
 tests/e2e/
-├── README.md                         # This file
-├── wdio.conf.js                      # WebdriverIO config with tauri-driver lifecycle
-├── tsconfig.json                     # TypeScript config for test specs
-├── fixtures.ts                       # Test fixture generator (isolated environments)
-├── results-db.ts                     # JSON test results store
-└── specs/
-    ├── agor.test.ts             # Smoke tests (CSS class selectors, 50+ tests)
-    └── agent-scenarios.test.ts       # Phase A scenarios (data-testid selectors, 22 tests)
+├── wdio.conf.js          # WebDriverIO config + tauri-driver lifecycle
+├── tsconfig.json          # TypeScript config for specs
+├── README.md              # This file
+├── infra/                 # Test infrastructure (not specs)
+│   ├── fixtures.ts        # Test fixture generator (isolated temp dirs)
+│   ├── llm-judge.ts       # LLM-based assertion engine (Claude CLI / API)
+│   ├── results-db.ts      # JSON test results store
+│   └── test-mode-constants.ts  # Typed env var names for test mode
+└── specs/                 # Test specifications
+    ├── agor.test.ts       # Smoke + UI tests (50+ tests)
+    ├── agent-scenarios.test.ts  # Phase A: agent interaction (22 tests)
+    ├── phase-b.test.ts    # Phase B: multi-project + LLM judge
+    └── phase-c.test.ts    # Phase C: hardening features (11 scenarios)
 ```

-## References
+## Test Mode Environment Variables

- Tauri WebDriver docs: https://v2.tauri.app/develop/tests/webdriver/
- WebdriverIO docs: https://webdriver.io/
- tauri-driver: https://crates.io/crates/tauri-driver
+| Variable | Purpose | Read By |
+|----------|---------|---------|
+| `AGOR_TEST=1` | Enable test isolation | config.rs, misc.rs, lib.rs, watcher.rs, fs_watcher.rs, telemetry.rs, App.svelte |
+| `AGOR_TEST_DATA_DIR` | Override data dir | config.rs |
+| `AGOR_TEST_CONFIG_DIR` | Override config dir | config.rs |
+
+**Effects when AGOR_TEST=1:**
+- File watchers disabled (watcher.rs, fs_watcher.rs)
+- OTLP telemetry export disabled (telemetry.rs)
+- CLI tool installation skipped (lib.rs)
+- Wake scheduler disabled (App.svelte)
+- Test env vars forwarded to sidecar processes (lib.rs)
+
+## Test Phases
+
+| Phase | File | Tests | Type |
+|-------|------|-------|------|
+| Smoke | agor.test.ts | 50+ | Deterministic (CSS/DOM assertions) |
+| A | agent-scenarios.test.ts | 22 | Deterministic (data-testid selectors) |
+| B | phase-b.test.ts | 6+ | LLM-judged (multi-project, agent quality) |
+| C | phase-c.test.ts | 11 scenarios | Mixed (deterministic + LLM-judged) |
+
+## Adding a New Spec
+
+1. Create `tests/e2e/specs/my-feature.test.ts`
+2. Import from `@wdio/globals` for `browser` and `expect`
+3. Use `data-testid` selectors (preferred) or CSS classes
+4. Add to `wdio.conf.js` specs array
+5. For LLM assertions: `import { assertWithJudge } from '../infra/llm-judge'`
+6. Run `./scripts/check-test-flags.sh` if you added new AGOR_TEST references
+
+## CI Workflow
+
+See `.github/workflows/e2e.yml` — 3 jobs:
+1. **unit-tests**: vitest frontend
+2. **cargo-tests**: Rust backend
+3. **e2e-tests**: WebDriverIO (xvfb-run, Phase A+B+C, LLM tests gated on secret)
--- a/tests/e2e/infra/fixtures.ts
+++ b/tests/e2e/infra/fixtures.ts
--- a/tests/e2e/infra/llm-judge.ts
+++ b/tests/e2e/infra/llm-judge.ts
--- a/tests/e2e/infra/results-db.ts
+++ b/tests/e2e/infra/results-db.ts
--- a/tests/e2e/infra/test-mode-constants.ts
+++ b/tests/e2e/infra/test-mode-constants.ts
@ -0,0 +1,23 @@
+// Typed constants for test-mode environment variables.
+// Single source of truth for env var names — prevents string literal duplication.
+//
+// These env vars are read by:
+//   Rust:  agor-core/src/config.rs (AppConfig::from_env)
+//          src-tauri/src/commands/misc.rs (is_test_mode)
+//          src-tauri/src/lib.rs (setup: skip CLI install, forward to sidecar)
+//          src-tauri/src/watcher.rs (disable file watcher)
+//          src-tauri/src/fs_watcher.rs (disable fs watcher)
+//          src-tauri/src/telemetry.rs (disable OTLP)
+//   Svelte: src/App.svelte (disable wake scheduler)
+
+/** Main test mode flag — set to '1' to enable test isolation */
+export const AGOR_TEST = 'AGOR_TEST';
+
+/** Override data directory (sessions.db, btmsg.db, search.db) */
+export const AGOR_TEST_DATA_DIR = 'AGOR_TEST_DATA_DIR';
+
+/** Override config directory (groups.json, plugins/) */
+export const AGOR_TEST_CONFIG_DIR = 'AGOR_TEST_CONFIG_DIR';
+
+/** All test-mode env vars for iteration */
+export const TEST_ENV_VARS = [AGOR_TEST, AGOR_TEST_DATA_DIR, AGOR_TEST_CONFIG_DIR] as const;
--- a/tests/e2e/specs/phase-b.test.ts
+++ b/tests/e2e/specs/phase-b.test.ts
@ -1,5 +1,5 @@
 import { browser, expect } from '@wdio/globals';
-import { isJudgeAvailable, assertWithJudge } from '../llm-judge';
+import { isJudgeAvailable, assertWithJudge } from '../infra/llm-judge';

 // Phase B: Multi-project scenarios + LLM-judged assertions.
 // Extends Phase A with tests that exercise multiple project boxes simultaneously
--- a/tests/e2e/specs/phase-c.test.ts
+++ b/tests/e2e/specs/phase-c.test.ts
@ -1,5 +1,5 @@
 import { browser, expect } from '@wdio/globals';
-import { isJudgeAvailable, assertWithJudge } from '../llm-judge';
+import { isJudgeAvailable, assertWithJudge } from '../infra/llm-judge';

 // Phase C: Hardening feature tests.
 // Tests the v3 production-readiness features added in the hardening sprint:
--- a/tests/e2e/wdio.conf.js
+++ b/tests/e2e/wdio.conf.js
@ -1,68 +1,30 @@
-import { spawn, execSync } from 'node:child_process';
+import { spawn } from 'node:child_process';
 import { createConnection } from 'node:net';
-import { resolve, dirname, join } from 'node:path';
+import { resolve, dirname } from 'node:path';
 import { fileURLToPath } from 'node:url';
-import { mkdirSync, writeFileSync, rmSync } from 'node:fs';
-import { tmpdir } from 'node:os';
+import { rmSync } from 'node:fs';
+import { createTestFixture } from './infra/fixtures.ts';

 const __dirname = dirname(fileURLToPath(import.meta.url));
 const projectRoot = resolve(__dirname, '../..');

-// Debug binary path (built with `cargo tauri build --debug --no-bundle`)
-// Cargo workspace target dir is at v2/target/, not v2/src-tauri/target/
+// Debug binary path (Cargo workspace target at repo root)
 const tauriBinary = resolve(projectRoot, 'target/debug/agent-orchestrator');

 let tauriDriver;

-// ── Test Fixture (created eagerly so env vars are available for capabilities) ──
-const fixtureRoot = join(tmpdir(), `agor-e2e-${Date.now()}`);
-const fixtureDataDir = join(fixtureRoot, 'data');
-const fixtureConfigDir = join(fixtureRoot, 'config');
-const fixtureProjectDir = join(fixtureRoot, 'test-project');
+// ── Test Fixture ──
+// IMPORTANT: Must be created at module top-level (synchronously) because the
+// capabilities object below references fixtureDataDir/fixtureConfigDir at eval time.
+// tauri:options.env may not reliably set process-level env vars, so we also
+// inject into process.env for tauri-driver inheritance.
+const fixture = createTestFixture('agor-e2e');

-mkdirSync(fixtureDataDir, { recursive: true });
-mkdirSync(fixtureConfigDir, { recursive: true });
-mkdirSync(fixtureProjectDir, { recursive: true });
-
-// Create a minimal git repo for agent testing
-execSync('git init', { cwd: fixtureProjectDir, stdio: 'ignore' });
-execSync('git config user.email "test@agor.dev"', { cwd: fixtureProjectDir, stdio: 'ignore' });
-execSync('git config user.name "Agor Test"', { cwd: fixtureProjectDir, stdio: 'ignore' });
-writeFileSync(join(fixtureProjectDir, 'README.md'), '# Test Project\n\nA simple test project for Agor E2E tests.\n');
-writeFileSync(join(fixtureProjectDir, 'hello.py'), 'def greet(name: str) -> str:\n    return f"Hello, {name}!"\n');
-execSync('git add -A && git commit -m "initial commit"', { cwd: fixtureProjectDir, stdio: 'ignore' });
-
-// Write groups.json with one group containing the test project
-writeFileSync(
-  join(fixtureConfigDir, 'groups.json'),
-  JSON.stringify({
-    version: 1,
-    groups: [{
-      id: 'test-group',
-      name: 'Test Group',
-      projects: [{
-        id: 'test-project',
-        name: 'Test Project',
-        identifier: 'test-project',
-        description: 'E2E test project',
-        icon: '\uf120',
-        cwd: fixtureProjectDir,
-        profile: 'default',
-        enabled: true,
-      }],
-      agents: [],
-    }],
-    activeGroupId: 'test-group',
-  }, null, 2),
-);
-
-// Inject env vars into process.env so tauri-driver inherits them
-// (tauri:options.env may not reliably set process-level env vars)
 process.env.AGOR_TEST = '1';
-process.env.AGOR_TEST_DATA_DIR = fixtureDataDir;
-process.env.AGOR_TEST_CONFIG_DIR = fixtureConfigDir;
+process.env.AGOR_TEST_DATA_DIR = fixture.dataDir;
+process.env.AGOR_TEST_CONFIG_DIR = fixture.configDir;

-console.log(`Test fixture created at ${fixtureRoot}`);
+console.log(`Test fixture created at ${fixture.rootDir}`);

 export const config = {
  // ── Runner ──
@ -78,10 +40,10 @@ export const config = {
  // Single spec file — Tauri launches one app instance per session,
  // and tauri-driver can't re-create sessions between spec files.
  specs: [
-    resolve(__dirname, 'specs/agor.test.ts'),
-    resolve(__dirname, 'specs/agent-scenarios.test.ts'),
-    resolve(__dirname, 'specs/phase-b.test.ts'),
-    resolve(__dirname, 'specs/phase-c.test.ts'),
+    resolve(projectRoot, 'tests/e2e/specs/agor.test.ts'),
+    resolve(projectRoot, 'tests/e2e/specs/agent-scenarios.test.ts'),
+    resolve(projectRoot, 'tests/e2e/specs/phase-b.test.ts'),
+    resolve(projectRoot, 'tests/e2e/specs/phase-c.test.ts'),
  ],

  // ── Capabilities ──
@ -91,11 +53,7 @@ export const config = {
    'tauri:options': {
      application: tauriBinary,
      // Test isolation: fixture-created data/config dirs, disable watchers/telemetry
-      env: {
-        AGOR_TEST: '1',
-        AGOR_TEST_DATA_DIR: fixtureDataDir,
-        AGOR_TEST_CONFIG_DIR: fixtureConfigDir,
-      },
+      env: fixture.env,
    },
  }],

@ -199,7 +157,7 @@ export const config = {
    }
    // Clean up test fixture
    try {
-      rmSync(fixtureRoot, { recursive: true, force: true });
+      rmSync(fixture.rootDir, { recursive: true, force: true });
      console.log('Test fixture cleaned up.');
    } catch { /* best-effort cleanup */ }
  },
@ -207,7 +165,7 @@ export const config = {
  // ── TypeScript (auto-compile via tsx) ──
  autoCompileOpts: {
    tsNodeOpts: {
-      project: resolve(__dirname, 'tsconfig.json'),
+      project: resolve(projectRoot, 'tests/e2e/tsconfig.json'),
    },
  },
 };