refactor(e2e): extract infrastructure into tests/e2e/infra/ module

- Move fixtures.ts, llm-judge.ts, results-db.ts to tests/e2e/infra/
- Deduplicate wdio.conf.js: use createTestFixture() instead of inline copy
- Replace __dirname paths with projectRoot-anchored paths
- Create test-mode-constants.ts (typed env var names, flag registry)
- Create scripts/preflight-check.sh (validates tauri-driver, display, Claude CLI)
- Create scripts/check-test-flags.sh (CI lint for AGOR_TEST flag drift)
- Rewrite tests/e2e/README.md with full documentation
- Update spec imports for moved infra files
This commit is contained in:
Hibryda 2026-03-18 03:06:57 +01:00
parent 538a31f85c
commit e76bc341f2
10 changed files with 235 additions and 191 deletions

50
scripts/check-test-flags.sh Executable file
View file

@ -0,0 +1,50 @@
#!/usr/bin/env bash
# Lint check: verify all AGOR_TEST references are documented.
# Run in CI to catch flag drift between code and documentation.
set -euo pipefail
echo "Checking AGOR_TEST flag references..."
# Known files that should reference AGOR_TEST (from test-mode-constants.ts)
KNOWN_FILES=(
"agor-core/src/config.rs"
"src-tauri/src/commands/misc.rs"
"src-tauri/src/lib.rs"
"src-tauri/src/watcher.rs"
"src-tauri/src/fs_watcher.rs"
"src-tauri/src/telemetry.rs"
"src/App.svelte"
"tests/e2e/wdio.conf.js"
"tests/e2e/infra/fixtures.ts"
"tests/e2e/infra/test-mode-constants.ts"
)
# Find all files referencing AGOR_TEST (excluding node_modules, target, .git)
FOUND=$(grep -rl 'AGOR_TEST' --include='*.rs' --include='*.ts' --include='*.js' --include='*.svelte' \
--exclude-dir=node_modules --exclude-dir=target --exclude-dir=.git . 2>/dev/null | \
sed 's|^\./||' | sort)
UNKNOWN=""
for f in $FOUND; do
MATCH=0
for k in "${KNOWN_FILES[@]}"; do
if [[ "$f" == "$k" ]]; then
MATCH=1
break
fi
done
if [[ $MATCH -eq 0 ]]; then
UNKNOWN="$UNKNOWN $f\n"
fi
done
if [[ -n "$UNKNOWN" ]]; then
echo ""
echo "WARNING: AGOR_TEST referenced in files not in the known registry:"
echo -e "$UNKNOWN"
echo "Update tests/e2e/infra/test-mode-constants.ts and this script."
exit 1
else
echo "All AGOR_TEST references are documented. ✓"
fi

68
scripts/preflight-check.sh Executable file
View file

@ -0,0 +1,68 @@
#!/usr/bin/env bash
# Preflight check for E2E tests — validates system dependencies.
# Run before E2E suite to catch missing tools early.
set -euo pipefail
OK=0
WARN=0
check() {
local name="$1" cmd="$2"
if eval "$cmd" >/dev/null 2>&1; then
echo "$name"
else
echo "$name"
return 1
fi
}
echo "E2E Preflight Check"
echo "==================="
echo ""
echo "Required:"
if ! check "tauri-driver" "command -v tauri-driver"; then
echo " Install: cargo install tauri-driver"
OK=1
fi
if ! check "debug binary" "test -f target/debug/agent-orchestrator"; then
echo " Build: cargo tauri build --debug --no-bundle"
OK=1
fi
# Display server (Linux only)
if [[ "$(uname)" == "Linux" ]]; then
if [[ -n "${DISPLAY:-}" ]] || [[ -n "${WAYLAND_DISPLAY:-}" ]]; then
echo " ✓ display server (DISPLAY=$DISPLAY)"
else
echo " ✗ display server — no DISPLAY or WAYLAND_DISPLAY set"
echo " Use: xvfb-run --auto-servernum npm run test:e2e"
OK=1
fi
fi
echo ""
echo "Optional (LLM judge):"
if command -v claude >/dev/null 2>&1; then
echo " ✓ Claude CLI ($(which claude))"
elif [[ -n "${ANTHROPIC_API_KEY:-}" ]]; then
echo " ✓ ANTHROPIC_API_KEY set"
else
echo " ~ Claude CLI not found, ANTHROPIC_API_KEY not set"
echo " LLM-judged tests will be skipped"
WARN=1
fi
echo ""
if [[ $OK -ne 0 ]]; then
echo "FAILED — missing required dependencies"
exit 1
elif [[ $WARN -ne 0 ]]; then
echo "PASSED with warnings"
else
echo "ALL CHECKS PASSED"
fi

View file

@ -1,143 +1,88 @@
# E2E Tests (WebDriver)
# E2E Testing Module
Tauri apps use the WebDriver protocol for E2E testing (not Playwright directly).
The app runs inside WebKit2GTK on Linux, so tests interact with the real WebView.
Browser automation tests for Agent Orchestrator using WebDriverIO + tauri-driver.
## Prerequisites
- Rust toolchain (for building the Tauri app)
- Display server (X11 or Wayland) — headless Xvfb works for CI
- `tauri-driver` installed: `cargo install tauri-driver`
- `webkit2gtk-driver` system package: `sudo apt install webkit2gtk-driver`
- npm devDeps already in package.json (`@wdio/cli`, `@wdio/local-runner`, `@wdio/mocha-framework`, `@wdio/spec-reporter`)
## Running
## Quick Start
```bash
# From v2/ directory — builds debug binary automatically, spawns tauri-driver
npm run test:e2e
# Preflight check (validates dependencies)
./scripts/preflight-check.sh
# Skip rebuild (use existing binary)
# Build debug binary + run E2E
npm run test:all:e2e
# Run E2E only (skip build)
SKIP_BUILD=1 npm run test:e2e
# With test isolation (custom data/config dirs)
AGOR_TEST_DATA_DIR=/tmp/bt-test/data AGOR_TEST_CONFIG_DIR=/tmp/bt-test/config npm run test:e2e
# Headless (CI)
xvfb-run --auto-servernum npm run test:e2e
```
The `wdio.conf.js` handles:
1. Building the debug binary (`cargo tauri build --debug --no-bundle`) in `onPrepare`
2. Spawning `tauri-driver` before each session (TCP readiness probe, 10s deadline)
3. Killing `tauri-driver` after each session
4. Passing `AGOR_TEST=1` env var to the app for test mode isolation
## System Dependencies
## Test Mode (`AGOR_TEST=1`)
| Tool | Required | Install |
|------|----------|---------|
| tauri-driver | Yes | `cargo install tauri-driver` |
| Debug binary | Yes | `cargo tauri build --debug --no-bundle` |
| X11/Wayland | Yes (Linux) | Use `xvfb-run` in CI |
| Claude CLI | Optional | LLM-judged tests skip if absent |
| ANTHROPIC_API_KEY | Optional | Alternative to Claude CLI for LLM judge |
When `AGOR_TEST=1` is set:
- File watchers (watcher.rs, fs_watcher.rs) are disabled to avoid inotify noise
- Wake scheduler is disabled (no auto-wake timers)
- Data/config directories can be overridden via `AGOR_TEST_DATA_DIR` / `AGOR_TEST_CONFIG_DIR`
## CI setup (headless)
```bash
# Install virtual framebuffer + WebKit driver
sudo apt install xvfb webkit2gtk-driver
# Run with Xvfb wrapper
xvfb-run npm run test:e2e
```
## Writing tests
Tests use WebdriverIO with Mocha. Specs go in `specs/`:
```typescript
import { browser, expect } from '@wdio/globals';
describe('BTerminal', () => {
it('should show the status bar', async () => {
const statusBar = await browser.$('[data-testid="status-bar"]');
await expect(statusBar).toBeDisplayed();
});
});
```
### Stable selectors
Prefer `data-testid` attributes over CSS class selectors:
| Element | Selector |
|---------|----------|
| Status bar | `[data-testid="status-bar"]` |
| Sidebar rail | `[data-testid="sidebar-rail"]` |
| Settings button | `[data-testid="settings-btn"]` |
| Project box | `[data-testid="project-box"]` |
| Project ID | `[data-project-id="..."]` |
| Project tabs | `[data-testid="project-tabs"]` |
| Agent session | `[data-testid="agent-session"]` |
| Agent pane | `[data-testid="agent-pane"]` |
| Agent status | `[data-agent-status="idle\|running\|..."]` |
| Agent messages | `[data-testid="agent-messages"]` |
| Agent prompt | `[data-testid="agent-prompt"]` |
| Agent submit | `[data-testid="agent-submit"]` |
| Agent stop | `[data-testid="agent-stop"]` |
| Terminal tabs | `[data-testid="terminal-tabs"]` |
| Add tab button | `[data-testid="tab-add"]` |
| Terminal toggle | `[data-testid="terminal-toggle"]` |
| Command palette | `[data-testid="command-palette"]` |
| Palette input | `[data-testid="palette-input"]` |
### Key constraints
- `maxInstances: 1` — Tauri doesn't support parallel WebDriver sessions
- Mocha timeout is 60s — the app needs time to initialize
- Tests interact with the real WebKit2GTK WebView, not a browser
- Use `browser.execute()` for JS clicks when WebDriver clicks don't trigger Svelte handlers
- Agent tests (Scenario 7) require a real Claude CLI install + API key — they skip gracefully if unavailable
## Test infrastructure
### Fixtures (`fixtures.ts`)
Creates isolated test environments with temp data/config dirs and git repos:
```typescript
import { createTestFixture, destroyTestFixture } from '../fixtures';
const fixture = createTestFixture('my-test');
// fixture.dataDir, fixture.configDir, fixture.projectDir, fixture.env
destroyTestFixture(fixture);
```
### Results DB (`results-db.ts`)
JSON-based test results store for tracking runs and steps:
```typescript
import { ResultsDb } from '../results-db';
const db = new ResultsDb();
db.startRun('run-001', 'v2-mission-control', 'abc123');
db.recordStep({ run_id: 'run-001', scenario_name: 'Smoke', step_name: 'renders', status: 'passed', ... });
db.finishRun('run-001', 'passed', 5000);
```
## File structure
## Directory Structure
```
tests/e2e/
├── README.md # This file
├── wdio.conf.js # WebdriverIO config with tauri-driver lifecycle
├── tsconfig.json # TypeScript config for test specs
├── fixtures.ts # Test fixture generator (isolated environments)
├── results-db.ts # JSON test results store
└── specs/
├── agor.test.ts # Smoke tests (CSS class selectors, 50+ tests)
└── agent-scenarios.test.ts # Phase A scenarios (data-testid selectors, 22 tests)
├── wdio.conf.js # WebDriverIO config + tauri-driver lifecycle
├── tsconfig.json # TypeScript config for specs
├── README.md # This file
├── infra/ # Test infrastructure (not specs)
│ ├── fixtures.ts # Test fixture generator (isolated temp dirs)
│ ├── llm-judge.ts # LLM-based assertion engine (Claude CLI / API)
│ ├── results-db.ts # JSON test results store
│ └── test-mode-constants.ts # Typed env var names for test mode
└── specs/ # Test specifications
├── agor.test.ts # Smoke + UI tests (50+ tests)
├── agent-scenarios.test.ts # Phase A: agent interaction (22 tests)
├── phase-b.test.ts # Phase B: multi-project + LLM judge
└── phase-c.test.ts # Phase C: hardening features (11 scenarios)
```
## References
## Test Mode Environment Variables
- Tauri WebDriver docs: https://v2.tauri.app/develop/tests/webdriver/
- WebdriverIO docs: https://webdriver.io/
- tauri-driver: https://crates.io/crates/tauri-driver
| Variable | Purpose | Read By |
|----------|---------|---------|
| `AGOR_TEST=1` | Enable test isolation | config.rs, misc.rs, lib.rs, watcher.rs, fs_watcher.rs, telemetry.rs, App.svelte |
| `AGOR_TEST_DATA_DIR` | Override data dir | config.rs |
| `AGOR_TEST_CONFIG_DIR` | Override config dir | config.rs |
**Effects when AGOR_TEST=1:**
- File watchers disabled (watcher.rs, fs_watcher.rs)
- OTLP telemetry export disabled (telemetry.rs)
- CLI tool installation skipped (lib.rs)
- Wake scheduler disabled (App.svelte)
- Test env vars forwarded to sidecar processes (lib.rs)
## Test Phases
| Phase | File | Tests | Type |
|-------|------|-------|------|
| Smoke | agor.test.ts | 50+ | Deterministic (CSS/DOM assertions) |
| A | agent-scenarios.test.ts | 22 | Deterministic (data-testid selectors) |
| B | phase-b.test.ts | 6+ | LLM-judged (multi-project, agent quality) |
| C | phase-c.test.ts | 11 scenarios | Mixed (deterministic + LLM-judged) |
## Adding a New Spec
1. Create `tests/e2e/specs/my-feature.test.ts`
2. Import from `@wdio/globals` for `browser` and `expect`
3. Use `data-testid` selectors (preferred) or CSS classes
4. Add to `wdio.conf.js` specs array
5. For LLM assertions: `import { assertWithJudge } from '../infra/llm-judge'`
6. Run `./scripts/check-test-flags.sh` if you added new AGOR_TEST references
## CI Workflow
See `.github/workflows/e2e.yml` — 3 jobs:
1. **unit-tests**: vitest frontend
2. **cargo-tests**: Rust backend
3. **e2e-tests**: WebDriverIO (xvfb-run, Phase A+B+C, LLM tests gated on secret)

View file

@ -0,0 +1,23 @@
// Typed constants for test-mode environment variables.
// Single source of truth for env var names — prevents string literal duplication.
//
// These env vars are read by:
// Rust: agor-core/src/config.rs (AppConfig::from_env)
// src-tauri/src/commands/misc.rs (is_test_mode)
// src-tauri/src/lib.rs (setup: skip CLI install, forward to sidecar)
// src-tauri/src/watcher.rs (disable file watcher)
// src-tauri/src/fs_watcher.rs (disable fs watcher)
// src-tauri/src/telemetry.rs (disable OTLP)
// Svelte: src/App.svelte (disable wake scheduler)
/** Main test mode flag — set to '1' to enable test isolation */
export const AGOR_TEST = 'AGOR_TEST';
/** Override data directory (sessions.db, btmsg.db, search.db) */
export const AGOR_TEST_DATA_DIR = 'AGOR_TEST_DATA_DIR';
/** Override config directory (groups.json, plugins/) */
export const AGOR_TEST_CONFIG_DIR = 'AGOR_TEST_CONFIG_DIR';
/** All test-mode env vars for iteration */
export const TEST_ENV_VARS = [AGOR_TEST, AGOR_TEST_DATA_DIR, AGOR_TEST_CONFIG_DIR] as const;

View file

@ -1,5 +1,5 @@
import { browser, expect } from '@wdio/globals';
import { isJudgeAvailable, assertWithJudge } from '../llm-judge';
import { isJudgeAvailable, assertWithJudge } from '../infra/llm-judge';
// Phase B: Multi-project scenarios + LLM-judged assertions.
// Extends Phase A with tests that exercise multiple project boxes simultaneously

View file

@ -1,5 +1,5 @@
import { browser, expect } from '@wdio/globals';
import { isJudgeAvailable, assertWithJudge } from '../llm-judge';
import { isJudgeAvailable, assertWithJudge } from '../infra/llm-judge';
// Phase C: Hardening feature tests.
// Tests the v3 production-readiness features added in the hardening sprint:

View file

@ -1,68 +1,30 @@
import { spawn, execSync } from 'node:child_process';
import { spawn } from 'node:child_process';
import { createConnection } from 'node:net';
import { resolve, dirname, join } from 'node:path';
import { resolve, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
import { mkdirSync, writeFileSync, rmSync } from 'node:fs';
import { tmpdir } from 'node:os';
import { rmSync } from 'node:fs';
import { createTestFixture } from './infra/fixtures.ts';
const __dirname = dirname(fileURLToPath(import.meta.url));
const projectRoot = resolve(__dirname, '../..');
// Debug binary path (built with `cargo tauri build --debug --no-bundle`)
// Cargo workspace target dir is at v2/target/, not v2/src-tauri/target/
// Debug binary path (Cargo workspace target at repo root)
const tauriBinary = resolve(projectRoot, 'target/debug/agent-orchestrator');
let tauriDriver;
// ── Test Fixture (created eagerly so env vars are available for capabilities) ──
const fixtureRoot = join(tmpdir(), `agor-e2e-${Date.now()}`);
const fixtureDataDir = join(fixtureRoot, 'data');
const fixtureConfigDir = join(fixtureRoot, 'config');
const fixtureProjectDir = join(fixtureRoot, 'test-project');
// ── Test Fixture ──
// IMPORTANT: Must be created at module top-level (synchronously) because the
// capabilities object below references fixtureDataDir/fixtureConfigDir at eval time.
// tauri:options.env may not reliably set process-level env vars, so we also
// inject into process.env for tauri-driver inheritance.
const fixture = createTestFixture('agor-e2e');
mkdirSync(fixtureDataDir, { recursive: true });
mkdirSync(fixtureConfigDir, { recursive: true });
mkdirSync(fixtureProjectDir, { recursive: true });
// Create a minimal git repo for agent testing
execSync('git init', { cwd: fixtureProjectDir, stdio: 'ignore' });
execSync('git config user.email "test@agor.dev"', { cwd: fixtureProjectDir, stdio: 'ignore' });
execSync('git config user.name "Agor Test"', { cwd: fixtureProjectDir, stdio: 'ignore' });
writeFileSync(join(fixtureProjectDir, 'README.md'), '# Test Project\n\nA simple test project for Agor E2E tests.\n');
writeFileSync(join(fixtureProjectDir, 'hello.py'), 'def greet(name: str) -> str:\n return f"Hello, {name}!"\n');
execSync('git add -A && git commit -m "initial commit"', { cwd: fixtureProjectDir, stdio: 'ignore' });
// Write groups.json with one group containing the test project
writeFileSync(
join(fixtureConfigDir, 'groups.json'),
JSON.stringify({
version: 1,
groups: [{
id: 'test-group',
name: 'Test Group',
projects: [{
id: 'test-project',
name: 'Test Project',
identifier: 'test-project',
description: 'E2E test project',
icon: '\uf120',
cwd: fixtureProjectDir,
profile: 'default',
enabled: true,
}],
agents: [],
}],
activeGroupId: 'test-group',
}, null, 2),
);
// Inject env vars into process.env so tauri-driver inherits them
// (tauri:options.env may not reliably set process-level env vars)
process.env.AGOR_TEST = '1';
process.env.AGOR_TEST_DATA_DIR = fixtureDataDir;
process.env.AGOR_TEST_CONFIG_DIR = fixtureConfigDir;
process.env.AGOR_TEST_DATA_DIR = fixture.dataDir;
process.env.AGOR_TEST_CONFIG_DIR = fixture.configDir;
console.log(`Test fixture created at ${fixtureRoot}`);
console.log(`Test fixture created at ${fixture.rootDir}`);
export const config = {
// ── Runner ──
@ -78,10 +40,10 @@ export const config = {
// Single spec file — Tauri launches one app instance per session,
// and tauri-driver can't re-create sessions between spec files.
specs: [
resolve(__dirname, 'specs/agor.test.ts'),
resolve(__dirname, 'specs/agent-scenarios.test.ts'),
resolve(__dirname, 'specs/phase-b.test.ts'),
resolve(__dirname, 'specs/phase-c.test.ts'),
resolve(projectRoot, 'tests/e2e/specs/agor.test.ts'),
resolve(projectRoot, 'tests/e2e/specs/agent-scenarios.test.ts'),
resolve(projectRoot, 'tests/e2e/specs/phase-b.test.ts'),
resolve(projectRoot, 'tests/e2e/specs/phase-c.test.ts'),
],
// ── Capabilities ──
@ -91,11 +53,7 @@ export const config = {
'tauri:options': {
application: tauriBinary,
// Test isolation: fixture-created data/config dirs, disable watchers/telemetry
env: {
AGOR_TEST: '1',
AGOR_TEST_DATA_DIR: fixtureDataDir,
AGOR_TEST_CONFIG_DIR: fixtureConfigDir,
},
env: fixture.env,
},
}],
@ -199,7 +157,7 @@ export const config = {
}
// Clean up test fixture
try {
rmSync(fixtureRoot, { recursive: true, force: true });
rmSync(fixture.rootDir, { recursive: true, force: true });
console.log('Test fixture cleaned up.');
} catch { /* best-effort cleanup */ }
},
@ -207,7 +165,7 @@ export const config = {
// ── TypeScript (auto-compile via tsx) ──
autoCompileOpts: {
tsNodeOpts: {
project: resolve(__dirname, 'tsconfig.json'),
project: resolve(projectRoot, 'tests/e2e/tsconfig.json'),
},
},
};