refactor(e2e): extract infrastructure into tests/e2e/infra/ module

- Move fixtures.ts, llm-judge.ts, results-db.ts to tests/e2e/infra/
- Deduplicate wdio.conf.js: use createTestFixture() instead of inline copy
- Replace __dirname paths with projectRoot-anchored paths
- Create test-mode-constants.ts (typed env var names, flag registry)
- Create scripts/preflight-check.sh (validates tauri-driver, display, Claude CLI)
- Create scripts/check-test-flags.sh (CI lint for AGOR_TEST flag drift)
- Rewrite tests/e2e/README.md with full documentation
- Update spec imports for moved infra files
This commit is contained in:
Hibryda 2026-03-18 03:06:57 +01:00
parent 538a31f85c
commit e76bc341f2
10 changed files with 235 additions and 191 deletions

50
scripts/check-test-flags.sh Executable file
View file

@ -0,0 +1,50 @@
#!/usr/bin/env bash
# Lint check: verify all AGOR_TEST references are documented.
# Run in CI to catch flag drift between code and documentation.
set -euo pipefail
echo "Checking AGOR_TEST flag references..."
# Known files that should reference AGOR_TEST (from test-mode-constants.ts)
KNOWN_FILES=(
"agor-core/src/config.rs"
"src-tauri/src/commands/misc.rs"
"src-tauri/src/lib.rs"
"src-tauri/src/watcher.rs"
"src-tauri/src/fs_watcher.rs"
"src-tauri/src/telemetry.rs"
"src/App.svelte"
"tests/e2e/wdio.conf.js"
"tests/e2e/infra/fixtures.ts"
"tests/e2e/infra/test-mode-constants.ts"
)
# Find all files referencing AGOR_TEST (excluding node_modules, target, .git)
FOUND=$(grep -rl 'AGOR_TEST' --include='*.rs' --include='*.ts' --include='*.js' --include='*.svelte' \
--exclude-dir=node_modules --exclude-dir=target --exclude-dir=.git . 2>/dev/null | \
sed 's|^\./||' | sort)
UNKNOWN=""
for f in $FOUND; do
MATCH=0
for k in "${KNOWN_FILES[@]}"; do
if [[ "$f" == "$k" ]]; then
MATCH=1
break
fi
done
if [[ $MATCH -eq 0 ]]; then
UNKNOWN="$UNKNOWN $f\n"
fi
done
if [[ -n "$UNKNOWN" ]]; then
echo ""
echo "WARNING: AGOR_TEST referenced in files not in the known registry:"
echo -e "$UNKNOWN"
echo "Update tests/e2e/infra/test-mode-constants.ts and this script."
exit 1
else
echo "All AGOR_TEST references are documented. ✓"
fi

68
scripts/preflight-check.sh Executable file
View file

@ -0,0 +1,68 @@
#!/usr/bin/env bash
# Preflight check for E2E tests — validates system dependencies.
# Run before E2E suite to catch missing tools early.
set -euo pipefail
OK=0
WARN=0
check() {
local name="$1" cmd="$2"
if eval "$cmd" >/dev/null 2>&1; then
echo "$name"
else
echo "$name"
return 1
fi
}
echo "E2E Preflight Check"
echo "==================="
echo ""
echo "Required:"
if ! check "tauri-driver" "command -v tauri-driver"; then
echo " Install: cargo install tauri-driver"
OK=1
fi
if ! check "debug binary" "test -f target/debug/agent-orchestrator"; then
echo " Build: cargo tauri build --debug --no-bundle"
OK=1
fi
# Display server (Linux only)
if [[ "$(uname)" == "Linux" ]]; then
if [[ -n "${DISPLAY:-}" ]] || [[ -n "${WAYLAND_DISPLAY:-}" ]]; then
echo " ✓ display server (DISPLAY=$DISPLAY)"
else
echo " ✗ display server — no DISPLAY or WAYLAND_DISPLAY set"
echo " Use: xvfb-run --auto-servernum npm run test:e2e"
OK=1
fi
fi
echo ""
echo "Optional (LLM judge):"
if command -v claude >/dev/null 2>&1; then
echo " ✓ Claude CLI ($(which claude))"
elif [[ -n "${ANTHROPIC_API_KEY:-}" ]]; then
echo " ✓ ANTHROPIC_API_KEY set"
else
echo " ~ Claude CLI not found, ANTHROPIC_API_KEY not set"
echo " LLM-judged tests will be skipped"
WARN=1
fi
echo ""
if [[ $OK -ne 0 ]]; then
echo "FAILED — missing required dependencies"
exit 1
elif [[ $WARN -ne 0 ]]; then
echo "PASSED with warnings"
else
echo "ALL CHECKS PASSED"
fi

View file

@ -1,143 +1,88 @@
# E2E Tests (WebDriver) # E2E Testing Module
Tauri apps use the WebDriver protocol for E2E testing (not Playwright directly). Browser automation tests for Agent Orchestrator using WebDriverIO + tauri-driver.
The app runs inside WebKit2GTK on Linux, so tests interact with the real WebView.
## Prerequisites ## Quick Start
- Rust toolchain (for building the Tauri app)
- Display server (X11 or Wayland) — headless Xvfb works for CI
- `tauri-driver` installed: `cargo install tauri-driver`
- `webkit2gtk-driver` system package: `sudo apt install webkit2gtk-driver`
- npm devDeps already in package.json (`@wdio/cli`, `@wdio/local-runner`, `@wdio/mocha-framework`, `@wdio/spec-reporter`)
## Running
```bash ```bash
# From v2/ directory — builds debug binary automatically, spawns tauri-driver # Preflight check (validates dependencies)
npm run test:e2e ./scripts/preflight-check.sh
# Skip rebuild (use existing binary) # Build debug binary + run E2E
npm run test:all:e2e
# Run E2E only (skip build)
SKIP_BUILD=1 npm run test:e2e SKIP_BUILD=1 npm run test:e2e
# With test isolation (custom data/config dirs) # Headless (CI)
AGOR_TEST_DATA_DIR=/tmp/bt-test/data AGOR_TEST_CONFIG_DIR=/tmp/bt-test/config npm run test:e2e xvfb-run --auto-servernum npm run test:e2e
``` ```
The `wdio.conf.js` handles: ## System Dependencies
1. Building the debug binary (`cargo tauri build --debug --no-bundle`) in `onPrepare`
2. Spawning `tauri-driver` before each session (TCP readiness probe, 10s deadline)
3. Killing `tauri-driver` after each session
4. Passing `AGOR_TEST=1` env var to the app for test mode isolation
## Test Mode (`AGOR_TEST=1`) | Tool | Required | Install |
|------|----------|---------|
| tauri-driver | Yes | `cargo install tauri-driver` |
| Debug binary | Yes | `cargo tauri build --debug --no-bundle` |
| X11/Wayland | Yes (Linux) | Use `xvfb-run` in CI |
| Claude CLI | Optional | LLM-judged tests skip if absent |
| ANTHROPIC_API_KEY | Optional | Alternative to Claude CLI for LLM judge |
When `AGOR_TEST=1` is set: ## Directory Structure
- File watchers (watcher.rs, fs_watcher.rs) are disabled to avoid inotify noise
- Wake scheduler is disabled (no auto-wake timers)
- Data/config directories can be overridden via `AGOR_TEST_DATA_DIR` / `AGOR_TEST_CONFIG_DIR`
## CI setup (headless)
```bash
# Install virtual framebuffer + WebKit driver
sudo apt install xvfb webkit2gtk-driver
# Run with Xvfb wrapper
xvfb-run npm run test:e2e
```
## Writing tests
Tests use WebdriverIO with Mocha. Specs go in `specs/`:
```typescript
import { browser, expect } from '@wdio/globals';
describe('BTerminal', () => {
it('should show the status bar', async () => {
const statusBar = await browser.$('[data-testid="status-bar"]');
await expect(statusBar).toBeDisplayed();
});
});
```
### Stable selectors
Prefer `data-testid` attributes over CSS class selectors:
| Element | Selector |
|---------|----------|
| Status bar | `[data-testid="status-bar"]` |
| Sidebar rail | `[data-testid="sidebar-rail"]` |
| Settings button | `[data-testid="settings-btn"]` |
| Project box | `[data-testid="project-box"]` |
| Project ID | `[data-project-id="..."]` |
| Project tabs | `[data-testid="project-tabs"]` |
| Agent session | `[data-testid="agent-session"]` |
| Agent pane | `[data-testid="agent-pane"]` |
| Agent status | `[data-agent-status="idle\|running\|..."]` |
| Agent messages | `[data-testid="agent-messages"]` |
| Agent prompt | `[data-testid="agent-prompt"]` |
| Agent submit | `[data-testid="agent-submit"]` |
| Agent stop | `[data-testid="agent-stop"]` |
| Terminal tabs | `[data-testid="terminal-tabs"]` |
| Add tab button | `[data-testid="tab-add"]` |
| Terminal toggle | `[data-testid="terminal-toggle"]` |
| Command palette | `[data-testid="command-palette"]` |
| Palette input | `[data-testid="palette-input"]` |
### Key constraints
- `maxInstances: 1` — Tauri doesn't support parallel WebDriver sessions
- Mocha timeout is 60s — the app needs time to initialize
- Tests interact with the real WebKit2GTK WebView, not a browser
- Use `browser.execute()` for JS clicks when WebDriver clicks don't trigger Svelte handlers
- Agent tests (Scenario 7) require a real Claude CLI install + API key — they skip gracefully if unavailable
## Test infrastructure
### Fixtures (`fixtures.ts`)
Creates isolated test environments with temp data/config dirs and git repos:
```typescript
import { createTestFixture, destroyTestFixture } from '../fixtures';
const fixture = createTestFixture('my-test');
// fixture.dataDir, fixture.configDir, fixture.projectDir, fixture.env
destroyTestFixture(fixture);
```
### Results DB (`results-db.ts`)
JSON-based test results store for tracking runs and steps:
```typescript
import { ResultsDb } from '../results-db';
const db = new ResultsDb();
db.startRun('run-001', 'v2-mission-control', 'abc123');
db.recordStep({ run_id: 'run-001', scenario_name: 'Smoke', step_name: 'renders', status: 'passed', ... });
db.finishRun('run-001', 'passed', 5000);
```
## File structure
``` ```
tests/e2e/ tests/e2e/
├── README.md # This file ├── wdio.conf.js # WebDriverIO config + tauri-driver lifecycle
├── wdio.conf.js # WebdriverIO config with tauri-driver lifecycle ├── tsconfig.json # TypeScript config for specs
├── tsconfig.json # TypeScript config for test specs ├── README.md # This file
├── fixtures.ts # Test fixture generator (isolated environments) ├── infra/ # Test infrastructure (not specs)
├── results-db.ts # JSON test results store │ ├── fixtures.ts # Test fixture generator (isolated temp dirs)
└── specs/ │ ├── llm-judge.ts # LLM-based assertion engine (Claude CLI / API)
├── agor.test.ts # Smoke tests (CSS class selectors, 50+ tests) │ ├── results-db.ts # JSON test results store
└── agent-scenarios.test.ts # Phase A scenarios (data-testid selectors, 22 tests) │ └── test-mode-constants.ts # Typed env var names for test mode
└── specs/ # Test specifications
├── agor.test.ts # Smoke + UI tests (50+ tests)
├── agent-scenarios.test.ts # Phase A: agent interaction (22 tests)
├── phase-b.test.ts # Phase B: multi-project + LLM judge
└── phase-c.test.ts # Phase C: hardening features (11 scenarios)
``` ```
## References ## Test Mode Environment Variables
- Tauri WebDriver docs: https://v2.tauri.app/develop/tests/webdriver/ | Variable | Purpose | Read By |
- WebdriverIO docs: https://webdriver.io/ |----------|---------|---------|
- tauri-driver: https://crates.io/crates/tauri-driver | `AGOR_TEST=1` | Enable test isolation | config.rs, misc.rs, lib.rs, watcher.rs, fs_watcher.rs, telemetry.rs, App.svelte |
| `AGOR_TEST_DATA_DIR` | Override data dir | config.rs |
| `AGOR_TEST_CONFIG_DIR` | Override config dir | config.rs |
**Effects when AGOR_TEST=1:**
- File watchers disabled (watcher.rs, fs_watcher.rs)
- OTLP telemetry export disabled (telemetry.rs)
- CLI tool installation skipped (lib.rs)
- Wake scheduler disabled (App.svelte)
- Test env vars forwarded to sidecar processes (lib.rs)
## Test Phases
| Phase | File | Tests | Type |
|-------|------|-------|------|
| Smoke | agor.test.ts | 50+ | Deterministic (CSS/DOM assertions) |
| A | agent-scenarios.test.ts | 22 | Deterministic (data-testid selectors) |
| B | phase-b.test.ts | 6+ | LLM-judged (multi-project, agent quality) |
| C | phase-c.test.ts | 11 scenarios | Mixed (deterministic + LLM-judged) |
## Adding a New Spec
1. Create `tests/e2e/specs/my-feature.test.ts`
2. Import from `@wdio/globals` for `browser` and `expect`
3. Use `data-testid` selectors (preferred) or CSS classes
4. Add to `wdio.conf.js` specs array
5. For LLM assertions: `import { assertWithJudge } from '../infra/llm-judge'`
6. Run `./scripts/check-test-flags.sh` if you added new AGOR_TEST references
## CI Workflow
See `.github/workflows/e2e.yml` — 3 jobs:
1. **unit-tests**: vitest frontend
2. **cargo-tests**: Rust backend
3. **e2e-tests**: WebDriverIO (xvfb-run, Phase A+B+C, LLM tests gated on secret)

View file

@ -0,0 +1,23 @@
// Typed constants for test-mode environment variables.
// Single source of truth for env var names — prevents string literal duplication.
//
// These env vars are read by:
// Rust: agor-core/src/config.rs (AppConfig::from_env)
// src-tauri/src/commands/misc.rs (is_test_mode)
// src-tauri/src/lib.rs (setup: skip CLI install, forward to sidecar)
// src-tauri/src/watcher.rs (disable file watcher)
// src-tauri/src/fs_watcher.rs (disable fs watcher)
// src-tauri/src/telemetry.rs (disable OTLP)
// Svelte: src/App.svelte (disable wake scheduler)
/** Main test mode flag — set to '1' to enable test isolation */
export const AGOR_TEST = 'AGOR_TEST';
/** Override data directory (sessions.db, btmsg.db, search.db) */
export const AGOR_TEST_DATA_DIR = 'AGOR_TEST_DATA_DIR';
/** Override config directory (groups.json, plugins/) */
export const AGOR_TEST_CONFIG_DIR = 'AGOR_TEST_CONFIG_DIR';
/** All test-mode env vars for iteration */
export const TEST_ENV_VARS = [AGOR_TEST, AGOR_TEST_DATA_DIR, AGOR_TEST_CONFIG_DIR] as const;

View file

@ -1,5 +1,5 @@
import { browser, expect } from '@wdio/globals'; import { browser, expect } from '@wdio/globals';
import { isJudgeAvailable, assertWithJudge } from '../llm-judge'; import { isJudgeAvailable, assertWithJudge } from '../infra/llm-judge';
// Phase B: Multi-project scenarios + LLM-judged assertions. // Phase B: Multi-project scenarios + LLM-judged assertions.
// Extends Phase A with tests that exercise multiple project boxes simultaneously // Extends Phase A with tests that exercise multiple project boxes simultaneously

View file

@ -1,5 +1,5 @@
import { browser, expect } from '@wdio/globals'; import { browser, expect } from '@wdio/globals';
import { isJudgeAvailable, assertWithJudge } from '../llm-judge'; import { isJudgeAvailable, assertWithJudge } from '../infra/llm-judge';
// Phase C: Hardening feature tests. // Phase C: Hardening feature tests.
// Tests the v3 production-readiness features added in the hardening sprint: // Tests the v3 production-readiness features added in the hardening sprint:

View file

@ -1,68 +1,30 @@
import { spawn, execSync } from 'node:child_process'; import { spawn } from 'node:child_process';
import { createConnection } from 'node:net'; import { createConnection } from 'node:net';
import { resolve, dirname, join } from 'node:path'; import { resolve, dirname } from 'node:path';
import { fileURLToPath } from 'node:url'; import { fileURLToPath } from 'node:url';
import { mkdirSync, writeFileSync, rmSync } from 'node:fs'; import { rmSync } from 'node:fs';
import { tmpdir } from 'node:os'; import { createTestFixture } from './infra/fixtures.ts';
const __dirname = dirname(fileURLToPath(import.meta.url)); const __dirname = dirname(fileURLToPath(import.meta.url));
const projectRoot = resolve(__dirname, '../..'); const projectRoot = resolve(__dirname, '../..');
// Debug binary path (built with `cargo tauri build --debug --no-bundle`) // Debug binary path (Cargo workspace target at repo root)
// Cargo workspace target dir is at v2/target/, not v2/src-tauri/target/
const tauriBinary = resolve(projectRoot, 'target/debug/agent-orchestrator'); const tauriBinary = resolve(projectRoot, 'target/debug/agent-orchestrator');
let tauriDriver; let tauriDriver;
// ── Test Fixture (created eagerly so env vars are available for capabilities) ── // ── Test Fixture ──
const fixtureRoot = join(tmpdir(), `agor-e2e-${Date.now()}`); // IMPORTANT: Must be created at module top-level (synchronously) because the
const fixtureDataDir = join(fixtureRoot, 'data'); // capabilities object below references fixtureDataDir/fixtureConfigDir at eval time.
const fixtureConfigDir = join(fixtureRoot, 'config'); // tauri:options.env may not reliably set process-level env vars, so we also
const fixtureProjectDir = join(fixtureRoot, 'test-project'); // inject into process.env for tauri-driver inheritance.
const fixture = createTestFixture('agor-e2e');
mkdirSync(fixtureDataDir, { recursive: true });
mkdirSync(fixtureConfigDir, { recursive: true });
mkdirSync(fixtureProjectDir, { recursive: true });
// Create a minimal git repo for agent testing
execSync('git init', { cwd: fixtureProjectDir, stdio: 'ignore' });
execSync('git config user.email "test@agor.dev"', { cwd: fixtureProjectDir, stdio: 'ignore' });
execSync('git config user.name "Agor Test"', { cwd: fixtureProjectDir, stdio: 'ignore' });
writeFileSync(join(fixtureProjectDir, 'README.md'), '# Test Project\n\nA simple test project for Agor E2E tests.\n');
writeFileSync(join(fixtureProjectDir, 'hello.py'), 'def greet(name: str) -> str:\n return f"Hello, {name}!"\n');
execSync('git add -A && git commit -m "initial commit"', { cwd: fixtureProjectDir, stdio: 'ignore' });
// Write groups.json with one group containing the test project
writeFileSync(
join(fixtureConfigDir, 'groups.json'),
JSON.stringify({
version: 1,
groups: [{
id: 'test-group',
name: 'Test Group',
projects: [{
id: 'test-project',
name: 'Test Project',
identifier: 'test-project',
description: 'E2E test project',
icon: '\uf120',
cwd: fixtureProjectDir,
profile: 'default',
enabled: true,
}],
agents: [],
}],
activeGroupId: 'test-group',
}, null, 2),
);
// Inject env vars into process.env so tauri-driver inherits them
// (tauri:options.env may not reliably set process-level env vars)
process.env.AGOR_TEST = '1'; process.env.AGOR_TEST = '1';
process.env.AGOR_TEST_DATA_DIR = fixtureDataDir; process.env.AGOR_TEST_DATA_DIR = fixture.dataDir;
process.env.AGOR_TEST_CONFIG_DIR = fixtureConfigDir; process.env.AGOR_TEST_CONFIG_DIR = fixture.configDir;
console.log(`Test fixture created at ${fixtureRoot}`); console.log(`Test fixture created at ${fixture.rootDir}`);
export const config = { export const config = {
// ── Runner ── // ── Runner ──
@ -78,10 +40,10 @@ export const config = {
// Single spec file — Tauri launches one app instance per session, // Single spec file — Tauri launches one app instance per session,
// and tauri-driver can't re-create sessions between spec files. // and tauri-driver can't re-create sessions between spec files.
specs: [ specs: [
resolve(__dirname, 'specs/agor.test.ts'), resolve(projectRoot, 'tests/e2e/specs/agor.test.ts'),
resolve(__dirname, 'specs/agent-scenarios.test.ts'), resolve(projectRoot, 'tests/e2e/specs/agent-scenarios.test.ts'),
resolve(__dirname, 'specs/phase-b.test.ts'), resolve(projectRoot, 'tests/e2e/specs/phase-b.test.ts'),
resolve(__dirname, 'specs/phase-c.test.ts'), resolve(projectRoot, 'tests/e2e/specs/phase-c.test.ts'),
], ],
// ── Capabilities ── // ── Capabilities ──
@ -91,11 +53,7 @@ export const config = {
'tauri:options': { 'tauri:options': {
application: tauriBinary, application: tauriBinary,
// Test isolation: fixture-created data/config dirs, disable watchers/telemetry // Test isolation: fixture-created data/config dirs, disable watchers/telemetry
env: { env: fixture.env,
AGOR_TEST: '1',
AGOR_TEST_DATA_DIR: fixtureDataDir,
AGOR_TEST_CONFIG_DIR: fixtureConfigDir,
},
}, },
}], }],
@ -199,7 +157,7 @@ export const config = {
} }
// Clean up test fixture // Clean up test fixture
try { try {
rmSync(fixtureRoot, { recursive: true, force: true }); rmSync(fixture.rootDir, { recursive: true, force: true });
console.log('Test fixture cleaned up.'); console.log('Test fixture cleaned up.');
} catch { /* best-effort cleanup */ } } catch { /* best-effort cleanup */ }
}, },
@ -207,7 +165,7 @@ export const config = {
// ── TypeScript (auto-compile via tsx) ── // ── TypeScript (auto-compile via tsx) ──
autoCompileOpts: { autoCompileOpts: {
tsNodeOpts: { tsNodeOpts: {
project: resolve(__dirname, 'tsconfig.json'), project: resolve(projectRoot, 'tests/e2e/tsconfig.json'),
}, },
}, },
}; };