feat(e2e): add Phase A scenarios, fixtures, and results store

7 human-authored test scenarios (22 tests) using data-testid
selectors. Test fixture generator for isolated environments.
JSON results store (no native deps). WebDriverIO config updated
with TCP readiness probe and multi-spec support.
This commit is contained in:
Hibryda 2026-03-12 02:52:14 +01:00
parent 2746b34f83
commit c6c38b91c6
5 changed files with 701 additions and 10 deletions

142
v2/tests/e2e/fixtures.ts Normal file
View file

@ -0,0 +1,142 @@
// Test fixture generator — creates isolated test environments
// Used by E2E tests to set up temp data/config dirs with valid groups.json
import { mkdirSync, writeFileSync, rmSync, existsSync } from 'node:fs';
import { join } from 'node:path';
import { execSync } from 'node:child_process';
import { tmpdir } from 'node:os';
export interface TestFixture {
/** Root temp directory for this test run */
rootDir: string;
/** BTERMINAL_TEST_DATA_DIR — isolated data dir */
dataDir: string;
/** BTERMINAL_TEST_CONFIG_DIR — isolated config dir */
configDir: string;
/** Path to a minimal git repo for agent testing */
projectDir: string;
/** Environment variables to pass to the app */
env: Record<string, string>;
}
/**
* Create an isolated test fixture with:
* - Temp data dir (sessions.db, btmsg.db created at runtime)
* - Temp config dir with a minimal groups.json
* - A simple git repo with one file for agent testing
*/
export function createTestFixture(name = 'bterminal-e2e'): TestFixture {
const rootDir = join(tmpdir(), `${name}-${Date.now()}`);
const dataDir = join(rootDir, 'data');
const configDir = join(rootDir, 'config');
const projectDir = join(rootDir, 'test-project');
// Create directory structure
mkdirSync(dataDir, { recursive: true });
mkdirSync(configDir, { recursive: true });
mkdirSync(projectDir, { recursive: true });
// Create a minimal git repo for agent testing
execSync('git init', { cwd: projectDir, stdio: 'ignore' });
execSync('git config user.email "test@bterminal.dev"', { cwd: projectDir, stdio: 'ignore' });
execSync('git config user.name "BTerminal Test"', { cwd: projectDir, stdio: 'ignore' });
writeFileSync(join(projectDir, 'README.md'), '# Test Project\n\nA simple test project for BTerminal E2E tests.\n');
writeFileSync(join(projectDir, 'hello.py'), 'def greet(name: str) -> str:\n return f"Hello, {name}!"\n');
execSync('git add -A && git commit -m "initial commit"', { cwd: projectDir, stdio: 'ignore' });
// Write groups.json with one group containing the test project
const groupsJson = {
version: 1,
groups: [
{
id: 'test-group',
name: 'Test Group',
projects: [
{
id: 'test-project',
name: 'Test Project',
identifier: 'test-project',
description: 'E2E test project',
icon: '\uf120',
cwd: projectDir,
profile: 'default',
enabled: true,
},
],
agents: [],
},
],
activeGroupId: 'test-group',
};
writeFileSync(
join(configDir, 'groups.json'),
JSON.stringify(groupsJson, null, 2),
);
const env: Record<string, string> = {
BTERMINAL_TEST: '1',
BTERMINAL_TEST_DATA_DIR: dataDir,
BTERMINAL_TEST_CONFIG_DIR: configDir,
};
return { rootDir, dataDir, configDir, projectDir, env };
}
/**
* Clean up a test fixture's temporary directories.
*/
export function destroyTestFixture(fixture: TestFixture): void {
if (existsSync(fixture.rootDir)) {
rmSync(fixture.rootDir, { recursive: true, force: true });
}
}
/**
* Create a groups.json with multiple projects for multi-project testing.
*/
export function createMultiProjectFixture(projectCount = 3): TestFixture {
const fixture = createTestFixture('bterminal-multi');
const projects = [];
for (let i = 0; i < projectCount; i++) {
const projDir = join(fixture.rootDir, `project-${i}`);
mkdirSync(projDir, { recursive: true });
execSync('git init', { cwd: projDir, stdio: 'ignore' });
execSync('git config user.email "test@bterminal.dev"', { cwd: projDir, stdio: 'ignore' });
execSync('git config user.name "BTerminal Test"', { cwd: projDir, stdio: 'ignore' });
writeFileSync(join(projDir, 'README.md'), `# Project ${i}\n`);
execSync('git add -A && git commit -m "init"', { cwd: projDir, stdio: 'ignore' });
projects.push({
id: `project-${i}`,
name: `Project ${i}`,
identifier: `project-${i}`,
description: `Test project ${i}`,
icon: '\uf120',
cwd: projDir,
profile: 'default',
enabled: true,
});
}
const groupsJson = {
version: 1,
groups: [
{
id: 'multi-group',
name: 'Multi Project Group',
projects,
agents: [],
},
],
activeGroupId: 'multi-group',
};
writeFileSync(
join(fixture.configDir, 'groups.json'),
JSON.stringify(groupsJson, null, 2),
);
return fixture;
}

113
v2/tests/e2e/results-db.ts Normal file
View file

@ -0,0 +1,113 @@
// Test results store — persists test run outcomes as JSON for analysis
// No native deps needed — reads/writes a JSON file
import { resolve, dirname } from 'node:path';
import { mkdirSync, readFileSync, writeFileSync, existsSync } from 'node:fs';
import { fileURLToPath } from 'node:url';
const __dirname = dirname(fileURLToPath(import.meta.url));
const DEFAULT_PATH = resolve(__dirname, '../../test-results/results.json');
export interface TestRunRow {
run_id: string;
started_at: string;
finished_at: string | null;
status: 'running' | 'passed' | 'failed' | 'error';
total_tests: number;
passed_tests: number;
failed_tests: number;
duration_ms: number | null;
git_branch: string | null;
git_sha: string | null;
}
export interface TestStepRow {
run_id: string;
scenario_name: string;
step_name: string;
status: 'passed' | 'failed' | 'skipped' | 'error';
duration_ms: number | null;
error_message: string | null;
screenshot_path: string | null;
agent_cost_usd: number | null;
created_at: string;
}
interface ResultsStore {
runs: TestRunRow[];
steps: TestStepRow[];
}
export class ResultsDb {
private filePath: string;
private store: ResultsStore;
constructor(filePath = DEFAULT_PATH) {
this.filePath = filePath;
mkdirSync(dirname(filePath), { recursive: true });
this.store = this.load();
}
private load(): ResultsStore {
if (existsSync(this.filePath)) {
try {
return JSON.parse(readFileSync(this.filePath, 'utf-8'));
} catch {
return { runs: [], steps: [] };
}
}
return { runs: [], steps: [] };
}
private save(): void {
writeFileSync(this.filePath, JSON.stringify(this.store, null, 2));
}
startRun(runId: string, gitBranch?: string, gitSha?: string): void {
this.store.runs.push({
run_id: runId,
started_at: new Date().toISOString(),
finished_at: null,
status: 'running',
total_tests: 0,
passed_tests: 0,
failed_tests: 0,
duration_ms: null,
git_branch: gitBranch ?? null,
git_sha: gitSha ?? null,
});
this.save();
}
finishRun(runId: string, status: 'passed' | 'failed' | 'error', durationMs: number): void {
const run = this.store.runs.find(r => r.run_id === runId);
if (!run) return;
const steps = this.store.steps.filter(s => s.run_id === runId);
run.finished_at = new Date().toISOString();
run.status = status;
run.duration_ms = durationMs;
run.total_tests = steps.length;
run.passed_tests = steps.filter(s => s.status === 'passed').length;
run.failed_tests = steps.filter(s => s.status === 'failed' || s.status === 'error').length;
this.save();
}
recordStep(step: Omit<TestStepRow, 'created_at'>): void {
this.store.steps.push({
...step,
created_at: new Date().toISOString(),
});
this.save();
}
getRecentRuns(limit = 20): TestRunRow[] {
return this.store.runs
.sort((a, b) => b.started_at.localeCompare(a.started_at))
.slice(0, limit);
}
getStepsForRun(runId: string): TestStepRow[] {
return this.store.steps.filter(s => s.run_id === runId);
}
}

View file

@ -0,0 +1,411 @@
import { browser, expect } from '@wdio/globals';
// Phase A: Human-authored E2E scenarios with deterministic assertions.
// These test the agent UI flow end-to-end using stable data-testid selectors.
// Agent-interaction tests require a real Claude CLI install + API key.
// ─── Helpers ──────────────────────────────────────────────────────────
/** Wait for agent status to reach a target value within timeout. */
async function waitForAgentStatus(
status: string,
timeout = 30_000,
): Promise<void> {
await browser.waitUntil(
async () => {
const attr = await browser.execute(() => {
const el = document.querySelector('[data-testid="agent-pane"]');
return el?.getAttribute('data-agent-status') ?? 'idle';
});
return attr === status;
},
{ timeout, timeoutMsg: `Agent did not reach status "${status}" within ${timeout}ms` },
);
}
/** Check if an agent pane exists and is visible. */
async function agentPaneExists(): Promise<boolean> {
const el = await browser.$('[data-testid="agent-pane"]');
return el.isExisting();
}
/** Type a prompt into the agent textarea and submit. */
async function sendAgentPrompt(text: string): Promise<void> {
const textarea = await browser.$('[data-testid="agent-prompt"]');
await textarea.waitForDisplayed({ timeout: 5000 });
await textarea.setValue(text);
// Small delay for Svelte reactivity
await browser.pause(200);
const submitBtn = await browser.$('[data-testid="agent-submit"]');
await browser.execute((el) => (el as HTMLElement).click(), submitBtn);
}
// ─── Scenario 1: App renders with project grid and data-testid anchors ───
describe('Scenario 1 — App Structural Integrity', () => {
it('should render the status bar with data-testid', async () => {
const bar = await browser.$('[data-testid="status-bar"]');
await expect(bar).toBeDisplayed();
});
it('should render the sidebar rail with data-testid', async () => {
const rail = await browser.$('[data-testid="sidebar-rail"]');
await expect(rail).toBeDisplayed();
});
it('should render at least one project box with data-testid', async () => {
const boxes = await browser.$$('[data-testid="project-box"]');
expect(boxes.length).toBeGreaterThanOrEqual(1);
});
it('should have data-project-id on project boxes', async () => {
const projectId = await browser.execute(() => {
const box = document.querySelector('[data-testid="project-box"]');
return box?.getAttribute('data-project-id') ?? null;
});
expect(projectId).not.toBeNull();
expect((projectId as string).length).toBeGreaterThan(0);
});
it('should render project tabs with data-testid', async () => {
const tabs = await browser.$('[data-testid="project-tabs"]');
await expect(tabs).toBeDisplayed();
});
it('should render agent session component', async () => {
const session = await browser.$('[data-testid="agent-session"]');
await expect(session).toBeDisplayed();
});
});
// ─── Scenario 2: Settings panel via data-testid ──────────────────────
describe('Scenario 2 — Settings Panel (data-testid)', () => {
it('should open settings via data-testid button', async () => {
const btn = await browser.$('[data-testid="settings-btn"]');
await btn.click();
const panel = await browser.$('.sidebar-panel');
await panel.waitForDisplayed({ timeout: 5000 });
await expect(panel).toBeDisplayed();
});
it('should close settings with Escape', async () => {
await browser.keys('Escape');
const panel = await browser.$('.sidebar-panel');
await panel.waitForDisplayed({ timeout: 3000, reverse: true });
});
});
// ─── Scenario 3: Agent pane initial state ────────────────────────────
describe('Scenario 3 — Agent Pane Initial State', () => {
it('should display agent pane in idle status', async () => {
const exists = await agentPaneExists();
if (!exists) {
// Agent pane might not be visible until Model tab is active
await browser.execute(() => {
const tab = document.querySelector('[data-testid="project-tabs"] .ptab');
if (tab) (tab as HTMLElement).click();
});
await browser.pause(300);
}
const pane = await browser.$('[data-testid="agent-pane"]');
await expect(pane).toBeExisting();
const status = await browser.execute(() => {
const el = document.querySelector('[data-testid="agent-pane"]');
return el?.getAttribute('data-agent-status') ?? 'unknown';
});
expect(status).toBe('idle');
});
it('should show prompt textarea', async () => {
const textarea = await browser.$('[data-testid="agent-prompt"]');
await expect(textarea).toBeDisplayed();
});
it('should show submit button', async () => {
const btn = await browser.$('[data-testid="agent-submit"]');
await expect(btn).toBeExisting();
});
it('should have empty messages area initially', async () => {
const msgArea = await browser.$('[data-testid="agent-messages"]');
await expect(msgArea).toBeExisting();
// No message bubbles should exist in a fresh session
const msgCount = await browser.execute(() => {
const area = document.querySelector('[data-testid="agent-messages"]');
if (!area) return 0;
return area.querySelectorAll('.message').length;
});
expect(msgCount).toBe(0);
});
});
// ─── Scenario 4: Terminal tab management ─────────────────────────────
describe('Scenario 4 — Terminal Tab Management (data-testid)', () => {
before(async () => {
// Ensure Model tab is active and terminal section visible
await browser.execute(() => {
const tab = document.querySelector('[data-testid="project-tabs"] .ptab');
if (tab) (tab as HTMLElement).click();
});
await browser.pause(300);
// Expand terminal section
await browser.execute(() => {
const toggle = document.querySelector('[data-testid="terminal-toggle"]');
if (toggle) (toggle as HTMLElement).click();
});
await browser.pause(500);
});
it('should display terminal tabs container', async () => {
const tabs = await browser.$('[data-testid="terminal-tabs"]');
await expect(tabs).toBeDisplayed();
});
it('should add a shell tab via data-testid button', async () => {
await browser.execute(() => {
const btn = document.querySelector('[data-testid="tab-add"]');
if (btn) (btn as HTMLElement).click();
});
await browser.pause(500);
const tabTitle = await browser.execute(() => {
const el = document.querySelector('.tab-bar .tab-title');
return el?.textContent ?? '';
});
expect(tabTitle.toLowerCase()).toContain('shell');
});
it('should show active tab styling', async () => {
const activeTab = await browser.$('.tab.active');
await expect(activeTab).toBeExisting();
});
it('should close tab and show empty state', async () => {
// Close all tabs
await browser.execute(() => {
const closeBtns = document.querySelectorAll('.tab-close');
closeBtns.forEach(btn => (btn as HTMLElement).click());
});
await browser.pause(500);
// Should show empty terminal area with "Open terminal" button
const emptyBtn = await browser.$('.add-first');
await expect(emptyBtn).toBeDisplayed();
});
after(async () => {
// Collapse terminal section
await browser.execute(() => {
const toggle = document.querySelector('[data-testid="terminal-toggle"]');
const chevron = toggle?.querySelector('.toggle-chevron.expanded');
if (chevron) (toggle as HTMLElement).click();
});
await browser.pause(300);
});
});
// ─── Scenario 5: Command palette with data-testid ───────────────────
describe('Scenario 5 — Command Palette (data-testid)', () => {
it('should open palette and show data-testid input', async () => {
await browser.execute(() => document.body.focus());
await browser.pause(200);
await browser.keys(['Control', 'k']);
const palette = await browser.$('[data-testid="command-palette"]');
await palette.waitForDisplayed({ timeout: 3000 });
const input = await browser.$('[data-testid="palette-input"]');
await expect(input).toBeDisplayed();
});
it('should have focused input', async () => {
const isFocused = await browser.execute(() => {
const el = document.querySelector('[data-testid="palette-input"]');
return el === document.activeElement;
});
expect(isFocused).toBe(true);
});
it('should show at least one group item', async () => {
const items = await browser.$$('.palette-item');
expect(items.length).toBeGreaterThanOrEqual(1);
});
it('should filter and show no-results for nonsense query', async () => {
const input = await browser.$('[data-testid="palette-input"]');
await input.setValue('zzz_no_match_xyz');
await browser.pause(300);
const noResults = await browser.$('.no-results');
await expect(noResults).toBeDisplayed();
});
it('should close on Escape', async () => {
await browser.keys('Escape');
const palette = await browser.$('[data-testid="command-palette"]');
await browser.waitUntil(
async () => !(await palette.isDisplayed()),
{ timeout: 3000 },
);
});
});
// ─── Scenario 6: Project focus and tab switching ─────────────────────
describe('Scenario 6 — Project Focus & Tab Switching', () => {
it('should focus project on header click', async () => {
await browser.execute(() => {
const header = document.querySelector('.project-header');
if (header) (header as HTMLElement).click();
});
await browser.pause(300);
const activeBox = await browser.$('.project-box.active');
await expect(activeBox).toBeDisplayed();
});
it('should switch to Files tab and back without losing agent session', async () => {
// Get current agent session element reference
const sessionBefore = await browser.execute(() => {
const el = document.querySelector('[data-testid="agent-session"]');
return el !== null;
});
expect(sessionBefore).toBe(true);
// Switch to Files tab (second tab)
await browser.execute(() => {
const tabs = document.querySelectorAll('[data-testid="project-tabs"] .ptab');
if (tabs.length >= 2) (tabs[1] as HTMLElement).click();
});
await browser.pause(500);
// AgentSession should still exist in DOM (display:none, not unmounted)
const sessionDuring = await browser.execute(() => {
const el = document.querySelector('[data-testid="agent-session"]');
return el !== null;
});
expect(sessionDuring).toBe(true);
// Switch back to Model tab
await browser.execute(() => {
const tab = document.querySelector('[data-testid="project-tabs"] .ptab');
if (tab) (tab as HTMLElement).click();
});
await browser.pause(300);
// Agent session should be visible again
const session = await browser.$('[data-testid="agent-session"]');
await expect(session).toBeDisplayed();
});
it('should preserve agent status across tab switches', async () => {
const statusBefore = await browser.execute(() => {
const el = document.querySelector('[data-testid="agent-pane"]');
return el?.getAttribute('data-agent-status') ?? 'unknown';
});
// Switch to Context tab (third tab) and back
await browser.execute(() => {
const tabs = document.querySelectorAll('[data-testid="project-tabs"] .ptab');
if (tabs.length >= 3) (tabs[2] as HTMLElement).click();
});
await browser.pause(300);
await browser.execute(() => {
const tab = document.querySelector('[data-testid="project-tabs"] .ptab');
if (tab) (tab as HTMLElement).click();
});
await browser.pause(300);
const statusAfter = await browser.execute(() => {
const el = document.querySelector('[data-testid="agent-pane"]');
return el?.getAttribute('data-agent-status') ?? 'unknown';
});
expect(statusAfter).toBe(statusBefore);
});
});
// ─── Scenario 7: Agent prompt interaction (requires Claude CLI) ──────
describe('Scenario 7 — Agent Prompt Submission', () => {
// This scenario requires a real Claude CLI + API key.
// Skip gracefully if agent doesn't transition to "running" within timeout.
it('should accept text in prompt textarea', async () => {
const textarea = await browser.$('[data-testid="agent-prompt"]');
await textarea.waitForDisplayed({ timeout: 5000 });
await textarea.setValue('Say hello');
await browser.pause(200);
const value = await textarea.getValue();
expect(value).toBe('Say hello');
// Clear without submitting
await textarea.clearValue();
});
it('should enable submit button when prompt has text', async () => {
const textarea = await browser.$('[data-testid="agent-prompt"]');
await textarea.setValue('Test prompt');
await browser.pause(200);
// Submit button should be interactable (not disabled)
const isDisabled = await browser.execute(() => {
const btn = document.querySelector('[data-testid="agent-submit"]');
if (!btn) return true;
return (btn as HTMLButtonElement).disabled;
});
expect(isDisabled).toBe(false);
await textarea.clearValue();
});
it('should show stop button during agent execution (if Claude available)', async function () {
// Check if Claude CLI is likely available by looking for env hint
const hasClaude = await browser.execute(() => {
// If test mode is active and no ANTHROPIC_API_KEY, skip
return true; // Optimistic — let the timeout catch failures
});
if (!hasClaude) {
this.skip();
return;
}
// Send a minimal prompt
await sendAgentPrompt('Reply with exactly: BTERMINAL_TEST_OK');
// Wait for running status (generous timeout for sidecar spin-up)
try {
await waitForAgentStatus('running', 15_000);
} catch {
// Claude CLI not available — skip remaining assertions
console.log('Agent did not start — Claude CLI may not be available. Skipping.');
this.skip();
return;
}
// Stop button should appear while running
const stopBtn = await browser.$('[data-testid="agent-stop"]');
await expect(stopBtn).toBeDisplayed();
// Wait for completion
await waitForAgentStatus('idle', 60_000);
// Messages area should now have content
const msgCount = await browser.execute(() => {
const area = document.querySelector('[data-testid="agent-messages"]');
if (!area) return 0;
return area.children.length;
});
expect(msgCount).toBeGreaterThan(0);
});
});

View file

@ -7,5 +7,5 @@
"esModuleInterop": true, "esModuleInterop": true,
"skipLibCheck": true "skipLibCheck": true
}, },
"include": ["specs/**/*.ts"] "include": ["specs/**/*.ts", "*.ts"]
} }

View file

@ -1,4 +1,5 @@
import { spawn } from 'node:child_process'; import { spawn } from 'node:child_process';
import { createConnection } from 'node:net';
import { resolve, dirname } from 'node:path'; import { resolve, dirname } from 'node:path';
import { fileURLToPath } from 'node:url'; import { fileURLToPath } from 'node:url';
@ -24,7 +25,10 @@ export const config = {
// ── Specs ── // ── Specs ──
// Single spec file — Tauri launches one app instance per session, // Single spec file — Tauri launches one app instance per session,
// and tauri-driver can't re-create sessions between spec files. // and tauri-driver can't re-create sessions between spec files.
specs: [resolve(__dirname, 'specs/bterminal.test.ts')], specs: [
resolve(__dirname, 'specs/bterminal.test.ts'),
resolve(__dirname, 'specs/agent-scenarios.test.ts'),
],
// ── Capabilities ── // ── Capabilities ──
capabilities: [{ capabilities: [{
@ -32,6 +36,12 @@ export const config = {
'wdio:enforceWebDriverClassic': true, 'wdio:enforceWebDriverClassic': true,
'tauri:options': { 'tauri:options': {
application: tauriBinary, application: tauriBinary,
// Test isolation: separate data/config dirs, disable watchers/telemetry
env: {
BTERMINAL_TEST: '1',
...(process.env.BTERMINAL_TEST_DATA_DIR && { BTERMINAL_TEST_DATA_DIR: process.env.BTERMINAL_TEST_DATA_DIR }),
...(process.env.BTERMINAL_TEST_CONFIG_DIR && { BTERMINAL_TEST_CONFIG_DIR: process.env.BTERMINAL_TEST_CONFIG_DIR }),
},
}, },
}], }],
@ -85,9 +95,10 @@ export const config = {
/** /**
* Spawn tauri-driver before the session. * Spawn tauri-driver before the session.
* tauri-driver bridges WebDriver protocol to WebKit2GTK's inspector. * tauri-driver bridges WebDriver protocol to WebKit2GTK's inspector.
* Uses TCP probe to confirm port 4444 is accepting connections.
*/ */
beforeSession() { beforeSession() {
return new Promise((resolve, reject) => { return new Promise((res, reject) => {
tauriDriver = spawn('tauri-driver', [], { tauriDriver = spawn('tauri-driver', [], {
stdio: ['ignore', 'pipe', 'pipe'], stdio: ['ignore', 'pipe', 'pipe'],
}); });
@ -99,14 +110,28 @@ export const config = {
)); ));
}); });
// Wait for tauri-driver to be ready (listens on port 4444) // TCP readiness probe — poll port 4444 until it accepts a connection
const timeout = setTimeout(() => resolve(), 2000); const maxWaitMs = 10_000;
tauriDriver.stdout.on('data', (data) => { const intervalMs = 200;
if (data.toString().includes('4444')) { const deadline = Date.now() + maxWaitMs;
clearTimeout(timeout);
resolve(); function probe() {
if (Date.now() > deadline) {
reject(new Error('tauri-driver did not become ready within 10s'));
return;
} }
}); const sock = createConnection({ port: 4444, host: 'localhost' }, () => {
sock.destroy();
res();
});
sock.on('error', () => {
sock.destroy();
setTimeout(probe, intervalMs);
});
}
// Give it a moment before first probe
setTimeout(probe, 300);
}); });
}, },