feat(reviewer): add Tier 1 reviewer agent role with auto-channel notifications

Reviewer workflow in agent-prompts.ts (8-step process), Rust auto-post
to #review-queue on task->review transition, reviewQueueDepth in
attention scoring (10pts/task cap 50), Tasks tab for reviewer in
ProjectBox with 10s queue polling. 7 vitest + 4 cargo tests.
This commit is contained in:
Hibryda 2026-03-12 00:54:43 +01:00
parent 61f01e22b8
commit 323bb1b040
9 changed files with 397 additions and 0 deletions

View file

@ -56,3 +56,8 @@ export async function createTask(
export async function deleteTask(taskId: string): Promise<void> {
return invoke('bttask_delete', { taskId });
}
/** Count tasks currently in 'review' status for a group */
export async function reviewQueueCount(groupId: GroupId): Promise<number> {
return invoke<number>('bttask_review_queue_count', { groupId });
}

View file

@ -22,6 +22,8 @@
import { ProjectId, type AgentId, type GroupId } from '../../types/ids';
import { notify, dismissNotification } from '../../stores/notifications.svelte';
import { registerManager, unregisterManager, updateManagerConfig } from '../../stores/wake-scheduler.svelte';
import { setReviewQueueDepth } from '../../stores/health.svelte';
import { reviewQueueCount } from '../../adapters/bttask-bridge';
interface Props {
project: ProjectConfig;
@ -93,6 +95,23 @@
};
});
// Poll review queue depth for reviewer agents (feeds into attention scoring)
$effect(() => {
if (!(project.isAgent && project.agentRole === 'reviewer')) return;
const groupId = activeGroup?.id;
if (!groupId) return;
const pollReviewQueue = () => {
reviewQueueCount(groupId)
.then(count => setReviewQueueDepth(project.id, count))
.catch(() => {}); // best-effort
};
pollReviewQueue(); // immediate first poll
const timer = setInterval(pollReviewQueue, 10_000); // 10s poll
return () => clearInterval(timer);
});
// S-1 Phase 2: start filesystem watcher for this project's CWD
$effect(() => {
const cwd = project.cwd;
@ -195,6 +214,9 @@
{#if isAgent && agentRole === 'architect'}
<button class="ptab ptab-role" class:active={activeTab === 'architecture'} onclick={() => switchTab('architecture')}>Arch</button>
{/if}
{#if isAgent && agentRole === 'reviewer'}
<button class="ptab ptab-role" class:active={activeTab === 'tasks'} onclick={() => switchTab('tasks')}>Tasks</button>
{/if}
{#if isAgent && agentRole === 'tester'}
<button class="ptab ptab-role" class:active={activeTab === 'selenium'} onclick={() => switchTab('selenium')}>Selenium</button>
<button class="ptab ptab-role" class:active={activeTab === 'tests'} onclick={() => switchTab('tests')}>Tests</button>

View file

@ -66,6 +66,8 @@ interface ProjectTracker {
tokenSnapshots: Array<[number, number]>;
/** Cost snapshots for $/hr: [timestamp, costUsd] */
costSnapshots: Array<[number, number]>;
/** Number of tasks in 'review' status (for reviewer agents) */
reviewQueueDepth: number;
}
let trackers = $state<Map<ProjectIdType, ProjectTracker>>(new Map());
@ -90,6 +92,7 @@ export function trackProject(projectId: ProjectIdType, sessionId: SessionIdType
toolInFlight: false,
tokenSnapshots: [],
costSnapshots: [],
reviewQueueDepth: 0,
});
}
@ -179,6 +182,12 @@ export function stopHealthTick(): void {
}
}
/** Set review queue depth for a project (used by reviewer agents) */
export function setReviewQueueDepth(projectId: ProjectIdType, depth: number): void {
const t = trackers.get(projectId);
if (t) t.reviewQueueDepth = depth;
}
/** Clear all tracked projects */
export function clearHealthTracking(): void {
trackers = new Map();
@ -255,6 +264,7 @@ function computeHealth(tracker: ProjectTracker, now: number): ProjectHealth {
contextPressure,
fileConflictCount,
externalConflictCount,
reviewQueueDepth: tracker.reviewQueueDepth,
});
return {

View file

@ -194,6 +194,29 @@ ${roleDesc}
parts.push(BTMSG_DOCS);
if (role === 'manager' || role === 'architect') {
parts.push(BTTASK_DOCS);
} else if (role === 'reviewer') {
// Reviewer gets full read + status update + comment access
parts.push(`
## Tool: bttask Task Board (review access)
You have full read access plus the ability to update task status and add comments.
You CANNOT create, assign, or delete tasks (Manager only).
\`\`\`bash
bttask board # Kanban board view
bttask show <task-id> # Full task details + comments
bttask list # List all tasks
bttask status <task-id> done # Approve mark as done
bttask status <task-id> progress # Request changes send back
bttask status <task-id> blocked # Block explain in comment!
bttask comment <task-id> "verdict" # Add review verdict/feedback
\`\`\`
### Review workflow with bttask
- Tasks in the **review** column are waiting for YOUR review
- After reviewing, either move to **done** (approved) or **progress** (needs changes)
- ALWAYS add a comment with your verdict before changing status
- When a task moves to review, a notification is auto-posted to \`#review-queue\``);
} else {
// Other agents get read-only bttask info
parts.push(`
@ -329,6 +352,29 @@ If the Operator sends a message, it's your TOP PRIORITY.`;
6. **Verify fixes:** Re-test when developers say a bug is fixed`;
}
if (role === 'reviewer') {
return `## Your Workflow
1. **Check inbox:** \`btmsg inbox\` — read review requests and messages
2. **Check review queue:** \`btmsg channel history review-queue\` — see newly submitted reviews
3. **Review tasks:** \`bttask board\` — find tasks in the **review** column
4. **Analyze:** For each review task:
a. Read the task description and comments (\`bttask show <id>\`)
b. Read the relevant code changes
c. Check for security issues, bugs, style violations, and test coverage
5. **Verdict:** Add your review as a comment (\`bttask comment <id> "APPROVED: ..."\` or \`"CHANGES REQUESTED: ..."\`)
6. **Update status:** Move task to **done** (approved) or **progress** (needs changes)
7. **Log verdict:** Post summary to \`btmsg channel send review-log "Task <id>: APPROVED/REJECTED — reason"\`
8. **Report:** Notify the Manager of review outcomes if significant
**Review standards:**
- Code quality: readability, naming, structure
- Security: input validation, auth checks, injection risks
- Error handling: all errors caught and handled visibly
- Tests: adequate coverage for new/changed code
- Performance: no N+1 queries, unbounded fetches, or memory leaks`;
}
return `## Your Workflow
1. **Check inbox:** \`btmsg inbox\` — read all unread messages

View file

@ -136,4 +136,66 @@ describe('scoreAttention', () => {
}));
expect(result.reason).toContain('Unknown');
});
// --- Review queue depth scoring ---
it('scores review queue depth at 10 per task', () => {
const result = scoreAttention(makeInput({
activityState: 'running',
reviewQueueDepth: 3,
}));
expect(result.score).toBe(30);
expect(result.reason).toContain('3 tasks awaiting review');
});
it('caps review queue score at 50', () => {
const result = scoreAttention(makeInput({
activityState: 'running',
reviewQueueDepth: 8,
}));
expect(result.score).toBe(50);
expect(result.reason).toContain('8 tasks');
});
it('uses singular grammar for 1 review task', () => {
const result = scoreAttention(makeInput({
activityState: 'running',
reviewQueueDepth: 1,
}));
expect(result.score).toBe(10);
expect(result.reason).toBe('1 task awaiting review');
});
it('review queue has lower priority than file conflicts', () => {
const result = scoreAttention(makeInput({
activityState: 'running',
fileConflictCount: 2,
reviewQueueDepth: 5,
}));
expect(result.score).toBe(70); // file conflicts win
});
it('review queue has higher priority than context high', () => {
const result = scoreAttention(makeInput({
activityState: 'running',
contextPressure: 0.80,
reviewQueueDepth: 2,
}));
expect(result.score).toBe(20); // review queue wins over context high (40)
});
it('ignores review queue when depth is 0', () => {
const result = scoreAttention(makeInput({
activityState: 'running',
reviewQueueDepth: 0,
}));
expect(result.score).toBe(0);
});
it('ignores review queue when undefined', () => {
const result = scoreAttention(makeInput({
activityState: 'running',
}));
expect(result.score).toBe(0);
});
});

View file

@ -10,6 +10,10 @@ const SCORE_CONTEXT_CRITICAL = 80; // >90% context
const SCORE_FILE_CONFLICT = 70;
const SCORE_CONTEXT_HIGH = 40; // >75% context
// Review queue scoring: 10pts per stale review, capped at 50
const SCORE_REVIEW_PER_TASK = 10;
const SCORE_REVIEW_CAP = 50;
export interface AttentionInput {
sessionStatus: string | undefined;
sessionError: string | undefined;
@ -18,6 +22,8 @@ export interface AttentionInput {
contextPressure: number | null;
fileConflictCount: number;
externalConflictCount: number;
/** Number of tasks in 'review' status (for reviewer agents) */
reviewQueueDepth?: number;
}
export interface AttentionResult {
@ -57,6 +63,14 @@ export function scoreAttention(input: AttentionInput): AttentionResult {
};
}
if (input.reviewQueueDepth && input.reviewQueueDepth > 0) {
const score = Math.min(input.reviewQueueDepth * SCORE_REVIEW_PER_TASK, SCORE_REVIEW_CAP);
return {
score,
reason: `${input.reviewQueueDepth} task${input.reviewQueueDepth > 1 ? 's' : ''} awaiting review`,
};
}
if (input.contextPressure !== null && input.contextPressure > 0.75) {
return {
score: SCORE_CONTEXT_HIGH,