feat: refactor LLM judge to dual-mode CLI/API and fix config test race

Refactor llm-judge.ts from raw API-only to dual-mode: CLI first
(spawns claude with --output-format text, unsets CLAUDECODE), API
fallback. Backend selectable via LLM_JUDGE_BACKEND env var.

Fix pre-existing race condition in config.rs tests where parallel
test execution caused env var mutations to interfere. Added static
Mutex to serialize env-mutating tests.
This commit is contained in:
Hibryda 2026-03-12 06:35:04 +01:00
parent 05c9e1abbb
commit a3185656eb
4 changed files with 169 additions and 42 deletions

View file

@ -217,7 +217,7 @@ describe('Scenario B3 — Status Bar Fleet State', () => {
// ─── Scenario B4: LLM-judged agent response (requires API key) ──────
describe('Scenario B4 — LLM-Judged Agent Response', () => {
const SKIP_MSG = 'Skipping — ANTHROPIC_API_KEY not set';
const SKIP_MSG = 'Skipping — LLM judge not available (no CLI or API key)';
it('should send prompt and get meaningful response', async function () {
if (!isJudgeAvailable()) {
@ -297,7 +297,7 @@ describe('Scenario B4 — LLM-Judged Agent Response', () => {
// ─── Scenario B5: LLM-judged code generation quality ─────────────────
describe('Scenario B5 — LLM-Judged Code Generation', () => {
const SKIP_MSG = 'Skipping — ANTHROPIC_API_KEY not set';
const SKIP_MSG = 'Skipping — LLM judge not available (no CLI or API key)';
it('should generate valid code when asked', async function () {
if (!isJudgeAvailable()) {

View file

@ -559,7 +559,7 @@ describe('Scenario C9 — Files Tab & Code Editor', () => {
describe('Scenario C10 — LLM-Judged Settings Completeness', () => {
it('should have comprehensive settings panel', async function () {
if (!isJudgeAvailable()) {
console.log('Skipping — ANTHROPIC_API_KEY not set');
console.log('Skipping — LLM judge not available (no CLI or API key)');
this.skip();
return;
}
@ -597,7 +597,7 @@ describe('Scenario C10 — LLM-Judged Settings Completeness', () => {
describe('Scenario C11 — LLM-Judged Status Bar Completeness', () => {
it('should render a comprehensive status bar', async function () {
if (!isJudgeAvailable()) {
console.log('Skipping — ANTHROPIC_API_KEY not set');
console.log('Skipping — LLM judge not available (no CLI or API key)');
this.skip();
return;
}