feat: Agent Orchestrator — multi-project agent dashboard
Tauri + Svelte 5 + Rust application for orchestrating multiple AI coding agents. Includes Claude, Aider, Codex, and Ollama provider support, multi-agent communication (btmsg/bttask), session anchors, plugin sandbox, FTS5 search, Landlock sandboxing, and 507 vitest + 110 cargo tests.
This commit is contained in:
commit
3672e92b7e
272 changed files with 68600 additions and 0 deletions
15
bterminal-core/Cargo.toml
Normal file
15
bterminal-core/Cargo.toml
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
[package]
|
||||
name = "bterminal-core"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
description = "Shared PTY and sidecar management for BTerminal"
|
||||
license = "MIT"
|
||||
|
||||
[dependencies]
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
log = "0.4"
|
||||
portable-pty = "0.8"
|
||||
uuid = { version = "1", features = ["v4"] }
|
||||
dirs = "5"
|
||||
landlock = "0.4"
|
||||
209
bterminal-core/src/config.rs
Normal file
209
bterminal-core/src/config.rs
Normal file
|
|
@ -0,0 +1,209 @@
|
|||
// AppConfig — centralized path resolution for all BTerminal subsystems.
|
||||
// In production, paths resolve via dirs:: crate defaults.
|
||||
// In test mode (BTERMINAL_TEST=1), paths resolve from env var overrides:
|
||||
// BTERMINAL_TEST_DATA_DIR → replaces dirs::data_dir()/bterminal
|
||||
// BTERMINAL_TEST_CONFIG_DIR → replaces dirs::config_dir()/bterminal
|
||||
// BTERMINAL_TEST_CTX_DIR → replaces ~/.claude-context
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct AppConfig {
|
||||
/// Data directory for btmsg.db, sessions.db (default: ~/.local/share/bterminal)
|
||||
pub data_dir: PathBuf,
|
||||
/// Config directory for groups.json (default: ~/.config/bterminal)
|
||||
pub config_dir: PathBuf,
|
||||
/// ctx database path (default: ~/.claude-context/context.db)
|
||||
pub ctx_db_path: PathBuf,
|
||||
/// Memora database path (default: ~/.local/share/memora/memories.db)
|
||||
pub memora_db_path: PathBuf,
|
||||
/// Whether we are in test mode
|
||||
pub test_mode: bool,
|
||||
}
|
||||
|
||||
impl AppConfig {
|
||||
/// Build config from environment. In test mode, uses BTERMINAL_TEST_*_DIR env vars.
|
||||
pub fn from_env() -> Self {
|
||||
let test_mode = std::env::var("BTERMINAL_TEST").map_or(false, |v| v == "1");
|
||||
|
||||
let data_dir = std::env::var("BTERMINAL_TEST_DATA_DIR")
|
||||
.ok()
|
||||
.filter(|_| test_mode)
|
||||
.map(PathBuf::from)
|
||||
.unwrap_or_else(|| {
|
||||
dirs::data_dir()
|
||||
.unwrap_or_else(|| PathBuf::from("."))
|
||||
.join("bterminal")
|
||||
});
|
||||
|
||||
let config_dir = std::env::var("BTERMINAL_TEST_CONFIG_DIR")
|
||||
.ok()
|
||||
.filter(|_| test_mode)
|
||||
.map(PathBuf::from)
|
||||
.unwrap_or_else(|| {
|
||||
dirs::config_dir()
|
||||
.unwrap_or_else(|| PathBuf::from("."))
|
||||
.join("bterminal")
|
||||
});
|
||||
|
||||
let ctx_db_path = std::env::var("BTERMINAL_TEST_CTX_DIR")
|
||||
.ok()
|
||||
.filter(|_| test_mode)
|
||||
.map(|d| PathBuf::from(d).join("context.db"))
|
||||
.unwrap_or_else(|| {
|
||||
dirs::home_dir()
|
||||
.unwrap_or_default()
|
||||
.join(".claude-context")
|
||||
.join("context.db")
|
||||
});
|
||||
|
||||
let memora_db_path = if test_mode {
|
||||
// In test mode, memora is optional — use data_dir/memora/memories.db
|
||||
data_dir.join("memora").join("memories.db")
|
||||
} else {
|
||||
dirs::data_dir()
|
||||
.unwrap_or_else(|| {
|
||||
dirs::home_dir()
|
||||
.unwrap_or_default()
|
||||
.join(".local/share")
|
||||
})
|
||||
.join("memora")
|
||||
.join("memories.db")
|
||||
};
|
||||
|
||||
Self {
|
||||
data_dir,
|
||||
config_dir,
|
||||
ctx_db_path,
|
||||
memora_db_path,
|
||||
test_mode,
|
||||
}
|
||||
}
|
||||
|
||||
/// Path to btmsg.db (shared between btmsg and bttask)
|
||||
pub fn btmsg_db_path(&self) -> PathBuf {
|
||||
self.data_dir.join("btmsg.db")
|
||||
}
|
||||
|
||||
/// Path to sessions.db
|
||||
pub fn sessions_db_dir(&self) -> &PathBuf {
|
||||
&self.data_dir
|
||||
}
|
||||
|
||||
/// Path to groups.json
|
||||
pub fn groups_json_path(&self) -> PathBuf {
|
||||
self.config_dir.join("groups.json")
|
||||
}
|
||||
|
||||
/// Path to plugins directory
|
||||
pub fn plugins_dir(&self) -> PathBuf {
|
||||
self.config_dir.join("plugins")
|
||||
}
|
||||
|
||||
/// Whether running in test mode (BTERMINAL_TEST=1)
|
||||
pub fn is_test_mode(&self) -> bool {
|
||||
self.test_mode
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::sync::Mutex;
|
||||
|
||||
// Serialize all tests that mutate env vars to prevent race conditions.
|
||||
// Rust runs tests in parallel; set_var/remove_var are process-global.
|
||||
static ENV_LOCK: Mutex<()> = Mutex::new(());
|
||||
|
||||
#[test]
|
||||
fn test_production_paths_use_dirs() {
|
||||
let _lock = ENV_LOCK.lock().unwrap();
|
||||
// Without BTERMINAL_TEST=1, paths should use dirs:: defaults
|
||||
std::env::remove_var("BTERMINAL_TEST");
|
||||
std::env::remove_var("BTERMINAL_TEST_DATA_DIR");
|
||||
std::env::remove_var("BTERMINAL_TEST_CONFIG_DIR");
|
||||
std::env::remove_var("BTERMINAL_TEST_CTX_DIR");
|
||||
|
||||
let config = AppConfig::from_env();
|
||||
assert!(!config.is_test_mode());
|
||||
// Should end with "bterminal" for data and config
|
||||
assert!(config.data_dir.ends_with("bterminal"));
|
||||
assert!(config.config_dir.ends_with("bterminal"));
|
||||
assert!(config.ctx_db_path.ends_with("context.db"));
|
||||
assert!(config.memora_db_path.ends_with("memories.db"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_btmsg_db_path() {
|
||||
let _lock = ENV_LOCK.lock().unwrap();
|
||||
std::env::remove_var("BTERMINAL_TEST");
|
||||
let config = AppConfig::from_env();
|
||||
let path = config.btmsg_db_path();
|
||||
assert!(path.ends_with("btmsg.db"));
|
||||
assert!(path.parent().unwrap().ends_with("bterminal"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_groups_json_path() {
|
||||
let _lock = ENV_LOCK.lock().unwrap();
|
||||
std::env::remove_var("BTERMINAL_TEST");
|
||||
let config = AppConfig::from_env();
|
||||
let path = config.groups_json_path();
|
||||
assert!(path.ends_with("groups.json"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_test_mode_uses_overrides() {
|
||||
let _lock = ENV_LOCK.lock().unwrap();
|
||||
std::env::set_var("BTERMINAL_TEST", "1");
|
||||
std::env::set_var("BTERMINAL_TEST_DATA_DIR", "/tmp/bt-test-data");
|
||||
std::env::set_var("BTERMINAL_TEST_CONFIG_DIR", "/tmp/bt-test-config");
|
||||
std::env::set_var("BTERMINAL_TEST_CTX_DIR", "/tmp/bt-test-ctx");
|
||||
|
||||
let config = AppConfig::from_env();
|
||||
assert!(config.is_test_mode());
|
||||
assert_eq!(config.data_dir, PathBuf::from("/tmp/bt-test-data"));
|
||||
assert_eq!(config.config_dir, PathBuf::from("/tmp/bt-test-config"));
|
||||
assert_eq!(config.ctx_db_path, PathBuf::from("/tmp/bt-test-ctx/context.db"));
|
||||
assert_eq!(config.btmsg_db_path(), PathBuf::from("/tmp/bt-test-data/btmsg.db"));
|
||||
assert_eq!(config.groups_json_path(), PathBuf::from("/tmp/bt-test-config/groups.json"));
|
||||
|
||||
// Cleanup
|
||||
std::env::remove_var("BTERMINAL_TEST");
|
||||
std::env::remove_var("BTERMINAL_TEST_DATA_DIR");
|
||||
std::env::remove_var("BTERMINAL_TEST_CONFIG_DIR");
|
||||
std::env::remove_var("BTERMINAL_TEST_CTX_DIR");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_test_mode_without_overrides_uses_defaults() {
|
||||
let _lock = ENV_LOCK.lock().unwrap();
|
||||
std::env::set_var("BTERMINAL_TEST", "1");
|
||||
std::env::remove_var("BTERMINAL_TEST_DATA_DIR");
|
||||
std::env::remove_var("BTERMINAL_TEST_CONFIG_DIR");
|
||||
std::env::remove_var("BTERMINAL_TEST_CTX_DIR");
|
||||
|
||||
let config = AppConfig::from_env();
|
||||
assert!(config.is_test_mode());
|
||||
// Without override vars, falls back to dirs:: defaults
|
||||
assert!(config.data_dir.ends_with("bterminal"));
|
||||
|
||||
std::env::remove_var("BTERMINAL_TEST");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_test_mode_memora_in_data_dir() {
|
||||
let _lock = ENV_LOCK.lock().unwrap();
|
||||
std::env::set_var("BTERMINAL_TEST", "1");
|
||||
std::env::set_var("BTERMINAL_TEST_DATA_DIR", "/tmp/bt-test-data");
|
||||
|
||||
let config = AppConfig::from_env();
|
||||
assert_eq!(
|
||||
config.memora_db_path,
|
||||
PathBuf::from("/tmp/bt-test-data/memora/memories.db")
|
||||
);
|
||||
|
||||
std::env::remove_var("BTERMINAL_TEST");
|
||||
std::env::remove_var("BTERMINAL_TEST_DATA_DIR");
|
||||
}
|
||||
}
|
||||
5
bterminal-core/src/event.rs
Normal file
5
bterminal-core/src/event.rs
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
/// Trait for emitting events from PTY and sidecar managers.
|
||||
/// Implemented by Tauri's AppHandle (controller) and WebSocket sender (relay).
|
||||
pub trait EventSink: Send + Sync {
|
||||
fn emit(&self, event: &str, payload: serde_json::Value);
|
||||
}
|
||||
6
bterminal-core/src/lib.rs
Normal file
6
bterminal-core/src/lib.rs
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
pub mod config;
|
||||
pub mod event;
|
||||
pub mod pty;
|
||||
pub mod sandbox;
|
||||
pub mod sidecar;
|
||||
pub mod supervisor;
|
||||
173
bterminal-core/src/pty.rs
Normal file
173
bterminal-core/src/pty.rs
Normal file
|
|
@ -0,0 +1,173 @@
|
|||
use portable_pty::{native_pty_system, CommandBuilder, MasterPty, PtySize};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::io::{BufReader, Write};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::thread;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::event::EventSink;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct PtyOptions {
|
||||
pub shell: Option<String>,
|
||||
pub cwd: Option<String>,
|
||||
pub args: Option<Vec<String>>,
|
||||
pub cols: Option<u16>,
|
||||
pub rows: Option<u16>,
|
||||
}
|
||||
|
||||
struct PtyInstance {
|
||||
master: Box<dyn MasterPty + Send>,
|
||||
writer: Box<dyn Write + Send>,
|
||||
}
|
||||
|
||||
pub struct PtyManager {
|
||||
instances: Arc<Mutex<HashMap<String, PtyInstance>>>,
|
||||
sink: Arc<dyn EventSink>,
|
||||
}
|
||||
|
||||
impl PtyManager {
|
||||
pub fn new(sink: Arc<dyn EventSink>) -> Self {
|
||||
Self {
|
||||
instances: Arc::new(Mutex::new(HashMap::new())),
|
||||
sink,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn spawn(&self, options: PtyOptions) -> Result<String, String> {
|
||||
let pty_system = native_pty_system();
|
||||
let cols = options.cols.unwrap_or(80);
|
||||
let rows = options.rows.unwrap_or(24);
|
||||
|
||||
let pair = pty_system
|
||||
.openpty(PtySize {
|
||||
rows,
|
||||
cols,
|
||||
pixel_width: 0,
|
||||
pixel_height: 0,
|
||||
})
|
||||
.map_err(|e| format!("Failed to open PTY: {e}"))?;
|
||||
|
||||
let shell = options.shell.unwrap_or_else(|| {
|
||||
std::env::var("SHELL").unwrap_or_else(|_| "/bin/bash".to_string())
|
||||
});
|
||||
|
||||
let mut cmd = CommandBuilder::new(&shell);
|
||||
if let Some(args) = &options.args {
|
||||
for arg in args {
|
||||
cmd.arg(arg);
|
||||
}
|
||||
}
|
||||
if let Some(cwd) = &options.cwd {
|
||||
cmd.cwd(cwd);
|
||||
}
|
||||
|
||||
let _child = pair
|
||||
.slave
|
||||
.spawn_command(cmd)
|
||||
.map_err(|e| format!("Failed to spawn command: {e}"))?;
|
||||
|
||||
drop(pair.slave);
|
||||
|
||||
let id = Uuid::new_v4().to_string();
|
||||
let reader = pair
|
||||
.master
|
||||
.try_clone_reader()
|
||||
.map_err(|e| format!("Failed to clone PTY reader: {e}"))?;
|
||||
let writer = pair
|
||||
.master
|
||||
.take_writer()
|
||||
.map_err(|e| format!("Failed to take PTY writer: {e}"))?;
|
||||
|
||||
let event_id = id.clone();
|
||||
let sink = self.sink.clone();
|
||||
thread::spawn(move || {
|
||||
let mut buf_reader = BufReader::with_capacity(4096, reader);
|
||||
let mut buf = vec![0u8; 4096];
|
||||
loop {
|
||||
match std::io::Read::read(&mut buf_reader, &mut buf) {
|
||||
Ok(0) => {
|
||||
sink.emit(
|
||||
&format!("pty-exit-{event_id}"),
|
||||
serde_json::Value::Null,
|
||||
);
|
||||
break;
|
||||
}
|
||||
Ok(n) => {
|
||||
let data = String::from_utf8_lossy(&buf[..n]).to_string();
|
||||
sink.emit(
|
||||
&format!("pty-data-{event_id}"),
|
||||
serde_json::Value::String(data),
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
log::error!("PTY read error for {event_id}: {e}");
|
||||
sink.emit(
|
||||
&format!("pty-exit-{event_id}"),
|
||||
serde_json::Value::Null,
|
||||
);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let instance = PtyInstance {
|
||||
master: pair.master,
|
||||
writer,
|
||||
};
|
||||
self.instances.lock().unwrap().insert(id.clone(), instance);
|
||||
|
||||
log::info!("Spawned PTY {id} ({shell})");
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
pub fn write(&self, id: &str, data: &str) -> Result<(), String> {
|
||||
let mut instances = self.instances.lock().unwrap();
|
||||
let instance = instances
|
||||
.get_mut(id)
|
||||
.ok_or_else(|| format!("PTY {id} not found"))?;
|
||||
instance
|
||||
.writer
|
||||
.write_all(data.as_bytes())
|
||||
.map_err(|e| format!("PTY write error: {e}"))?;
|
||||
instance
|
||||
.writer
|
||||
.flush()
|
||||
.map_err(|e| format!("PTY flush error: {e}"))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn resize(&self, id: &str, cols: u16, rows: u16) -> Result<(), String> {
|
||||
let instances = self.instances.lock().unwrap();
|
||||
let instance = instances
|
||||
.get(id)
|
||||
.ok_or_else(|| format!("PTY {id} not found"))?;
|
||||
instance
|
||||
.master
|
||||
.resize(PtySize {
|
||||
rows,
|
||||
cols,
|
||||
pixel_width: 0,
|
||||
pixel_height: 0,
|
||||
})
|
||||
.map_err(|e| format!("PTY resize error: {e}"))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn kill(&self, id: &str) -> Result<(), String> {
|
||||
let mut instances = self.instances.lock().unwrap();
|
||||
if instances.remove(id).is_some() {
|
||||
log::info!("Killed PTY {id}");
|
||||
Ok(())
|
||||
} else {
|
||||
Err(format!("PTY {id} not found"))
|
||||
}
|
||||
}
|
||||
|
||||
/// List active PTY session IDs.
|
||||
pub fn list_sessions(&self) -> Vec<String> {
|
||||
self.instances.lock().unwrap().keys().cloned().collect()
|
||||
}
|
||||
}
|
||||
361
bterminal-core/src/sandbox.rs
Normal file
361
bterminal-core/src/sandbox.rs
Normal file
|
|
@ -0,0 +1,361 @@
|
|||
// Landlock-based filesystem sandboxing for sidecar processes.
|
||||
//
|
||||
// Landlock is a Linux Security Module (LSM) available since kernel 5.13.
|
||||
// It restricts filesystem access for the calling process and all its children.
|
||||
// Applied via pre_exec() on the sidecar child process before exec.
|
||||
//
|
||||
// Restrictions can only be tightened after application — never relaxed.
|
||||
// The sidecar is long-lived and handles queries for multiple projects,
|
||||
// so we apply the union of all project paths at sidecar start time.
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
use landlock::{
|
||||
Access, AccessFs, PathBeneath, PathFd, Ruleset, RulesetAttr, RulesetCreatedAttr,
|
||||
RulesetStatus, ABI,
|
||||
};
|
||||
|
||||
/// Target Landlock ABI version. V3 requires kernel 6.2+ (we run 6.12+).
|
||||
/// Falls back gracefully on older kernels via best-effort mode.
|
||||
const TARGET_ABI: ABI = ABI::V3;
|
||||
|
||||
/// Configuration for Landlock filesystem sandboxing.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SandboxConfig {
|
||||
/// Directories with full read+write+execute access (project CWDs, worktrees, tmp)
|
||||
pub rw_paths: Vec<PathBuf>,
|
||||
/// Directories with read-only access (system libs, runtimes, config)
|
||||
pub ro_paths: Vec<PathBuf>,
|
||||
/// Whether sandboxing is enabled
|
||||
pub enabled: bool,
|
||||
}
|
||||
|
||||
impl Default for SandboxConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
rw_paths: Vec::new(),
|
||||
ro_paths: Vec::new(),
|
||||
enabled: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SandboxConfig {
|
||||
/// Build a sandbox config for a set of project directories.
|
||||
///
|
||||
/// `project_cwds` — directories that need read+write access (one per project).
|
||||
/// `worktree_roots` — optional worktree directories (one per project that uses worktrees).
|
||||
///
|
||||
/// System paths (runtimes, libraries, /etc) are added as read-only automatically.
|
||||
pub fn for_projects(project_cwds: &[&str], worktree_roots: &[&str]) -> Self {
|
||||
let mut rw = Vec::new();
|
||||
|
||||
for cwd in project_cwds {
|
||||
rw.push(PathBuf::from(cwd));
|
||||
}
|
||||
for wt in worktree_roots {
|
||||
rw.push(PathBuf::from(wt));
|
||||
}
|
||||
|
||||
// Temp dir for sidecar scratch files
|
||||
rw.push(std::env::temp_dir());
|
||||
|
||||
let home = dirs::home_dir().unwrap_or_else(|| PathBuf::from("/root"));
|
||||
|
||||
let ro = vec![
|
||||
PathBuf::from("/usr"), // system binaries + libraries
|
||||
PathBuf::from("/lib"), // shared libraries
|
||||
PathBuf::from("/lib64"), // 64-bit shared libraries
|
||||
PathBuf::from("/etc"), // system configuration (read only)
|
||||
PathBuf::from("/proc"), // process info (Landlock V3+ handles this)
|
||||
PathBuf::from("/dev"), // device nodes (stdin/stdout/stderr, /dev/null, urandom)
|
||||
PathBuf::from("/bin"), // essential binaries (symlink to /usr/bin on most distros)
|
||||
PathBuf::from("/sbin"), // essential system binaries
|
||||
home.join(".local"), // ~/.local/bin (claude CLI, user-installed tools)
|
||||
home.join(".deno"), // Deno runtime cache
|
||||
home.join(".nvm"), // Node.js version manager
|
||||
home.join(".config"), // XDG config (claude profiles, bterminal config)
|
||||
home.join(".claude"), // Claude CLI data (worktrees, skills, settings)
|
||||
];
|
||||
|
||||
Self {
|
||||
rw_paths: rw,
|
||||
ro_paths: ro,
|
||||
enabled: true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Build a restricted sandbox config for Aider agent sessions.
|
||||
/// More restrictive than `for_projects`: only project worktree + read-only system paths.
|
||||
/// Does NOT allow write access to ~/.config, ~/.claude, etc.
|
||||
pub fn for_aider_restricted(project_cwd: &str, worktree: Option<&str>) -> Self {
|
||||
let mut rw = vec![PathBuf::from(project_cwd)];
|
||||
if let Some(wt) = worktree {
|
||||
rw.push(PathBuf::from(wt));
|
||||
}
|
||||
rw.push(std::env::temp_dir());
|
||||
let home = dirs::home_dir().unwrap_or_else(|| PathBuf::from("/root"));
|
||||
rw.push(home.join(".aider"));
|
||||
|
||||
let ro = vec![
|
||||
PathBuf::from("/usr"),
|
||||
PathBuf::from("/lib"),
|
||||
PathBuf::from("/lib64"),
|
||||
PathBuf::from("/etc"),
|
||||
PathBuf::from("/proc"),
|
||||
PathBuf::from("/dev"),
|
||||
PathBuf::from("/bin"),
|
||||
PathBuf::from("/sbin"),
|
||||
home.join(".local"),
|
||||
home.join(".deno"),
|
||||
home.join(".nvm"),
|
||||
];
|
||||
|
||||
Self {
|
||||
rw_paths: rw,
|
||||
ro_paths: ro,
|
||||
enabled: true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Build a sandbox config for a single project directory.
|
||||
pub fn for_project(cwd: &str, worktree: Option<&str>) -> Self {
|
||||
let worktrees: Vec<&str> = worktree.into_iter().collect();
|
||||
Self::for_projects(&[cwd], &worktrees)
|
||||
}
|
||||
|
||||
/// Apply Landlock restrictions to the current process.
|
||||
///
|
||||
/// This must be called in the child process (e.g., via `pre_exec`) BEFORE exec.
|
||||
/// Once applied, restrictions are inherited by all child processes and cannot be relaxed.
|
||||
///
|
||||
/// Returns:
|
||||
/// - `Ok(true)` if Landlock was applied and enforced
|
||||
/// - `Ok(false)` if the kernel does not support Landlock (graceful degradation)
|
||||
/// - `Err(msg)` on configuration or syscall errors
|
||||
pub fn apply(&self) -> Result<bool, String> {
|
||||
if !self.enabled {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
let access_all = AccessFs::from_all(TARGET_ABI);
|
||||
let access_read = AccessFs::from_read(TARGET_ABI);
|
||||
|
||||
// Create ruleset handling all filesystem access types
|
||||
let mut ruleset = Ruleset::default()
|
||||
.handle_access(access_all)
|
||||
.map_err(|e| format!("Landlock: failed to handle access: {e}"))?
|
||||
.create()
|
||||
.map_err(|e| format!("Landlock: failed to create ruleset: {e}"))?;
|
||||
|
||||
// Add read+write rules for project directories and tmp
|
||||
for path in &self.rw_paths {
|
||||
if path.exists() {
|
||||
let fd = PathFd::new(path)
|
||||
.map_err(|e| format!("Landlock: PathFd failed for {}: {e}", path.display()))?;
|
||||
ruleset = ruleset
|
||||
.add_rule(PathBeneath::new(fd, access_all))
|
||||
.map_err(|e| {
|
||||
format!("Landlock: add_rule (rw) failed for {}: {e}", path.display())
|
||||
})?;
|
||||
} else {
|
||||
log::warn!(
|
||||
"Landlock: skipping non-existent rw path: {}",
|
||||
path.display()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Add read-only rules for system paths
|
||||
for path in &self.ro_paths {
|
||||
if path.exists() {
|
||||
let fd = PathFd::new(path)
|
||||
.map_err(|e| format!("Landlock: PathFd failed for {}: {e}", path.display()))?;
|
||||
ruleset = ruleset
|
||||
.add_rule(PathBeneath::new(fd, access_read))
|
||||
.map_err(|e| {
|
||||
format!("Landlock: add_rule (ro) failed for {}: {e}", path.display())
|
||||
})?;
|
||||
}
|
||||
// Silently skip non-existent read-only paths (e.g., /lib64 on some systems)
|
||||
}
|
||||
|
||||
// Enforce the ruleset on this thread (and inherited by children)
|
||||
let status = ruleset
|
||||
.restrict_self()
|
||||
.map_err(|e| format!("Landlock: restrict_self failed: {e}"))?;
|
||||
|
||||
// Landlock enforcement states:
|
||||
// - Enforced: kernel 6.2+ with ABI V3 (full filesystem restriction)
|
||||
// - NotEnforced: kernel 5.13–6.1 (Landlock exists but ABI too old for V3)
|
||||
// - Error (caught above): kernel <5.13 (no Landlock LSM available)
|
||||
let enforced = status.ruleset != RulesetStatus::NotEnforced;
|
||||
if enforced {
|
||||
log::info!("Landlock sandbox applied ({} rw, {} ro paths)", self.rw_paths.len(), self.ro_paths.len());
|
||||
} else {
|
||||
log::warn!(
|
||||
"Landlock not enforced — sidecar runs without filesystem restrictions. \
|
||||
Kernel 6.2+ required for enforcement."
|
||||
);
|
||||
}
|
||||
|
||||
Ok(enforced)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_default_is_disabled() {
|
||||
let config = SandboxConfig::default();
|
||||
assert!(!config.enabled);
|
||||
assert!(config.rw_paths.is_empty());
|
||||
assert!(config.ro_paths.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_for_project_single_cwd() {
|
||||
let config = SandboxConfig::for_project("/home/user/myproject", None);
|
||||
assert!(config.enabled);
|
||||
assert!(config.rw_paths.contains(&PathBuf::from("/home/user/myproject")));
|
||||
assert!(config.rw_paths.contains(&std::env::temp_dir()));
|
||||
// No worktree path added
|
||||
assert!(!config
|
||||
.rw_paths
|
||||
.iter()
|
||||
.any(|p| p.to_string_lossy().contains("worktree")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_for_project_with_worktree() {
|
||||
let config = SandboxConfig::for_project(
|
||||
"/home/user/myproject",
|
||||
Some("/home/user/myproject/.claude/worktrees/abc123"),
|
||||
);
|
||||
assert!(config.enabled);
|
||||
assert!(config.rw_paths.contains(&PathBuf::from("/home/user/myproject")));
|
||||
assert!(config.rw_paths.contains(&PathBuf::from(
|
||||
"/home/user/myproject/.claude/worktrees/abc123"
|
||||
)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_for_projects_multiple_cwds() {
|
||||
let config = SandboxConfig::for_projects(
|
||||
&["/home/user/project-a", "/home/user/project-b"],
|
||||
&["/home/user/project-a/.claude/worktrees/s1"],
|
||||
);
|
||||
assert!(config.enabled);
|
||||
assert!(config.rw_paths.contains(&PathBuf::from("/home/user/project-a")));
|
||||
assert!(config.rw_paths.contains(&PathBuf::from("/home/user/project-b")));
|
||||
assert!(config.rw_paths.contains(&PathBuf::from(
|
||||
"/home/user/project-a/.claude/worktrees/s1"
|
||||
)));
|
||||
// tmp always present
|
||||
assert!(config.rw_paths.contains(&std::env::temp_dir()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ro_paths_include_system_dirs() {
|
||||
let config = SandboxConfig::for_project("/tmp/test", None);
|
||||
let ro_strs: Vec<String> = config.ro_paths.iter().map(|p| p.display().to_string()).collect();
|
||||
|
||||
assert!(ro_strs.iter().any(|p| p == "/usr"), "missing /usr");
|
||||
assert!(ro_strs.iter().any(|p| p == "/lib"), "missing /lib");
|
||||
assert!(ro_strs.iter().any(|p| p == "/etc"), "missing /etc");
|
||||
assert!(ro_strs.iter().any(|p| p == "/proc"), "missing /proc");
|
||||
assert!(ro_strs.iter().any(|p| p == "/dev"), "missing /dev");
|
||||
assert!(ro_strs.iter().any(|p| p == "/bin"), "missing /bin");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ro_paths_include_runtime_dirs() {
|
||||
let config = SandboxConfig::for_project("/tmp/test", None);
|
||||
let home = dirs::home_dir().unwrap();
|
||||
|
||||
assert!(config.ro_paths.contains(&home.join(".local")));
|
||||
assert!(config.ro_paths.contains(&home.join(".deno")));
|
||||
assert!(config.ro_paths.contains(&home.join(".nvm")));
|
||||
assert!(config.ro_paths.contains(&home.join(".config")));
|
||||
assert!(config.ro_paths.contains(&home.join(".claude")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_disabled_apply_returns_false() {
|
||||
let config = SandboxConfig::default();
|
||||
assert_eq!(config.apply().unwrap(), false);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rw_paths_count() {
|
||||
// Single project: cwd + tmp = 2
|
||||
let config = SandboxConfig::for_project("/tmp/test", None);
|
||||
assert_eq!(config.rw_paths.len(), 2);
|
||||
|
||||
// With worktree: cwd + worktree + tmp = 3
|
||||
let config = SandboxConfig::for_project("/tmp/test", Some("/tmp/wt"));
|
||||
assert_eq!(config.rw_paths.len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_for_aider_restricted_single_cwd() {
|
||||
let config = SandboxConfig::for_aider_restricted("/home/user/myproject", None);
|
||||
assert!(config.enabled);
|
||||
assert!(config.rw_paths.contains(&PathBuf::from("/home/user/myproject")));
|
||||
assert!(config.rw_paths.contains(&std::env::temp_dir()));
|
||||
let home = dirs::home_dir().unwrap();
|
||||
assert!(config.rw_paths.contains(&home.join(".aider")));
|
||||
// No worktree path added
|
||||
assert!(!config
|
||||
.rw_paths
|
||||
.iter()
|
||||
.any(|p| p.to_string_lossy().contains("worktree")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_for_aider_restricted_with_worktree() {
|
||||
let config = SandboxConfig::for_aider_restricted(
|
||||
"/home/user/myproject",
|
||||
Some("/home/user/myproject/.claude/worktrees/abc123"),
|
||||
);
|
||||
assert!(config.enabled);
|
||||
assert!(config.rw_paths.contains(&PathBuf::from("/home/user/myproject")));
|
||||
assert!(config.rw_paths.contains(&PathBuf::from(
|
||||
"/home/user/myproject/.claude/worktrees/abc123"
|
||||
)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_for_aider_restricted_no_config_write() {
|
||||
let config = SandboxConfig::for_aider_restricted("/tmp/test", None);
|
||||
let home = dirs::home_dir().unwrap();
|
||||
// Aider restricted must NOT have ~/.config or ~/.claude in rw_paths
|
||||
assert!(!config.rw_paths.contains(&home.join(".config")));
|
||||
assert!(!config.rw_paths.contains(&home.join(".claude")));
|
||||
// And NOT in ro_paths either (stricter than for_projects)
|
||||
assert!(!config.ro_paths.contains(&home.join(".config")));
|
||||
assert!(!config.ro_paths.contains(&home.join(".claude")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_for_aider_restricted_rw_count() {
|
||||
// Without worktree: cwd + tmp + .aider = 3
|
||||
let config = SandboxConfig::for_aider_restricted("/tmp/test", None);
|
||||
assert_eq!(config.rw_paths.len(), 3);
|
||||
|
||||
// With worktree: cwd + worktree + tmp + .aider = 4
|
||||
let config = SandboxConfig::for_aider_restricted("/tmp/test", Some("/tmp/wt"));
|
||||
assert_eq!(config.rw_paths.len(), 4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_for_projects_empty() {
|
||||
let config = SandboxConfig::for_projects(&[], &[]);
|
||||
assert!(config.enabled);
|
||||
// Only tmp dir in rw
|
||||
assert_eq!(config.rw_paths.len(), 1);
|
||||
assert_eq!(config.rw_paths[0], std::env::temp_dir());
|
||||
}
|
||||
}
|
||||
980
bterminal-core/src/sidecar.rs
Normal file
980
bterminal-core/src/sidecar.rs
Normal file
|
|
@ -0,0 +1,980 @@
|
|||
// Sidecar lifecycle management (Deno-first, Node.js fallback)
|
||||
// Spawns per-provider runner scripts (e.g. claude-runner.mjs, aider-runner.mjs)
|
||||
// via deno or node, communicates via stdio NDJSON.
|
||||
// Each provider gets its own process, started lazily on first query.
|
||||
//
|
||||
// Uses a std::sync::mpsc actor pattern: the actor thread owns all mutable state
|
||||
// (providers HashMap, session_providers HashMap) exclusively. External callers
|
||||
// send requests via a channel, eliminating the TOCTOU race in ensure_provider().
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::io::{BufRead, BufReader, Write};
|
||||
#[cfg(unix)]
|
||||
use std::os::unix::process::CommandExt;
|
||||
use std::path::PathBuf;
|
||||
use std::process::{Child, Command, Stdio};
|
||||
use std::sync::mpsc as std_mpsc;
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
|
||||
use crate::event::EventSink;
|
||||
use crate::sandbox::SandboxConfig;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct AgentQueryOptions {
|
||||
#[serde(default = "default_provider")]
|
||||
pub provider: String,
|
||||
pub session_id: String,
|
||||
pub prompt: String,
|
||||
pub cwd: Option<String>,
|
||||
pub max_turns: Option<u32>,
|
||||
pub max_budget_usd: Option<f64>,
|
||||
pub resume_session_id: Option<String>,
|
||||
pub permission_mode: Option<String>,
|
||||
pub setting_sources: Option<Vec<String>>,
|
||||
pub system_prompt: Option<String>,
|
||||
pub model: Option<String>,
|
||||
pub claude_config_dir: Option<String>,
|
||||
pub additional_directories: Option<Vec<String>>,
|
||||
/// When set, agent runs in a git worktree for isolation (passed as --worktree <name> CLI flag)
|
||||
pub worktree_name: Option<String>,
|
||||
/// Provider-specific configuration blob (passed through to sidecar as-is)
|
||||
#[serde(default)]
|
||||
pub provider_config: serde_json::Value,
|
||||
/// Extra environment variables injected into the agent process (e.g. BTMSG_AGENT_ID)
|
||||
#[serde(default)]
|
||||
pub extra_env: std::collections::HashMap<String, String>,
|
||||
}
|
||||
|
||||
fn default_provider() -> String {
|
||||
"claude".to_string()
|
||||
}
|
||||
|
||||
/// Directories to search for sidecar scripts.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SidecarConfig {
|
||||
pub search_paths: Vec<PathBuf>,
|
||||
/// Extra env vars forwarded to sidecar processes (e.g. BTERMINAL_TEST=1 for test isolation)
|
||||
pub env_overrides: std::collections::HashMap<String, String>,
|
||||
/// Landlock filesystem sandbox configuration (Linux 5.13+, applied via pre_exec)
|
||||
pub sandbox: SandboxConfig,
|
||||
}
|
||||
|
||||
struct SidecarCommand {
|
||||
program: String,
|
||||
args: Vec<String>,
|
||||
}
|
||||
|
||||
/// Per-provider sidecar process state.
|
||||
struct ProviderProcess {
|
||||
child: Child,
|
||||
stdin_writer: Box<dyn Write + Send>,
|
||||
ready: bool,
|
||||
/// Atomic flag set by the stdout reader thread when "ready" message arrives.
|
||||
/// The actor polls this to detect readiness without needing a separate channel.
|
||||
ready_flag: Arc<std::sync::atomic::AtomicBool>,
|
||||
}
|
||||
|
||||
/// Requests sent from public API methods to the actor thread.
|
||||
enum ProviderRequest {
|
||||
Start {
|
||||
reply: std_mpsc::Sender<Result<(), String>>,
|
||||
},
|
||||
EnsureAndQuery {
|
||||
options: AgentQueryOptions,
|
||||
reply: std_mpsc::Sender<Result<(), String>>,
|
||||
},
|
||||
StopSession {
|
||||
session_id: String,
|
||||
reply: std_mpsc::Sender<Result<(), String>>,
|
||||
},
|
||||
SendMessage {
|
||||
msg: serde_json::Value,
|
||||
reply: std_mpsc::Sender<Result<(), String>>,
|
||||
},
|
||||
Restart {
|
||||
reply: std_mpsc::Sender<Result<(), String>>,
|
||||
},
|
||||
Shutdown {
|
||||
reply: std_mpsc::Sender<Result<(), String>>,
|
||||
},
|
||||
IsReady {
|
||||
reply: std_mpsc::Sender<bool>,
|
||||
},
|
||||
SetSandbox {
|
||||
sandbox: SandboxConfig,
|
||||
reply: std_mpsc::Sender<()>,
|
||||
},
|
||||
}
|
||||
|
||||
pub struct SidecarManager {
|
||||
tx: std_mpsc::Sender<ProviderRequest>,
|
||||
// Keep a handle so the thread lives as long as the manager.
|
||||
// Not joined on drop — we send Shutdown instead.
|
||||
_actor_thread: Option<thread::JoinHandle<()>>,
|
||||
}
|
||||
|
||||
/// Actor function that owns all mutable state exclusively.
|
||||
/// Receives requests via `req_rx`. Ready signaling from stdout reader threads
|
||||
/// uses per-provider AtomicBool flags (polled during ensure_provider_impl).
|
||||
fn run_actor(
|
||||
req_rx: std_mpsc::Receiver<ProviderRequest>,
|
||||
sink: Arc<dyn EventSink>,
|
||||
initial_config: SidecarConfig,
|
||||
) {
|
||||
let mut providers: HashMap<String, ProviderProcess> = HashMap::new();
|
||||
let mut session_providers: HashMap<String, String> = HashMap::new();
|
||||
let mut config = initial_config;
|
||||
|
||||
loop {
|
||||
// Block waiting for next request (with timeout so actor stays responsive)
|
||||
match req_rx.recv_timeout(std::time::Duration::from_millis(50)) {
|
||||
Ok(req) => {
|
||||
match req {
|
||||
ProviderRequest::Start { reply } => {
|
||||
let result = start_provider_impl(
|
||||
&mut providers,
|
||||
&config,
|
||||
&sink,
|
||||
"claude",
|
||||
);
|
||||
let _ = reply.send(result);
|
||||
}
|
||||
ProviderRequest::EnsureAndQuery { options, reply } => {
|
||||
let provider = options.provider.clone();
|
||||
|
||||
// Ensure provider is ready — atomic, no TOCTOU
|
||||
if let Err(e) = ensure_provider_impl(
|
||||
&mut providers,
|
||||
&config,
|
||||
&sink,
|
||||
&provider,
|
||||
) {
|
||||
let _ = reply.send(Err(e));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Track session -> provider mapping
|
||||
session_providers.insert(options.session_id.clone(), provider.clone());
|
||||
|
||||
// Build and send query message
|
||||
let msg = build_query_msg(&options);
|
||||
let result = send_to_provider_impl(&mut providers, &provider, &msg);
|
||||
let _ = reply.send(result);
|
||||
}
|
||||
ProviderRequest::StopSession { session_id, reply } => {
|
||||
let provider = session_providers
|
||||
.get(&session_id)
|
||||
.cloned()
|
||||
.unwrap_or_else(|| "claude".to_string());
|
||||
let msg = serde_json::json!({
|
||||
"type": "stop",
|
||||
"sessionId": session_id,
|
||||
});
|
||||
let result = send_to_provider_impl(&mut providers, &provider, &msg);
|
||||
let _ = reply.send(result);
|
||||
}
|
||||
ProviderRequest::SendMessage { msg, reply } => {
|
||||
let result = send_to_provider_impl(&mut providers, "claude", &msg);
|
||||
let _ = reply.send(result);
|
||||
}
|
||||
ProviderRequest::Restart { reply } => {
|
||||
log::info!("Restarting all sidecars");
|
||||
shutdown_all(&mut providers, &mut session_providers);
|
||||
let result = start_provider_impl(
|
||||
&mut providers,
|
||||
&config,
|
||||
&sink,
|
||||
"claude",
|
||||
);
|
||||
let _ = reply.send(result);
|
||||
}
|
||||
ProviderRequest::Shutdown { reply } => {
|
||||
shutdown_all(&mut providers, &mut session_providers);
|
||||
let _ = reply.send(Ok(()));
|
||||
}
|
||||
ProviderRequest::IsReady { reply } => {
|
||||
// Sync ready state from atomic flags
|
||||
sync_ready_flags(&mut providers);
|
||||
let ready = providers
|
||||
.get("claude")
|
||||
.map(|p| p.ready)
|
||||
.unwrap_or(false);
|
||||
let _ = reply.send(ready);
|
||||
}
|
||||
ProviderRequest::SetSandbox { sandbox, reply } => {
|
||||
config.sandbox = sandbox;
|
||||
let _ = reply.send(());
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(std_mpsc::RecvTimeoutError::Timeout) => {
|
||||
// Loop back -- keeps actor responsive to shutdown
|
||||
continue;
|
||||
}
|
||||
Err(std_mpsc::RecvTimeoutError::Disconnected) => {
|
||||
// All senders dropped — shut down
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Channel closed — clean up remaining providers
|
||||
shutdown_all(&mut providers, &mut session_providers);
|
||||
}
|
||||
|
||||
/// Sync ready state from AtomicBool flags set by stdout reader threads.
|
||||
fn sync_ready_flags(providers: &mut HashMap<String, ProviderProcess>) {
|
||||
for p in providers.values_mut() {
|
||||
if !p.ready && p.ready_flag.load(std::sync::atomic::Ordering::Acquire) {
|
||||
p.ready = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Shut down all provider processes and clear session mappings.
|
||||
fn shutdown_all(
|
||||
providers: &mut HashMap<String, ProviderProcess>,
|
||||
session_providers: &mut HashMap<String, String>,
|
||||
) {
|
||||
for (name, mut proc) in providers.drain() {
|
||||
log::info!("Shutting down {} sidecar", name);
|
||||
let _ = proc.child.kill();
|
||||
let _ = proc.child.wait();
|
||||
}
|
||||
session_providers.clear();
|
||||
}
|
||||
|
||||
/// Start a specific provider's sidecar process. Called from the actor thread
|
||||
/// which owns the providers HashMap exclusively — no lock contention possible.
|
||||
fn start_provider_impl(
|
||||
providers: &mut HashMap<String, ProviderProcess>,
|
||||
config: &SidecarConfig,
|
||||
sink: &Arc<dyn EventSink>,
|
||||
provider: &str,
|
||||
) -> Result<(), String> {
|
||||
if providers.contains_key(provider) {
|
||||
return Err(format!("Sidecar for '{}' already running", provider));
|
||||
}
|
||||
|
||||
let cmd = SidecarManager::resolve_sidecar_for_provider_with_config(config, provider)?;
|
||||
|
||||
log::info!(
|
||||
"Starting {} sidecar: {} {}",
|
||||
provider,
|
||||
cmd.program,
|
||||
cmd.args.join(" ")
|
||||
);
|
||||
|
||||
// Build a clean environment stripping provider-specific vars to prevent
|
||||
// SDKs from detecting nesting when BTerminal is launched from a provider terminal.
|
||||
let clean_env: Vec<(String, String)> = std::env::vars()
|
||||
.filter(|(k, _)| strip_provider_env_var(k))
|
||||
.collect();
|
||||
|
||||
let mut command = Command::new(&cmd.program);
|
||||
command
|
||||
.args(&cmd.args)
|
||||
.env_clear()
|
||||
.envs(clean_env)
|
||||
.envs(
|
||||
config
|
||||
.env_overrides
|
||||
.iter()
|
||||
.map(|(k, v)| (k.as_str(), v.as_str())),
|
||||
)
|
||||
.stdin(Stdio::piped())
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::piped());
|
||||
|
||||
// Apply Landlock sandbox in child process before exec (Linux only).
|
||||
#[cfg(unix)]
|
||||
if config.sandbox.enabled {
|
||||
let sandbox = config.sandbox.clone();
|
||||
unsafe {
|
||||
command.pre_exec(move || {
|
||||
sandbox
|
||||
.apply()
|
||||
.map(|enforced| {
|
||||
if !enforced {
|
||||
log::warn!("Landlock sandbox not enforced in sidecar child");
|
||||
}
|
||||
})
|
||||
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let mut child = command
|
||||
.spawn()
|
||||
.map_err(|e| format!("Failed to start {} sidecar: {e}", provider))?;
|
||||
|
||||
let child_stdin = child
|
||||
.stdin
|
||||
.take()
|
||||
.ok_or("Failed to capture sidecar stdin")?;
|
||||
let child_stdout = child
|
||||
.stdout
|
||||
.take()
|
||||
.ok_or("Failed to capture sidecar stdout")?;
|
||||
let child_stderr = child
|
||||
.stderr
|
||||
.take()
|
||||
.ok_or("Failed to capture sidecar stderr")?;
|
||||
|
||||
// Per-provider AtomicBool for ready signaling from stdout reader thread to actor.
|
||||
let ready_flag = Arc::new(std::sync::atomic::AtomicBool::new(false));
|
||||
let ready_flag_writer = ready_flag.clone();
|
||||
|
||||
// Stdout reader thread — forwards NDJSON to event sink
|
||||
let sink_clone = sink.clone();
|
||||
let provider_name = provider.to_string();
|
||||
thread::spawn(move || {
|
||||
let reader = BufReader::new(child_stdout);
|
||||
for line in reader.lines() {
|
||||
match line {
|
||||
Ok(line) => {
|
||||
if line.trim().is_empty() {
|
||||
continue;
|
||||
}
|
||||
match serde_json::from_str::<serde_json::Value>(&line) {
|
||||
Ok(msg) => {
|
||||
if msg.get("type").and_then(|t| t.as_str()) == Some("ready") {
|
||||
ready_flag_writer
|
||||
.store(true, std::sync::atomic::Ordering::Release);
|
||||
log::info!("{} sidecar ready", provider_name);
|
||||
}
|
||||
sink_clone.emit("sidecar-message", msg);
|
||||
}
|
||||
Err(e) => {
|
||||
log::warn!(
|
||||
"Invalid JSON from {} sidecar: {e}: {line}",
|
||||
provider_name
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
log::error!("{} sidecar stdout read error: {e}", provider_name);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
log::info!("{} sidecar stdout reader exited", provider_name);
|
||||
sink_clone.emit(
|
||||
"sidecar-exited",
|
||||
serde_json::json!({ "provider": provider_name }),
|
||||
);
|
||||
});
|
||||
|
||||
// Stderr reader thread — logs only
|
||||
let provider_name2 = provider.to_string();
|
||||
thread::spawn(move || {
|
||||
let reader = BufReader::new(child_stderr);
|
||||
for line in reader.lines() {
|
||||
match line {
|
||||
Ok(line) => log::info!("[{} sidecar stderr] {line}", provider_name2),
|
||||
Err(e) => {
|
||||
log::error!("{} sidecar stderr read error: {e}", provider_name2);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
providers.insert(
|
||||
provider.to_string(),
|
||||
ProviderProcess {
|
||||
child,
|
||||
stdin_writer: Box::new(child_stdin),
|
||||
ready: false,
|
||||
ready_flag,
|
||||
},
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Ensure a provider's sidecar is running and ready, starting it lazily if needed.
|
||||
/// Called exclusively from the actor thread — no lock contention, no TOCTOU race.
|
||||
fn ensure_provider_impl(
|
||||
providers: &mut HashMap<String, ProviderProcess>,
|
||||
config: &SidecarConfig,
|
||||
sink: &Arc<dyn EventSink>,
|
||||
provider: &str,
|
||||
) -> Result<(), String> {
|
||||
// Sync ready state from atomic flag (set by stdout reader thread)
|
||||
if let Some(p) = providers.get_mut(provider) {
|
||||
if !p.ready && p.ready_flag.load(std::sync::atomic::Ordering::Acquire) {
|
||||
p.ready = true;
|
||||
}
|
||||
if p.ready {
|
||||
return Ok(());
|
||||
}
|
||||
// Started but not ready yet -- fall through to wait loop
|
||||
} else {
|
||||
// Not started -- start it now. No TOCTOU: we own the HashMap exclusively.
|
||||
start_provider_impl(providers, config, sink, provider)?;
|
||||
}
|
||||
|
||||
// Wait for ready (up to 10 seconds)
|
||||
for _ in 0..100 {
|
||||
std::thread::sleep(std::time::Duration::from_millis(100));
|
||||
|
||||
if let Some(p) = providers.get_mut(provider) {
|
||||
if !p.ready && p.ready_flag.load(std::sync::atomic::Ordering::Acquire) {
|
||||
p.ready = true;
|
||||
}
|
||||
if p.ready {
|
||||
return Ok(());
|
||||
}
|
||||
} else {
|
||||
return Err(format!("{} sidecar process exited before ready", provider));
|
||||
}
|
||||
}
|
||||
Err(format!(
|
||||
"{} sidecar did not become ready within timeout",
|
||||
provider
|
||||
))
|
||||
}
|
||||
|
||||
/// Send a JSON message to a provider's stdin.
|
||||
fn send_to_provider_impl(
|
||||
providers: &mut HashMap<String, ProviderProcess>,
|
||||
provider: &str,
|
||||
msg: &serde_json::Value,
|
||||
) -> Result<(), String> {
|
||||
let proc = providers
|
||||
.get_mut(provider)
|
||||
.ok_or_else(|| format!("{} sidecar not running", provider))?;
|
||||
|
||||
let line =
|
||||
serde_json::to_string(msg).map_err(|e| format!("JSON serialize error: {e}"))?;
|
||||
|
||||
proc.stdin_writer
|
||||
.write_all(line.as_bytes())
|
||||
.map_err(|e| format!("Sidecar write error: {e}"))?;
|
||||
proc.stdin_writer
|
||||
.write_all(b"\n")
|
||||
.map_err(|e| format!("Sidecar write error: {e}"))?;
|
||||
proc.stdin_writer
|
||||
.flush()
|
||||
.map_err(|e| format!("Sidecar flush error: {e}"))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Build the NDJSON query message from AgentQueryOptions.
|
||||
fn build_query_msg(options: &AgentQueryOptions) -> serde_json::Value {
|
||||
serde_json::json!({
|
||||
"type": "query",
|
||||
"provider": options.provider,
|
||||
"sessionId": options.session_id,
|
||||
"prompt": options.prompt,
|
||||
"cwd": options.cwd,
|
||||
"maxTurns": options.max_turns,
|
||||
"maxBudgetUsd": options.max_budget_usd,
|
||||
"resumeSessionId": options.resume_session_id,
|
||||
"permissionMode": options.permission_mode,
|
||||
"settingSources": options.setting_sources,
|
||||
"systemPrompt": options.system_prompt,
|
||||
"model": options.model,
|
||||
"claudeConfigDir": options.claude_config_dir,
|
||||
"additionalDirectories": options.additional_directories,
|
||||
"worktreeName": options.worktree_name,
|
||||
"providerConfig": options.provider_config,
|
||||
"extraEnv": options.extra_env,
|
||||
})
|
||||
}
|
||||
|
||||
impl SidecarManager {
|
||||
pub fn new(sink: Arc<dyn EventSink>, config: SidecarConfig) -> Self {
|
||||
let (req_tx, req_rx) = std_mpsc::channel();
|
||||
|
||||
let handle = thread::spawn(move || {
|
||||
run_actor(req_rx, sink, config);
|
||||
});
|
||||
|
||||
Self {
|
||||
tx: req_tx,
|
||||
_actor_thread: Some(handle),
|
||||
}
|
||||
}
|
||||
|
||||
/// Update the sandbox configuration. Takes effect on next sidecar (re)start.
|
||||
pub fn set_sandbox(&self, sandbox: SandboxConfig) {
|
||||
let (reply_tx, reply_rx) = std_mpsc::channel();
|
||||
if self
|
||||
.tx
|
||||
.send(ProviderRequest::SetSandbox {
|
||||
sandbox,
|
||||
reply: reply_tx,
|
||||
})
|
||||
.is_ok()
|
||||
{
|
||||
let _ = reply_rx.recv();
|
||||
}
|
||||
}
|
||||
|
||||
/// Start the default (claude) provider sidecar. Called on app startup.
|
||||
pub fn start(&self) -> Result<(), String> {
|
||||
let (reply_tx, reply_rx) = std_mpsc::channel();
|
||||
self.tx
|
||||
.send(ProviderRequest::Start { reply: reply_tx })
|
||||
.map_err(|_| "Sidecar actor stopped".to_string())?;
|
||||
reply_rx
|
||||
.recv()
|
||||
.map_err(|_| "Sidecar actor stopped".to_string())?
|
||||
}
|
||||
|
||||
pub fn query(&self, options: &AgentQueryOptions) -> Result<(), String> {
|
||||
let (reply_tx, reply_rx) = std_mpsc::channel();
|
||||
self.tx
|
||||
.send(ProviderRequest::EnsureAndQuery {
|
||||
options: options.clone(),
|
||||
reply: reply_tx,
|
||||
})
|
||||
.map_err(|_| "Sidecar actor stopped".to_string())?;
|
||||
reply_rx
|
||||
.recv()
|
||||
.map_err(|_| "Sidecar actor stopped".to_string())?
|
||||
}
|
||||
|
||||
pub fn stop_session(&self, session_id: &str) -> Result<(), String> {
|
||||
let (reply_tx, reply_rx) = std_mpsc::channel();
|
||||
self.tx
|
||||
.send(ProviderRequest::StopSession {
|
||||
session_id: session_id.to_string(),
|
||||
reply: reply_tx,
|
||||
})
|
||||
.map_err(|_| "Sidecar actor stopped".to_string())?;
|
||||
reply_rx
|
||||
.recv()
|
||||
.map_err(|_| "Sidecar actor stopped".to_string())?
|
||||
}
|
||||
|
||||
pub fn restart(&self) -> Result<(), String> {
|
||||
let (reply_tx, reply_rx) = std_mpsc::channel();
|
||||
self.tx
|
||||
.send(ProviderRequest::Restart { reply: reply_tx })
|
||||
.map_err(|_| "Sidecar actor stopped".to_string())?;
|
||||
reply_rx
|
||||
.recv()
|
||||
.map_err(|_| "Sidecar actor stopped".to_string())?
|
||||
}
|
||||
|
||||
pub fn shutdown(&self) -> Result<(), String> {
|
||||
let (reply_tx, reply_rx) = std_mpsc::channel();
|
||||
if self
|
||||
.tx
|
||||
.send(ProviderRequest::Shutdown { reply: reply_tx })
|
||||
.is_ok()
|
||||
{
|
||||
let _ = reply_rx.recv();
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns true if the default (claude) provider sidecar is ready.
|
||||
pub fn is_ready(&self) -> bool {
|
||||
let (reply_tx, reply_rx) = std_mpsc::channel();
|
||||
if self
|
||||
.tx
|
||||
.send(ProviderRequest::IsReady { reply: reply_tx })
|
||||
.is_ok()
|
||||
{
|
||||
reply_rx.recv().unwrap_or(false)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Legacy send_message — routes to the default (claude) provider.
|
||||
pub fn send_message(&self, msg: &serde_json::Value) -> Result<(), String> {
|
||||
let (reply_tx, reply_rx) = std_mpsc::channel();
|
||||
self.tx
|
||||
.send(ProviderRequest::SendMessage {
|
||||
msg: msg.clone(),
|
||||
reply: reply_tx,
|
||||
})
|
||||
.map_err(|_| "Sidecar actor stopped".to_string())?;
|
||||
reply_rx
|
||||
.recv()
|
||||
.map_err(|_| "Sidecar actor stopped".to_string())?
|
||||
}
|
||||
|
||||
/// Resolve a sidecar command for a specific provider's runner file.
|
||||
fn resolve_sidecar_for_provider_with_config(
|
||||
config: &SidecarConfig,
|
||||
provider: &str,
|
||||
) -> Result<SidecarCommand, String> {
|
||||
let runner_name = format!("{}-runner.mjs", provider);
|
||||
|
||||
// Try Deno first (faster startup, better perf), fall back to Node.js.
|
||||
let has_deno = Command::new("deno")
|
||||
.arg("--version")
|
||||
.stdout(Stdio::null())
|
||||
.stderr(Stdio::null())
|
||||
.status()
|
||||
.is_ok();
|
||||
let has_node = Command::new("node")
|
||||
.arg("--version")
|
||||
.stdout(Stdio::null())
|
||||
.stderr(Stdio::null())
|
||||
.status()
|
||||
.is_ok();
|
||||
|
||||
let mut checked = Vec::new();
|
||||
|
||||
for base in &config.search_paths {
|
||||
let mjs_path = base.join("dist").join(&runner_name);
|
||||
if mjs_path.exists() {
|
||||
if has_deno {
|
||||
return Ok(SidecarCommand {
|
||||
program: "deno".to_string(),
|
||||
args: vec![
|
||||
"run".to_string(),
|
||||
"--allow-run".to_string(),
|
||||
"--allow-env".to_string(),
|
||||
"--allow-read".to_string(),
|
||||
"--allow-write".to_string(),
|
||||
"--allow-net".to_string(),
|
||||
mjs_path.to_string_lossy().to_string(),
|
||||
],
|
||||
});
|
||||
}
|
||||
if has_node {
|
||||
return Ok(SidecarCommand {
|
||||
program: "node".to_string(),
|
||||
args: vec![mjs_path.to_string_lossy().to_string()],
|
||||
});
|
||||
}
|
||||
}
|
||||
checked.push(mjs_path);
|
||||
}
|
||||
|
||||
let paths: Vec<_> = checked.iter().map(|p| p.display().to_string()).collect();
|
||||
let runtime_note = if !has_deno && !has_node {
|
||||
". Neither deno nor node found in PATH"
|
||||
} else {
|
||||
""
|
||||
};
|
||||
Err(format!(
|
||||
"Sidecar not found for provider '{}'. Checked: {}{}",
|
||||
provider,
|
||||
paths.join(", "),
|
||||
runtime_note,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if the env var should be KEPT (not stripped).
|
||||
/// First line of defense: strips provider-specific prefixes to prevent nesting detection
|
||||
/// and credential leakage. JS runners apply a second layer of provider-specific stripping.
|
||||
///
|
||||
/// Stripped prefixes: CLAUDE*, CODEX*, OLLAMA*, AIDER*, ANTHROPIC_*
|
||||
/// Whitelisted: CLAUDE_CODE_EXPERIMENTAL_* (feature flags like agent teams)
|
||||
///
|
||||
/// Note: OPENAI_* and OPENROUTER_* are NOT stripped here because runners need
|
||||
/// these keys from the environment or extraEnv injection.
|
||||
fn strip_provider_env_var(key: &str) -> bool {
|
||||
if key.starts_with("CLAUDE_CODE_EXPERIMENTAL_") {
|
||||
return true;
|
||||
}
|
||||
if key.starts_with("CLAUDE")
|
||||
|| key.starts_with("CODEX")
|
||||
|| key.starts_with("OLLAMA")
|
||||
|| key.starts_with("AIDER")
|
||||
|| key.starts_with("ANTHROPIC_")
|
||||
{
|
||||
return false;
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
impl Drop for SidecarManager {
|
||||
fn drop(&mut self) {
|
||||
// Send shutdown request to the actor. If the channel is already closed
|
||||
// (actor thread exited), this is a no-op.
|
||||
let (reply_tx, reply_rx) = std_mpsc::channel();
|
||||
if self
|
||||
.tx
|
||||
.send(ProviderRequest::Shutdown { reply: reply_tx })
|
||||
.is_ok()
|
||||
{
|
||||
// Wait briefly for the actor to clean up (with timeout to avoid hanging)
|
||||
let _ = reply_rx.recv_timeout(std::time::Duration::from_secs(5));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::sync::Mutex;
|
||||
|
||||
// ---- strip_provider_env_var unit tests ----
|
||||
|
||||
#[test]
|
||||
fn test_keeps_normal_env_vars() {
|
||||
assert!(strip_provider_env_var("HOME"));
|
||||
assert!(strip_provider_env_var("PATH"));
|
||||
assert!(strip_provider_env_var("USER"));
|
||||
assert!(strip_provider_env_var("SHELL"));
|
||||
assert!(strip_provider_env_var("TERM"));
|
||||
assert!(strip_provider_env_var("XDG_DATA_HOME"));
|
||||
assert!(strip_provider_env_var("RUST_LOG"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_strips_claude_vars() {
|
||||
assert!(!strip_provider_env_var("CLAUDE_CONFIG_DIR"));
|
||||
assert!(!strip_provider_env_var("CLAUDE_SESSION_ID"));
|
||||
assert!(!strip_provider_env_var("CLAUDECODE"));
|
||||
assert!(!strip_provider_env_var("CLAUDE_API_KEY"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_whitelists_claude_code_experimental() {
|
||||
assert!(strip_provider_env_var("CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS"));
|
||||
assert!(strip_provider_env_var("CLAUDE_CODE_EXPERIMENTAL_TOOLS"));
|
||||
assert!(strip_provider_env_var("CLAUDE_CODE_EXPERIMENTAL_SOMETHING_NEW"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_strips_codex_vars() {
|
||||
assert!(!strip_provider_env_var("CODEX_API_KEY"));
|
||||
assert!(!strip_provider_env_var("CODEX_SESSION"));
|
||||
assert!(!strip_provider_env_var("CODEX_CONFIG"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_strips_ollama_vars() {
|
||||
assert!(!strip_provider_env_var("OLLAMA_HOST"));
|
||||
assert!(!strip_provider_env_var("OLLAMA_MODELS"));
|
||||
assert!(!strip_provider_env_var("OLLAMA_NUM_PARALLEL"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_strips_anthropic_vars() {
|
||||
// ANTHROPIC_* vars stripped at Rust layer (defense in depth)
|
||||
// Claude CLI has its own auth via credentials file
|
||||
assert!(!strip_provider_env_var("ANTHROPIC_API_KEY"));
|
||||
assert!(!strip_provider_env_var("ANTHROPIC_BASE_URL"));
|
||||
assert!(!strip_provider_env_var("ANTHROPIC_LOG"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_keeps_openai_vars() {
|
||||
// OPENAI_* vars are NOT stripped by the Rust layer
|
||||
// (they're stripped in the JS codex-runner layer instead)
|
||||
assert!(strip_provider_env_var("OPENAI_API_KEY"));
|
||||
assert!(strip_provider_env_var("OPENAI_BASE_URL"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_env_filtering_integration() {
|
||||
let test_env = vec![
|
||||
("HOME", "/home/user"),
|
||||
("PATH", "/usr/bin"),
|
||||
("CLAUDE_CONFIG_DIR", "/tmp/claude"),
|
||||
("CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS", "1"),
|
||||
("CODEX_API_KEY", "sk-test"),
|
||||
("OLLAMA_HOST", "localhost"),
|
||||
("ANTHROPIC_API_KEY", "sk-ant-xxx"),
|
||||
("OPENAI_API_KEY", "sk-openai-xxx"),
|
||||
("RUST_LOG", "debug"),
|
||||
("BTMSG_AGENT_ID", "a1"),
|
||||
];
|
||||
|
||||
let kept: Vec<&str> = test_env
|
||||
.iter()
|
||||
.filter(|(k, _)| strip_provider_env_var(k))
|
||||
.map(|(k, _)| *k)
|
||||
.collect();
|
||||
|
||||
assert!(kept.contains(&"HOME"));
|
||||
assert!(kept.contains(&"PATH"));
|
||||
assert!(kept.contains(&"CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS"));
|
||||
assert!(kept.contains(&"RUST_LOG"));
|
||||
assert!(kept.contains(&"BTMSG_AGENT_ID"));
|
||||
// OPENAI_* passes through Rust layer (Codex runner needs it)
|
||||
assert!(kept.contains(&"OPENAI_API_KEY"));
|
||||
// These are stripped:
|
||||
assert!(!kept.contains(&"CLAUDE_CONFIG_DIR"));
|
||||
assert!(!kept.contains(&"CODEX_API_KEY"));
|
||||
assert!(!kept.contains(&"OLLAMA_HOST"));
|
||||
assert!(!kept.contains(&"ANTHROPIC_API_KEY"));
|
||||
}
|
||||
|
||||
// ---- Actor pattern tests ----
|
||||
|
||||
/// Mock EventSink that records emitted events.
|
||||
struct MockSink {
|
||||
events: Mutex<Vec<(String, serde_json::Value)>>,
|
||||
}
|
||||
|
||||
impl MockSink {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
events: Mutex::new(Vec::new()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl EventSink for MockSink {
|
||||
fn emit(&self, event: &str, payload: serde_json::Value) {
|
||||
self.events
|
||||
.lock()
|
||||
.unwrap()
|
||||
.push((event.to_string(), payload));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_actor_new_and_drop() {
|
||||
// SidecarManager should create and drop cleanly without panicking
|
||||
let sink: Arc<dyn EventSink> = Arc::new(MockSink::new());
|
||||
let config = SidecarConfig {
|
||||
search_paths: vec![],
|
||||
env_overrides: Default::default(),
|
||||
sandbox: Default::default(),
|
||||
};
|
||||
let manager = SidecarManager::new(sink, config);
|
||||
// is_ready should return false since no provider started
|
||||
assert!(!manager.is_ready());
|
||||
// Drop should send shutdown cleanly
|
||||
drop(manager);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_actor_shutdown_idempotent() {
|
||||
let sink: Arc<dyn EventSink> = Arc::new(MockSink::new());
|
||||
let config = SidecarConfig {
|
||||
search_paths: vec![],
|
||||
env_overrides: Default::default(),
|
||||
sandbox: Default::default(),
|
||||
};
|
||||
let manager = SidecarManager::new(sink, config);
|
||||
// Multiple shutdowns should not panic
|
||||
assert!(manager.shutdown().is_ok());
|
||||
assert!(manager.shutdown().is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_actor_set_sandbox() {
|
||||
let sink: Arc<dyn EventSink> = Arc::new(MockSink::new());
|
||||
let config = SidecarConfig {
|
||||
search_paths: vec![],
|
||||
env_overrides: Default::default(),
|
||||
sandbox: Default::default(),
|
||||
};
|
||||
let manager = SidecarManager::new(sink, config);
|
||||
// set_sandbox should complete without error
|
||||
manager.set_sandbox(SandboxConfig {
|
||||
rw_paths: vec![PathBuf::from("/tmp")],
|
||||
ro_paths: vec![],
|
||||
enabled: true,
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_query_msg_fields() {
|
||||
let options = AgentQueryOptions {
|
||||
provider: "claude".to_string(),
|
||||
session_id: "s1".to_string(),
|
||||
prompt: "hello".to_string(),
|
||||
cwd: Some("/tmp".to_string()),
|
||||
max_turns: Some(5),
|
||||
max_budget_usd: None,
|
||||
resume_session_id: None,
|
||||
permission_mode: Some("bypassPermissions".to_string()),
|
||||
setting_sources: None,
|
||||
system_prompt: None,
|
||||
model: Some("claude-4-opus".to_string()),
|
||||
claude_config_dir: None,
|
||||
additional_directories: None,
|
||||
worktree_name: None,
|
||||
provider_config: serde_json::Value::Null,
|
||||
extra_env: Default::default(),
|
||||
};
|
||||
let msg = build_query_msg(&options);
|
||||
assert_eq!(msg["type"], "query");
|
||||
assert_eq!(msg["provider"], "claude");
|
||||
assert_eq!(msg["sessionId"], "s1");
|
||||
assert_eq!(msg["prompt"], "hello");
|
||||
assert_eq!(msg["cwd"], "/tmp");
|
||||
assert_eq!(msg["maxTurns"], 5);
|
||||
assert_eq!(msg["model"], "claude-4-opus");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_concurrent_queries_no_race() {
|
||||
// This test verifies that concurrent query() calls from multiple threads
|
||||
// are serialized by the actor and don't cause a TOCTOU race on ensure_provider.
|
||||
// Since we can't actually start a sidecar in tests (no runner scripts),
|
||||
// we verify that the actor handles multiple concurrent requests gracefully
|
||||
// (all get errors, none panic or deadlock).
|
||||
|
||||
let sink: Arc<dyn EventSink> = Arc::new(MockSink::new());
|
||||
let config = SidecarConfig {
|
||||
search_paths: vec![], // No search paths → start_provider will fail
|
||||
env_overrides: Default::default(),
|
||||
sandbox: Default::default(),
|
||||
};
|
||||
let manager = Arc::new(SidecarManager::new(sink, config));
|
||||
|
||||
let mut handles = vec![];
|
||||
let errors = Arc::new(Mutex::new(Vec::new()));
|
||||
|
||||
// Spawn 10 concurrent query() calls
|
||||
for i in 0..10 {
|
||||
let mgr = manager.clone();
|
||||
let errs = errors.clone();
|
||||
handles.push(thread::spawn(move || {
|
||||
let options = AgentQueryOptions {
|
||||
provider: "test-provider".to_string(),
|
||||
session_id: format!("session-{}", i),
|
||||
prompt: "hello".to_string(),
|
||||
cwd: None,
|
||||
max_turns: None,
|
||||
max_budget_usd: None,
|
||||
resume_session_id: None,
|
||||
permission_mode: None,
|
||||
setting_sources: None,
|
||||
system_prompt: None,
|
||||
model: None,
|
||||
claude_config_dir: None,
|
||||
additional_directories: None,
|
||||
worktree_name: None,
|
||||
provider_config: serde_json::Value::Null,
|
||||
extra_env: Default::default(),
|
||||
};
|
||||
let result = mgr.query(&options);
|
||||
if let Err(e) = result {
|
||||
errs.lock().unwrap().push(e);
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
||||
for h in handles {
|
||||
h.join().expect("Thread should not panic");
|
||||
}
|
||||
|
||||
// All 10 should have failed (no sidecar scripts available), but none panicked
|
||||
let errs = errors.lock().unwrap();
|
||||
assert_eq!(errs.len(), 10, "All 10 concurrent queries should get errors");
|
||||
|
||||
// The key invariant: no "Sidecar for 'X' already running" error.
|
||||
// Because the actor serializes requests, the second caller sees the first's
|
||||
// start_provider result (either success or failure), not a conflicting start.
|
||||
// With no search paths, all errors should be "Sidecar not found" style.
|
||||
for err in errs.iter() {
|
||||
assert!(
|
||||
!err.contains("already running"),
|
||||
"Should not get 'already running' error from serialized actor. Got: {err}"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
684
bterminal-core/src/supervisor.rs
Normal file
684
bterminal-core/src/supervisor.rs
Normal file
|
|
@ -0,0 +1,684 @@
|
|||
// Sidecar crash recovery and supervision.
|
||||
// Wraps a SidecarManager with automatic restart, exponential backoff,
|
||||
// and health status tracking. Emits `sidecar-health-changed` events.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use crate::event::EventSink;
|
||||
use crate::sidecar::{AgentQueryOptions, SidecarConfig, SidecarManager};
|
||||
|
||||
/// Health status of the supervised sidecar process.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(tag = "status", rename_all = "camelCase")]
|
||||
pub enum SidecarHealth {
|
||||
Healthy,
|
||||
Degraded {
|
||||
restart_count: u32,
|
||||
},
|
||||
Failed {
|
||||
#[serde(default)]
|
||||
last_error: String,
|
||||
},
|
||||
}
|
||||
|
||||
/// Configuration for supervisor restart behavior.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SupervisorConfig {
|
||||
/// Maximum restart attempts before entering Failed state (default: 5)
|
||||
pub max_retries: u32,
|
||||
/// Base backoff in milliseconds, doubled each retry (default: 1000, cap: 30000)
|
||||
pub backoff_base_ms: u64,
|
||||
/// Maximum backoff in milliseconds (default: 30000)
|
||||
pub backoff_cap_ms: u64,
|
||||
/// Stable operation duration before restart_count resets (default: 5 minutes)
|
||||
pub stability_window: Duration,
|
||||
}
|
||||
|
||||
impl Default for SupervisorConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
max_retries: 5,
|
||||
backoff_base_ms: 1000,
|
||||
backoff_cap_ms: 30_000,
|
||||
stability_window: Duration::from_secs(300),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Internal state shared between the supervisor and its event interceptor.
|
||||
struct SupervisorState {
|
||||
health: SidecarHealth,
|
||||
restart_count: u32,
|
||||
last_crash_time: Option<Instant>,
|
||||
last_start_time: Option<Instant>,
|
||||
}
|
||||
|
||||
impl SupervisorState {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
health: SidecarHealth::Healthy,
|
||||
restart_count: 0,
|
||||
last_crash_time: None,
|
||||
last_start_time: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute exponential backoff: base_ms * 2^attempt, capped at cap_ms.
|
||||
fn compute_backoff(base_ms: u64, attempt: u32, cap_ms: u64) -> Duration {
|
||||
let backoff = base_ms.saturating_mul(1u64.checked_shl(attempt).unwrap_or(u64::MAX));
|
||||
Duration::from_millis(backoff.min(cap_ms))
|
||||
}
|
||||
|
||||
/// EventSink wrapper that intercepts `sidecar-exited` events and triggers
|
||||
/// supervisor restart logic, while forwarding all other events unchanged.
|
||||
struct SupervisorSink {
|
||||
outer_sink: Arc<dyn EventSink>,
|
||||
state: Arc<Mutex<SupervisorState>>,
|
||||
config: SupervisorConfig,
|
||||
sidecar_config: SidecarConfig,
|
||||
}
|
||||
|
||||
impl EventSink for SupervisorSink {
|
||||
fn emit(&self, event: &str, payload: serde_json::Value) {
|
||||
if event == "sidecar-exited" {
|
||||
self.handle_exit();
|
||||
} else {
|
||||
self.outer_sink.emit(event, payload);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SupervisorSink {
|
||||
fn handle_exit(&self) {
|
||||
let (should_restart, backoff, restart_count) = {
|
||||
let mut state = self.state.lock().unwrap();
|
||||
|
||||
// Check if stable operation has elapsed since last start — reset counter
|
||||
if let Some(start_time) = state.last_start_time {
|
||||
if start_time.elapsed() >= self.config.stability_window {
|
||||
log::info!(
|
||||
"Sidecar ran stable for {:?}, resetting restart count",
|
||||
start_time.elapsed()
|
||||
);
|
||||
state.restart_count = 0;
|
||||
}
|
||||
}
|
||||
|
||||
state.restart_count += 1;
|
||||
state.last_crash_time = Some(Instant::now());
|
||||
let count = state.restart_count;
|
||||
|
||||
if count > self.config.max_retries {
|
||||
let error = format!("Exceeded max retries ({})", self.config.max_retries);
|
||||
log::error!("Sidecar supervisor: {}", error);
|
||||
state.health = SidecarHealth::Failed {
|
||||
last_error: error.clone(),
|
||||
};
|
||||
self.emit_health(&state.health);
|
||||
// Forward the original exited event so frontend knows
|
||||
self.outer_sink
|
||||
.emit("sidecar-exited", serde_json::Value::Null);
|
||||
return;
|
||||
}
|
||||
|
||||
state.health = SidecarHealth::Degraded {
|
||||
restart_count: count,
|
||||
};
|
||||
self.emit_health(&state.health);
|
||||
|
||||
let backoff = compute_backoff(
|
||||
self.config.backoff_base_ms,
|
||||
count - 1,
|
||||
self.config.backoff_cap_ms,
|
||||
);
|
||||
|
||||
(true, backoff, count)
|
||||
};
|
||||
|
||||
if !should_restart {
|
||||
return;
|
||||
}
|
||||
|
||||
log::warn!(
|
||||
"Sidecar crashed (attempt {}/{}), restarting in {:?}",
|
||||
restart_count,
|
||||
self.config.max_retries,
|
||||
backoff
|
||||
);
|
||||
|
||||
// Restart on a background thread to avoid blocking the stdout reader
|
||||
let outer_sink = self.outer_sink.clone();
|
||||
let state = self.state.clone();
|
||||
let sidecar_config = self.sidecar_config.clone();
|
||||
let supervisor_state = self.state.clone();
|
||||
let stability_window = self.config.stability_window;
|
||||
let max_retries = self.config.max_retries;
|
||||
let backoff_base_ms = self.config.backoff_base_ms;
|
||||
let backoff_cap_ms = self.config.backoff_cap_ms;
|
||||
|
||||
std::thread::spawn(move || {
|
||||
std::thread::sleep(backoff);
|
||||
|
||||
// Create a new SidecarManager that shares our supervisor sink.
|
||||
// We need a new interceptor sink to capture the next exit event.
|
||||
let new_state = state.clone();
|
||||
let new_outer = outer_sink.clone();
|
||||
let new_sidecar_config = sidecar_config.clone();
|
||||
|
||||
let interceptor: Arc<dyn EventSink> = Arc::new(SupervisorSink {
|
||||
outer_sink: new_outer.clone(),
|
||||
state: new_state.clone(),
|
||||
config: SupervisorConfig {
|
||||
max_retries,
|
||||
backoff_base_ms,
|
||||
backoff_cap_ms,
|
||||
stability_window,
|
||||
},
|
||||
sidecar_config: new_sidecar_config.clone(),
|
||||
});
|
||||
|
||||
let new_manager = SidecarManager::new(interceptor, new_sidecar_config);
|
||||
match new_manager.start() {
|
||||
Ok(()) => {
|
||||
let mut s = supervisor_state.lock().unwrap();
|
||||
s.last_start_time = Some(Instant::now());
|
||||
log::info!("Sidecar restarted successfully (attempt {})", restart_count);
|
||||
// Note: we cannot replace the manager reference in the outer
|
||||
// SidecarSupervisor from here. The restart creates a new manager
|
||||
// that handles its own lifecycle. The outer manager reference
|
||||
// becomes stale. This is acceptable because:
|
||||
// 1. The new manager's stdout reader will emit through our sink chain
|
||||
// 2. The old manager's child process is already dead
|
||||
// For a more sophisticated approach, the supervisor would need
|
||||
// interior mutability on the manager reference. We do that below.
|
||||
}
|
||||
Err(e) => {
|
||||
log::error!("Sidecar restart failed: {}", e);
|
||||
let mut s = supervisor_state.lock().unwrap();
|
||||
s.health = SidecarHealth::Failed {
|
||||
last_error: e.clone(),
|
||||
};
|
||||
// Emit health change + forward exited
|
||||
drop(s);
|
||||
let health = SidecarHealth::Failed { last_error: e };
|
||||
emit_health_event(&new_outer, &health);
|
||||
new_outer
|
||||
.emit("sidecar-exited", serde_json::Value::Null);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
fn emit_health(&self, health: &SidecarHealth) {
|
||||
emit_health_event(&self.outer_sink, health);
|
||||
}
|
||||
}
|
||||
|
||||
fn emit_health_event(sink: &Arc<dyn EventSink>, health: &SidecarHealth) {
|
||||
let payload = serde_json::to_value(health).unwrap_or(serde_json::Value::Null);
|
||||
sink.emit("sidecar-health-changed", payload);
|
||||
}
|
||||
|
||||
/// Supervised sidecar process with automatic crash recovery.
|
||||
///
|
||||
/// Wraps a `SidecarManager` and intercepts exit events to perform automatic
|
||||
/// restarts with exponential backoff. Tracks health status and emits
|
||||
/// `sidecar-health-changed` events.
|
||||
pub struct SidecarSupervisor {
|
||||
manager: Arc<Mutex<SidecarManager>>,
|
||||
state: Arc<Mutex<SupervisorState>>,
|
||||
outer_sink: Arc<dyn EventSink>,
|
||||
#[allow(dead_code)]
|
||||
supervisor_config: SupervisorConfig,
|
||||
#[allow(dead_code)]
|
||||
sidecar_config: SidecarConfig,
|
||||
}
|
||||
|
||||
impl SidecarSupervisor {
|
||||
pub fn new(
|
||||
sink: Arc<dyn EventSink>,
|
||||
sidecar_config: SidecarConfig,
|
||||
supervisor_config: SupervisorConfig,
|
||||
) -> Self {
|
||||
let state = Arc::new(Mutex::new(SupervisorState::new()));
|
||||
|
||||
let interceptor: Arc<dyn EventSink> = Arc::new(SupervisorSink {
|
||||
outer_sink: sink.clone(),
|
||||
state: state.clone(),
|
||||
config: supervisor_config.clone(),
|
||||
sidecar_config: sidecar_config.clone(),
|
||||
});
|
||||
|
||||
let manager = SidecarManager::new(interceptor, sidecar_config.clone());
|
||||
|
||||
Self {
|
||||
manager: Arc::new(Mutex::new(manager)),
|
||||
state,
|
||||
outer_sink: sink,
|
||||
supervisor_config,
|
||||
sidecar_config,
|
||||
}
|
||||
}
|
||||
|
||||
/// Start the supervised sidecar process.
|
||||
pub fn start(&self) -> Result<(), String> {
|
||||
let manager = self.manager.lock().unwrap();
|
||||
let result = manager.start();
|
||||
if result.is_ok() {
|
||||
let mut state = self.state.lock().unwrap();
|
||||
state.last_start_time = Some(Instant::now());
|
||||
state.health = SidecarHealth::Healthy;
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
/// Send a raw JSON message to the sidecar.
|
||||
pub fn send_message(&self, msg: &serde_json::Value) -> Result<(), String> {
|
||||
self.manager.lock().unwrap().send_message(msg)
|
||||
}
|
||||
|
||||
/// Send an agent query to the sidecar.
|
||||
pub fn query(&self, options: &AgentQueryOptions) -> Result<(), String> {
|
||||
self.manager.lock().unwrap().query(options)
|
||||
}
|
||||
|
||||
/// Stop a specific agent session.
|
||||
pub fn stop_session(&self, session_id: &str) -> Result<(), String> {
|
||||
self.manager.lock().unwrap().stop_session(session_id)
|
||||
}
|
||||
|
||||
/// Check if the sidecar is ready to accept queries.
|
||||
pub fn is_ready(&self) -> bool {
|
||||
self.manager.lock().unwrap().is_ready()
|
||||
}
|
||||
|
||||
/// Shut down the sidecar process.
|
||||
pub fn shutdown(&self) -> Result<(), String> {
|
||||
let mut state = self.state.lock().unwrap();
|
||||
state.health = SidecarHealth::Healthy;
|
||||
state.restart_count = 0;
|
||||
drop(state);
|
||||
self.manager.lock().unwrap().shutdown()
|
||||
}
|
||||
|
||||
/// Get the current health status.
|
||||
pub fn health(&self) -> SidecarHealth {
|
||||
self.state.lock().unwrap().health.clone()
|
||||
}
|
||||
|
||||
/// Get the current restart count.
|
||||
pub fn restart_count(&self) -> u32 {
|
||||
self.state.lock().unwrap().restart_count
|
||||
}
|
||||
|
||||
/// Manually reset the supervisor state (e.g., after user intervention).
|
||||
pub fn reset(&self) {
|
||||
let mut state = self.state.lock().unwrap();
|
||||
state.health = SidecarHealth::Healthy;
|
||||
state.restart_count = 0;
|
||||
state.last_crash_time = None;
|
||||
emit_health_event(&self.outer_sink, &state.health);
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for SidecarSupervisor {
|
||||
fn drop(&mut self) {
|
||||
let _ = self.shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::sync::atomic::{AtomicU32, Ordering};
|
||||
|
||||
// ---- compute_backoff tests ----
|
||||
|
||||
#[test]
|
||||
fn test_backoff_base_case() {
|
||||
let d = compute_backoff(1000, 0, 30_000);
|
||||
assert_eq!(d, Duration::from_millis(1000));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_backoff_exponential() {
|
||||
assert_eq!(compute_backoff(1000, 1, 30_000), Duration::from_millis(2000));
|
||||
assert_eq!(compute_backoff(1000, 2, 30_000), Duration::from_millis(4000));
|
||||
assert_eq!(compute_backoff(1000, 3, 30_000), Duration::from_millis(8000));
|
||||
assert_eq!(compute_backoff(1000, 4, 30_000), Duration::from_millis(16000));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_backoff_capped() {
|
||||
assert_eq!(compute_backoff(1000, 5, 30_000), Duration::from_millis(30_000));
|
||||
assert_eq!(compute_backoff(1000, 10, 30_000), Duration::from_millis(30_000));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_backoff_overflow_safe() {
|
||||
// Very large attempt should not panic, just cap
|
||||
assert_eq!(compute_backoff(1000, 63, 30_000), Duration::from_millis(30_000));
|
||||
assert_eq!(compute_backoff(1000, 100, 30_000), Duration::from_millis(30_000));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_backoff_custom_base() {
|
||||
assert_eq!(compute_backoff(500, 0, 10_000), Duration::from_millis(500));
|
||||
assert_eq!(compute_backoff(500, 1, 10_000), Duration::from_millis(1000));
|
||||
assert_eq!(compute_backoff(500, 5, 10_000), Duration::from_millis(10_000));
|
||||
}
|
||||
|
||||
// ---- SidecarHealth serialization tests ----
|
||||
|
||||
#[test]
|
||||
fn test_health_serialize_healthy() {
|
||||
let h = SidecarHealth::Healthy;
|
||||
let json = serde_json::to_value(&h).unwrap();
|
||||
assert_eq!(json["status"], "healthy");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_health_serialize_degraded() {
|
||||
let h = SidecarHealth::Degraded { restart_count: 3 };
|
||||
let json = serde_json::to_value(&h).unwrap();
|
||||
assert_eq!(json["status"], "degraded");
|
||||
assert_eq!(json["restart_count"], 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_health_serialize_failed() {
|
||||
let h = SidecarHealth::Failed {
|
||||
last_error: "process killed".to_string(),
|
||||
};
|
||||
let json = serde_json::to_value(&h).unwrap();
|
||||
assert_eq!(json["status"], "failed");
|
||||
assert_eq!(json["last_error"], "process killed");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_health_deserialize_roundtrip() {
|
||||
let cases = vec![
|
||||
SidecarHealth::Healthy,
|
||||
SidecarHealth::Degraded { restart_count: 2 },
|
||||
SidecarHealth::Failed {
|
||||
last_error: "OOM".to_string(),
|
||||
},
|
||||
];
|
||||
for h in cases {
|
||||
let json = serde_json::to_string(&h).unwrap();
|
||||
let back: SidecarHealth = serde_json::from_str(&json).unwrap();
|
||||
assert_eq!(h, back);
|
||||
}
|
||||
}
|
||||
|
||||
// ---- SupervisorConfig defaults ----
|
||||
|
||||
#[test]
|
||||
fn test_supervisor_config_defaults() {
|
||||
let cfg = SupervisorConfig::default();
|
||||
assert_eq!(cfg.max_retries, 5);
|
||||
assert_eq!(cfg.backoff_base_ms, 1000);
|
||||
assert_eq!(cfg.backoff_cap_ms, 30_000);
|
||||
assert_eq!(cfg.stability_window, Duration::from_secs(300));
|
||||
}
|
||||
|
||||
// ---- SupervisorState tests ----
|
||||
|
||||
#[test]
|
||||
fn test_initial_state() {
|
||||
let state = SupervisorState::new();
|
||||
assert_eq!(state.health, SidecarHealth::Healthy);
|
||||
assert_eq!(state.restart_count, 0);
|
||||
assert!(state.last_crash_time.is_none());
|
||||
assert!(state.last_start_time.is_none());
|
||||
}
|
||||
|
||||
// ---- Event interception tests (using mock sink) ----
|
||||
|
||||
/// Mock EventSink that records emitted events.
|
||||
struct MockSink {
|
||||
events: Mutex<Vec<(String, serde_json::Value)>>,
|
||||
exit_count: AtomicU32,
|
||||
}
|
||||
|
||||
impl MockSink {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
events: Mutex::new(Vec::new()),
|
||||
exit_count: AtomicU32::new(0),
|
||||
}
|
||||
}
|
||||
|
||||
fn events(&self) -> Vec<(String, serde_json::Value)> {
|
||||
self.events.lock().unwrap().clone()
|
||||
}
|
||||
|
||||
fn health_events(&self) -> Vec<SidecarHealth> {
|
||||
self.events
|
||||
.lock()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.filter(|(name, _)| name == "sidecar-health-changed")
|
||||
.filter_map(|(_, payload)| serde_json::from_value(payload.clone()).ok())
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
impl EventSink for MockSink {
|
||||
fn emit(&self, event: &str, payload: serde_json::Value) {
|
||||
if event == "sidecar-exited" {
|
||||
self.exit_count.fetch_add(1, Ordering::SeqCst);
|
||||
}
|
||||
self.events
|
||||
.lock()
|
||||
.unwrap()
|
||||
.push((event.to_string(), payload));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_non_exit_events_forwarded() {
|
||||
let outer = Arc::new(MockSink::new());
|
||||
let state = Arc::new(Mutex::new(SupervisorState::new()));
|
||||
let sink = SupervisorSink {
|
||||
outer_sink: outer.clone(),
|
||||
state,
|
||||
config: SupervisorConfig::default(),
|
||||
sidecar_config: SidecarConfig {
|
||||
search_paths: vec![],
|
||||
env_overrides: Default::default(),
|
||||
sandbox: Default::default(),
|
||||
},
|
||||
};
|
||||
|
||||
let payload = serde_json::json!({"type": "ready"});
|
||||
sink.emit("sidecar-message", payload.clone());
|
||||
|
||||
let events = outer.events();
|
||||
assert_eq!(events.len(), 1);
|
||||
assert_eq!(events[0].0, "sidecar-message");
|
||||
assert_eq!(events[0].1, payload);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_exit_triggers_degraded_health() {
|
||||
let outer = Arc::new(MockSink::new());
|
||||
let state = Arc::new(Mutex::new(SupervisorState::new()));
|
||||
let sink = SupervisorSink {
|
||||
outer_sink: outer.clone(),
|
||||
state: state.clone(),
|
||||
config: SupervisorConfig {
|
||||
max_retries: 5,
|
||||
backoff_base_ms: 100,
|
||||
backoff_cap_ms: 1000,
|
||||
stability_window: Duration::from_secs(300),
|
||||
},
|
||||
sidecar_config: SidecarConfig {
|
||||
search_paths: vec![],
|
||||
env_overrides: Default::default(),
|
||||
sandbox: Default::default(),
|
||||
},
|
||||
};
|
||||
|
||||
// Simulate exit
|
||||
sink.emit("sidecar-exited", serde_json::Value::Null);
|
||||
|
||||
let s = state.lock().unwrap();
|
||||
assert_eq!(s.restart_count, 1);
|
||||
assert!(s.last_crash_time.is_some());
|
||||
match &s.health {
|
||||
SidecarHealth::Degraded { restart_count } => assert_eq!(*restart_count, 1),
|
||||
other => panic!("Expected Degraded, got {:?}", other),
|
||||
}
|
||||
|
||||
// Should have emitted health-changed event
|
||||
let health_events = outer.health_events();
|
||||
assert_eq!(health_events.len(), 1);
|
||||
assert_eq!(
|
||||
health_events[0],
|
||||
SidecarHealth::Degraded { restart_count: 1 }
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_exit_exceeding_max_retries_fails() {
|
||||
let outer = Arc::new(MockSink::new());
|
||||
let state = Arc::new(Mutex::new(SupervisorState {
|
||||
health: SidecarHealth::Degraded { restart_count: 5 },
|
||||
restart_count: 5,
|
||||
last_crash_time: Some(Instant::now()),
|
||||
last_start_time: Some(Instant::now()),
|
||||
}));
|
||||
|
||||
let sink = SupervisorSink {
|
||||
outer_sink: outer.clone(),
|
||||
state: state.clone(),
|
||||
config: SupervisorConfig {
|
||||
max_retries: 5,
|
||||
..SupervisorConfig::default()
|
||||
},
|
||||
sidecar_config: SidecarConfig {
|
||||
search_paths: vec![],
|
||||
env_overrides: Default::default(),
|
||||
sandbox: Default::default(),
|
||||
},
|
||||
};
|
||||
|
||||
// This is attempt 6, which exceeds max_retries=5
|
||||
sink.emit("sidecar-exited", serde_json::Value::Null);
|
||||
|
||||
let s = state.lock().unwrap();
|
||||
assert_eq!(s.restart_count, 6);
|
||||
match &s.health {
|
||||
SidecarHealth::Failed { last_error } => {
|
||||
assert!(last_error.contains("Exceeded max retries"));
|
||||
}
|
||||
other => panic!("Expected Failed, got {:?}", other),
|
||||
}
|
||||
|
||||
// Should have emitted health-changed with Failed + forwarded sidecar-exited
|
||||
let events = outer.events();
|
||||
let health_changed = events
|
||||
.iter()
|
||||
.filter(|(name, _)| name == "sidecar-health-changed")
|
||||
.count();
|
||||
let exited = events
|
||||
.iter()
|
||||
.filter(|(name, _)| name == "sidecar-exited")
|
||||
.count();
|
||||
assert_eq!(health_changed, 1);
|
||||
assert_eq!(exited, 1); // Forwarded after max retries
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stability_window_resets_count() {
|
||||
let outer = Arc::new(MockSink::new());
|
||||
// Simulate: started 6 minutes ago, ran stable
|
||||
let state = Arc::new(Mutex::new(SupervisorState {
|
||||
health: SidecarHealth::Degraded { restart_count: 3 },
|
||||
restart_count: 3,
|
||||
last_crash_time: Some(Instant::now() - Duration::from_secs(400)),
|
||||
last_start_time: Some(Instant::now() - Duration::from_secs(360)),
|
||||
}));
|
||||
|
||||
let sink = SupervisorSink {
|
||||
outer_sink: outer.clone(),
|
||||
state: state.clone(),
|
||||
config: SupervisorConfig {
|
||||
max_retries: 5,
|
||||
stability_window: Duration::from_secs(300), // 5 min
|
||||
backoff_base_ms: 100,
|
||||
backoff_cap_ms: 1000,
|
||||
},
|
||||
sidecar_config: SidecarConfig {
|
||||
search_paths: vec![],
|
||||
env_overrides: Default::default(),
|
||||
sandbox: Default::default(),
|
||||
},
|
||||
};
|
||||
|
||||
sink.emit("sidecar-exited", serde_json::Value::Null);
|
||||
|
||||
let s = state.lock().unwrap();
|
||||
// Count was reset to 0 then incremented to 1
|
||||
assert_eq!(s.restart_count, 1);
|
||||
match &s.health {
|
||||
SidecarHealth::Degraded { restart_count } => assert_eq!(*restart_count, 1),
|
||||
other => panic!("Expected Degraded(1), got {:?}", other),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multiple_crashes_increment_count() {
|
||||
let outer = Arc::new(MockSink::new());
|
||||
let state = Arc::new(Mutex::new(SupervisorState::new()));
|
||||
|
||||
let sink = SupervisorSink {
|
||||
outer_sink: outer.clone(),
|
||||
state: state.clone(),
|
||||
config: SupervisorConfig {
|
||||
max_retries: 10,
|
||||
backoff_base_ms: 100,
|
||||
backoff_cap_ms: 1000,
|
||||
stability_window: Duration::from_secs(300),
|
||||
},
|
||||
sidecar_config: SidecarConfig {
|
||||
search_paths: vec![],
|
||||
env_overrides: Default::default(),
|
||||
sandbox: Default::default(),
|
||||
},
|
||||
};
|
||||
|
||||
for i in 1..=3 {
|
||||
sink.emit("sidecar-exited", serde_json::Value::Null);
|
||||
let s = state.lock().unwrap();
|
||||
assert_eq!(s.restart_count, i);
|
||||
}
|
||||
|
||||
let health_events = outer.health_events();
|
||||
assert_eq!(health_events.len(), 3);
|
||||
assert_eq!(
|
||||
health_events[2],
|
||||
SidecarHealth::Degraded { restart_count: 3 }
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_health_equality() {
|
||||
assert_eq!(SidecarHealth::Healthy, SidecarHealth::Healthy);
|
||||
assert_eq!(
|
||||
SidecarHealth::Degraded { restart_count: 2 },
|
||||
SidecarHealth::Degraded { restart_count: 2 }
|
||||
);
|
||||
assert_ne!(
|
||||
SidecarHealth::Degraded { restart_count: 1 },
|
||||
SidecarHealth::Degraded { restart_count: 2 }
|
||||
);
|
||||
assert_ne!(SidecarHealth::Healthy, SidecarHealth::Failed {
|
||||
last_error: String::new(),
|
||||
});
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue