feat(electrobun): multi-machine relay + OTEL telemetry
Multi-machine relay: - relay-client.ts: WebSocket client for agor-relay with token auth, exponential backoff (1s-30s), TCP probe, heartbeat (15s ping) - machines-store.svelte.ts: remote machine state tracking - RemoteMachinesSettings.svelte: machine list, add/connect/disconnect UI - 7 RPC types (remote.connect/disconnect/list/send/status + events) Telemetry: - telemetry.ts: OTEL spans + OTLP/HTTP export to Tempo, controlled by AGOR_OTLP_ENDPOINT env var - telemetry-bridge.ts: tel.info/warn/error frontend convenience API - telemetry.log RPC for frontend→Bun tracing
This commit is contained in:
parent
ec30c69c3e
commit
88206205fe
11 changed files with 1458 additions and 15 deletions
343
ui-electrobun/src/bun/relay-client.ts
Normal file
343
ui-electrobun/src/bun/relay-client.ts
Normal file
|
|
@ -0,0 +1,343 @@
|
|||
/**
|
||||
* WebSocket client for connecting to agor-relay instances.
|
||||
*
|
||||
* Features:
|
||||
* - Token-based auth handshake (Bearer header)
|
||||
* - Exponential backoff reconnection (1s–30s cap)
|
||||
* - TCP probe before full WS upgrade on reconnect
|
||||
* - Per-connection command routing
|
||||
* - Event forwarding to webview via callback
|
||||
*/
|
||||
|
||||
import { randomUUID } from "crypto";
|
||||
import { Socket } from "net";
|
||||
|
||||
// ── Types ──────────────────────────────────────────────────────────────────
|
||||
|
||||
export type ConnectionStatus = "connecting" | "connected" | "disconnected" | "error";
|
||||
|
||||
export interface RelayCommand {
|
||||
id: string;
|
||||
type: string;
|
||||
payload: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface RelayEvent {
|
||||
type: string;
|
||||
sessionId?: string;
|
||||
machineId?: string;
|
||||
payload?: unknown;
|
||||
}
|
||||
|
||||
export type EventCallback = (machineId: string, event: RelayEvent) => void;
|
||||
export type StatusCallback = (machineId: string, status: ConnectionStatus, error?: string) => void;
|
||||
|
||||
interface MachineConnection {
|
||||
machineId: string;
|
||||
label: string;
|
||||
url: string;
|
||||
token: string;
|
||||
status: ConnectionStatus;
|
||||
latencyMs: number | null;
|
||||
ws: WebSocket | null;
|
||||
heartbeatTimer: ReturnType<typeof setInterval> | null;
|
||||
reconnectTimer: ReturnType<typeof setTimeout> | null;
|
||||
cancelled: boolean;
|
||||
lastPingSent: number;
|
||||
}
|
||||
|
||||
// ── Relay Client ───────────────────────────────────────────────────────────
|
||||
|
||||
export class RelayClient {
|
||||
private machines = new Map<string, MachineConnection>();
|
||||
private eventListeners: EventCallback[] = [];
|
||||
private statusListeners: StatusCallback[] = [];
|
||||
|
||||
/** Register an event listener for relay events from any machine. */
|
||||
onEvent(cb: EventCallback): void {
|
||||
this.eventListeners.push(cb);
|
||||
}
|
||||
|
||||
/** Register a listener for connection status changes. */
|
||||
onStatus(cb: StatusCallback): void {
|
||||
this.statusListeners.push(cb);
|
||||
}
|
||||
|
||||
/** Connect to an agor-relay instance. Returns a machine ID. */
|
||||
async connect(url: string, token: string, label?: string): Promise<string> {
|
||||
const machineId = randomUUID();
|
||||
const machine: MachineConnection = {
|
||||
machineId,
|
||||
label: label ?? url,
|
||||
url,
|
||||
token,
|
||||
status: "connecting",
|
||||
latencyMs: null,
|
||||
ws: null,
|
||||
heartbeatTimer: null,
|
||||
reconnectTimer: null,
|
||||
cancelled: false,
|
||||
lastPingSent: 0,
|
||||
};
|
||||
this.machines.set(machineId, machine);
|
||||
this.emitStatus(machineId, "connecting");
|
||||
|
||||
try {
|
||||
await this.openWebSocket(machine);
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
machine.status = "error";
|
||||
this.emitStatus(machineId, "error", msg);
|
||||
this.scheduleReconnect(machine);
|
||||
}
|
||||
|
||||
return machineId;
|
||||
}
|
||||
|
||||
/** Disconnect from a relay and stop reconnection attempts. */
|
||||
disconnect(machineId: string): void {
|
||||
const machine = this.machines.get(machineId);
|
||||
if (!machine) return;
|
||||
|
||||
machine.cancelled = true;
|
||||
this.cleanupConnection(machine);
|
||||
machine.status = "disconnected";
|
||||
this.emitStatus(machineId, "disconnected");
|
||||
}
|
||||
|
||||
/** Remove a machine entirely from tracking. */
|
||||
removeMachine(machineId: string): void {
|
||||
this.disconnect(machineId);
|
||||
this.machines.delete(machineId);
|
||||
}
|
||||
|
||||
/** Send a command to a connected relay. */
|
||||
sendCommand(machineId: string, type: string, payload: Record<string, unknown>): void {
|
||||
const machine = this.machines.get(machineId);
|
||||
if (!machine?.ws || machine.status !== "connected") {
|
||||
throw new Error(`Machine ${machineId} not connected`);
|
||||
}
|
||||
|
||||
const cmd: RelayCommand = {
|
||||
id: randomUUID(),
|
||||
type,
|
||||
payload,
|
||||
};
|
||||
machine.ws.send(JSON.stringify(cmd));
|
||||
}
|
||||
|
||||
/** Get the status of a specific machine. */
|
||||
getStatus(machineId: string): { status: ConnectionStatus; latencyMs: number | null } | null {
|
||||
const machine = this.machines.get(machineId);
|
||||
if (!machine) return null;
|
||||
return { status: machine.status, latencyMs: machine.latencyMs };
|
||||
}
|
||||
|
||||
/** List all tracked machines. */
|
||||
listMachines(): Array<{
|
||||
machineId: string;
|
||||
label: string;
|
||||
url: string;
|
||||
status: ConnectionStatus;
|
||||
latencyMs: number | null;
|
||||
}> {
|
||||
return Array.from(this.machines.values()).map((m) => ({
|
||||
machineId: m.machineId,
|
||||
label: m.label,
|
||||
url: m.url,
|
||||
status: m.status,
|
||||
latencyMs: m.latencyMs,
|
||||
}));
|
||||
}
|
||||
|
||||
// ── Internal ─────────────────────────────────────────────────────────────
|
||||
|
||||
private async openWebSocket(machine: MachineConnection): Promise<void> {
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
const ws = new WebSocket(machine.url, {
|
||||
headers: {
|
||||
Authorization: `Bearer ${machine.token}`,
|
||||
},
|
||||
} as unknown as string[]);
|
||||
|
||||
const timeout = setTimeout(() => {
|
||||
ws.close();
|
||||
reject(new Error("Connection timeout (10s)"));
|
||||
}, 10_000);
|
||||
|
||||
ws.addEventListener("open", () => {
|
||||
clearTimeout(timeout);
|
||||
machine.ws = ws;
|
||||
machine.status = "connected";
|
||||
machine.cancelled = false;
|
||||
this.emitStatus(machine.machineId, "connected");
|
||||
this.startHeartbeat(machine);
|
||||
resolve();
|
||||
});
|
||||
|
||||
ws.addEventListener("message", (ev) => {
|
||||
this.handleMessage(machine, String(ev.data));
|
||||
});
|
||||
|
||||
ws.addEventListener("close", () => {
|
||||
clearTimeout(timeout);
|
||||
if (machine.status === "connected") {
|
||||
this.cleanupConnection(machine);
|
||||
machine.status = "disconnected";
|
||||
this.emitStatus(machine.machineId, "disconnected");
|
||||
if (!machine.cancelled) {
|
||||
this.scheduleReconnect(machine);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
ws.addEventListener("error", (ev) => {
|
||||
clearTimeout(timeout);
|
||||
const errMsg = "WebSocket error";
|
||||
if (machine.status !== "connected") {
|
||||
reject(new Error(errMsg));
|
||||
} else {
|
||||
this.cleanupConnection(machine);
|
||||
machine.status = "error";
|
||||
this.emitStatus(machine.machineId, "error", errMsg);
|
||||
if (!machine.cancelled) {
|
||||
this.scheduleReconnect(machine);
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
private handleMessage(machine: MachineConnection, data: string): void {
|
||||
let event: RelayEvent;
|
||||
try {
|
||||
event = JSON.parse(data) as RelayEvent;
|
||||
} catch {
|
||||
console.error(`[relay] Invalid JSON from ${machine.machineId}`);
|
||||
return;
|
||||
}
|
||||
|
||||
// Handle pong for latency measurement
|
||||
if (event.type === "pong") {
|
||||
if (machine.lastPingSent > 0) {
|
||||
machine.latencyMs = Date.now() - machine.lastPingSent;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Forward all other events
|
||||
event.machineId = machine.machineId;
|
||||
for (const cb of this.eventListeners) {
|
||||
try {
|
||||
cb(machine.machineId, event);
|
||||
} catch (err) {
|
||||
console.error("[relay] Event listener error:", err);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private startHeartbeat(machine: MachineConnection): void {
|
||||
this.stopHeartbeat(machine);
|
||||
machine.heartbeatTimer = setInterval(() => {
|
||||
if (machine.ws?.readyState === WebSocket.OPEN) {
|
||||
machine.lastPingSent = Date.now();
|
||||
machine.ws.send(JSON.stringify({ id: "", type: "ping", payload: {} }));
|
||||
}
|
||||
}, 15_000);
|
||||
}
|
||||
|
||||
private stopHeartbeat(machine: MachineConnection): void {
|
||||
if (machine.heartbeatTimer) {
|
||||
clearInterval(machine.heartbeatTimer);
|
||||
machine.heartbeatTimer = null;
|
||||
}
|
||||
}
|
||||
|
||||
private cleanupConnection(machine: MachineConnection): void {
|
||||
this.stopHeartbeat(machine);
|
||||
if (machine.reconnectTimer) {
|
||||
clearTimeout(machine.reconnectTimer);
|
||||
machine.reconnectTimer = null;
|
||||
}
|
||||
if (machine.ws) {
|
||||
try { machine.ws.close(); } catch { /* ignore */ }
|
||||
machine.ws = null;
|
||||
}
|
||||
}
|
||||
|
||||
private scheduleReconnect(machine: MachineConnection): void {
|
||||
let delay = 1_000;
|
||||
const maxDelay = 30_000;
|
||||
|
||||
const attempt = async () => {
|
||||
if (machine.cancelled || !this.machines.has(machine.machineId)) return;
|
||||
|
||||
machine.status = "connecting";
|
||||
this.emitStatus(machine.machineId, "connecting");
|
||||
|
||||
// TCP probe first — avoids full WS overhead if host unreachable
|
||||
const probeOk = await this.tcpProbe(machine.url);
|
||||
if (!probeOk) {
|
||||
delay = Math.min(delay * 2, maxDelay);
|
||||
if (!machine.cancelled) {
|
||||
machine.reconnectTimer = setTimeout(attempt, delay);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
await this.openWebSocket(machine);
|
||||
// Success — reset
|
||||
} catch {
|
||||
delay = Math.min(delay * 2, maxDelay);
|
||||
if (!machine.cancelled) {
|
||||
machine.reconnectTimer = setTimeout(attempt, delay);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
machine.reconnectTimer = setTimeout(attempt, delay);
|
||||
}
|
||||
|
||||
/** TCP-only probe to check if the relay host is reachable. */
|
||||
private tcpProbe(url: string): Promise<boolean> {
|
||||
return new Promise((resolve) => {
|
||||
const host = this.extractHost(url);
|
||||
if (!host) { resolve(false); return; }
|
||||
|
||||
const [hostname, portStr] = host.includes(":")
|
||||
? [host.split(":")[0], host.split(":")[1]]
|
||||
: [host, "9750"];
|
||||
const port = parseInt(portStr, 10);
|
||||
|
||||
const socket = new Socket();
|
||||
const timer = setTimeout(() => { socket.destroy(); resolve(false); }, 5_000);
|
||||
|
||||
socket.connect(port, hostname, () => {
|
||||
clearTimeout(timer);
|
||||
socket.destroy();
|
||||
resolve(true);
|
||||
});
|
||||
|
||||
socket.on("error", () => {
|
||||
clearTimeout(timer);
|
||||
socket.destroy();
|
||||
resolve(false);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
private extractHost(url: string): string | null {
|
||||
return url.replace("wss://", "").replace("ws://", "").split("/")[0] ?? null;
|
||||
}
|
||||
|
||||
private emitStatus(machineId: string, status: ConnectionStatus, error?: string): void {
|
||||
for (const cb of this.statusListeners) {
|
||||
try {
|
||||
cb(machineId, status, error);
|
||||
} catch (err) {
|
||||
console.error("[relay] Status listener error:", err);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue