feat(electrobun): multi-machine relay + OTEL telemetry

Multi-machine relay:
- relay-client.ts: WebSocket client for agor-relay with token auth,
  exponential backoff (1s-30s), TCP probe, heartbeat (15s ping)
- machines-store.svelte.ts: remote machine state tracking
- RemoteMachinesSettings.svelte: machine list, add/connect/disconnect UI
- 7 RPC types (remote.connect/disconnect/list/send/status + events)

Telemetry:
- telemetry.ts: OTEL spans + OTLP/HTTP export to Tempo,
  controlled by AGOR_OTLP_ENDPOINT env var
- telemetry-bridge.ts: tel.info/warn/error frontend convenience API
- telemetry.log RPC for frontend→Bun tracing
This commit is contained in:
Hibryda 2026-03-22 01:46:03 +01:00
parent ec30c69c3e
commit 88206205fe
11 changed files with 1458 additions and 15 deletions

View file

@ -0,0 +1,193 @@
/**
* OpenTelemetry integration for the Bun process.
*
* Controlled by AGOR_OTLP_ENDPOINT env var:
* - Set (e.g. "http://localhost:4318") -> OTLP/HTTP trace export + console
* - Absent -> console-only (no network calls)
*
* Provides structured span creation for agent sessions, PTY operations, and
* RPC calls. Frontend events are forwarded via the telemetry.log RPC.
*/
// ── Types ──────────────────────────────────────────────────────────────────
export type LogLevel = "info" | "warn" | "error";
export interface SpanAttributes {
[key: string]: string | number | boolean;
}
interface ActiveSpan {
name: string;
attributes: SpanAttributes;
startTime: number;
}
// ── Telemetry Manager ──────────────────────────────────────────────────────
class TelemetryManager {
private enabled = false;
private endpoint = "";
private activeSpans = new Map<string, ActiveSpan>();
private spanCounter = 0;
private serviceName = "agent-orchestrator-electrobun";
private serviceVersion = "3.0.0-dev";
/** Initialize telemetry. Call once at startup. */
init(): void {
const endpoint = process.env.AGOR_OTLP_ENDPOINT ?? "";
const isTest = process.env.AGOR_TEST === "1";
if (endpoint && !isTest) {
this.enabled = true;
this.endpoint = endpoint.endsWith("/")
? endpoint + "v1/traces"
: endpoint + "/v1/traces";
console.log(`[telemetry] OTLP export enabled -> ${this.endpoint}`);
} else {
console.log("[telemetry] Console-only (AGOR_OTLP_ENDPOINT not set)");
}
}
/** Start a named span. Returns a spanId to pass to endSpan(). */
span(name: string, attributes: SpanAttributes = {}): string {
const spanId = `span_${++this.spanCounter}_${Date.now()}`;
this.activeSpans.set(spanId, {
name,
attributes,
startTime: Date.now(),
});
this.consoleLog("info", `[span:start] ${name}`, attributes);
return spanId;
}
/** End a span and optionally export it via OTLP. */
endSpan(spanId: string, extraAttributes: SpanAttributes = {}): void {
const active = this.activeSpans.get(spanId);
if (!active) return;
this.activeSpans.delete(spanId);
const durationMs = Date.now() - active.startTime;
const allAttributes = { ...active.attributes, ...extraAttributes, durationMs };
this.consoleLog("info", `[span:end] ${active.name} (${durationMs}ms)`, allAttributes);
if (this.enabled) {
this.exportSpan(active.name, active.startTime, durationMs, allAttributes);
}
}
/** Log a structured message. Used for frontend-forwarded events. */
log(level: LogLevel, message: string, attributes: SpanAttributes = {}): void {
this.consoleLog(level, message, attributes);
if (this.enabled) {
this.exportLog(level, message, attributes);
}
}
/** Shutdown — flush any pending exports. */
shutdown(): void {
this.activeSpans.clear();
if (this.enabled) {
console.log("[telemetry] Shutdown");
}
}
// ── Internal ─────────────────────────────────────────────────────────────
private consoleLog(level: LogLevel, message: string, attrs: SpanAttributes): void {
const attrStr = Object.keys(attrs).length > 0
? ` ${JSON.stringify(attrs)}`
: "";
switch (level) {
case "error": console.error(`[tel] ${message}${attrStr}`); break;
case "warn": console.warn(`[tel] ${message}${attrStr}`); break;
default: console.log(`[tel] ${message}${attrStr}`); break;
}
}
private async exportSpan(
name: string,
startTimeMs: number,
durationMs: number,
attributes: SpanAttributes,
): Promise<void> {
const traceId = this.randomHex(32);
const spanId = this.randomHex(16);
const startNs = BigInt(startTimeMs) * 1_000_000n;
const endNs = BigInt(startTimeMs + durationMs) * 1_000_000n;
const otlpPayload = {
resourceSpans: [{
resource: {
attributes: [
{ key: "service.name", value: { stringValue: this.serviceName } },
{ key: "service.version", value: { stringValue: this.serviceVersion } },
],
},
scopeSpans: [{
scope: { name: this.serviceName },
spans: [{
traceId,
spanId,
name,
kind: 1, // INTERNAL
startTimeUnixNano: startNs.toString(),
endTimeUnixNano: endNs.toString(),
attributes: Object.entries(attributes).map(([key, value]) => ({
key,
value: typeof value === "number"
? { intValue: value }
: typeof value === "boolean"
? { boolValue: value }
: { stringValue: String(value) },
})),
status: { code: 1 }, // OK
}],
}],
}],
};
try {
await fetch(this.endpoint, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(otlpPayload),
signal: AbortSignal.timeout(5_000),
});
} catch (err) {
console.warn("[telemetry] OTLP export failed:", err instanceof Error ? err.message : err);
}
}
private async exportLog(
level: LogLevel,
message: string,
attributes: SpanAttributes,
): Promise<void> {
// Wrap log as a zero-duration span for Tempo compatibility
await this.exportSpan(
`log.${level}`,
Date.now(),
0,
{ ...attributes, "log.message": message, "log.level": level },
);
}
private randomHex(length: number): string {
const bytes = new Uint8Array(length / 2);
crypto.getRandomValues(bytes);
return Array.from(bytes, (b) => b.toString(16).padStart(2, "0")).join("");
}
}
// ── Singleton ──────────────────────────────────────────────────────────────
export const telemetry = new TelemetryManager();
/** Initialize telemetry. Call once at app startup. */
export function initTelemetry(): void {
telemetry.init();
}