feat(electrobun): multi-machine relay + OTEL telemetry
Multi-machine relay: - relay-client.ts: WebSocket client for agor-relay with token auth, exponential backoff (1s-30s), TCP probe, heartbeat (15s ping) - machines-store.svelte.ts: remote machine state tracking - RemoteMachinesSettings.svelte: machine list, add/connect/disconnect UI - 7 RPC types (remote.connect/disconnect/list/send/status + events) Telemetry: - telemetry.ts: OTEL spans + OTLP/HTTP export to Tempo, controlled by AGOR_OTLP_ENDPOINT env var - telemetry-bridge.ts: tel.info/warn/error frontend convenience API - telemetry.log RPC for frontend→Bun tracing
This commit is contained in:
parent
ec30c69c3e
commit
88206205fe
11 changed files with 1458 additions and 15 deletions
193
ui-electrobun/src/bun/telemetry.ts
Normal file
193
ui-electrobun/src/bun/telemetry.ts
Normal file
|
|
@ -0,0 +1,193 @@
|
|||
/**
|
||||
* OpenTelemetry integration for the Bun process.
|
||||
*
|
||||
* Controlled by AGOR_OTLP_ENDPOINT env var:
|
||||
* - Set (e.g. "http://localhost:4318") -> OTLP/HTTP trace export + console
|
||||
* - Absent -> console-only (no network calls)
|
||||
*
|
||||
* Provides structured span creation for agent sessions, PTY operations, and
|
||||
* RPC calls. Frontend events are forwarded via the telemetry.log RPC.
|
||||
*/
|
||||
|
||||
// ── Types ──────────────────────────────────────────────────────────────────
|
||||
|
||||
export type LogLevel = "info" | "warn" | "error";
|
||||
|
||||
export interface SpanAttributes {
|
||||
[key: string]: string | number | boolean;
|
||||
}
|
||||
|
||||
interface ActiveSpan {
|
||||
name: string;
|
||||
attributes: SpanAttributes;
|
||||
startTime: number;
|
||||
}
|
||||
|
||||
// ── Telemetry Manager ──────────────────────────────────────────────────────
|
||||
|
||||
class TelemetryManager {
|
||||
private enabled = false;
|
||||
private endpoint = "";
|
||||
private activeSpans = new Map<string, ActiveSpan>();
|
||||
private spanCounter = 0;
|
||||
private serviceName = "agent-orchestrator-electrobun";
|
||||
private serviceVersion = "3.0.0-dev";
|
||||
|
||||
/** Initialize telemetry. Call once at startup. */
|
||||
init(): void {
|
||||
const endpoint = process.env.AGOR_OTLP_ENDPOINT ?? "";
|
||||
const isTest = process.env.AGOR_TEST === "1";
|
||||
|
||||
if (endpoint && !isTest) {
|
||||
this.enabled = true;
|
||||
this.endpoint = endpoint.endsWith("/")
|
||||
? endpoint + "v1/traces"
|
||||
: endpoint + "/v1/traces";
|
||||
console.log(`[telemetry] OTLP export enabled -> ${this.endpoint}`);
|
||||
} else {
|
||||
console.log("[telemetry] Console-only (AGOR_OTLP_ENDPOINT not set)");
|
||||
}
|
||||
}
|
||||
|
||||
/** Start a named span. Returns a spanId to pass to endSpan(). */
|
||||
span(name: string, attributes: SpanAttributes = {}): string {
|
||||
const spanId = `span_${++this.spanCounter}_${Date.now()}`;
|
||||
this.activeSpans.set(spanId, {
|
||||
name,
|
||||
attributes,
|
||||
startTime: Date.now(),
|
||||
});
|
||||
this.consoleLog("info", `[span:start] ${name}`, attributes);
|
||||
return spanId;
|
||||
}
|
||||
|
||||
/** End a span and optionally export it via OTLP. */
|
||||
endSpan(spanId: string, extraAttributes: SpanAttributes = {}): void {
|
||||
const active = this.activeSpans.get(spanId);
|
||||
if (!active) return;
|
||||
this.activeSpans.delete(spanId);
|
||||
|
||||
const durationMs = Date.now() - active.startTime;
|
||||
const allAttributes = { ...active.attributes, ...extraAttributes, durationMs };
|
||||
|
||||
this.consoleLog("info", `[span:end] ${active.name} (${durationMs}ms)`, allAttributes);
|
||||
|
||||
if (this.enabled) {
|
||||
this.exportSpan(active.name, active.startTime, durationMs, allAttributes);
|
||||
}
|
||||
}
|
||||
|
||||
/** Log a structured message. Used for frontend-forwarded events. */
|
||||
log(level: LogLevel, message: string, attributes: SpanAttributes = {}): void {
|
||||
this.consoleLog(level, message, attributes);
|
||||
|
||||
if (this.enabled) {
|
||||
this.exportLog(level, message, attributes);
|
||||
}
|
||||
}
|
||||
|
||||
/** Shutdown — flush any pending exports. */
|
||||
shutdown(): void {
|
||||
this.activeSpans.clear();
|
||||
if (this.enabled) {
|
||||
console.log("[telemetry] Shutdown");
|
||||
}
|
||||
}
|
||||
|
||||
// ── Internal ─────────────────────────────────────────────────────────────
|
||||
|
||||
private consoleLog(level: LogLevel, message: string, attrs: SpanAttributes): void {
|
||||
const attrStr = Object.keys(attrs).length > 0
|
||||
? ` ${JSON.stringify(attrs)}`
|
||||
: "";
|
||||
|
||||
switch (level) {
|
||||
case "error": console.error(`[tel] ${message}${attrStr}`); break;
|
||||
case "warn": console.warn(`[tel] ${message}${attrStr}`); break;
|
||||
default: console.log(`[tel] ${message}${attrStr}`); break;
|
||||
}
|
||||
}
|
||||
|
||||
private async exportSpan(
|
||||
name: string,
|
||||
startTimeMs: number,
|
||||
durationMs: number,
|
||||
attributes: SpanAttributes,
|
||||
): Promise<void> {
|
||||
const traceId = this.randomHex(32);
|
||||
const spanId = this.randomHex(16);
|
||||
const startNs = BigInt(startTimeMs) * 1_000_000n;
|
||||
const endNs = BigInt(startTimeMs + durationMs) * 1_000_000n;
|
||||
|
||||
const otlpPayload = {
|
||||
resourceSpans: [{
|
||||
resource: {
|
||||
attributes: [
|
||||
{ key: "service.name", value: { stringValue: this.serviceName } },
|
||||
{ key: "service.version", value: { stringValue: this.serviceVersion } },
|
||||
],
|
||||
},
|
||||
scopeSpans: [{
|
||||
scope: { name: this.serviceName },
|
||||
spans: [{
|
||||
traceId,
|
||||
spanId,
|
||||
name,
|
||||
kind: 1, // INTERNAL
|
||||
startTimeUnixNano: startNs.toString(),
|
||||
endTimeUnixNano: endNs.toString(),
|
||||
attributes: Object.entries(attributes).map(([key, value]) => ({
|
||||
key,
|
||||
value: typeof value === "number"
|
||||
? { intValue: value }
|
||||
: typeof value === "boolean"
|
||||
? { boolValue: value }
|
||||
: { stringValue: String(value) },
|
||||
})),
|
||||
status: { code: 1 }, // OK
|
||||
}],
|
||||
}],
|
||||
}],
|
||||
};
|
||||
|
||||
try {
|
||||
await fetch(this.endpoint, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify(otlpPayload),
|
||||
signal: AbortSignal.timeout(5_000),
|
||||
});
|
||||
} catch (err) {
|
||||
console.warn("[telemetry] OTLP export failed:", err instanceof Error ? err.message : err);
|
||||
}
|
||||
}
|
||||
|
||||
private async exportLog(
|
||||
level: LogLevel,
|
||||
message: string,
|
||||
attributes: SpanAttributes,
|
||||
): Promise<void> {
|
||||
// Wrap log as a zero-duration span for Tempo compatibility
|
||||
await this.exportSpan(
|
||||
`log.${level}`,
|
||||
Date.now(),
|
||||
0,
|
||||
{ ...attributes, "log.message": message, "log.level": level },
|
||||
);
|
||||
}
|
||||
|
||||
private randomHex(length: number): string {
|
||||
const bytes = new Uint8Array(length / 2);
|
||||
crypto.getRandomValues(bytes);
|
||||
return Array.from(bytes, (b) => b.toString(16).padStart(2, "0")).join("");
|
||||
}
|
||||
}
|
||||
|
||||
// ── Singleton ──────────────────────────────────────────────────────────────
|
||||
|
||||
export const telemetry = new TelemetryManager();
|
||||
|
||||
/** Initialize telemetry. Call once at app startup. */
|
||||
export function initTelemetry(): void {
|
||||
telemetry.init();
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue