Include full contents of all nested repositories

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-27 16:25:02 +01:00
parent 14ff8fd54c
commit 2401ed446f
7271 changed files with 1310112 additions and 6 deletions
--- a/openclaw/extensions/voice-call/src/providers/base.ts
+++ b/openclaw/extensions/voice-call/src/providers/base.ts
@@ -0,0 +1,68 @@
+import type {
+  HangupCallInput,
+  InitiateCallInput,
+  InitiateCallResult,
+  PlayTtsInput,
+  ProviderName,
+  WebhookParseOptions,
+  ProviderWebhookParseResult,
+  StartListeningInput,
+  StopListeningInput,
+  WebhookContext,
+  WebhookVerificationResult,
+} from "../types.js";
+
+/**
+ * Abstract base interface for voice call providers.
+ *
+ * Each provider (Telnyx, Twilio, etc.) implements this interface to provide
+ * a consistent API for the call manager.
+ *
+ * Responsibilities:
+ * - Webhook verification and event parsing
+ * - Outbound call initiation and hangup
+ * - Media control (TTS playback, STT listening)
+ */
+export interface VoiceCallProvider {
+  /** Provider identifier */
+  readonly name: ProviderName;
+
+  /**
+   * Verify webhook signature/HMAC before processing.
+   * Must be called before parseWebhookEvent.
+   */
+  verifyWebhook(ctx: WebhookContext): WebhookVerificationResult;
+
+  /**
+   * Parse provider-specific webhook payload into normalized events.
+   * Returns events and optional response to send back to provider.
+   */
+  parseWebhookEvent(ctx: WebhookContext, options?: WebhookParseOptions): ProviderWebhookParseResult;
+
+  /**
+   * Initiate an outbound call.
+   * @returns Provider call ID and status
+   */
+  initiateCall(input: InitiateCallInput): Promise<InitiateCallResult>;
+
+  /**
+   * Hang up an active call.
+   */
+  hangupCall(input: HangupCallInput): Promise<void>;
+
+  /**
+   * Play TTS audio to the caller.
+   * The provider should handle streaming if supported.
+   */
+  playTts(input: PlayTtsInput): Promise<void>;
+
+  /**
+   * Start listening for user speech (activate STT).
+   */
+  startListening(input: StartListeningInput): Promise<void>;
+
+  /**
+   * Stop listening for user speech (deactivate STT).
+   */
+  stopListening(input: StopListeningInput): Promise<void>;
+}
--- a/openclaw/extensions/voice-call/src/providers/index.ts
+++ b/openclaw/extensions/voice-call/src/providers/index.ts
@@ -0,0 +1,10 @@
+export type { VoiceCallProvider } from "./base.js";
+export { MockProvider } from "./mock.js";
+export {
+  OpenAIRealtimeSTTProvider,
+  type RealtimeSTTConfig,
+  type RealtimeSTTSession,
+} from "./stt-openai-realtime.js";
+export { TelnyxProvider } from "./telnyx.js";
+export { TwilioProvider } from "./twilio.js";
+export { PlivoProvider } from "./plivo.js";
--- a/openclaw/extensions/voice-call/src/providers/mock.ts
+++ b/openclaw/extensions/voice-call/src/providers/mock.ts
@@ -0,0 +1,169 @@
+import crypto from "node:crypto";
+import type {
+  EndReason,
+  HangupCallInput,
+  InitiateCallInput,
+  InitiateCallResult,
+  NormalizedEvent,
+  PlayTtsInput,
+  WebhookParseOptions,
+  ProviderWebhookParseResult,
+  StartListeningInput,
+  StopListeningInput,
+  WebhookContext,
+  WebhookVerificationResult,
+} from "../types.js";
+import type { VoiceCallProvider } from "./base.js";
+
+/**
+ * Mock voice call provider for local testing.
+ *
+ * Events are driven via webhook POST with JSON body:
+ * - { events: NormalizedEvent[] } for bulk events
+ * - { event: NormalizedEvent } for single event
+ */
+export class MockProvider implements VoiceCallProvider {
+  readonly name = "mock" as const;
+
+  verifyWebhook(_ctx: WebhookContext): WebhookVerificationResult {
+    return { ok: true };
+  }
+
+  parseWebhookEvent(
+    ctx: WebhookContext,
+    _options?: WebhookParseOptions,
+  ): ProviderWebhookParseResult {
+    try {
+      const payload = JSON.parse(ctx.rawBody);
+      const events: NormalizedEvent[] = [];
+
+      if (Array.isArray(payload.events)) {
+        for (const evt of payload.events) {
+          const normalized = this.normalizeEvent(evt);
+          if (normalized) {
+            events.push(normalized);
+          }
+        }
+      } else if (payload.event) {
+        const normalized = this.normalizeEvent(payload.event);
+        if (normalized) {
+          events.push(normalized);
+        }
+      }
+
+      return { events, statusCode: 200 };
+    } catch {
+      return { events: [], statusCode: 400 };
+    }
+  }
+
+  private normalizeEvent(evt: Partial<NormalizedEvent>): NormalizedEvent | null {
+    if (!evt.type || !evt.callId) {
+      return null;
+    }
+
+    const base = {
+      id: evt.id || crypto.randomUUID(),
+      callId: evt.callId,
+      providerCallId: evt.providerCallId,
+      timestamp: evt.timestamp || Date.now(),
+    };
+
+    switch (evt.type) {
+      case "call.initiated":
+      case "call.ringing":
+      case "call.answered":
+      case "call.active":
+        return { ...base, type: evt.type };
+
+      case "call.speaking": {
+        const payload = evt as Partial<NormalizedEvent & { text?: string }>;
+        return {
+          ...base,
+          type: evt.type,
+          text: payload.text || "",
+        };
+      }
+
+      case "call.speech": {
+        const payload = evt as Partial<
+          NormalizedEvent & {
+            transcript?: string;
+            isFinal?: boolean;
+            confidence?: number;
+          }
+        >;
+        return {
+          ...base,
+          type: evt.type,
+          transcript: payload.transcript || "",
+          isFinal: payload.isFinal ?? true,
+          confidence: payload.confidence,
+        };
+      }
+
+      case "call.silence": {
+        const payload = evt as Partial<NormalizedEvent & { durationMs?: number }>;
+        return {
+          ...base,
+          type: evt.type,
+          durationMs: payload.durationMs || 0,
+        };
+      }
+
+      case "call.dtmf": {
+        const payload = evt as Partial<NormalizedEvent & { digits?: string }>;
+        return {
+          ...base,
+          type: evt.type,
+          digits: payload.digits || "",
+        };
+      }
+
+      case "call.ended": {
+        const payload = evt as Partial<NormalizedEvent & { reason?: EndReason }>;
+        return {
+          ...base,
+          type: evt.type,
+          reason: payload.reason || "completed",
+        };
+      }
+
+      case "call.error": {
+        const payload = evt as Partial<NormalizedEvent & { error?: string; retryable?: boolean }>;
+        return {
+          ...base,
+          type: evt.type,
+          error: payload.error || "unknown error",
+          retryable: payload.retryable,
+        };
+      }
+
+      default:
+        return null;
+    }
+  }
+
+  async initiateCall(input: InitiateCallInput): Promise<InitiateCallResult> {
+    return {
+      providerCallId: `mock-${input.callId}`,
+      status: "initiated",
+    };
+  }
+
+  async hangupCall(_input: HangupCallInput): Promise<void> {
+    // No-op for mock
+  }
+
+  async playTts(_input: PlayTtsInput): Promise<void> {
+    // No-op for mock
+  }
+
+  async startListening(_input: StartListeningInput): Promise<void> {
+    // No-op for mock
+  }
+
+  async stopListening(_input: StopListeningInput): Promise<void> {
+    // No-op for mock
+  }
+}
--- a/openclaw/extensions/voice-call/src/providers/plivo.test.ts
+++ b/openclaw/extensions/voice-call/src/providers/plivo.test.ts
@@ -0,0 +1,49 @@
+import { describe, expect, it } from "vitest";
+import { PlivoProvider } from "./plivo.js";
+
+describe("PlivoProvider", () => {
+  it("parses answer callback into call.answered and returns keep-alive XML", () => {
+    const provider = new PlivoProvider({
+      authId: "MA000000000000000000",
+      authToken: "test-token",
+    });
+
+    const result = provider.parseWebhookEvent({
+      headers: { host: "example.com" },
+      rawBody:
+        "CallUUID=call-uuid&CallStatus=in-progress&Direction=outbound&From=%2B15550000000&To=%2B15550000001&Event=StartApp",
+      url: "https://example.com/voice/webhook?provider=plivo&flow=answer&callId=internal-call-id",
+      method: "POST",
+      query: { provider: "plivo", flow: "answer", callId: "internal-call-id" },
+    });
+
+    expect(result.events).toHaveLength(1);
+    expect(result.events[0]?.type).toBe("call.answered");
+    expect(result.events[0]?.callId).toBe("internal-call-id");
+    expect(result.events[0]?.providerCallId).toBe("call-uuid");
+    expect(result.providerResponseBody).toContain("<Wait");
+    expect(result.providerResponseBody).toContain('length="300"');
+  });
+
+  it("uses verified request key when provided", () => {
+    const provider = new PlivoProvider({
+      authId: "MA000000000000000000",
+      authToken: "test-token",
+    });
+
+    const result = provider.parseWebhookEvent(
+      {
+        headers: { host: "example.com", "x-plivo-signature-v3-nonce": "nonce-1" },
+        rawBody:
+          "CallUUID=call-uuid&CallStatus=in-progress&Direction=outbound&From=%2B15550000000&To=%2B15550000001&Event=StartApp",
+        url: "https://example.com/voice/webhook?provider=plivo&flow=answer&callId=internal-call-id",
+        method: "POST",
+        query: { provider: "plivo", flow: "answer", callId: "internal-call-id" },
+      },
+      { verifiedRequestKey: "plivo:v3:verified" },
+    );
+
+    expect(result.events).toHaveLength(1);
+    expect(result.events[0]?.dedupeKey).toBe("plivo:v3:verified");
+  });
+});
--- a/openclaw/extensions/voice-call/src/providers/plivo.ts
+++ b/openclaw/extensions/voice-call/src/providers/plivo.ts
@@ -0,0 +1,556 @@
+import crypto from "node:crypto";
+import type { PlivoConfig, WebhookSecurityConfig } from "../config.js";
+import { getHeader } from "../http-headers.js";
+import type {
+  HangupCallInput,
+  InitiateCallInput,
+  InitiateCallResult,
+  NormalizedEvent,
+  PlayTtsInput,
+  ProviderWebhookParseResult,
+  StartListeningInput,
+  StopListeningInput,
+  WebhookContext,
+  WebhookParseOptions,
+  WebhookVerificationResult,
+} from "../types.js";
+import { escapeXml } from "../voice-mapping.js";
+import { reconstructWebhookUrl, verifyPlivoWebhook } from "../webhook-security.js";
+import type { VoiceCallProvider } from "./base.js";
+import { guardedJsonApiRequest } from "./shared/guarded-json-api.js";
+
+export interface PlivoProviderOptions {
+  /** Override public URL origin for signature verification */
+  publicUrl?: string;
+  /** Skip webhook signature verification (development only) */
+  skipVerification?: boolean;
+  /** Outbound ring timeout in seconds */
+  ringTimeoutSec?: number;
+  /** Webhook security options (forwarded headers/allowlist) */
+  webhookSecurity?: WebhookSecurityConfig;
+}
+
+type PendingSpeak = { text: string; locale?: string };
+type PendingListen = { language?: string };
+
+function createPlivoRequestDedupeKey(ctx: WebhookContext): string {
+  const nonceV3 = getHeader(ctx.headers, "x-plivo-signature-v3-nonce");
+  if (nonceV3) {
+    return `plivo:v3:${nonceV3}`;
+  }
+  const nonceV2 = getHeader(ctx.headers, "x-plivo-signature-v2-nonce");
+  if (nonceV2) {
+    return `plivo:v2:${nonceV2}`;
+  }
+  return `plivo:fallback:${crypto.createHash("sha256").update(ctx.rawBody).digest("hex")}`;
+}
+
+export class PlivoProvider implements VoiceCallProvider {
+  readonly name = "plivo" as const;
+
+  private readonly authId: string;
+  private readonly authToken: string;
+  private readonly baseUrl: string;
+  private readonly options: PlivoProviderOptions;
+  private readonly apiHost: string;
+
+  // Best-effort mapping between create-call request UUID and call UUID.
+  private requestUuidToCallUuid = new Map<string, string>();
+
+  // Used for transfer URLs and GetInput action URLs.
+  private callIdToWebhookUrl = new Map<string, string>();
+  private callUuidToWebhookUrl = new Map<string, string>();
+
+  private pendingSpeakByCallId = new Map<string, PendingSpeak>();
+  private pendingListenByCallId = new Map<string, PendingListen>();
+
+  constructor(config: PlivoConfig, options: PlivoProviderOptions = {}) {
+    if (!config.authId) {
+      throw new Error("Plivo Auth ID is required");
+    }
+    if (!config.authToken) {
+      throw new Error("Plivo Auth Token is required");
+    }
+
+    this.authId = config.authId;
+    this.authToken = config.authToken;
+    this.baseUrl = `https://api.plivo.com/v1/Account/${this.authId}`;
+    this.apiHost = new URL(this.baseUrl).hostname;
+    this.options = options;
+  }
+
+  private async apiRequest<T = unknown>(params: {
+    method: "GET" | "POST" | "DELETE";
+    endpoint: string;
+    body?: Record<string, unknown>;
+    allowNotFound?: boolean;
+  }): Promise<T> {
+    const { method, endpoint, body, allowNotFound } = params;
+    return await guardedJsonApiRequest<T>({
+      url: `${this.baseUrl}${endpoint}`,
+      method,
+      headers: {
+        Authorization: `Basic ${Buffer.from(`${this.authId}:${this.authToken}`).toString("base64")}`,
+        "Content-Type": "application/json",
+      },
+      body,
+      allowNotFound,
+      allowedHostnames: [this.apiHost],
+      auditContext: "voice-call.plivo.api",
+      errorPrefix: "Plivo API error",
+    });
+  }
+
+  verifyWebhook(ctx: WebhookContext): WebhookVerificationResult {
+    const result = verifyPlivoWebhook(ctx, this.authToken, {
+      publicUrl: this.options.publicUrl,
+      skipVerification: this.options.skipVerification,
+      allowedHosts: this.options.webhookSecurity?.allowedHosts,
+      trustForwardingHeaders: this.options.webhookSecurity?.trustForwardingHeaders,
+      trustedProxyIPs: this.options.webhookSecurity?.trustedProxyIPs,
+      remoteIP: ctx.remoteAddress,
+    });
+
+    if (!result.ok) {
+      console.warn(`[plivo] Webhook verification failed: ${result.reason}`);
+    }
+
+    return {
+      ok: result.ok,
+      reason: result.reason,
+      isReplay: result.isReplay,
+      verifiedRequestKey: result.verifiedRequestKey,
+    };
+  }
+
+  parseWebhookEvent(
+    ctx: WebhookContext,
+    options?: WebhookParseOptions,
+  ): ProviderWebhookParseResult {
+    const flow = typeof ctx.query?.flow === "string" ? ctx.query.flow.trim() : "";
+
+    const parsed = this.parseBody(ctx.rawBody);
+    if (!parsed) {
+      return { events: [], statusCode: 400 };
+    }
+
+    // Keep providerCallId mapping for later call control.
+    const callUuid = parsed.get("CallUUID") || undefined;
+    if (callUuid) {
+      const webhookBase = this.baseWebhookUrlFromCtx(ctx);
+      if (webhookBase) {
+        this.callUuidToWebhookUrl.set(callUuid, webhookBase);
+      }
+    }
+
+    // Special flows that exist only to return Plivo XML (no events).
+    if (flow === "xml-speak") {
+      const callId = this.getCallIdFromQuery(ctx);
+      const pending = callId ? this.pendingSpeakByCallId.get(callId) : undefined;
+      if (callId) {
+        this.pendingSpeakByCallId.delete(callId);
+      }
+
+      const xml = pending
+        ? PlivoProvider.xmlSpeak(pending.text, pending.locale)
+        : PlivoProvider.xmlKeepAlive();
+      return {
+        events: [],
+        providerResponseBody: xml,
+        providerResponseHeaders: { "Content-Type": "text/xml" },
+        statusCode: 200,
+      };
+    }
+
+    if (flow === "xml-listen") {
+      const callId = this.getCallIdFromQuery(ctx);
+      const pending = callId ? this.pendingListenByCallId.get(callId) : undefined;
+      if (callId) {
+        this.pendingListenByCallId.delete(callId);
+      }
+
+      const actionUrl = this.buildActionUrl(ctx, {
+        flow: "getinput",
+        callId,
+      });
+
+      const xml =
+        actionUrl && callId
+          ? PlivoProvider.xmlGetInputSpeech({
+              actionUrl,
+              language: pending?.language,
+            })
+          : PlivoProvider.xmlKeepAlive();
+
+      return {
+        events: [],
+        providerResponseBody: xml,
+        providerResponseHeaders: { "Content-Type": "text/xml" },
+        statusCode: 200,
+      };
+    }
+
+    // Normal events.
+    const callIdFromQuery = this.getCallIdFromQuery(ctx);
+    const dedupeKey = options?.verifiedRequestKey ?? createPlivoRequestDedupeKey(ctx);
+    const event = this.normalizeEvent(parsed, callIdFromQuery, dedupeKey);
+
+    return {
+      events: event ? [event] : [],
+      providerResponseBody:
+        flow === "answer" || flow === "getinput"
+          ? PlivoProvider.xmlKeepAlive()
+          : PlivoProvider.xmlEmpty(),
+      providerResponseHeaders: { "Content-Type": "text/xml" },
+      statusCode: 200,
+    };
+  }
+
+  private normalizeEvent(
+    params: URLSearchParams,
+    callIdOverride?: string,
+    dedupeKey?: string,
+  ): NormalizedEvent | null {
+    const callUuid = params.get("CallUUID") || "";
+    const requestUuid = params.get("RequestUUID") || "";
+
+    if (requestUuid && callUuid) {
+      this.requestUuidToCallUuid.set(requestUuid, callUuid);
+    }
+
+    const direction = params.get("Direction");
+    const from = params.get("From") || undefined;
+    const to = params.get("To") || undefined;
+    const callStatus = params.get("CallStatus");
+
+    const baseEvent = {
+      id: crypto.randomUUID(),
+      dedupeKey,
+      callId: callIdOverride || callUuid || requestUuid,
+      providerCallId: callUuid || requestUuid || undefined,
+      timestamp: Date.now(),
+      direction:
+        direction === "inbound"
+          ? ("inbound" as const)
+          : direction === "outbound"
+            ? ("outbound" as const)
+            : undefined,
+      from,
+      to,
+    };
+
+    const digits = params.get("Digits");
+    if (digits) {
+      return { ...baseEvent, type: "call.dtmf", digits };
+    }
+
+    const transcript = PlivoProvider.extractTranscript(params);
+    if (transcript) {
+      return {
+        ...baseEvent,
+        type: "call.speech",
+        transcript,
+        isFinal: true,
+      };
+    }
+
+    // Call lifecycle.
+    if (callStatus === "ringing") {
+      return { ...baseEvent, type: "call.ringing" };
+    }
+
+    if (callStatus === "in-progress") {
+      return { ...baseEvent, type: "call.answered" };
+    }
+
+    if (
+      callStatus === "completed" ||
+      callStatus === "busy" ||
+      callStatus === "no-answer" ||
+      callStatus === "failed"
+    ) {
+      return {
+        ...baseEvent,
+        type: "call.ended",
+        reason:
+          callStatus === "completed"
+            ? "completed"
+            : callStatus === "busy"
+              ? "busy"
+              : callStatus === "no-answer"
+                ? "no-answer"
+                : "failed",
+      };
+    }
+
+    // Plivo will call our answer_url when the call is answered; if we don't have
+    // a CallStatus for some reason, treat it as answered so the call can proceed.
+    if (params.get("Event") === "StartApp" && callUuid) {
+      return { ...baseEvent, type: "call.answered" };
+    }
+
+    return null;
+  }
+
+  async initiateCall(input: InitiateCallInput): Promise<InitiateCallResult> {
+    const webhookUrl = new URL(input.webhookUrl);
+    webhookUrl.searchParams.set("provider", "plivo");
+    webhookUrl.searchParams.set("callId", input.callId);
+
+    const answerUrl = new URL(webhookUrl);
+    answerUrl.searchParams.set("flow", "answer");
+
+    const hangupUrl = new URL(webhookUrl);
+    hangupUrl.searchParams.set("flow", "hangup");
+
+    this.callIdToWebhookUrl.set(input.callId, input.webhookUrl);
+
+    const ringTimeoutSec = this.options.ringTimeoutSec ?? 30;
+
+    const result = await this.apiRequest<PlivoCreateCallResponse>({
+      method: "POST",
+      endpoint: "/Call/",
+      body: {
+        from: PlivoProvider.normalizeNumber(input.from),
+        to: PlivoProvider.normalizeNumber(input.to),
+        answer_url: answerUrl.toString(),
+        answer_method: "POST",
+        hangup_url: hangupUrl.toString(),
+        hangup_method: "POST",
+        // Plivo's API uses `hangup_on_ring` for outbound ring timeout.
+        hangup_on_ring: ringTimeoutSec,
+      },
+    });
+
+    const requestUuid = Array.isArray(result.request_uuid)
+      ? result.request_uuid[0]
+      : result.request_uuid;
+    if (!requestUuid) {
+      throw new Error("Plivo call create returned no request_uuid");
+    }
+
+    return { providerCallId: requestUuid, status: "initiated" };
+  }
+
+  async hangupCall(input: HangupCallInput): Promise<void> {
+    const callUuid = this.requestUuidToCallUuid.get(input.providerCallId);
+    if (callUuid) {
+      await this.apiRequest({
+        method: "DELETE",
+        endpoint: `/Call/${callUuid}/`,
+        allowNotFound: true,
+      });
+      return;
+    }
+
+    // Best-effort: try hangup (call UUID), then cancel (request UUID).
+    await this.apiRequest({
+      method: "DELETE",
+      endpoint: `/Call/${input.providerCallId}/`,
+      allowNotFound: true,
+    });
+    await this.apiRequest({
+      method: "DELETE",
+      endpoint: `/Request/${input.providerCallId}/`,
+      allowNotFound: true,
+    });
+  }
+
+  private resolveCallContext(params: {
+    providerCallId: string;
+    callId: string;
+    operation: string;
+  }): {
+    callUuid: string;
+    webhookBase: string;
+  } {
+    const callUuid = this.requestUuidToCallUuid.get(params.providerCallId) ?? params.providerCallId;
+    const webhookBase =
+      this.callUuidToWebhookUrl.get(callUuid) || this.callIdToWebhookUrl.get(params.callId);
+    if (!webhookBase) {
+      throw new Error("Missing webhook URL for this call (provider state missing)");
+    }
+    if (!callUuid) {
+      throw new Error(`Missing Plivo CallUUID for ${params.operation}`);
+    }
+    return { callUuid, webhookBase };
+  }
+
+  private async transferCallLeg(params: {
+    callUuid: string;
+    webhookBase: string;
+    callId: string;
+    flow: "xml-speak" | "xml-listen";
+  }): Promise<void> {
+    const transferUrl = new URL(params.webhookBase);
+    transferUrl.searchParams.set("provider", "plivo");
+    transferUrl.searchParams.set("flow", params.flow);
+    transferUrl.searchParams.set("callId", params.callId);
+
+    await this.apiRequest({
+      method: "POST",
+      endpoint: `/Call/${params.callUuid}/`,
+      body: {
+        legs: "aleg",
+        aleg_url: transferUrl.toString(),
+        aleg_method: "POST",
+      },
+    });
+  }
+
+  async playTts(input: PlayTtsInput): Promise<void> {
+    const { callUuid, webhookBase } = this.resolveCallContext({
+      providerCallId: input.providerCallId,
+      callId: input.callId,
+      operation: "playTts",
+    });
+
+    this.pendingSpeakByCallId.set(input.callId, {
+      text: input.text,
+      locale: input.locale,
+    });
+
+    await this.transferCallLeg({
+      callUuid,
+      webhookBase,
+      callId: input.callId,
+      flow: "xml-speak",
+    });
+  }
+
+  async startListening(input: StartListeningInput): Promise<void> {
+    const { callUuid, webhookBase } = this.resolveCallContext({
+      providerCallId: input.providerCallId,
+      callId: input.callId,
+      operation: "startListening",
+    });
+
+    this.pendingListenByCallId.set(input.callId, {
+      language: input.language,
+    });
+
+    await this.transferCallLeg({
+      callUuid,
+      webhookBase,
+      callId: input.callId,
+      flow: "xml-listen",
+    });
+  }
+
+  async stopListening(_input: StopListeningInput): Promise<void> {
+    // GetInput ends automatically when speech ends.
+  }
+
+  private static normalizeNumber(numberOrSip: string): string {
+    const trimmed = numberOrSip.trim();
+    if (trimmed.toLowerCase().startsWith("sip:")) {
+      return trimmed;
+    }
+    return trimmed.replace(/[^\d+]/g, "");
+  }
+
+  private static xmlEmpty(): string {
+    return `<?xml version="1.0" encoding="UTF-8"?><Response></Response>`;
+  }
+
+  private static xmlKeepAlive(): string {
+    return `<?xml version="1.0" encoding="UTF-8"?>
+<Response>
+  <Wait length="300" />
+</Response>`;
+  }
+
+  private static xmlSpeak(text: string, locale?: string): string {
+    const language = locale || "en-US";
+    return `<?xml version="1.0" encoding="UTF-8"?>
+<Response>
+  <Speak language="${escapeXml(language)}">${escapeXml(text)}</Speak>
+  <Wait length="300" />
+</Response>`;
+  }
+
+  private static xmlGetInputSpeech(params: { actionUrl: string; language?: string }): string {
+    const language = params.language || "en-US";
+    return `<?xml version="1.0" encoding="UTF-8"?>
+<Response>
+  <GetInput inputType="speech" method="POST" action="${escapeXml(params.actionUrl)}" language="${escapeXml(language)}" executionTimeout="30" speechEndTimeout="1" redirect="false">
+  </GetInput>
+  <Wait length="300" />
+</Response>`;
+  }
+
+  private getCallIdFromQuery(ctx: WebhookContext): string | undefined {
+    const callId =
+      typeof ctx.query?.callId === "string" && ctx.query.callId.trim()
+        ? ctx.query.callId.trim()
+        : undefined;
+    return callId || undefined;
+  }
+
+  private buildActionUrl(
+    ctx: WebhookContext,
+    opts: { flow: string; callId?: string },
+  ): string | null {
+    const base = this.baseWebhookUrlFromCtx(ctx);
+    if (!base) {
+      return null;
+    }
+
+    const u = new URL(base);
+    u.searchParams.set("provider", "plivo");
+    u.searchParams.set("flow", opts.flow);
+    if (opts.callId) {
+      u.searchParams.set("callId", opts.callId);
+    }
+    return u.toString();
+  }
+
+  private baseWebhookUrlFromCtx(ctx: WebhookContext): string | null {
+    try {
+      const u = new URL(
+        reconstructWebhookUrl(ctx, {
+          allowedHosts: this.options.webhookSecurity?.allowedHosts,
+          trustForwardingHeaders: this.options.webhookSecurity?.trustForwardingHeaders,
+          trustedProxyIPs: this.options.webhookSecurity?.trustedProxyIPs,
+          remoteIP: ctx.remoteAddress,
+        }),
+      );
+      return `${u.origin}${u.pathname}`;
+    } catch {
+      return null;
+    }
+  }
+
+  private parseBody(rawBody: string): URLSearchParams | null {
+    try {
+      return new URLSearchParams(rawBody);
+    } catch {
+      return null;
+    }
+  }
+
+  private static extractTranscript(params: URLSearchParams): string | null {
+    const candidates = [
+      "Speech",
+      "Transcription",
+      "TranscriptionText",
+      "SpeechResult",
+      "RecognizedSpeech",
+      "Text",
+    ] as const;
+
+    for (const key of candidates) {
+      const value = params.get(key);
+      if (value && value.trim()) {
+        return value.trim();
+      }
+    }
+    return null;
+  }
+}
+
+type PlivoCreateCallResponse = {
+  api_id?: string;
+  message?: string;
+  request_uuid?: string | string[];
+};
--- a/openclaw/extensions/voice-call/src/providers/shared/guarded-json-api.ts
+++ b/openclaw/extensions/voice-call/src/providers/shared/guarded-json-api.ts
@@ -0,0 +1,42 @@
+import { fetchWithSsrFGuard } from "openclaw/plugin-sdk";
+
+type GuardedJsonApiRequestParams = {
+  url: string;
+  method: "GET" | "POST" | "DELETE" | "PUT" | "PATCH";
+  headers: Record<string, string>;
+  body?: Record<string, unknown>;
+  allowNotFound?: boolean;
+  allowedHostnames: string[];
+  auditContext: string;
+  errorPrefix: string;
+};
+
+export async function guardedJsonApiRequest<T = unknown>(
+  params: GuardedJsonApiRequestParams,
+): Promise<T> {
+  const { response, release } = await fetchWithSsrFGuard({
+    url: params.url,
+    init: {
+      method: params.method,
+      headers: params.headers,
+      body: params.body ? JSON.stringify(params.body) : undefined,
+    },
+    policy: { allowedHostnames: params.allowedHostnames },
+    auditContext: params.auditContext,
+  });
+
+  try {
+    if (!response.ok) {
+      if (params.allowNotFound && response.status === 404) {
+        return undefined as T;
+      }
+      const errorText = await response.text();
+      throw new Error(`${params.errorPrefix}: ${response.status} ${errorText}`);
+    }
+
+    const text = await response.text();
+    return text ? (JSON.parse(text) as T) : (undefined as T);
+  } finally {
+    await release();
+  }
+}
--- a/openclaw/extensions/voice-call/src/providers/stt-openai-realtime.ts
+++ b/openclaw/extensions/voice-call/src/providers/stt-openai-realtime.ts
@@ -0,0 +1,311 @@
+/**
+ * OpenAI Realtime STT Provider
+ *
+ * Uses the OpenAI Realtime API for streaming transcription with:
+ * - Direct mu-law audio support (no conversion needed)
+ * - Built-in server-side VAD for turn detection
+ * - Low-latency streaming transcription
+ * - Partial transcript callbacks for real-time UI updates
+ */
+
+import WebSocket from "ws";
+
+/**
+ * Configuration for OpenAI Realtime STT.
+ */
+export interface RealtimeSTTConfig {
+  /** OpenAI API key */
+  apiKey: string;
+  /** Model to use (default: gpt-4o-transcribe) */
+  model?: string;
+  /** Silence duration in ms before considering speech ended (default: 800) */
+  silenceDurationMs?: number;
+  /** VAD threshold 0-1 (default: 0.5) */
+  vadThreshold?: number;
+}
+
+/**
+ * Session for streaming audio and receiving transcripts.
+ */
+export interface RealtimeSTTSession {
+  /** Connect to the transcription service */
+  connect(): Promise<void>;
+  /** Send mu-law audio data (8kHz mono) */
+  sendAudio(audio: Buffer): void;
+  /** Wait for next complete transcript (after VAD detects end of speech) */
+  waitForTranscript(timeoutMs?: number): Promise<string>;
+  /** Set callback for partial transcripts (streaming) */
+  onPartial(callback: (partial: string) => void): void;
+  /** Set callback for final transcripts */
+  onTranscript(callback: (transcript: string) => void): void;
+  /** Set callback when speech starts (VAD) */
+  onSpeechStart(callback: () => void): void;
+  /** Close the session */
+  close(): void;
+  /** Check if session is connected */
+  isConnected(): boolean;
+}
+
+/**
+ * Provider factory for OpenAI Realtime STT sessions.
+ */
+export class OpenAIRealtimeSTTProvider {
+  readonly name = "openai-realtime";
+  private apiKey: string;
+  private model: string;
+  private silenceDurationMs: number;
+  private vadThreshold: number;
+
+  constructor(config: RealtimeSTTConfig) {
+    if (!config.apiKey) {
+      throw new Error("OpenAI API key required for Realtime STT");
+    }
+    this.apiKey = config.apiKey;
+    this.model = config.model || "gpt-4o-transcribe";
+    this.silenceDurationMs = config.silenceDurationMs || 800;
+    this.vadThreshold = config.vadThreshold || 0.5;
+  }
+
+  /**
+   * Create a new realtime transcription session.
+   */
+  createSession(): RealtimeSTTSession {
+    return new OpenAIRealtimeSTTSession(
+      this.apiKey,
+      this.model,
+      this.silenceDurationMs,
+      this.vadThreshold,
+    );
+  }
+}
+
+/**
+ * WebSocket-based session for real-time speech-to-text.
+ */
+class OpenAIRealtimeSTTSession implements RealtimeSTTSession {
+  private static readonly MAX_RECONNECT_ATTEMPTS = 5;
+  private static readonly RECONNECT_DELAY_MS = 1000;
+
+  private ws: WebSocket | null = null;
+  private connected = false;
+  private closed = false;
+  private reconnectAttempts = 0;
+  private pendingTranscript = "";
+  private onTranscriptCallback: ((transcript: string) => void) | null = null;
+  private onPartialCallback: ((partial: string) => void) | null = null;
+  private onSpeechStartCallback: (() => void) | null = null;
+
+  constructor(
+    private readonly apiKey: string,
+    private readonly model: string,
+    private readonly silenceDurationMs: number,
+    private readonly vadThreshold: number,
+  ) {}
+
+  async connect(): Promise<void> {
+    this.closed = false;
+    this.reconnectAttempts = 0;
+    return this.doConnect();
+  }
+
+  private async doConnect(): Promise<void> {
+    return new Promise((resolve, reject) => {
+      const url = "wss://api.openai.com/v1/realtime?intent=transcription";
+
+      this.ws = new WebSocket(url, {
+        headers: {
+          Authorization: `Bearer ${this.apiKey}`,
+          "OpenAI-Beta": "realtime=v1",
+        },
+      });
+
+      this.ws.on("open", () => {
+        console.log("[RealtimeSTT] WebSocket connected");
+        this.connected = true;
+        this.reconnectAttempts = 0;
+
+        // Configure the transcription session
+        this.sendEvent({
+          type: "transcription_session.update",
+          session: {
+            input_audio_format: "g711_ulaw",
+            input_audio_transcription: {
+              model: this.model,
+            },
+            turn_detection: {
+              type: "server_vad",
+              threshold: this.vadThreshold,
+              prefix_padding_ms: 300,
+              silence_duration_ms: this.silenceDurationMs,
+            },
+          },
+        });
+
+        resolve();
+      });
+
+      this.ws.on("message", (data: Buffer) => {
+        try {
+          const event = JSON.parse(data.toString());
+          this.handleEvent(event);
+        } catch (e) {
+          console.error("[RealtimeSTT] Failed to parse event:", e);
+        }
+      });
+
+      this.ws.on("error", (error) => {
+        console.error("[RealtimeSTT] WebSocket error:", error);
+        if (!this.connected) {
+          reject(error);
+        }
+      });
+
+      this.ws.on("close", (code, reason) => {
+        console.log(
+          `[RealtimeSTT] WebSocket closed (code: ${code}, reason: ${reason?.toString() || "none"})`,
+        );
+        this.connected = false;
+
+        // Attempt reconnection if not intentionally closed
+        if (!this.closed) {
+          void this.attemptReconnect();
+        }
+      });
+
+      setTimeout(() => {
+        if (!this.connected) {
+          reject(new Error("Realtime STT connection timeout"));
+        }
+      }, 10000);
+    });
+  }
+
+  private async attemptReconnect(): Promise<void> {
+    if (this.closed) {
+      return;
+    }
+
+    if (this.reconnectAttempts >= OpenAIRealtimeSTTSession.MAX_RECONNECT_ATTEMPTS) {
+      console.error(
+        `[RealtimeSTT] Max reconnect attempts (${OpenAIRealtimeSTTSession.MAX_RECONNECT_ATTEMPTS}) reached`,
+      );
+      return;
+    }
+
+    this.reconnectAttempts++;
+    const delay = OpenAIRealtimeSTTSession.RECONNECT_DELAY_MS * 2 ** (this.reconnectAttempts - 1);
+    console.log(
+      `[RealtimeSTT] Reconnecting ${this.reconnectAttempts}/${OpenAIRealtimeSTTSession.MAX_RECONNECT_ATTEMPTS} in ${delay}ms...`,
+    );
+
+    await new Promise((resolve) => setTimeout(resolve, delay));
+
+    if (this.closed) {
+      return;
+    }
+
+    try {
+      await this.doConnect();
+      console.log("[RealtimeSTT] Reconnected successfully");
+    } catch (error) {
+      console.error("[RealtimeSTT] Reconnect failed:", error);
+    }
+  }
+
+  private handleEvent(event: {
+    type: string;
+    delta?: string;
+    transcript?: string;
+    error?: unknown;
+  }): void {
+    switch (event.type) {
+      case "transcription_session.created":
+      case "transcription_session.updated":
+      case "input_audio_buffer.speech_stopped":
+      case "input_audio_buffer.committed":
+        console.log(`[RealtimeSTT] ${event.type}`);
+        break;
+
+      case "conversation.item.input_audio_transcription.delta":
+        if (event.delta) {
+          this.pendingTranscript += event.delta;
+          this.onPartialCallback?.(this.pendingTranscript);
+        }
+        break;
+
+      case "conversation.item.input_audio_transcription.completed":
+        if (event.transcript) {
+          console.log(`[RealtimeSTT] Transcript: ${event.transcript}`);
+          this.onTranscriptCallback?.(event.transcript);
+        }
+        this.pendingTranscript = "";
+        break;
+
+      case "input_audio_buffer.speech_started":
+        console.log("[RealtimeSTT] Speech started");
+        this.pendingTranscript = "";
+        this.onSpeechStartCallback?.();
+        break;
+
+      case "error":
+        console.error("[RealtimeSTT] Error:", event.error);
+        break;
+    }
+  }
+
+  private sendEvent(event: unknown): void {
+    if (this.ws?.readyState === WebSocket.OPEN) {
+      this.ws.send(JSON.stringify(event));
+    }
+  }
+
+  sendAudio(muLawData: Buffer): void {
+    if (!this.connected) {
+      return;
+    }
+    this.sendEvent({
+      type: "input_audio_buffer.append",
+      audio: muLawData.toString("base64"),
+    });
+  }
+
+  onPartial(callback: (partial: string) => void): void {
+    this.onPartialCallback = callback;
+  }
+
+  onTranscript(callback: (transcript: string) => void): void {
+    this.onTranscriptCallback = callback;
+  }
+
+  onSpeechStart(callback: () => void): void {
+    this.onSpeechStartCallback = callback;
+  }
+
+  async waitForTranscript(timeoutMs = 30000): Promise<string> {
+    return new Promise((resolve, reject) => {
+      const timeout = setTimeout(() => {
+        this.onTranscriptCallback = null;
+        reject(new Error("Transcript timeout"));
+      }, timeoutMs);
+
+      this.onTranscriptCallback = (transcript) => {
+        clearTimeout(timeout);
+        this.onTranscriptCallback = null;
+        resolve(transcript);
+      };
+    });
+  }
+
+  close(): void {
+    this.closed = true;
+    if (this.ws) {
+      this.ws.close();
+      this.ws = null;
+    }
+    this.connected = false;
+  }
+
+  isConnected(): boolean {
+    return this.connected;
+  }
+}
--- a/openclaw/extensions/voice-call/src/providers/telnyx.test.ts
+++ b/openclaw/extensions/voice-call/src/providers/telnyx.test.ts
@@ -0,0 +1,166 @@
+import crypto from "node:crypto";
+import { describe, expect, it } from "vitest";
+import type { WebhookContext } from "../types.js";
+import { TelnyxProvider } from "./telnyx.js";
+
+function createCtx(params?: Partial<WebhookContext>): WebhookContext {
+  return {
+    headers: {},
+    rawBody: "{}",
+    url: "http://localhost/voice/webhook",
+    method: "POST",
+    query: {},
+    remoteAddress: "127.0.0.1",
+    ...params,
+  };
+}
+
+function decodeBase64Url(input: string): Buffer {
+  const normalized = input.replace(/-/g, "+").replace(/_/g, "/");
+  const padLen = (4 - (normalized.length % 4)) % 4;
+  const padded = normalized + "=".repeat(padLen);
+  return Buffer.from(padded, "base64");
+}
+
+function expectWebhookVerificationSucceeds(params: {
+  publicKey: string;
+  privateKey: crypto.KeyObject;
+}) {
+  const provider = new TelnyxProvider(
+    { apiKey: "KEY123", connectionId: "CONN456", publicKey: params.publicKey },
+    { skipVerification: false },
+  );
+
+  const rawBody = JSON.stringify({
+    event_type: "call.initiated",
+    payload: { call_control_id: "x" },
+  });
+  const timestamp = String(Math.floor(Date.now() / 1000));
+  const signedPayload = `${timestamp}|${rawBody}`;
+  const signature = crypto
+    .sign(null, Buffer.from(signedPayload), params.privateKey)
+    .toString("base64");
+
+  const result = provider.verifyWebhook(
+    createCtx({
+      rawBody,
+      headers: {
+        "telnyx-signature-ed25519": signature,
+        "telnyx-timestamp": timestamp,
+      },
+    }),
+  );
+  expect(result.ok).toBe(true);
+}
+
+describe("TelnyxProvider.verifyWebhook", () => {
+  it("fails closed when public key is missing and skipVerification is false", () => {
+    const provider = new TelnyxProvider(
+      { apiKey: "KEY123", connectionId: "CONN456", publicKey: undefined },
+      { skipVerification: false },
+    );
+
+    const result = provider.verifyWebhook(createCtx());
+    expect(result.ok).toBe(false);
+  });
+
+  it("allows requests when skipVerification is true (development only)", () => {
+    const provider = new TelnyxProvider(
+      { apiKey: "KEY123", connectionId: "CONN456", publicKey: undefined },
+      { skipVerification: true },
+    );
+
+    const result = provider.verifyWebhook(createCtx());
+    expect(result.ok).toBe(true);
+  });
+
+  it("fails when signature headers are missing (with public key configured)", () => {
+    const provider = new TelnyxProvider(
+      { apiKey: "KEY123", connectionId: "CONN456", publicKey: "public-key" },
+      { skipVerification: false },
+    );
+
+    const result = provider.verifyWebhook(createCtx({ headers: {} }));
+    expect(result.ok).toBe(false);
+  });
+
+  it("verifies a valid signature with a raw Ed25519 public key (Base64)", () => {
+    const { publicKey, privateKey } = crypto.generateKeyPairSync("ed25519");
+
+    const jwk = publicKey.export({ format: "jwk" }) as JsonWebKey;
+    expect(jwk.kty).toBe("OKP");
+    expect(jwk.crv).toBe("Ed25519");
+    expect(typeof jwk.x).toBe("string");
+
+    const rawPublicKey = decodeBase64Url(jwk.x as string);
+    const rawPublicKeyBase64 = rawPublicKey.toString("base64");
+    expectWebhookVerificationSucceeds({ publicKey: rawPublicKeyBase64, privateKey });
+  });
+
+  it("verifies a valid signature with a DER SPKI public key (Base64)", () => {
+    const { publicKey, privateKey } = crypto.generateKeyPairSync("ed25519");
+    const spkiDer = publicKey.export({ format: "der", type: "spki" }) as Buffer;
+    const spkiDerBase64 = spkiDer.toString("base64");
+    expectWebhookVerificationSucceeds({ publicKey: spkiDerBase64, privateKey });
+  });
+
+  it("returns replay status when the same signed request is seen twice", () => {
+    const { publicKey, privateKey } = crypto.generateKeyPairSync("ed25519");
+    const spkiDer = publicKey.export({ format: "der", type: "spki" }) as Buffer;
+    const provider = new TelnyxProvider(
+      { apiKey: "KEY123", connectionId: "CONN456", publicKey: spkiDer.toString("base64") },
+      { skipVerification: false },
+    );
+
+    const rawBody = JSON.stringify({
+      event_type: "call.initiated",
+      payload: { call_control_id: "call-replay-test" },
+      nonce: crypto.randomUUID(),
+    });
+    const timestamp = String(Math.floor(Date.now() / 1000));
+    const signedPayload = `${timestamp}|${rawBody}`;
+    const signature = crypto.sign(null, Buffer.from(signedPayload), privateKey).toString("base64");
+    const ctx = createCtx({
+      rawBody,
+      headers: {
+        "telnyx-signature-ed25519": signature,
+        "telnyx-timestamp": timestamp,
+      },
+    });
+
+    const first = provider.verifyWebhook(ctx);
+    const second = provider.verifyWebhook(ctx);
+
+    expect(first.ok).toBe(true);
+    expect(first.isReplay).toBeFalsy();
+    expect(first.verifiedRequestKey).toBeTruthy();
+    expect(second.ok).toBe(true);
+    expect(second.isReplay).toBe(true);
+    expect(second.verifiedRequestKey).toBe(first.verifiedRequestKey);
+  });
+});
+
+describe("TelnyxProvider.parseWebhookEvent", () => {
+  it("uses verified request key for manager dedupe", () => {
+    const provider = new TelnyxProvider({
+      apiKey: "KEY123",
+      connectionId: "CONN456",
+      publicKey: undefined,
+    });
+    const result = provider.parseWebhookEvent(
+      createCtx({
+        rawBody: JSON.stringify({
+          data: {
+            id: "evt-123",
+            event_type: "call.initiated",
+            payload: { call_control_id: "call-1" },
+          },
+        }),
+      }),
+      { verifiedRequestKey: "telnyx:req:abc" },
+    );
+
+    expect(result.events).toHaveLength(1);
+    expect(result.events[0]?.dedupeKey).toBe("telnyx:req:abc");
+  });
+});
--- a/openclaw/extensions/voice-call/src/providers/telnyx.ts
+++ b/openclaw/extensions/voice-call/src/providers/telnyx.ts
@@ -0,0 +1,324 @@
+import crypto from "node:crypto";
+import type { TelnyxConfig } from "../config.js";
+import type {
+  EndReason,
+  HangupCallInput,
+  InitiateCallInput,
+  InitiateCallResult,
+  NormalizedEvent,
+  PlayTtsInput,
+  ProviderWebhookParseResult,
+  StartListeningInput,
+  StopListeningInput,
+  WebhookContext,
+  WebhookParseOptions,
+  WebhookVerificationResult,
+} from "../types.js";
+import { verifyTelnyxWebhook } from "../webhook-security.js";
+import type { VoiceCallProvider } from "./base.js";
+import { guardedJsonApiRequest } from "./shared/guarded-json-api.js";
+
+/**
+ * Telnyx Voice API provider implementation.
+ *
+ * Uses Telnyx Call Control API v2 for managing calls.
+ * @see https://developers.telnyx.com/docs/api/v2/call-control
+ */
+export interface TelnyxProviderOptions {
+  /** Skip webhook signature verification (development only, NOT for production) */
+  skipVerification?: boolean;
+}
+
+export class TelnyxProvider implements VoiceCallProvider {
+  readonly name = "telnyx" as const;
+
+  private readonly apiKey: string;
+  private readonly connectionId: string;
+  private readonly publicKey: string | undefined;
+  private readonly options: TelnyxProviderOptions;
+  private readonly baseUrl = "https://api.telnyx.com/v2";
+  private readonly apiHost = "api.telnyx.com";
+
+  constructor(config: TelnyxConfig, options: TelnyxProviderOptions = {}) {
+    if (!config.apiKey) {
+      throw new Error("Telnyx API key is required");
+    }
+    if (!config.connectionId) {
+      throw new Error("Telnyx connection ID is required");
+    }
+
+    this.apiKey = config.apiKey;
+    this.connectionId = config.connectionId;
+    this.publicKey = config.publicKey;
+    this.options = options;
+  }
+
+  /**
+   * Make an authenticated request to the Telnyx API.
+   */
+  private async apiRequest<T = unknown>(
+    endpoint: string,
+    body: Record<string, unknown>,
+    options?: { allowNotFound?: boolean },
+  ): Promise<T> {
+    return await guardedJsonApiRequest<T>({
+      url: `${this.baseUrl}${endpoint}`,
+      method: "POST",
+      headers: {
+        Authorization: `Bearer ${this.apiKey}`,
+        "Content-Type": "application/json",
+      },
+      body,
+      allowNotFound: options?.allowNotFound,
+      allowedHostnames: [this.apiHost],
+      auditContext: "voice-call.telnyx.api",
+      errorPrefix: "Telnyx API error",
+    });
+  }
+
+  /**
+   * Verify Telnyx webhook signature using Ed25519.
+   */
+  verifyWebhook(ctx: WebhookContext): WebhookVerificationResult {
+    const result = verifyTelnyxWebhook(ctx, this.publicKey, {
+      skipVerification: this.options.skipVerification,
+    });
+
+    return {
+      ok: result.ok,
+      reason: result.reason,
+      isReplay: result.isReplay,
+      verifiedRequestKey: result.verifiedRequestKey,
+    };
+  }
+
+  /**
+   * Parse Telnyx webhook event into normalized format.
+   */
+  parseWebhookEvent(
+    ctx: WebhookContext,
+    options?: WebhookParseOptions,
+  ): ProviderWebhookParseResult {
+    try {
+      const payload = JSON.parse(ctx.rawBody);
+      const data = payload.data;
+
+      if (!data || !data.event_type) {
+        return { events: [], statusCode: 200 };
+      }
+
+      const event = this.normalizeEvent(data, options?.verifiedRequestKey);
+      return {
+        events: event ? [event] : [],
+        statusCode: 200,
+      };
+    } catch {
+      return { events: [], statusCode: 400 };
+    }
+  }
+
+  /**
+   * Convert Telnyx event to normalized event format.
+   */
+  private normalizeEvent(data: TelnyxEvent, dedupeKey?: string): NormalizedEvent | null {
+    // Decode client_state from Base64 (we encode it in initiateCall)
+    let callId = "";
+    if (data.payload?.client_state) {
+      try {
+        callId = Buffer.from(data.payload.client_state, "base64").toString("utf8");
+      } catch {
+        // Fallback if not valid Base64
+        callId = data.payload.client_state;
+      }
+    }
+    if (!callId) {
+      callId = data.payload?.call_control_id || "";
+    }
+
+    const baseEvent = {
+      id: data.id || crypto.randomUUID(),
+      dedupeKey,
+      callId,
+      providerCallId: data.payload?.call_control_id,
+      timestamp: Date.now(),
+    };
+
+    switch (data.event_type) {
+      case "call.initiated":
+        return { ...baseEvent, type: "call.initiated" };
+
+      case "call.ringing":
+        return { ...baseEvent, type: "call.ringing" };
+
+      case "call.answered":
+        return { ...baseEvent, type: "call.answered" };
+
+      case "call.bridged":
+        return { ...baseEvent, type: "call.active" };
+
+      case "call.speak.started":
+        return {
+          ...baseEvent,
+          type: "call.speaking",
+          text: data.payload?.text || "",
+        };
+
+      case "call.transcription":
+        return {
+          ...baseEvent,
+          type: "call.speech",
+          transcript: data.payload?.transcription || "",
+          isFinal: data.payload?.is_final ?? true,
+          confidence: data.payload?.confidence,
+        };
+
+      case "call.hangup":
+        return {
+          ...baseEvent,
+          type: "call.ended",
+          reason: this.mapHangupCause(data.payload?.hangup_cause),
+        };
+
+      case "call.dtmf.received":
+        return {
+          ...baseEvent,
+          type: "call.dtmf",
+          digits: data.payload?.digit || "",
+        };
+
+      default:
+        return null;
+    }
+  }
+
+  /**
+   * Map Telnyx hangup cause to normalized end reason.
+   * @see https://developers.telnyx.com/docs/api/v2/call-control/Call-Commands#hangup-causes
+   */
+  private mapHangupCause(cause?: string): EndReason {
+    switch (cause) {
+      case "normal_clearing":
+      case "normal_unspecified":
+        return "completed";
+      case "originator_cancel":
+        return "hangup-bot";
+      case "call_rejected":
+      case "user_busy":
+        return "busy";
+      case "no_answer":
+      case "no_user_response":
+        return "no-answer";
+      case "destination_out_of_order":
+      case "network_out_of_order":
+      case "service_unavailable":
+      case "recovery_on_timer_expire":
+        return "failed";
+      case "machine_detected":
+      case "fax_detected":
+        return "voicemail";
+      case "user_hangup":
+      case "subscriber_absent":
+        return "hangup-user";
+      default:
+        // Unknown cause - log it for debugging and return completed
+        if (cause) {
+          console.warn(`[telnyx] Unknown hangup cause: ${cause}`);
+        }
+        return "completed";
+    }
+  }
+
+  /**
+   * Initiate an outbound call via Telnyx API.
+   */
+  async initiateCall(input: InitiateCallInput): Promise<InitiateCallResult> {
+    const result = await this.apiRequest<TelnyxCallResponse>("/calls", {
+      connection_id: this.connectionId,
+      to: input.to,
+      from: input.from,
+      webhook_url: input.webhookUrl,
+      webhook_url_method: "POST",
+      client_state: Buffer.from(input.callId).toString("base64"),
+      timeout_secs: 30,
+    });
+
+    return {
+      providerCallId: result.data.call_control_id,
+      status: "initiated",
+    };
+  }
+
+  /**
+   * Hang up a call via Telnyx API.
+   */
+  async hangupCall(input: HangupCallInput): Promise<void> {
+    await this.apiRequest(
+      `/calls/${input.providerCallId}/actions/hangup`,
+      { command_id: crypto.randomUUID() },
+      { allowNotFound: true },
+    );
+  }
+
+  /**
+   * Play TTS audio via Telnyx speak action.
+   */
+  async playTts(input: PlayTtsInput): Promise<void> {
+    await this.apiRequest(`/calls/${input.providerCallId}/actions/speak`, {
+      command_id: crypto.randomUUID(),
+      payload: input.text,
+      voice: input.voice || "female",
+      language: input.locale || "en-US",
+    });
+  }
+
+  /**
+   * Start transcription (STT) via Telnyx.
+   */
+  async startListening(input: StartListeningInput): Promise<void> {
+    await this.apiRequest(`/calls/${input.providerCallId}/actions/transcription_start`, {
+      command_id: crypto.randomUUID(),
+      language: input.language || "en",
+    });
+  }
+
+  /**
+   * Stop transcription via Telnyx.
+   */
+  async stopListening(input: StopListeningInput): Promise<void> {
+    await this.apiRequest(
+      `/calls/${input.providerCallId}/actions/transcription_stop`,
+      { command_id: crypto.randomUUID() },
+      { allowNotFound: true },
+    );
+  }
+}
+
+// -----------------------------------------------------------------------------
+// Telnyx-specific types
+// -----------------------------------------------------------------------------
+
+interface TelnyxEvent {
+  id?: string;
+  event_type: string;
+  payload?: {
+    call_control_id?: string;
+    client_state?: string;
+    text?: string;
+    transcription?: string;
+    is_final?: boolean;
+    confidence?: number;
+    hangup_cause?: string;
+    digit?: string;
+    [key: string]: unknown;
+  };
+}
+
+interface TelnyxCallResponse {
+  data: {
+    call_control_id: string;
+    call_leg_id: string;
+    call_session_id: string;
+    is_alive: boolean;
+    record_type: string;
+  };
+}
--- a/openclaw/extensions/voice-call/src/providers/tts-openai.ts
+++ b/openclaw/extensions/voice-call/src/providers/tts-openai.ts
@@ -0,0 +1,259 @@
+/**
+ * OpenAI TTS Provider
+ *
+ * Generates speech audio using OpenAI's text-to-speech API.
+ * Handles audio format conversion for telephony (mu-law 8kHz).
+ *
+ * Best practices from OpenAI docs:
+ * - Use gpt-4o-mini-tts for intelligent realtime applications (supports instructions)
+ * - Use tts-1 for lower latency, tts-1-hd for higher quality
+ * - Use marin or cedar voices for best quality
+ * - Use pcm or wav format for fastest response times
+ *
+ * @see https://platform.openai.com/docs/guides/text-to-speech
+ */
+
+/**
+ * OpenAI TTS configuration.
+ */
+export interface OpenAITTSConfig {
+  /** OpenAI API key (uses OPENAI_API_KEY env if not set) */
+  apiKey?: string;
+  /**
+   * TTS model:
+   * - gpt-4o-mini-tts: newest, supports instructions for tone/style control (recommended)
+   * - tts-1: lower latency
+   * - tts-1-hd: higher quality
+   */
+  model?: string;
+  /**
+   * Voice to use. For best quality, use marin or cedar.
+   * All 13 voices: alloy, ash, ballad, coral, echo, fable, nova, onyx, sage, shimmer, verse, marin, cedar
+   * Note: tts-1/tts-1-hd only support: alloy, ash, coral, echo, fable, onyx, nova, sage, shimmer
+   */
+  voice?: string;
+  /** Speed multiplier (0.25 to 4.0) */
+  speed?: number;
+  /**
+   * Instructions for speech style (only works with gpt-4o-mini-tts model).
+   * Examples: "Speak in a cheerful tone", "Talk like a sympathetic customer service agent"
+   */
+  instructions?: string;
+}
+
+/**
+ * Supported OpenAI TTS voices (all 13 built-in voices).
+ * For best quality, use marin or cedar.
+ * Note: tts-1 and tts-1-hd support a smaller set.
+ */
+export const OPENAI_TTS_VOICES = [
+  "alloy",
+  "ash",
+  "ballad",
+  "coral",
+  "echo",
+  "fable",
+  "nova",
+  "onyx",
+  "sage",
+  "shimmer",
+  "verse",
+  "marin",
+  "cedar",
+] as const;
+
+export type OpenAITTSVoice = (typeof OPENAI_TTS_VOICES)[number];
+
+/**
+ * OpenAI TTS Provider for generating speech audio.
+ */
+export class OpenAITTSProvider {
+  private apiKey: string;
+  private model: string;
+  private voice: OpenAITTSVoice;
+  private speed: number;
+  private instructions?: string;
+
+  constructor(config: OpenAITTSConfig = {}) {
+    this.apiKey = config.apiKey || process.env.OPENAI_API_KEY || "";
+    // Default to gpt-4o-mini-tts for intelligent realtime applications
+    this.model = config.model || "gpt-4o-mini-tts";
+    // Default to coral - good balance of quality and natural tone
+    this.voice = (config.voice as OpenAITTSVoice) || "coral";
+    this.speed = config.speed || 1.0;
+    this.instructions = config.instructions;
+
+    if (!this.apiKey) {
+      throw new Error("OpenAI API key required (set OPENAI_API_KEY or pass apiKey)");
+    }
+  }
+
+  /**
+   * Generate speech audio from text.
+   * Returns raw PCM audio data (24kHz, mono, 16-bit).
+   */
+  async synthesize(text: string, instructions?: string): Promise<Buffer> {
+    // Build request body
+    const body: Record<string, unknown> = {
+      model: this.model,
+      input: text,
+      voice: this.voice,
+      response_format: "pcm", // Raw PCM audio (24kHz, mono, 16-bit signed LE)
+      speed: this.speed,
+    };
+
+    // Add instructions if using gpt-4o-mini-tts model
+    const effectiveInstructions = instructions || this.instructions;
+    if (effectiveInstructions && this.model.includes("gpt-4o-mini-tts")) {
+      body.instructions = effectiveInstructions;
+    }
+
+    const response = await fetch("https://api.openai.com/v1/audio/speech", {
+      method: "POST",
+      headers: {
+        Authorization: `Bearer ${this.apiKey}`,
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify(body),
+    });
+
+    if (!response.ok) {
+      const error = await response.text();
+      throw new Error(`OpenAI TTS failed: ${response.status} - ${error}`);
+    }
+
+    const arrayBuffer = await response.arrayBuffer();
+    return Buffer.from(arrayBuffer);
+  }
+
+  /**
+   * Generate speech and convert to mu-law format for Twilio.
+   * Twilio Media Streams expect 8kHz mono mu-law audio.
+   */
+  async synthesizeForTwilio(text: string): Promise<Buffer> {
+    // Get raw PCM from OpenAI (24kHz, 16-bit signed LE, mono)
+    const pcm24k = await this.synthesize(text);
+
+    // Resample from 24kHz to 8kHz
+    const pcm8k = resample24kTo8k(pcm24k);
+
+    // Encode to mu-law
+    return pcmToMulaw(pcm8k);
+  }
+}
+
+/**
+ * Resample 24kHz PCM to 8kHz using linear interpolation.
+ * Input/output: 16-bit signed little-endian mono.
+ */
+function resample24kTo8k(input: Buffer): Buffer {
+  const inputSamples = input.length / 2;
+  const outputSamples = Math.floor(inputSamples / 3);
+  const output = Buffer.alloc(outputSamples * 2);
+
+  for (let i = 0; i < outputSamples; i++) {
+    // Calculate position in input (3:1 ratio)
+    const srcPos = i * 3;
+    const srcIdx = srcPos * 2;
+
+    if (srcIdx + 3 < input.length) {
+      // Linear interpolation between samples
+      const s0 = input.readInt16LE(srcIdx);
+      const s1 = input.readInt16LE(srcIdx + 2);
+      const frac = srcPos % 1 || 0;
+      const sample = Math.round(s0 + frac * (s1 - s0));
+      output.writeInt16LE(clamp16(sample), i * 2);
+    } else {
+      // Last sample
+      output.writeInt16LE(input.readInt16LE(srcIdx), i * 2);
+    }
+  }
+
+  return output;
+}
+
+/**
+ * Clamp value to 16-bit signed integer range.
+ */
+function clamp16(value: number): number {
+  return Math.max(-32768, Math.min(32767, value));
+}
+
+/**
+ * Convert 16-bit PCM to 8-bit mu-law.
+ * Standard G.711 mu-law encoding for telephony.
+ */
+function pcmToMulaw(pcm: Buffer): Buffer {
+  const samples = pcm.length / 2;
+  const mulaw = Buffer.alloc(samples);
+
+  for (let i = 0; i < samples; i++) {
+    const sample = pcm.readInt16LE(i * 2);
+    mulaw[i] = linearToMulaw(sample);
+  }
+
+  return mulaw;
+}
+
+/**
+ * Convert a single 16-bit linear sample to 8-bit mu-law.
+ * Implements ITU-T G.711 mu-law encoding.
+ */
+function linearToMulaw(sample: number): number {
+  const BIAS = 132;
+  const CLIP = 32635;
+
+  // Get sign bit
+  const sign = sample < 0 ? 0x80 : 0;
+  if (sample < 0) {
+    sample = -sample;
+  }
+
+  // Clip to prevent overflow
+  if (sample > CLIP) {
+    sample = CLIP;
+  }
+
+  // Add bias and find segment
+  sample += BIAS;
+  let exponent = 7;
+  for (let expMask = 0x4000; (sample & expMask) === 0 && exponent > 0; exponent--, expMask >>= 1) {
+    // Find the segment (exponent)
+  }
+
+  // Extract mantissa bits
+  const mantissa = (sample >> (exponent + 3)) & 0x0f;
+
+  // Combine into mu-law byte (inverted for transmission)
+  return ~(sign | (exponent << 4) | mantissa) & 0xff;
+}
+
+/**
+ * Convert 8-bit mu-law to 16-bit linear PCM.
+ * Useful for decoding incoming audio.
+ */
+export function mulawToLinear(mulaw: number): number {
+  // mu-law is transmitted inverted
+  mulaw = ~mulaw & 0xff;
+
+  const sign = mulaw & 0x80;
+  const exponent = (mulaw >> 4) & 0x07;
+  const mantissa = mulaw & 0x0f;
+
+  let sample = ((mantissa << 3) + 132) << exponent;
+  sample -= 132;
+
+  return sign ? -sample : sample;
+}
+
+/**
+ * Chunk audio buffer into 20ms frames for streaming.
+ * At 8kHz mono, 20ms = 160 samples = 160 bytes (mu-law).
+ */
+export function chunkAudio(audio: Buffer, chunkSize = 160): Generator<Buffer, void, unknown> {
+  return (function* () {
+    for (let i = 0; i < audio.length; i += chunkSize) {
+      yield audio.subarray(i, Math.min(i + chunkSize, audio.length));
+    }
+  })();
+}
--- a/openclaw/extensions/voice-call/src/providers/twilio.test.ts
+++ b/openclaw/extensions/voice-call/src/providers/twilio.test.ts
@@ -0,0 +1,117 @@
+import { describe, expect, it } from "vitest";
+import type { WebhookContext } from "../types.js";
+import { TwilioProvider } from "./twilio.js";
+
+const STREAM_URL = "wss://example.ngrok.app/voice/stream";
+
+function createProvider(): TwilioProvider {
+  return new TwilioProvider(
+    { accountSid: "AC123", authToken: "secret" },
+    { publicUrl: "https://example.ngrok.app", streamPath: "/voice/stream" },
+  );
+}
+
+function createContext(rawBody: string, query?: WebhookContext["query"]): WebhookContext {
+  return {
+    headers: {},
+    rawBody,
+    url: "https://example.ngrok.app/voice/twilio",
+    method: "POST",
+    query,
+  };
+}
+
+describe("TwilioProvider", () => {
+  it("returns streaming TwiML for outbound conversation calls before in-progress", () => {
+    const provider = createProvider();
+    const ctx = createContext("CallStatus=initiated&Direction=outbound-api&CallSid=CA123", {
+      callId: "call-1",
+    });
+
+    const result = provider.parseWebhookEvent(ctx);
+
+    expect(result.providerResponseBody).toContain(STREAM_URL);
+    expect(result.providerResponseBody).toContain('<Parameter name="token" value="');
+    expect(result.providerResponseBody).toContain("<Connect>");
+  });
+
+  it("returns empty TwiML for status callbacks", () => {
+    const provider = createProvider();
+    const ctx = createContext("CallStatus=ringing&Direction=outbound-api", {
+      callId: "call-1",
+      type: "status",
+    });
+
+    const result = provider.parseWebhookEvent(ctx);
+
+    expect(result.providerResponseBody).toBe(
+      '<?xml version="1.0" encoding="UTF-8"?><Response></Response>',
+    );
+  });
+
+  it("returns streaming TwiML for inbound calls", () => {
+    const provider = createProvider();
+    const ctx = createContext("CallStatus=ringing&Direction=inbound&CallSid=CA456");
+
+    const result = provider.parseWebhookEvent(ctx);
+
+    expect(result.providerResponseBody).toContain(STREAM_URL);
+    expect(result.providerResponseBody).toContain('<Parameter name="token" value="');
+    expect(result.providerResponseBody).toContain("<Connect>");
+  });
+
+  it("uses a stable fallback dedupeKey for identical request payloads", () => {
+    const provider = createProvider();
+    const rawBody = "CallSid=CA789&Direction=inbound&SpeechResult=hello";
+    const ctxA = {
+      ...createContext(rawBody, { callId: "call-1", turnToken: "turn-1" }),
+      headers: { "i-twilio-idempotency-token": "idem-123" },
+    };
+    const ctxB = {
+      ...createContext(rawBody, { callId: "call-1", turnToken: "turn-1" }),
+      headers: { "i-twilio-idempotency-token": "idem-123" },
+    };
+
+    const eventA = provider.parseWebhookEvent(ctxA).events[0];
+    const eventB = provider.parseWebhookEvent(ctxB).events[0];
+
+    expect(eventA).toBeDefined();
+    expect(eventB).toBeDefined();
+    expect(eventA?.id).not.toBe(eventB?.id);
+    expect(eventA?.dedupeKey).toContain("twilio:fallback:");
+    expect(eventA?.dedupeKey).toBe(eventB?.dedupeKey);
+  });
+
+  it("uses verified request key for dedupe and ignores idempotency header changes", () => {
+    const provider = createProvider();
+    const rawBody = "CallSid=CA790&Direction=inbound&SpeechResult=hello";
+    const ctxA = {
+      ...createContext(rawBody, { callId: "call-1", turnToken: "turn-1" }),
+      headers: { "i-twilio-idempotency-token": "idem-a" },
+    };
+    const ctxB = {
+      ...createContext(rawBody, { callId: "call-1", turnToken: "turn-1" }),
+      headers: { "i-twilio-idempotency-token": "idem-b" },
+    };
+
+    const eventA = provider.parseWebhookEvent(ctxA, { verifiedRequestKey: "twilio:req:abc" })
+      .events[0];
+    const eventB = provider.parseWebhookEvent(ctxB, { verifiedRequestKey: "twilio:req:abc" })
+      .events[0];
+
+    expect(eventA?.dedupeKey).toBe("twilio:req:abc");
+    expect(eventB?.dedupeKey).toBe("twilio:req:abc");
+  });
+
+  it("keeps turnToken from query on speech events", () => {
+    const provider = createProvider();
+    const ctx = createContext("CallSid=CA222&Direction=inbound&SpeechResult=hello", {
+      callId: "call-2",
+      turnToken: "turn-xyz",
+    });
+
+    const event = provider.parseWebhookEvent(ctx).events[0];
+    expect(event?.type).toBe("call.speech");
+    expect(event?.turnToken).toBe("turn-xyz");
+  });
+});
--- a/openclaw/extensions/voice-call/src/providers/twilio.ts
+++ b/openclaw/extensions/voice-call/src/providers/twilio.ts
@@ -0,0 +1,687 @@
+import crypto from "node:crypto";
+import type { TwilioConfig, WebhookSecurityConfig } from "../config.js";
+import { getHeader } from "../http-headers.js";
+import type { MediaStreamHandler } from "../media-stream.js";
+import { chunkAudio } from "../telephony-audio.js";
+import type { TelephonyTtsProvider } from "../telephony-tts.js";
+import type {
+  HangupCallInput,
+  InitiateCallInput,
+  InitiateCallResult,
+  NormalizedEvent,
+  PlayTtsInput,
+  ProviderWebhookParseResult,
+  StartListeningInput,
+  StopListeningInput,
+  WebhookContext,
+  WebhookParseOptions,
+  WebhookVerificationResult,
+} from "../types.js";
+import { escapeXml, mapVoiceToPolly } from "../voice-mapping.js";
+import type { VoiceCallProvider } from "./base.js";
+import { twilioApiRequest } from "./twilio/api.js";
+import { verifyTwilioProviderWebhook } from "./twilio/webhook.js";
+
+function createTwilioRequestDedupeKey(ctx: WebhookContext, verifiedRequestKey?: string): string {
+  if (verifiedRequestKey) {
+    return verifiedRequestKey;
+  }
+
+  const signature = getHeader(ctx.headers, "x-twilio-signature") ?? "";
+  const params = new URLSearchParams(ctx.rawBody);
+  const callSid = params.get("CallSid") ?? "";
+  const callStatus = params.get("CallStatus") ?? "";
+  const direction = params.get("Direction") ?? "";
+  const callId = typeof ctx.query?.callId === "string" ? ctx.query.callId.trim() : "";
+  const flow = typeof ctx.query?.flow === "string" ? ctx.query.flow.trim() : "";
+  const turnToken = typeof ctx.query?.turnToken === "string" ? ctx.query.turnToken.trim() : "";
+  return `twilio:fallback:${crypto
+    .createHash("sha256")
+    .update(
+      `${signature}\n${callSid}\n${callStatus}\n${direction}\n${callId}\n${flow}\n${turnToken}\n${ctx.rawBody}`,
+    )
+    .digest("hex")}`;
+}
+
+/**
+ * Twilio Voice API provider implementation.
+ *
+ * Uses Twilio Programmable Voice API with Media Streams for real-time
+ * bidirectional audio streaming.
+ *
+ * @see https://www.twilio.com/docs/voice
+ * @see https://www.twilio.com/docs/voice/media-streams
+ */
+export interface TwilioProviderOptions {
+  /** Allow ngrok free tier compatibility mode (loopback only, less secure) */
+  allowNgrokFreeTierLoopbackBypass?: boolean;
+  /** Override public URL for signature verification */
+  publicUrl?: string;
+  /** Path for media stream WebSocket (e.g., /voice/stream) */
+  streamPath?: string;
+  /** Skip webhook signature verification (development only) */
+  skipVerification?: boolean;
+  /** Webhook security options (forwarded headers/allowlist) */
+  webhookSecurity?: WebhookSecurityConfig;
+}
+
+export class TwilioProvider implements VoiceCallProvider {
+  readonly name = "twilio" as const;
+
+  private readonly accountSid: string;
+  private readonly authToken: string;
+  private readonly baseUrl: string;
+  private readonly callWebhookUrls = new Map<string, string>();
+  private readonly options: TwilioProviderOptions;
+
+  /** Current public webhook URL (set when tunnel starts or from config) */
+  private currentPublicUrl: string | null = null;
+
+  /** Optional telephony TTS provider for streaming TTS */
+  private ttsProvider: TelephonyTtsProvider | null = null;
+
+  /** Optional media stream handler for sending audio */
+  private mediaStreamHandler: MediaStreamHandler | null = null;
+
+  /** Map of call SID to stream SID for media streams */
+  private callStreamMap = new Map<string, string>();
+  /** Per-call tokens for media stream authentication */
+  private streamAuthTokens = new Map<string, string>();
+
+  /** Storage for TwiML content (for notify mode with URL-based TwiML) */
+  private readonly twimlStorage = new Map<string, string>();
+  /** Track notify-mode calls to avoid streaming on follow-up callbacks */
+  private readonly notifyCalls = new Set<string>();
+
+  /**
+   * Delete stored TwiML for a given `callId`.
+   *
+   * We keep TwiML in-memory only long enough to satisfy the initial Twilio
+   * webhook request (notify mode). Subsequent webhooks should not reuse it.
+   */
+  private deleteStoredTwiml(callId: string): void {
+    this.twimlStorage.delete(callId);
+    this.notifyCalls.delete(callId);
+  }
+
+  /**
+   * Delete stored TwiML for a call, addressed by Twilio's provider call SID.
+   *
+   * This is used when we only have `providerCallId` (e.g. hangup).
+   */
+  private deleteStoredTwimlForProviderCall(providerCallId: string): void {
+    const webhookUrl = this.callWebhookUrls.get(providerCallId);
+    if (!webhookUrl) {
+      return;
+    }
+
+    const callIdMatch = webhookUrl.match(/callId=([^&]+)/);
+    if (!callIdMatch) {
+      return;
+    }
+
+    this.deleteStoredTwiml(callIdMatch[1]);
+    this.streamAuthTokens.delete(providerCallId);
+  }
+
+  constructor(config: TwilioConfig, options: TwilioProviderOptions = {}) {
+    if (!config.accountSid) {
+      throw new Error("Twilio Account SID is required");
+    }
+    if (!config.authToken) {
+      throw new Error("Twilio Auth Token is required");
+    }
+
+    this.accountSid = config.accountSid;
+    this.authToken = config.authToken;
+    this.baseUrl = `https://api.twilio.com/2010-04-01/Accounts/${this.accountSid}`;
+    this.options = options;
+
+    if (options.publicUrl) {
+      this.currentPublicUrl = options.publicUrl;
+    }
+  }
+
+  setPublicUrl(url: string): void {
+    this.currentPublicUrl = url;
+  }
+
+  getPublicUrl(): string | null {
+    return this.currentPublicUrl;
+  }
+
+  setTTSProvider(provider: TelephonyTtsProvider): void {
+    this.ttsProvider = provider;
+  }
+
+  setMediaStreamHandler(handler: MediaStreamHandler): void {
+    this.mediaStreamHandler = handler;
+  }
+
+  registerCallStream(callSid: string, streamSid: string): void {
+    this.callStreamMap.set(callSid, streamSid);
+  }
+
+  unregisterCallStream(callSid: string): void {
+    this.callStreamMap.delete(callSid);
+  }
+
+  isValidStreamToken(callSid: string, token?: string): boolean {
+    const expected = this.streamAuthTokens.get(callSid);
+    if (!expected || !token) {
+      return false;
+    }
+    if (expected.length !== token.length) {
+      const dummy = Buffer.from(expected);
+      crypto.timingSafeEqual(dummy, dummy);
+      return false;
+    }
+    return crypto.timingSafeEqual(Buffer.from(expected), Buffer.from(token));
+  }
+
+  /**
+   * Clear TTS queue for a call (barge-in).
+   * Used when user starts speaking to interrupt current TTS playback.
+   */
+  clearTtsQueue(callSid: string): void {
+    const streamSid = this.callStreamMap.get(callSid);
+    if (streamSid && this.mediaStreamHandler) {
+      this.mediaStreamHandler.clearTtsQueue(streamSid);
+    }
+  }
+
+  /**
+   * Make an authenticated request to the Twilio API.
+   */
+  private async apiRequest<T = unknown>(
+    endpoint: string,
+    params: Record<string, string | string[]>,
+    options?: { allowNotFound?: boolean },
+  ): Promise<T> {
+    return await twilioApiRequest<T>({
+      baseUrl: this.baseUrl,
+      accountSid: this.accountSid,
+      authToken: this.authToken,
+      endpoint,
+      body: params,
+      allowNotFound: options?.allowNotFound,
+    });
+  }
+
+  /**
+   * Verify Twilio webhook signature using HMAC-SHA1.
+   *
+   * Handles reverse proxy scenarios (Tailscale, nginx, ngrok) by reconstructing
+   * the public URL from forwarding headers.
+   *
+   * @see https://www.twilio.com/docs/usage/webhooks/webhooks-security
+   */
+  verifyWebhook(ctx: WebhookContext): WebhookVerificationResult {
+    return verifyTwilioProviderWebhook({
+      ctx,
+      authToken: this.authToken,
+      currentPublicUrl: this.currentPublicUrl,
+      options: this.options,
+    });
+  }
+
+  /**
+   * Parse Twilio webhook event into normalized format.
+   */
+  parseWebhookEvent(
+    ctx: WebhookContext,
+    options?: WebhookParseOptions,
+  ): ProviderWebhookParseResult {
+    try {
+      const params = new URLSearchParams(ctx.rawBody);
+      const callIdFromQuery =
+        typeof ctx.query?.callId === "string" && ctx.query.callId.trim()
+          ? ctx.query.callId.trim()
+          : undefined;
+      const turnTokenFromQuery =
+        typeof ctx.query?.turnToken === "string" && ctx.query.turnToken.trim()
+          ? ctx.query.turnToken.trim()
+          : undefined;
+      const dedupeKey = createTwilioRequestDedupeKey(ctx, options?.verifiedRequestKey);
+      const event = this.normalizeEvent(params, {
+        callIdOverride: callIdFromQuery,
+        dedupeKey,
+        turnToken: turnTokenFromQuery,
+      });
+
+      // For Twilio, we must return TwiML. Most actions are driven by Calls API updates,
+      // so the webhook response is typically a pause to keep the call alive.
+      const twiml = this.generateTwimlResponse(ctx);
+
+      return {
+        events: event ? [event] : [],
+        providerResponseBody: twiml,
+        providerResponseHeaders: { "Content-Type": "application/xml" },
+        statusCode: 200,
+      };
+    } catch {
+      return { events: [], statusCode: 400 };
+    }
+  }
+
+  /**
+   * Parse Twilio direction to normalized format.
+   */
+  private static parseDirection(direction: string | null): "inbound" | "outbound" | undefined {
+    if (direction === "inbound") {
+      return "inbound";
+    }
+    if (direction === "outbound-api" || direction === "outbound-dial") {
+      return "outbound";
+    }
+    return undefined;
+  }
+
+  /**
+   * Convert Twilio webhook params to normalized event format.
+   */
+  private normalizeEvent(
+    params: URLSearchParams,
+    options?: {
+      callIdOverride?: string;
+      dedupeKey?: string;
+      turnToken?: string;
+    },
+  ): NormalizedEvent | null {
+    const callSid = params.get("CallSid") || "";
+    const callIdOverride = options?.callIdOverride;
+
+    const baseEvent = {
+      id: crypto.randomUUID(),
+      dedupeKey: options?.dedupeKey,
+      callId: callIdOverride || callSid,
+      providerCallId: callSid,
+      timestamp: Date.now(),
+      turnToken: options?.turnToken,
+      direction: TwilioProvider.parseDirection(params.get("Direction")),
+      from: params.get("From") || undefined,
+      to: params.get("To") || undefined,
+    };
+
+    // Handle speech result (from <Gather>)
+    const speechResult = params.get("SpeechResult");
+    if (speechResult) {
+      return {
+        ...baseEvent,
+        type: "call.speech",
+        transcript: speechResult,
+        isFinal: true,
+        confidence: parseFloat(params.get("Confidence") || "0.9"),
+      };
+    }
+
+    // Handle DTMF
+    const digits = params.get("Digits");
+    if (digits) {
+      return { ...baseEvent, type: "call.dtmf", digits };
+    }
+
+    // Handle call status changes
+    const callStatus = params.get("CallStatus");
+    switch (callStatus) {
+      case "initiated":
+        return { ...baseEvent, type: "call.initiated" };
+      case "ringing":
+        return { ...baseEvent, type: "call.ringing" };
+      case "in-progress":
+        return { ...baseEvent, type: "call.answered" };
+      case "completed":
+      case "busy":
+      case "no-answer":
+      case "failed":
+        this.streamAuthTokens.delete(callSid);
+        if (callIdOverride) {
+          this.deleteStoredTwiml(callIdOverride);
+        }
+        return { ...baseEvent, type: "call.ended", reason: callStatus };
+      case "canceled":
+        this.streamAuthTokens.delete(callSid);
+        if (callIdOverride) {
+          this.deleteStoredTwiml(callIdOverride);
+        }
+        return { ...baseEvent, type: "call.ended", reason: "hangup-bot" };
+      default:
+        return null;
+    }
+  }
+
+  private static readonly EMPTY_TWIML =
+    '<?xml version="1.0" encoding="UTF-8"?><Response></Response>';
+
+  private static readonly PAUSE_TWIML = `<?xml version="1.0" encoding="UTF-8"?>
+<Response>
+  <Pause length="30"/>
+</Response>`;
+
+  /**
+   * Generate TwiML response for webhook.
+   * When a call is answered, connects to media stream for bidirectional audio.
+   */
+  private generateTwimlResponse(ctx?: WebhookContext): string {
+    if (!ctx) {
+      return TwilioProvider.EMPTY_TWIML;
+    }
+
+    const params = new URLSearchParams(ctx.rawBody);
+    const type = typeof ctx.query?.type === "string" ? ctx.query.type.trim() : undefined;
+    const isStatusCallback = type === "status";
+    const callStatus = params.get("CallStatus");
+    const direction = params.get("Direction");
+    const isOutbound = direction?.startsWith("outbound") ?? false;
+    const callSid = params.get("CallSid") || undefined;
+    const callIdFromQuery =
+      typeof ctx.query?.callId === "string" && ctx.query.callId.trim()
+        ? ctx.query.callId.trim()
+        : undefined;
+
+    // Avoid logging webhook params/TwiML (may contain PII).
+
+    // Handle initial TwiML request (when Twilio first initiates the call)
+    // Check if we have stored TwiML for this call (notify mode)
+    if (callIdFromQuery && !isStatusCallback) {
+      const storedTwiml = this.twimlStorage.get(callIdFromQuery);
+      if (storedTwiml) {
+        // Clean up after serving (one-time use)
+        this.deleteStoredTwiml(callIdFromQuery);
+        return storedTwiml;
+      }
+      if (this.notifyCalls.has(callIdFromQuery)) {
+        return TwilioProvider.EMPTY_TWIML;
+      }
+
+      // Conversation mode: return streaming TwiML immediately for outbound calls.
+      if (isOutbound) {
+        const streamUrl = callSid ? this.getStreamUrlForCall(callSid) : null;
+        return streamUrl ? this.getStreamConnectXml(streamUrl) : TwilioProvider.PAUSE_TWIML;
+      }
+    }
+
+    // Status callbacks should not receive TwiML.
+    if (isStatusCallback) {
+      return TwilioProvider.EMPTY_TWIML;
+    }
+
+    // Handle subsequent webhook requests (status callbacks, etc.)
+    // For inbound calls, answer immediately with stream
+    if (direction === "inbound") {
+      const streamUrl = callSid ? this.getStreamUrlForCall(callSid) : null;
+      return streamUrl ? this.getStreamConnectXml(streamUrl) : TwilioProvider.PAUSE_TWIML;
+    }
+
+    // For outbound calls, only connect to stream when call is in-progress
+    if (callStatus !== "in-progress") {
+      return TwilioProvider.EMPTY_TWIML;
+    }
+
+    const streamUrl = callSid ? this.getStreamUrlForCall(callSid) : null;
+    return streamUrl ? this.getStreamConnectXml(streamUrl) : TwilioProvider.PAUSE_TWIML;
+  }
+
+  /**
+   * Get the WebSocket URL for media streaming.
+   * Derives from the public URL origin + stream path.
+   */
+  private getStreamUrl(): string | null {
+    if (!this.currentPublicUrl || !this.options.streamPath) {
+      return null;
+    }
+
+    // Extract just the origin (host) from the public URL, ignoring any path
+    const url = new URL(this.currentPublicUrl);
+    const origin = url.origin;
+
+    // Convert https:// to wss:// for WebSocket
+    const wsOrigin = origin.replace(/^https:\/\//, "wss://").replace(/^http:\/\//, "ws://");
+
+    // Append the stream path
+    const path = this.options.streamPath.startsWith("/")
+      ? this.options.streamPath
+      : `/${this.options.streamPath}`;
+
+    return `${wsOrigin}${path}`;
+  }
+
+  private getStreamAuthToken(callSid: string): string {
+    const existing = this.streamAuthTokens.get(callSid);
+    if (existing) {
+      return existing;
+    }
+    const token = crypto.randomBytes(16).toString("base64url");
+    this.streamAuthTokens.set(callSid, token);
+    return token;
+  }
+
+  private getStreamUrlForCall(callSid: string): string | null {
+    const baseUrl = this.getStreamUrl();
+    if (!baseUrl) {
+      return null;
+    }
+    const token = this.getStreamAuthToken(callSid);
+    const url = new URL(baseUrl);
+    url.searchParams.set("token", token);
+    return url.toString();
+  }
+
+  /**
+   * Generate TwiML to connect a call to a WebSocket media stream.
+   * This enables bidirectional audio streaming for real-time STT/TTS.
+   *
+   * @param streamUrl - WebSocket URL (wss://...) for the media stream
+   */
+  getStreamConnectXml(streamUrl: string): string {
+    // Extract token from URL and pass via <Parameter> instead of query string.
+    // Twilio strips query params from WebSocket URLs, but delivers <Parameter>
+    // values in the "start" message's customParameters field.
+    const parsed = new URL(streamUrl);
+    const token = parsed.searchParams.get("token");
+    parsed.searchParams.delete("token");
+    const cleanUrl = parsed.toString();
+
+    const paramXml = token ? `\n      <Parameter name="token" value="${escapeXml(token)}" />` : "";
+
+    return `<?xml version="1.0" encoding="UTF-8"?>
+<Response>
+  <Connect>
+    <Stream url="${escapeXml(cleanUrl)}">${paramXml}
+    </Stream>
+  </Connect>
+</Response>`;
+  }
+
+  /**
+   * Initiate an outbound call via Twilio API.
+   * If inlineTwiml is provided, uses that directly (for notify mode).
+   * Otherwise, uses webhook URL for dynamic TwiML.
+   */
+  async initiateCall(input: InitiateCallInput): Promise<InitiateCallResult> {
+    const url = new URL(input.webhookUrl);
+    url.searchParams.set("callId", input.callId);
+
+    // Create separate URL for status callbacks (required by Twilio)
+    const statusUrl = new URL(input.webhookUrl);
+    statusUrl.searchParams.set("callId", input.callId);
+    statusUrl.searchParams.set("type", "status"); // Differentiate from TwiML requests
+
+    // Store TwiML content if provided (for notify mode)
+    // We now serve it from the webhook endpoint instead of sending inline
+    if (input.inlineTwiml) {
+      this.twimlStorage.set(input.callId, input.inlineTwiml);
+      this.notifyCalls.add(input.callId);
+    }
+
+    // Build request params - always use URL-based TwiML.
+    // Twilio silently ignores `StatusCallback` when using the inline `Twiml` parameter.
+    const params: Record<string, string | string[]> = {
+      To: input.to,
+      From: input.from,
+      Url: url.toString(), // TwiML serving endpoint
+      StatusCallback: statusUrl.toString(), // Separate status callback endpoint
+      StatusCallbackEvent: ["initiated", "ringing", "answered", "completed"],
+      Timeout: "30",
+    };
+
+    const result = await this.apiRequest<TwilioCallResponse>("/Calls.json", params);
+
+    this.callWebhookUrls.set(result.sid, url.toString());
+
+    return {
+      providerCallId: result.sid,
+      status: result.status === "queued" ? "queued" : "initiated",
+    };
+  }
+
+  /**
+   * Hang up a call via Twilio API.
+   */
+  async hangupCall(input: HangupCallInput): Promise<void> {
+    this.deleteStoredTwimlForProviderCall(input.providerCallId);
+
+    this.callWebhookUrls.delete(input.providerCallId);
+    this.streamAuthTokens.delete(input.providerCallId);
+
+    await this.apiRequest(
+      `/Calls/${input.providerCallId}.json`,
+      { Status: "completed" },
+      { allowNotFound: true },
+    );
+  }
+
+  /**
+   * Play TTS audio via Twilio.
+   *
+   * Two modes:
+   * 1. Core TTS + Media Streams: If TTS provider and media stream are available,
+   *    generates audio via core TTS and streams it through WebSocket (preferred).
+   * 2. TwiML <Say>: Falls back to Twilio's native TTS with Polly voices.
+   *    Note: This may not work on all Twilio accounts.
+   */
+  async playTts(input: PlayTtsInput): Promise<void> {
+    // Try telephony TTS via media stream first (if configured)
+    const streamSid = this.callStreamMap.get(input.providerCallId);
+    if (this.ttsProvider && this.mediaStreamHandler && streamSid) {
+      try {
+        await this.playTtsViaStream(input.text, streamSid);
+        return;
+      } catch (err) {
+        console.warn(
+          `[voice-call] Telephony TTS failed, falling back to Twilio <Say>:`,
+          err instanceof Error ? err.message : err,
+        );
+        // Fall through to TwiML <Say> fallback
+      }
+    }
+
+    // Fall back to TwiML <Say> (may not work on all accounts)
+    const webhookUrl = this.callWebhookUrls.get(input.providerCallId);
+    if (!webhookUrl) {
+      throw new Error("Missing webhook URL for this call (provider state not initialized)");
+    }
+
+    console.warn(
+      "[voice-call] Using TwiML <Say> fallback - telephony TTS not configured or media stream not active",
+    );
+
+    const pollyVoice = mapVoiceToPolly(input.voice);
+    const twiml = `<?xml version="1.0" encoding="UTF-8"?>
+<Response>
+  <Say voice="${pollyVoice}" language="${input.locale || "en-US"}">${escapeXml(input.text)}</Say>
+  <Gather input="speech" speechTimeout="auto" action="${escapeXml(webhookUrl)}" method="POST">
+    <Say>.</Say>
+  </Gather>
+</Response>`;
+
+    await this.apiRequest(`/Calls/${input.providerCallId}.json`, {
+      Twiml: twiml,
+    });
+  }
+
+  /**
+   * Play TTS via core TTS and Twilio Media Streams.
+   * Generates audio with core TTS, converts to mu-law, and streams via WebSocket.
+   * Uses a queue to serialize playback and prevent overlapping audio.
+   */
+  private async playTtsViaStream(text: string, streamSid: string): Promise<void> {
+    if (!this.ttsProvider || !this.mediaStreamHandler) {
+      throw new Error("TTS provider and media stream handler required");
+    }
+
+    // Stream audio in 20ms chunks (160 bytes at 8kHz mu-law)
+    const CHUNK_SIZE = 160;
+    const CHUNK_DELAY_MS = 20;
+
+    const handler = this.mediaStreamHandler;
+    const ttsProvider = this.ttsProvider;
+    await handler.queueTts(streamSid, async (signal) => {
+      // Generate audio with core TTS (returns mu-law at 8kHz)
+      const muLawAudio = await ttsProvider.synthesizeForTelephony(text);
+      for (const chunk of chunkAudio(muLawAudio, CHUNK_SIZE)) {
+        if (signal.aborted) {
+          break;
+        }
+        handler.sendAudio(streamSid, chunk);
+
+        // Pace the audio to match real-time playback
+        await new Promise((resolve) => setTimeout(resolve, CHUNK_DELAY_MS));
+        if (signal.aborted) {
+          break;
+        }
+      }
+
+      if (!signal.aborted) {
+        // Send a mark to track when audio finishes
+        handler.sendMark(streamSid, `tts-${Date.now()}`);
+      }
+    });
+  }
+
+  /**
+   * Start listening for speech via Twilio <Gather>.
+   */
+  async startListening(input: StartListeningInput): Promise<void> {
+    const webhookUrl = this.callWebhookUrls.get(input.providerCallId);
+    if (!webhookUrl) {
+      throw new Error("Missing webhook URL for this call (provider state not initialized)");
+    }
+
+    const actionUrl = new URL(webhookUrl);
+    if (input.turnToken) {
+      actionUrl.searchParams.set("turnToken", input.turnToken);
+    }
+
+    const twiml = `<?xml version="1.0" encoding="UTF-8"?>
+<Response>
+  <Gather input="speech" speechTimeout="auto" language="${input.language || "en-US"}" action="${escapeXml(actionUrl.toString())}" method="POST">
+  </Gather>
+</Response>`;
+
+    await this.apiRequest(`/Calls/${input.providerCallId}.json`, {
+      Twiml: twiml,
+    });
+  }
+
+  /**
+   * Stop listening - for Twilio this is a no-op as <Gather> auto-ends.
+   */
+  async stopListening(_input: StopListeningInput): Promise<void> {
+    // Twilio's <Gather> automatically stops on speech end
+    // No explicit action needed
+  }
+}
+
+// -----------------------------------------------------------------------------
+// Twilio-specific types
+// -----------------------------------------------------------------------------
+
+interface TwilioCallResponse {
+  sid: string;
+  status: string;
+  direction: string;
+  from: string;
+  to: string;
+  uri: string;
+}
--- a/openclaw/extensions/voice-call/src/providers/twilio/api.ts
+++ b/openclaw/extensions/voice-call/src/providers/twilio/api.ts
@@ -0,0 +1,42 @@
+export async function twilioApiRequest<T = unknown>(params: {
+  baseUrl: string;
+  accountSid: string;
+  authToken: string;
+  endpoint: string;
+  body: URLSearchParams | Record<string, string | string[]>;
+  allowNotFound?: boolean;
+}): Promise<T> {
+  const bodyParams =
+    params.body instanceof URLSearchParams
+      ? params.body
+      : Object.entries(params.body).reduce<URLSearchParams>((acc, [key, value]) => {
+          if (Array.isArray(value)) {
+            for (const entry of value) {
+              acc.append(key, entry);
+            }
+          } else if (typeof value === "string") {
+            acc.append(key, value);
+          }
+          return acc;
+        }, new URLSearchParams());
+
+  const response = await fetch(`${params.baseUrl}${params.endpoint}`, {
+    method: "POST",
+    headers: {
+      Authorization: `Basic ${Buffer.from(`${params.accountSid}:${params.authToken}`).toString("base64")}`,
+      "Content-Type": "application/x-www-form-urlencoded",
+    },
+    body: bodyParams,
+  });
+
+  if (!response.ok) {
+    if (params.allowNotFound && response.status === 404) {
+      return undefined as T;
+    }
+    const errorText = await response.text();
+    throw new Error(`Twilio API error: ${response.status} ${errorText}`);
+  }
+
+  const text = await response.text();
+  return text ? (JSON.parse(text) as T) : (undefined as T);
+}
--- a/openclaw/extensions/voice-call/src/providers/twilio/webhook.ts
+++ b/openclaw/extensions/voice-call/src/providers/twilio/webhook.ts
@@ -0,0 +1,34 @@
+import type { WebhookContext, WebhookVerificationResult } from "../../types.js";
+import { verifyTwilioWebhook } from "../../webhook-security.js";
+import type { TwilioProviderOptions } from "../twilio.js";
+
+export function verifyTwilioProviderWebhook(params: {
+  ctx: WebhookContext;
+  authToken: string;
+  currentPublicUrl?: string | null;
+  options: TwilioProviderOptions;
+}): WebhookVerificationResult {
+  const result = verifyTwilioWebhook(params.ctx, params.authToken, {
+    publicUrl: params.currentPublicUrl || undefined,
+    allowNgrokFreeTierLoopbackBypass: params.options.allowNgrokFreeTierLoopbackBypass ?? false,
+    skipVerification: params.options.skipVerification,
+    allowedHosts: params.options.webhookSecurity?.allowedHosts,
+    trustForwardingHeaders: params.options.webhookSecurity?.trustForwardingHeaders,
+    trustedProxyIPs: params.options.webhookSecurity?.trustedProxyIPs,
+    remoteIP: params.ctx.remoteAddress,
+  });
+
+  if (!result.ok) {
+    console.warn(`[twilio] Webhook verification failed: ${result.reason}`);
+    if (result.verificationUrl) {
+      console.warn(`[twilio] Verification URL: ${result.verificationUrl}`);
+    }
+  }
+
+  return {
+    ok: result.ok,
+    reason: result.reason,
+    isReplay: result.isReplay,
+    verifiedRequestKey: result.verifiedRequestKey,
+  };
+}