sacred-roseS
VAPI4mo ago
sacred-rose

Very slow speaking response times

Hey VAPI folks - i am trying to figure out settings that produce natural feeling conversation times. Right now I talk and it can take 2-3 seconds before the AI responds. It's really slow. am using claude-sonnet-4 for my production completions model but even when i use Groq gpt-oss-120B it's not much better. Is it the SST provider I'm using (Speechmatics)? Is it cartesia? Is there some observability where i can see for each turn what is taking the most time?

Here's my current settings. Any help would be appreciated, may end up spending a lot with you guys so happy to hop on a call..... :


// Transient assistant configuration defaults
export const ASSISTANT_CONFIG: Partial<Vapi.CreateAssistantDto> = {
  name: 'AI Coach',

  model: {
    provider: PRODUCTION_MODEL.provider,
    model: PRODUCTION_MODEL.model,
    maxTokens: 1200,
    temperature: PRODUCTION_MODEL.temperature,
    ...(PRODUCTION_MODEL.provider === 'custom-llm'
      ? { url: PRODUCTION_MODEL.url, metadataSendMode: 'off' }
      : {}),
  } as any,

  voice: {
    provider: 'cartesia',
    voiceId: 'b7d50908-b17c-442d-ad8d-810c63997ed9', // Professional female voice
  },
  // OPTIMIZED: Much more responsive interruption handling
  startSpeakingPlan: {
    waitSeconds: 0.05, // REDUCED from 0.12 - start speaking faster
    transcriptionEndpointingPlan: {
      onPunctuationSeconds: 0.03, // REDUCED from 0.05
      onNoPunctuationSeconds: 0.25, // REDUCED from 0.38
      onNumberSeconds: 0.2, // REDUCED from 0.3
    },
    smartEndpointingPlan: {
      provider: 'livekit',
      waitFunction: '200 + 800 * max(0, x - 0.3)', // FASTER response curve
    },
  },
  // OPTIMIZED: More sensitive interruption detection
  stopSpeakingPlan: {
    numWords: 1, // REDUCED from 2 - interrupt faster
    voiceSeconds: 0.12, // REDUCED from 0.18 - more sensitive
    backoffSeconds: 0.4, // REDUCED from 0.6 - shorter pause after interruption
  },
  backgroundSpeechDenoisingPlan: {
    smartDenoisingPlan: {
      enabled: true,
    },
    // SIMPLIFIED: Reduce processing overhead
    fourierDenoisingPlan: {
      enabled: false, // DISABLED - can add latency
    },
  },

  messagePlan: {
    idleMessages: ['Are you still there?', "I'm here whenever you're ready to continue."],
    idleTimeoutSeconds: 8,
    idleMessageMaxSpokenCount: 3,
    idleMessageResetCountOnUserSpeechEnabled: true,
  },
  transcriber: {
    provider: 'speechmatics',
    model: 'default', // Use 'enhanced' for better accuracy or 'standard' for cost efficiency
    language: 'en',
    fallbackPlan: {
      transcribers: [
        {
          provider: 'assembly-ai',
          language: 'en',
        },
      ],
    },
  },

  silenceTimeoutSeconds: 20,
  maxDurationSeconds: 600, // 10 minutes max
  // Subscribe to important events only (exclude speech-update to reduce spam)
  serverMessages: ['end-of-call-report', 'hang'],
  backgroundSound: 'off',
  backgroundDenoisingEnabled: true, // Enable background noise reduction
};
Was this page helpful?