Very slow speaking response times
Hey VAPI folks - i am trying to figure out settings that produce natural feeling conversation times. Right now I talk and it can take 2-3 seconds before the AI responds. It's really slow. am using
Here's my current settings. Any help would be appreciated, may end up spending a lot with you guys so happy to hop on a call..... :
claude-sonnet-4claude-sonnet-4 for my production completions model but even when i use Groq gpt-oss-120Bgpt-oss-120B it's not much better. Is it the SST provider I'm using (Speechmatics)? Is it cartesia? Is there some observability where i can see for each turn what is taking the most time? Here's my current settings. Any help would be appreciated, may end up spending a lot with you guys so happy to hop on a call..... :
// Transient assistant configuration defaults
export const ASSISTANT_CONFIG: Partial<Vapi.CreateAssistantDto> = {
name: 'AI Coach',
model: {
provider: PRODUCTION_MODEL.provider,
model: PRODUCTION_MODEL.model,
maxTokens: 1200,
temperature: PRODUCTION_MODEL.temperature,
...(PRODUCTION_MODEL.provider === 'custom-llm'
? { url: PRODUCTION_MODEL.url, metadataSendMode: 'off' }
: {}),
} as any,
voice: {
provider: 'cartesia',
voiceId: 'b7d50908-b17c-442d-ad8d-810c63997ed9', // Professional female voice
},
// OPTIMIZED: Much more responsive interruption handling
startSpeakingPlan: {
waitSeconds: 0.05, // REDUCED from 0.12 - start speaking faster
transcriptionEndpointingPlan: {
onPunctuationSeconds: 0.03, // REDUCED from 0.05
onNoPunctuationSeconds: 0.25, // REDUCED from 0.38
onNumberSeconds: 0.2, // REDUCED from 0.3
},
smartEndpointingPlan: {
provider: 'livekit',
waitFunction: '200 + 800 * max(0, x - 0.3)', // FASTER response curve
},
},
// OPTIMIZED: More sensitive interruption detection
stopSpeakingPlan: {
numWords: 1, // REDUCED from 2 - interrupt faster
voiceSeconds: 0.12, // REDUCED from 0.18 - more sensitive
backoffSeconds: 0.4, // REDUCED from 0.6 - shorter pause after interruption
},
backgroundSpeechDenoisingPlan: {
smartDenoisingPlan: {
enabled: true,
},
// SIMPLIFIED: Reduce processing overhead
fourierDenoisingPlan: {
enabled: false, // DISABLED - can add latency
},
},
messagePlan: {
idleMessages: ['Are you still there?', "I'm here whenever you're ready to continue."],
idleTimeoutSeconds: 8,
idleMessageMaxSpokenCount: 3,
idleMessageResetCountOnUserSpeechEnabled: true,
},
transcriber: {
provider: 'speechmatics',
model: 'default', // Use 'enhanced' for better accuracy or 'standard' for cost efficiency
language: 'en',
fallbackPlan: {
transcribers: [
{
provider: 'assembly-ai',
language: 'en',
},
],
},
},
silenceTimeoutSeconds: 20,
maxDurationSeconds: 600, // 10 minutes max
// Subscribe to important events only (exclude speech-update to reduce spam)
serverMessages: ['end-of-call-report', 'hang'],
backgroundSound: 'off',
backgroundDenoisingEnabled: true, // Enable background noise reduction
};// Transient assistant configuration defaults
export const ASSISTANT_CONFIG: Partial<Vapi.CreateAssistantDto> = {
name: 'AI Coach',
model: {
provider: PRODUCTION_MODEL.provider,
model: PRODUCTION_MODEL.model,
maxTokens: 1200,
temperature: PRODUCTION_MODEL.temperature,
...(PRODUCTION_MODEL.provider === 'custom-llm'
? { url: PRODUCTION_MODEL.url, metadataSendMode: 'off' }
: {}),
} as any,
voice: {
provider: 'cartesia',
voiceId: 'b7d50908-b17c-442d-ad8d-810c63997ed9', // Professional female voice
},
// OPTIMIZED: Much more responsive interruption handling
startSpeakingPlan: {
waitSeconds: 0.05, // REDUCED from 0.12 - start speaking faster
transcriptionEndpointingPlan: {
onPunctuationSeconds: 0.03, // REDUCED from 0.05
onNoPunctuationSeconds: 0.25, // REDUCED from 0.38
onNumberSeconds: 0.2, // REDUCED from 0.3
},
smartEndpointingPlan: {
provider: 'livekit',
waitFunction: '200 + 800 * max(0, x - 0.3)', // FASTER response curve
},
},
// OPTIMIZED: More sensitive interruption detection
stopSpeakingPlan: {
numWords: 1, // REDUCED from 2 - interrupt faster
voiceSeconds: 0.12, // REDUCED from 0.18 - more sensitive
backoffSeconds: 0.4, // REDUCED from 0.6 - shorter pause after interruption
},
backgroundSpeechDenoisingPlan: {
smartDenoisingPlan: {
enabled: true,
},
// SIMPLIFIED: Reduce processing overhead
fourierDenoisingPlan: {
enabled: false, // DISABLED - can add latency
},
},
messagePlan: {
idleMessages: ['Are you still there?', "I'm here whenever you're ready to continue."],
idleTimeoutSeconds: 8,
idleMessageMaxSpokenCount: 3,
idleMessageResetCountOnUserSpeechEnabled: true,
},
transcriber: {
provider: 'speechmatics',
model: 'default', // Use 'enhanced' for better accuracy or 'standard' for cost efficiency
language: 'en',
fallbackPlan: {
transcribers: [
{
provider: 'assembly-ai',
language: 'en',
},
],
},
},
silenceTimeoutSeconds: 20,
maxDurationSeconds: 600, // 10 minutes max
// Subscribe to important events only (exclude speech-update to reduce spam)
serverMessages: ['end-of-call-report', 'hang'],
backgroundSound: 'off',
backgroundDenoisingEnabled: true, // Enable background noise reduction
};