```python @app.get("/get-voice-token") async def get_voice_token(): """Get authentication token, agent ID, and create thread for Voice Live API""" try: credential = DefaultAzureCredential() token = credential.get_token("https://cognitiveservices.azure.com/.default") agent_id = AGENT_IDS[0] if AGENT_IDS else None if not agent_id: raise HTTPException(status_code=400, detail="No agent ID configured") client = AgentsClient(credential=credential, endpoint=ENDPOINT) # Create thread for the agent context thread = client.threads.create() thread_id = thread.id logging.info(f"Created thread {thread_id} for agent {agent_id}") return JSONResponse(content={ "token": token.token, "expires_on": token.expires_on, "agent_id": agent_id, "thread_id": thread_id }) except Exception as e: logging.exception("Failed to create voice session") raise HTTPException(status_code=500, detail=f"Voice session setup failed: {str(e)}") ``` ```typescript const initializeVoiceClient = async (): Promise => { try { // Get authentication token, agent ID, and thread ID const tokenRes = await fetch(`${import.meta.env.VITE_API_BASE}/get-voice-token`); if (!tokenRes.ok) { throw new Error('Failed to get voice token'); } const { token, agent_id, thread_id } = await tokenRes.json(); console.log('Voice session setup:', { token, agent_id, thread_id }); // Construct WebSocket URL const wsUrl = import.meta.env.VITE_VOICE_WS || `wss://${import.meta.env.VITE_AZURE_RESOURCE_NAME}.cognitiveservices.azure.com/voice-live/realtime`; // Initialize client with agent and thread information const client = new VoiceLiveClient(wsUrl, token, agent_id, thread_id); // Set up event handlers client.onUserText((text) => { console.log('User speech transcribed:', text); setMessages(prev => [...prev, { role: "user", message: text }]); }); client.onModelText((text) => { console.log('Model response delta:', text); setMessages(prev => { const lastMessage = prev[prev.length - 1]; if (lastMessage && lastMessage.role === 'model') { // Update existing model response return [...prev.slice(0, -1), { role: "model", message: lastMessage.message + text }]; } else { // Create new model response return [...prev, { role: "model", message: text }]; } }); }); ``` ```typescript private wsUrl: string; private token: string; private agentId?: string; private threadId?: string; constructor(wsUrl: string, token: string, agentId?: string, threadId?: string) { this.wsUrl = wsUrl; this.token = token; this.agentId = agentId; this.threadId = threadId; } // Callback setters onUserText(callback: (text: string) => void) { this.onUserTextCallback = callback; } onModelText(callback: (text: string) => void) { this.onModelTextCallback = callback; } onAudioResponse(callback: (audioBlob: Blob) => void) { this.onAudioCallback = callback; } onError(callback: (error: string) => void) { this.onErrorCallback = callback; } onStatus(callback: (status: string) => void) { this.onStatusCallback = callback; } async startSession(): Promise { return new Promise((resolve, reject) => { try { const url = new URL(this.wsUrl); url.searchParams.set('api-version', 'modelversion'); url.searchParams.set('model', 'model'); url.searchParams.set('authorization', `Bearer ${this.token}`); console.log('Connecting to WebSocket:', url.toString()); this.ws = new WebSocket(url.toString()); this.ws.onopen = () => { console.log('WebSocket connected'); this.sessionActive = true; this.onStatusCallback?.('connected'); this.sendSessionUpdate(); resolve(); }; this.ws.onmessage = (event) => { try { const data = JSON.parse(event.data); this.handleServerMessage(data); } catch (error) { console.error('Error parsing WebSocket message:', error); } }; this.ws.onerror = (error) => { this.onErrorCallback?.('WebSocket connection error'); reject(error); }; this.ws.onclose = (event) => { this.sessionActive = false; this.onStatusCallback?.('disconnected'); this.cleanup(); }; } catch (error) { this.onErrorCallback?.('Failed to start session'); reject(error); } }); } private sendSessionUpdate() { if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return; const sessionConfig: any = { type: 'session.update', session: { modalities: ['text', 'audio'], instructions: this.agentId ? `You are an AI assistant with access to uploaded knowledge and tools. Use your knowledge base to provide accurate, helpful responses based on the available information. Agent ID: ${this.agentId}${this.threadId ? `, Thread ID: ${this.threadId}` : ''}` : 'You are a helpful AI assistant. Respond naturally and conversationally to user questions.', voice: { name: 'en-US-AriaNeural', type: 'azure-standard' }, turn_detection: { type: 'server_vad', threshold: 0.5, prefix_padding_ms: 300, silence_duration_ms: 500 }, input_audio_format: 'pcm16', output_audio_format: 'pcm16', input_audio_transcription: { model: 'whisper-1', language: 'en' }, input_audio_noise_reduction: { type: 'azure_deep_noise_suppression' }, input_audio_echo_cancellation: { type: 'server_echo_cancellation' } } }; // Add agent configuration if available if (this.agentId) { sessionConfig.session.agent = { id: this.agentId }; if (this.threadId) { sessionConfig.session.thread = { id: this.threadId }; } } console.log('Sending session config:', JSON.stringify(sessionConfig, null, 2)); this.ws.send(JSON.stringify(sessionConfig)); } ```