voice-assistant VoiceAssistant.tsx

93.81% Statements 91/97
90.69% Branches 39/43
96% Functions 24/25
94.44% Lines 85/90
Press n or j to go to the next uncovered block, b, p or k for the previous block.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245  
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1x
37x
37x
37x
37x
 
 
 
37x
37x
37x
37x
 
37x
 
6x
6x
6x
6x
 
6x
1x
1x
3x
 
 
 
1x
1x
 
 
6x
6x
 
 
37x
 
25x
25x
25x
 
 
37x
18x
7x
7x
 
18x
6x
6x
 
18x
6x
6x
 
 
 
37x
 
17x
17x
17x
17x
17x
17x
 
 
37x
8x
 
8x
8x
 
8x
8x
7x
 
7x
 
6x
6x
 
6x
 
 
 
 
3x
3x
 
 
 
 
3x
1x
 
3x
1x
 
3x
 
 
 
 
1x
1x
1x
 
 
 
 
1x
 
1x
 
 
 
 
1x
1x
1x
 
 
 
 
 
 
 
 
7x
 
 
 
1x
1x
 
 
 
37x
 
14x
14x
 
 
 
37x
 
36x
36x
 
16x
 
8x
 
10x
 
 
 
2x
 
 
 
36x
 
 
 
 
 
 
11x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  // src/features/voice-assistant/VoiceAssistant.tsx
import React, { useState, useEffect, useCallback, useRef } from 'react';
import { startVoiceSession } from '../../services/aiApiClient';
import { MicrophoneIcon } from '../../components/icons/MicrophoneIcon';
// FIX: Corrected the import path. Types should be imported from the top-level '@google/genai' package.
import type { LiveServerMessage, Blob } from '@google/genai';
import { encode } from '../../utils/audioUtils';
import { logger } from '../../services/logger.client';
import { XMarkIcon } from '../../components/icons/XMarkIcon';
 
export interface VoiceAssistantProps {
  isOpen: boolean;
  onClose: () => void;
}
 
type VoiceStatus = 'idle' | 'connecting' | 'listening' | 'speaking' | 'error';
 
// Define a local interface for the session object to provide type safety.
interface LiveSession {
  close: () => void;
  sendRealtimeInput: (input: { media: Blob }) => void;
}
 
export const VoiceAssistant: React.FC<VoiceAssistantProps> = ({ isOpen, onClose }) => {
  const [status, setStatus] = useState<VoiceStatus>('idle');
  const [userTranscript, setUserTranscript] = useState('');
  const [modelTranscript, setModelTranscript] = useState('');
  const [history, setHistory] = useState<{ speaker: 'user' | 'model'; text: string }[]>([]);
 
  // The session promise ref holds the promise returned by startVoiceSession.
  // We type it as Promise<LiveSession> to allow calling .then() with proper typing.
  const sessionPromiseRef = useRef<Promise<LiveSession> | null>(null);
  const mediaStreamRef = useRef<MediaStream | null>(null);
  const audioContextRef = useRef<AudioContext | null>(null);
  const scriptProcessorRef = useRef<ScriptProcessorNode | null>(null);
 
  const startAudioStreaming = useCallback((stream: MediaStream) => {
    // This function encapsulates the Web Audio API setup for streaming microphone data.
    audioContextRef.current = new window.AudioContext({ sampleRate: 16000 });
    const source = audioContextRef.current.createMediaStreamSource(stream);
    const scriptProcessor = audioContextRef.current.createScriptProcessor(4096, 1, 1);
    scriptProcessorRef.current = scriptProcessor;
 
    scriptProcessor.onaudioprocess = (audioProcessingEvent) => {
      const inputData = audioProcessingEvent.inputBuffer.getChannelData(0);
      const pcmBlob: Blob = {
        data: encode(new Uint8Array(new Int16Array(inputData.map((x) => x * 32768)).buffer)),
        mimeType: 'audio/pcm;rate=16000',
      };
      // Send the encoded audio data to the voice session.
      sessionPromiseRef.current?.then((session: LiveSession) => {
        session.sendRealtimeInput({ media: pcmBlob });
      });
    };
    source.connect(scriptProcessor);
    scriptProcessor.connect(audioContextRef.current.destination);
  }, []); // No dependencies as it only uses refs and imported functions.
 
  const resetForNewSession = useCallback(() => {
    // Centralize the state reset logic for starting a new session or closing the modal.
    setHistory([]);
    setUserTranscript('');
    setModelTranscript('');
  }, []);
 
  const stopRecording = useCallback(() => {
    if (mediaStreamRef.current) {
      mediaStreamRef.current.getTracks().forEach((track) => track.stop());
      mediaStreamRef.current = null;
    }
    if (scriptProcessorRef.current) {
      scriptProcessorRef.current.disconnect();
      scriptProcessorRef.current = null;
    }
    if (audioContextRef.current && audioContextRef.current.state !== 'closed') {
      audioContextRef.current.close();
      audioContextRef.current = null;
    }
  }, []);
 
  const handleClose = useCallback(() => {
    // Use optional chaining to simplify closing the session.
    sessionPromiseRef.current?.then((session: LiveSession) => session.close());
    sessionPromiseRef.current = null; // Prevent multiple close attempts.
    stopRecording();
    resetForNewSession();
    setStatus('idle');
    onClose();
  }, [onClose, stopRecording, resetForNewSession]);
 
  const startSession = useCallback(async () => {
    Iif (status !== 'idle' && status !== 'error') return;
 
    setStatus('connecting');
    resetForNewSession();
 
    try {
      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
      mediaStreamRef.current = stream;
 
      const callbacks = {
        onopen: () => {
          logger.debug('Voice session opened.');
          setStatus('listening');
          // The complex audio setup is now replaced by a single, clear function call.
          startAudioStreaming(stream);
        },
        onmessage: (message: LiveServerMessage) => {
          // NOTE: This stub doesn't play audio, just displays transcripts.
          // A full implementation would use the audioUtils to decode and play audio.
          const serverContent = message.serverContent;
          Iif (!serverContent) {
            return; // Exit if there's no content to process
          }
 
          // Safely access nested properties with optional chaining.
          if (serverContent.inputTranscription?.text) {
            setUserTranscript((prev) => prev + serverContent.inputTranscription!.text);
          }
          if (serverContent.outputTranscription?.text) {
            setModelTranscript((prev) => prev + serverContent.outputTranscription!.text);
          }
          if (serverContent.turnComplete) {
            // FIX: To prevent a stale closure, we use the functional update form for all
            // related state updates. This allows us to access the latest state values
            // for userTranscript and modelTranscript at the time of the update, rather
            // than the stale values captured when the `onmessage` callback was created.
            setUserTranscript((currentUserTranscript) => {
              setModelTranscript((currentModelTranscript) => {
                setHistory((prevHistory) => [
                  ...prevHistory,
                  { speaker: 'user', text: currentUserTranscript },
                  { speaker: 'model', text: currentModelTranscript },
                ]);
                return ''; // Reset model transcript
              });
              return ''; // Reset user transcript
            });
          }
        },
        onerror: (e: ErrorEvent) => {
          logger.error('Voice session error', { error: e });
          setStatus('error');
          stopRecording();
        },
        onclose: () => {
          logger.debug('Voice session closed.');
          stopRecording();
          setStatus('idle');
        },
      };
 
      sessionPromiseRef.current = startVoiceSession(callbacks) as Promise<LiveSession>;
    } catch (e) {
      // We check if the caught object is an instance of Error to safely access its message property.
      // This avoids using 'any' and handles different types of thrown values.
      logger.error('Failed to start voice session', { error: e });
      setStatus('error');
    }
  }, [status, stopRecording, resetForNewSession, startAudioStreaming]);
 
  useEffect(() => {
    // Cleanup on unmount
    return () => {
      handleClose();
    };
  }, [handleClose]);
 
  if (!isOpen) return null;
 
  const getStatusText = () => {
    switch (status) {
      case 'idle':
        return 'Click the mic to start';
      case 'connecting':
        return 'Connecting...';
      case 'listening':
        return 'Listening...';
      case 'speaking':
        return 'Thinking...';
      case 'error':
        return 'Connection error. Please try again.';
    }
  };
 
  return (
    <div
      className="fixed inset-0 bg-black bg-opacity-60 z-50 flex justify-center items-center p-4"
      onClick={handleClose}
    >
      <div
        className="bg-white dark:bg-gray-800 rounded-lg shadow-xl w-full max-w-lg relative flex flex-col h-[70vh]"
        onClick={(e) => e.stopPropagation()}
        role="dialog"
        aria-modal="true"
      >
        <div className="flex justify-between items-center p-4 border-b border-gray-200 dark:border-gray-700">
          <h2 className="text-xl font-bold text-gray-800 dark:text-white">Voice Assistant</h2>
          <button
            onClick={handleClose}
            className="text-gray-400 hover:text-gray-600 dark:hover:text-gray-200"
            aria-label="Close"
          >
            <XMarkIcon className="w-6 h-6" />
          </button>
        </div>
 
        <div className="grow p-4 overflow-y-auto space-y-4">
          {history.map((entry, index) => (
            <div
              key={index}
              className={`p-3 rounded-lg max-w-[80%] ${entry.speaker === 'user' ? 'bg-blue-100 dark:bg-blue-900/50 ml-auto' : 'bg-gray-100 dark:bg-gray-700/50'}`}
            >
              <p className="text-sm text-gray-800 dark:text-gray-200">{entry.text}</p>
            </div>
          ))}
          {userTranscript && (
            <div className="p-3 rounded-lg max-w-[80%] bg-blue-100 dark:bg-blue-900/50 ml-auto opacity-70">
              <p className="text-sm text-gray-800 dark:text-gray-200">{userTranscript}</p>
            </div>
          )}
          {modelTranscript && (
            <div className="p-3 rounded-lg max-w-[80%] bg-gray-100 dark:bg-gray-700/50 opacity-70">
              <p className="text-sm text-gray-800 dark:text-gray-200">{modelTranscript}</p>
            </div>
          )}
        </div>
 
        <div className="p-4 border-t border-gray-200 dark:border-gray-700 flex flex-col items-center">
          <button
            onClick={status === 'idle' || status === 'error' ? startSession : handleClose}
            className={`w-16 h-16 rounded-full flex items-center justify-center transition-colors ${status === 'listening' ? 'bg-red-500 hover:bg-red-600' : 'bg-brand-primary hover:bg-brand-secondary'}`}
            aria-label={
              status === 'idle' || status === 'error' ? 'Start voice session' : 'Stop voice session'
            }
          >
            <MicrophoneIcon className="w-8 h-8 text-white" />
          </button>
          <p className="text-sm text-gray-500 dark:text-gray-400 mt-2">{getStatusText()}</p>
        </div>
      </div>
    </div>
  );
};