From e96bc27683bad8e0605b056a9316f0e6df973da5 Mon Sep 17 00:00:00 2001 From: Ovidijus Parsiunas Date: Tue, 14 Jan 2025 21:08:36 +0900 Subject: [PATCH] OpenAI Realtime response frequency animation functionality --- .../src/services/openAI/openAIRealtimeIO.ts | 91 +++++++++++++++---- 1 file changed, 72 insertions(+), 19 deletions(-) diff --git a/component/src/services/openAI/openAIRealtimeIO.ts b/component/src/services/openAI/openAIRealtimeIO.ts index 35999792..54430c22 100644 --- a/component/src/services/openAI/openAIRealtimeIO.ts +++ b/component/src/services/openAI/openAIRealtimeIO.ts @@ -6,8 +6,6 @@ import {OpenAIUtils} from './utils/openAIUtils'; import {APIKey} from '../../types/APIKey'; import {DeepChat} from '../../deepChat'; -// https://platform.openai.com/docs/guides/realtime-webrtc -// https://platform.openai.com/docs/api-reference/realtime-server-events/conversation export class OpenAIRealtimeIO extends DirectServiceIO { override insertKeyPlaceholderText = 'OpenAI API Key'; override keyHelpUrl = 'https://platform.openai.com/account/api-keys'; @@ -16,6 +14,9 @@ export class OpenAIRealtimeIO extends DirectServiceIO { _functionHandler?: ChatFunctionHandler; asyncCallInProgress = false; // used when streaming tools private readonly _avatarConfig: OpenAIRealTime['avatar']; + private readonly _avatarMaxScale: number = 2.5; + private readonly _avatarEl: HTMLImageElement; + private readonly _containerEl: HTMLDivElement; constructor(deepChat: DeepChat) { const directConnectionCopy = JSON.parse(JSON.stringify(deepChat.directConnection)) as DirectConnection; @@ -26,41 +27,42 @@ export class OpenAIRealtimeIO extends DirectServiceIO { if (config.avatar) this._avatarConfig = config.avatar; } this.rawBody.model ??= 'gpt-4o'; + this._avatarEl = OpenAIRealtimeIO.createAvatarEl(this._avatarConfig); + this._containerEl = OpenAIRealtimeIO.createContainerEl(this._avatarEl, this._avatarConfig); + if (this._avatarConfig?.maxScale && this._avatarConfig.maxScale > 1) { + this._avatarMaxScale = this._avatarConfig.maxScale; + } this.init(); } - public setUpView(containerElement: HTMLElement, parentElement: HTMLElement) { - containerElement.style.display = 'none'; - parentElement.appendChild(this.createContainer()); + public setUpView(oldContainerElement: HTMLElement, parentElement: HTMLElement) { + oldContainerElement.style.display = 'none'; + parentElement.appendChild(this._containerEl); } - private createContainer() { + private static createContainerEl(avatarEl: HTMLImageElement, config?: OpenAIRealTime['avatar']) { const container = document.createElement('div'); container.id = 'deep-chat-openai-realtime-container'; - container.appendChild(this.avatarContainer()); + container.appendChild(this.createAvatarContainerEl(avatarEl, config)); return container; } - private avatarContainer() { + private static createAvatarContainerEl(avatarEl: HTMLImageElement, config?: OpenAIRealTime['avatar']) { const avatarContainer = document.createElement('div'); avatarContainer.id = 'deep-chat-openai-realtime-avatar-container'; - Object.assign(avatarContainer.style, this._avatarConfig?.styles?.container); - avatarContainer.appendChild(this.createAvatar()); + Object.assign(avatarContainer.style, config?.styles?.container); + avatarContainer.appendChild(avatarEl); return avatarContainer; } - private createAvatar() { + private static createAvatarEl(config?: OpenAIRealTime['avatar']) { const avatar = document.createElement('img'); avatar.id = 'deep-chat-openai-realtime-avatar'; - Object.assign(avatar.style, this._avatarConfig?.styles?.avatar); - avatar.src = this._avatarConfig?.src || avatarUrl; + Object.assign(avatar.style, config?.styles?.avatar); + avatar.src = config?.src || avatarUrl; return avatar; } - override isCustomView() { - return true; - } - private async init() { // Get an ephemeral key from your server - see server code below // const tokenResponse = await fetch('/session'); @@ -73,7 +75,28 @@ export class OpenAIRealtimeIO extends DirectServiceIO { // Set up to play remote audio from the model const audioEl = document.createElement('audio'); audioEl.autoplay = true; - pc.ontrack = (e) => (audioEl.srcObject = e.streams[0]); + const audioContext = new AudioContext(); + + const analyser = audioContext.createAnalyser(); + analyser.fftSize = 256; // Determines frequency resolution + const frequencyData = new Uint8Array(analyser.frequencyBinCount); + + // Monitor when tracks are added to the peer connection + pc.ontrack = async (e) => { + if (e.streams[0]) { + audioEl.srcObject = e.streams[0]; + + const source = audioContext.createMediaStreamSource(e.streams[0]); + if (audioContext.state === 'suspended') { + await audioContext.resume(); + console.log('AudioContext resumed'); + } + source.connect(analyser); + this.monitorFrequencies(analyser, frequencyData); + } else { + console.error('No streams found in the ontrack event.'); + } + }; // Add local audio track for microphone input in the browser const ms = await navigator.mediaDevices.getUserMedia({ @@ -86,8 +109,9 @@ export class OpenAIRealtimeIO extends DirectServiceIO { dc.addEventListener('message', (e) => { // Realtime server events appear here! const response = JSON.parse(e.data); + // console.log(response); if (response.type === 'response.audio_transcript.delta') { - console.log(response.delta); + // console.log(response.delta); } }); @@ -112,4 +136,33 @@ export class OpenAIRealtimeIO extends DirectServiceIO { }; await pc.setRemoteDescription(answer); } + + // there is a bug where sometimes upon refreshing the browser too many times the frequencyData is all 0s + // in such instance please wait and refresh at a later time + private monitorFrequencies(analyser: AnalyserNode, frequencyData: Uint8Array) { + const updateFrequencyData = () => { + analyser.getByteFrequencyData(frequencyData); + + // Calculate loudness (sum of all frequency amplitudes) + const totalLoudness = frequencyData.reduce((sum, value) => sum + value, 0); + const maxLoudness = frequencyData.length * 255; // Maximum possible loudness + const normalizedLoudness = (totalLoudness / maxLoudness) * 100; // Scale to 100p + + // const hasAudio = frequencyData.some((value) => value > 0); + // if (hasAudio) console.log('Non-zero frequency data detected'); + + // Update the avatar scale + const minScale = 1; + const scale = minScale + (normalizedLoudness / 100) * (this._avatarMaxScale - minScale); + this._avatarEl.style.transform = `scale(${scale})`; + + requestAnimationFrame(updateFrequencyData); + }; + + updateFrequencyData(); + } + + override isCustomView() { + return true; + } }