From c8962ceb74a440988f37c7ddb76f147b4027ef27 Mon Sep 17 00:00:00 2001 From: Ovidijus Parsiunas Date: Sun, 12 Jan 2025 19:47:46 +0900 Subject: [PATCH] initial code for OpenAI Realtime API --- component/src/services/openAI/openAIChatIO.ts | 3 +- .../src/services/openAI/openAIRealtimeIO.ts | 75 +++++++++++++++++++ component/src/services/serviceIOFactory.ts | 4 + component/src/types/openAI.ts | 1 + 4 files changed, 82 insertions(+), 1 deletion(-) create mode 100644 component/src/services/openAI/openAIRealtimeIO.ts diff --git a/component/src/services/openAI/openAIChatIO.ts b/component/src/services/openAI/openAIChatIO.ts index dce140d6f..6ce242498 100644 --- a/component/src/services/openAI/openAIChatIO.ts +++ b/component/src/services/openAI/openAIChatIO.ts @@ -180,5 +180,6 @@ export class OpenAIChatIO extends DirectServiceIO { throw e; } } - throw Error(OpenAIUtils.FUNCTION_TOOL_RESP_ERROR);} + throw Error(OpenAIUtils.FUNCTION_TOOL_RESP_ERROR); + } } diff --git a/component/src/services/openAI/openAIRealtimeIO.ts b/component/src/services/openAI/openAIRealtimeIO.ts new file mode 100644 index 000000000..edcca50e3 --- /dev/null +++ b/component/src/services/openAI/openAIRealtimeIO.ts @@ -0,0 +1,75 @@ +import {DirectConnection} from '../../types/directConnection'; +import {DirectServiceIO} from '../utils/directServiceIO'; +import {ChatFunctionHandler} from '../../types/openAI'; +import {OpenAIUtils} from './utils/openAIUtils'; +import {DeepChat} from '../../deepChat'; + +export class OpenAIRealtimeIO extends DirectServiceIO { + override insertKeyPlaceholderText = 'OpenAI API Key'; + override keyHelpUrl = 'https://platform.openai.com/account/api-keys'; + url = 'https://api.openai.com/v1/chat/completions'; + permittedErrorPrefixes = ['Incorrect']; + _functionHandler?: ChatFunctionHandler; + asyncCallInProgress = false; // used when streaming tools + + constructor(deepChat: DeepChat) { + const directConnectionCopy = JSON.parse(JSON.stringify(deepChat.directConnection)) as DirectConnection; + const apiKey = directConnectionCopy.openAI; + super(deepChat, OpenAIUtils.buildKeyVerificationDetails(), OpenAIUtils.buildHeaders, apiKey); + this.maxMessages ??= -1; + this.rawBody.model ??= 'gpt-4o'; + this.init(); + } + + private async init() { + // Get an ephemeral key from your server - see server code below + // const tokenResponse = await fetch('/session'); + // const data = await tokenResponse.json(); + const EPHEMERAL_KEY = 'key'; + + // Create a peer connection + const pc = new RTCPeerConnection(); + + // Set up to play remote audio from the model + const audioEl = document.createElement('audio'); + audioEl.autoplay = true; + pc.ontrack = (e) => (audioEl.srcObject = e.streams[0]); + + // Add local audio track for microphone input in the browser + const ms = await navigator.mediaDevices.getUserMedia({ + audio: true, + }); + pc.addTrack(ms.getTracks()[0]); + + // Set up data channel for sending and receiving events + const dc = pc.createDataChannel('oai-events'); + dc.addEventListener('message', (e) => { + // Realtime server events appear here! + const response = JSON.parse(e.data); + if (response.type === 'response.audio_transcript.delta') { + console.log(response.delta); + } + }); + + // Start the session using the Session Description Protocol (SDP) + const offer = await pc.createOffer(); + await pc.setLocalDescription(offer); + + const baseUrl = 'https://api.openai.com/v1/realtime'; + const model = 'gpt-4o-realtime-preview-2024-12-17'; + const sdpResponse = await fetch(`${baseUrl}?model=${model}`, { + method: 'POST', + body: offer.sdp, + headers: { + Authorization: `Bearer ${EPHEMERAL_KEY}`, + 'Content-Type': 'application/sdp', + }, + }); + + const answer: RTCSessionDescriptionInit = { + type: 'answer', + sdp: await sdpResponse.text(), + }; + await pc.setRemoteDescription(answer); + } +} diff --git a/component/src/services/serviceIOFactory.ts b/component/src/services/serviceIOFactory.ts index f447104e8..052451abd 100644 --- a/component/src/services/serviceIOFactory.ts +++ b/component/src/services/serviceIOFactory.ts @@ -23,6 +23,7 @@ import {AzureTextToSpeechIO} from './azure/azureTextToSpeechIO'; import {AzureSpeechToTextIO} from './azure/azureSpeechToTextIO'; import {AzureTranslationIO} from './azure/azureTranslationIO'; import {AzureOpenAIChatIO} from './azure/azureOpenAIChatIO'; +import {OpenAIRealtimeIO} from './openAI/openAIRealtimeIO'; import {OpenAIImagesIO} from './openAI/openAIImagesIO'; import {BaseServiceIO} from './utils/baseServiceIO'; import {OpenAIChatIO} from './openAI/openAIChatIO'; @@ -54,6 +55,9 @@ export class ServiceIOFactory { if (directConnection.openAI.assistant) { return new OpenAIAssistantIO(deepChat); } + if (directConnection.openAI.realtime) { + return new OpenAIRealtimeIO(deepChat); + } return new OpenAIChatIO(deepChat); } if (directConnection.assemblyAI) { diff --git a/component/src/types/openAI.ts b/component/src/types/openAI.ts index 0c5824588..ae1e44bb3 100644 --- a/component/src/types/openAI.ts +++ b/component/src/types/openAI.ts @@ -104,6 +104,7 @@ export type OpenAIChat = { export interface OpenAI { chat?: true | OpenAIChat; assistant?: true | OpenAIAssistant; + realtime?: true; images?: true | OpenAIImagesDalle2 | OpenAIImagesDalle3; textToSpeech?: true | OpenAITextToSpeech; speechToText?: true | OpenAISpeechToText;