Callautomation live transcript changed to single port

Azure-Samples · Dec 10, 2024 · 5ff97cc · 5ff97cc
1 parent 5c07bcb
commit 5ff97cc
Show file tree

Hide file tree

Showing 5 changed files with 88 additions and 84 deletions.
diff --git a/callautomation-az-openai-voice/package.json b/callautomation-az-openai-voice/package.json
@@ -17,6 +17,7 @@
     "@azure/openai": "^2.0.0-beta.2",
     "@types/express": "^4.17.17",
     "@types/node": "^20.2.1",
+    "callautomation_az_openai_voice": "file:",
     "dotenv": "^16.3.1",
     "express": "^4.18.2",
     "openai": "^4.72.0",
@@ -29,4 +30,4 @@
     "ts-node": "^10.9.1",
     "typescript": "^5.0.4"
   }
-}
+}
diff --git a/callautomation-live-transcription/README.md b/callautomation-live-transcription/README.md
@@ -25,32 +25,21 @@ It accepts an incoming call from a phone number, performs DTMF recognition, and
 3. cd into the `callautomation-live-transcription` folder.
 4. From the root of the above folder, and with node installed, run `npm install`
 
-### Setup and host ngrok
-
-You can run multiple tunnels on ngrok by changing ngrok.yml file as follows:
-
-1. Open the ngrok.yml file from a powershell using the command ngrok config edit
-2. Update the ngrok.yml file as follows:
-    authtoken: xxxxxxxxxxxxxxxxxxxxxxxxxx
-    version: "2"
-    region: us
-    tunnels:
-    first:
-        addr: 8080
-        proto: http 
-        host_header: localhost:8080
-    second:
-        proto: http
-        addr: 5001
-        host_header: localhost:5001
-NOTE: Make sure the "addr:" field has only the port number, not the localhost url.
-3. Start all ngrok tunnels configured using the following command on a powershell - ngrok start --all
-4. Once you have setup the websocket server, note down the the ngrok url on your server's port as the websocket url in this application for incoming call scenario. Just replace the https:// with wss:// and update in the .env file.
 
 ### Add a Managed Identity to the ACS Resource that connects to the Cognitive Services Resource
 
 Follow the instructions in the [documentation](https://learn.microsoft.com/en-us/azure/communication-services/concepts/call-automation/azure-communication-services-azure-cognitive-services-integration).
 
+### Setup and host your Azure DevTunnel
+
+[Azure DevTunnels](https://learn.microsoft.com/en-us/azure/developer/dev-tunnels/get-started?tabs=windows) is an Azure service that enables you to share local web services hosted on the internet. Use the commands below to connect your local development environment to the public internet. This creates a tunnel with a persistent endpoint URL and which allows anonymous access. We will then use this endpoint to notify your application of calling events from the ACS Call Automation service.
+
+```bash
+devtunnel create --allow-anonymous
+devtunnel port create -p 8080
+devtunnel host
+```
+
 ### Configuring application
 
 Open the `.env` file to configure the following settings
@@ -65,10 +54,8 @@ Open the `.env` file to configure the following settings
 
 ### Run app locally
 
-1. Open a new Powershell window, cd into the `callautomation-live-transcription` folder and run `npm run dev:app`
+1. Open a new Powershell window, cd into the `callautomation-az-openai-voice` folder and run `npm run dev`
 2. Browser should pop up with the below page. If not navigate it to `http://localhost:8080/`
-3. Open a new Powershell window, cd into the `callautomation-live-transcription` folder and run `npm run dev:websocket`
-4. Configure websocket url as transport url ex. `wss://localhost:5001`
-5. Register an EventGrid Webhook for the IncomingCall Event that points to your DevTunnel URI endpoint ex `{CALLBACK_HOST_URI}/api/incomingCall` and register Recording File Status Updated event to you recordingstatus api endpoint ex. `{CALLBACK_HOST_URI}/api/recordingFileStatus`. Instructions [here](https://learn.microsoft.com/en-us/azure/communication-services/concepts/call-automation/incoming-call-notification).
+3. Register an EventGrid Webhook for the IncomingCall Event that points to your DevTunnel URI endpoint ex `{CALLBACK_HOST_URI}/api/incomingCall` and register Recording File Status Updated event to you recordingstatus api endpoint ex. `{CALLBACK_HOST_URI}/api/recordingFileStatus`. Instructions [here](https://learn.microsoft.com/en-us/azure/communication-services/concepts/call-automation/incoming-call-notification).
 
 Once that's completed you should have a running application. The best way to test this is to place a call to your ACS phone number and talk to your intelligent agent.
diff --git a/callautomation-live-transcription/package.json b/callautomation-live-transcription/package.json
@@ -5,8 +5,7 @@
   "main": "index.js",
   "scripts": {
     "build": "tsc",
-    "dev:app": "nodemon ./src/app.ts",
-    "dev:websocket": "nodemon ./src/websocket.ts"
+    "dev": "nodemon ./src/app.ts"
   },
   "keywords": [],
   "author": "",
@@ -18,6 +17,7 @@
     "@azure/openai": "^1.0.0-beta.7",
     "@types/express": "^4.17.17",
     "@types/node": "^20.2.1",
+    "callautomation_live_transcription": "file:",
     "dotenv": "^16.3.1",
     "express": "^4.18.2",
     "uuid": "^9.0.1",

diff --git a/callautomation-live-transcription/src/app.ts b/callautomation-live-transcription/src/app.ts
@@ -1,22 +1,27 @@
 import { config } from 'dotenv';
 import express, { Application } from 'express';
+import http from 'http';
 import { PhoneNumberIdentifier, createIdentifierFromRawId } from "@azure/communication-common";
 import {
 	CallAutomationClient, CallConnection, AnswerCallOptions, CallMedia,
 	TextSource, AnswerCallResult,
 	CallIntelligenceOptions, PlayOptions,
 	CallMediaRecognizeDtmfOptions,
 	TranscriptionOptions,
-	CallLocator, StartRecordingOptions, CallInvite,
+	CallLocator, StartRecordingOptions, CallInvite,streamingData
 }
 	from "@azure/communication-call-automation";
 import { v4 as uuidv4 } from 'uuid';
+import WebSocket from 'ws';
 config();
 
 const PORT = process.env.PORT;
 const app: Application = express();
 app.use(express.json());
 
+// Create common server for app and websocket
+const server = http.createServer(app);
+
 let callConnectionId: string;
 let callConnection: CallConnection;
 let acsClient: CallAutomationClient;
@@ -54,7 +59,6 @@ let recordingId: string;
 let recordingLocation: string;
 const agentPhonenumber = process.env.AGENT_PHONE_NUMBER;
 const acsPhoneNumber = process.env.ACS_PHONE_NUMBER;
-const transportUrl = process.env.TRANSPORT_URL;
 const transportType = process.env.TRANSPORT_TYPE;
 const locale = process.env.LOCALE;
 
@@ -81,10 +85,23 @@ app.post("/api/incomingCall", async (req: any, res: any) => {
 		const uuid = uuidv4();
 		const callbackUri = `${process.env.CALLBACK_HOST_URI}/api/callbacks/${uuid}?callerId=${callerId}`;
 		const incomingCallContext = eventData.incomingCallContext;
+		const websocketUrl = process.env.CALLBACK_HOST_URI.replace(/^https:\/\//, 'wss://');
+
 		console.log(`Cognitive service endpoint:  ${process.env.COGNITIVE_SERVICE_ENDPOINT.trim()}`);
 		const callIntelligenceOptions: CallIntelligenceOptions = { cognitiveServicesEndpoint: process.env.COGNITIVE_SERVICE_ENDPOINT.trim() };
-		const transcriptionOptions: TranscriptionOptions = { transportUrl: transportUrl, transportType: transportType, locale: locale, startTranscription: false }
-		const answerCallOptions: AnswerCallOptions = { callIntelligenceOptions: callIntelligenceOptions, transcriptionOptions: transcriptionOptions };
+		const transcriptionOptions: TranscriptionOptions = 
+		{ 
+			transportUrl: websocketUrl, 
+			transportType: transportType, 
+			locale: locale, 
+			startTranscription: false 
+		}
+		const answerCallOptions: AnswerCallOptions = 
+		{ 
+			callIntelligenceOptions: callIntelligenceOptions, 
+			transcriptionOptions: transcriptionOptions
+
+		};
 		console.log(`TranscriptionOption:" ${JSON.stringify(transcriptionOptions)}`);
 		answerCallResult = await acsClient.answerCall(incomingCallContext, callbackUri, answerCallOptions);
 		callConnection = answerCallResult.callConnection;
@@ -318,7 +335,57 @@ async function delayWithSetTimeout(): Promise<void> {
 }
 
 // Start the server
-app.listen(PORT, async () => {
+server.listen(PORT, async () => {
 	console.log(`Server is listening on port ${PORT}`);
 	await createAcsClient();
 });
+
+
+const wss = new WebSocket.Server({ server});
+
+wss.on('connection', (ws: WebSocket) => {
+    console.log('Client connected');
+    ws.on('message', (packetData: ArrayBuffer) => {
+        const decoder = new TextDecoder();
+        const stringJson = decoder.decode(packetData);
+        console.log("STRING JSON=>--" + stringJson)
+        var response = streamingData(packetData);
+        if ('locale' in response) {
+            console.log("--------------------------------------------")
+            console.log("Transcription Metadata")
+            console.log("CALL CONNECTION ID:-->" + response.callConnectionId);
+            console.log("CORRELATION ID:-->" + response.correlationId);
+            console.log("LOCALE:-->" + response.locale);
+            console.log("SUBSCRIPTION ID:-->" + response.subscriptionId);
+            console.log("--------------------------------------------")
+        }
+        if ('text' in response) {
+            console.log("--------------------------------------------")
+            console.log("Transcription Data")
+            console.log("TEXT:-->" + response.text);
+            console.log("FORMAT:-->" + response.format);
+            console.log("CONFIDENCE:-->" + response.confidence);
+            console.log("OFFSET IN TICKS:-->" + response.offsetInTicks);
+            console.log("DURATION IN TICKS:-->" + response.durationInTicks);
+            console.log("RESULT STATE:-->" + response.resultState);
+            if ('phoneNumber' in response.participant) {
+                console.log("PARTICIPANT:-->" + response.participant.phoneNumber);
+            }
+            if ('communicationUserId' in response.participant) {
+                console.log("PARTICIPANT:-->" + response.participant.communicationUserId);
+            }
+            response.words.forEach(element => {
+                console.log("TEXT:-->" + element.text)
+                console.log("DURATION IN TICKS:-->" + element.durationInTicks)
+                console.log("OFFSET IN TICKS:-->" + element.offsetInTicks)
+            });
+            console.log("--------------------------------------------")
+        }
+    });
+
+    ws.on('close', () => {
+        console.log('Client disconnected');
+    });
+});
+
+console.log(`WebSocket server running on port ${PORT}`);
diff --git a/callautomation-live-transcription/src/websocket.ts b/callautomation-live-transcription/src/websocket.ts