From 1f56b88f106a9041d1f11500c9b32716bc22fd99 Mon Sep 17 00:00:00 2001
From: Dongri Jin <dongrify@gmail.com>
Date: Sun, 24 Dec 2023 15:30:49 +0800
Subject: [PATCH] Add text-to-speech

---
 examples/text_to_speech.rs | 21 ++++++++++++++++++++
 src/v1/api.rs              | 40 ++++++++++++++++++++++++++++++++++++--
 src/v1/audio.rs            | 36 ++++++++++++++++++++++++++++++++++
 3 files changed, 95 insertions(+), 2 deletions(-)
 create mode 100644 examples/text_to_speech.rs
diff --git a/examples/text_to_speech.rs b/examples/text_to_speech.rs
new file mode 100644
index 0000000..43b5c4c
--- /dev/null
+++ b/examples/text_to_speech.rs
@@ -0,0 +1,21 @@
+use openai_api_rs::v1::api::Client;
+use openai_api_rs::v1::audio::{self, AudioSpeechRequest, TTS_1};
+use std::env;
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let client = Client::new(env::var("OPENAI_API_KEY").unwrap().to_string());
+
+    let req = AudioSpeechRequest::new(
+        TTS_1.to_string(),
+        String::from("Money is not problem, Problem is no money"),
+        audio::VOICE_ALLOY.to_string(),
+        String::from("problem.mp3"),
+    );
+
+    let result = client.audio_speech(req)?;
+    println!("{:?}", result);
+
+    Ok(())
+}
+
+// OPENAI_API_KEY=xxxx cargo run --package openai-api-rs --example text_to_speech
diff --git a/src/v1/api.rs b/src/v1/api.rs
index b450601..5a67332 100644
--- a/src/v1/api.rs
+++ b/src/v1/api.rs
@@ -3,8 +3,8 @@ use crate::v1::assistant::{
     ListAssistant, ListAssistantFile,
 };
 use crate::v1::audio::{
-    AudioTranscriptionRequest, AudioTranscriptionResponse, AudioTranslationRequest,
-    AudioTranslationResponse,
+    AudioSpeechRequest, AudioSpeechResponse, AudioTranscriptionRequest, AudioTranscriptionResponse,
+    AudioTranslationRequest, AudioTranslationResponse,
 };
 use crate::v1::chat_completion::{ChatCompletionRequest, ChatCompletionResponse};
 use crate::v1::completion::{CompletionRequest, CompletionResponse};
@@ -38,6 +38,9 @@ use crate::v1::run::{
 use crate::v1::thread::{CreateThreadRequest, ModifyThreadRequest, ThreadObject};
 
 use minreq::Response;
+use std::fs::{create_dir_all, File};
+use std::io::Write;
+use std::path::Path;
 
 const API_URL_V1: &str = "https://api.openai.com/v1";
 
@@ -300,6 +303,39 @@ impl Client {
         }
     }
 
+    pub fn audio_speech(&self, req: AudioSpeechRequest) -> Result<AudioSpeechResponse, APIError> {
+        let res = self.post("/audio/speech", &req)?;
+        let bytes = res.as_bytes();
+        let path = req.output.as_str();
+        let path = Path::new(path);
+        if let Some(parent) = path.parent() {
+            match create_dir_all(parent) {
+                Ok(_) => {}
+                Err(e) => {
+                    return Err(APIError {
+                        message: e.to_string(),
+                    })
+                }
+            }
+        }
+        match File::create(path) {
+            Ok(mut file) => match file.write_all(bytes) {
+                Ok(_) => {}
+                Err(e) => {
+                    return Err(APIError {
+                        message: e.to_string(),
+                    })
+                }
+            },
+            Err(e) => {
+                return Err(APIError {
+                    message: e.to_string(),
+                })
+            }
+        }
+        Ok(AudioSpeechResponse { result: true })
+    }
+
     pub fn create_fine_tune(
         &self,
         req: CreateFineTuneRequest,
diff --git a/src/v1/audio.rs b/src/v1/audio.rs
index 9f92bf8..e78ca59 100644
--- a/src/v1/audio.rs
+++ b/src/v1/audio.rs
@@ -79,3 +79,39 @@ impl_builder_methods!(
 pub struct AudioTranslationResponse {
     pub text: String,
 }
+
+pub const TTS_1: &str = "tts-1";
+pub const TTS_1_HD: &str = "tts-1-hd";
+
+pub const VOICE_ALLOY: &str = "alloy";
+pub const VOICE_ECHO: &str = "echo";
+pub const VOICE_FABLE: &str = "fable";
+pub const VOICE_ONYX: &str = "onyx";
+pub const VOICE_NOVA: &str = "nova";
+pub const VOICE_SHIMMER: &str = "shimmer";
+
+#[derive(Debug, Serialize, Clone)]
+pub struct AudioSpeechRequest {
+    pub model: String,
+    pub input: String,
+    pub voice: String,
+    pub output: String,
+}
+
+impl AudioSpeechRequest {
+    pub fn new(model: String, input: String, voice: String, output: String) -> Self {
+        Self {
+            model,
+            input,
+            voice,
+            output,
+        }
+    }
+}
+
+impl_builder_methods!(AudioSpeechRequest,);
+
+#[derive(Debug, Deserialize)]
+pub struct AudioSpeechResponse {
+    pub result: bool,
+}