From 33f5ddd801fce17acf61eb299dffe647438b9ea6 Mon Sep 17 00:00:00 2001 From: tegnike Date: Wed, 11 Dec 2024 21:38:42 +0100 Subject: [PATCH 01/14] =?UTF-8?q?=E7=89=B9=E5=AE=9AAI=E3=82=B5=E3=83=BC?= =?UTF-8?q?=E3=83=93=E3=82=B9=E3=81=AE=E3=83=87=E3=83=95=E3=82=A9=E3=83=AB?= =?UTF-8?q?=E3=83=88=E3=81=AE=E3=83=A2=E3=83=87=E3=83=AB=E5=90=8D=E3=81=8C?= =?UTF-8?q?=E5=AD=98=E5=9C=A8=E3=81=97=E3=81=AA=E3=81=84=E3=82=82=E3=81=AE?= =?UTF-8?q?=E3=82=92=E6=8C=87=E5=AE=9A=E3=81=97=E3=81=A6=E3=81=84=E3=81=9F?= =?UTF-8?q?=E3=81=AE=E3=81=A7=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/components/settings/modelProvider.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/components/settings/modelProvider.tsx b/src/components/settings/modelProvider.tsx index d457a581..c6ef5d79 100644 --- a/src/components/settings/modelProvider.tsx +++ b/src/components/settings/modelProvider.tsx @@ -51,9 +51,9 @@ const ModelProvider = () => { // オブジェクトを定義して、各AIサービスのデフォルトモデルを保存する // ローカルLLMが選択された場合、AIモデルを空文字に設定 const defaultModels = { - openai: 'gpt-4o', + openai: 'gpt-4o-2024-11-20', anthropic: 'claude-3-5-sonnet-20241022', - google: 'gemini-1.5-pro', + google: 'gemini-1.5-flash-latest', azure: '', groq: 'gemma-7b-it', cohere: 'command-r-plus', From 0981ca79b8236b858463d1d8c01d07e30fbc0307 Mon Sep 17 00:00:00 2001 From: tegnike Date: Wed, 11 Dec 2024 21:56:09 +0100 Subject: [PATCH 02/14] =?UTF-8?q?TTS=E3=81=AB=E3=83=86=E3=82=AD=E3=82=B9?= =?UTF-8?q?=E3=83=88=E3=82=92=E6=B8=A1=E3=81=99=E5=89=8D=E3=81=AB=E7=B5=B5?= =?UTF-8?q?=E6=96=87=E5=AD=97=E5=89=8A=E9=99=A4=E3=81=AA=E3=81=A9=E3=81=AE?= =?UTF-8?q?=E5=8A=A0=E5=B7=A5=E5=87=A6=E7=90=86=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/features/messages/speakCharacter.ts | 31 +++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/src/features/messages/speakCharacter.ts b/src/features/messages/speakCharacter.ts index d931df0b..9a7fc42d 100644 --- a/src/features/messages/speakCharacter.ts +++ b/src/features/messages/speakCharacter.ts @@ -25,6 +25,28 @@ const typedEnglishToJapanese = englishToJapanese as EnglishToJapanese const speakQueue = new SpeakQueue() +function preprocessMessage( + message: string, + settings: ReturnType +): string | null { + // 前後の空白を削除 + let processed = message.trim() + + // 英語から日本語への変換 + if (settings.changeEnglishToJapanese && settings.selectLanguage === 'ja') { + processed = convertEnglishToJapaneseReading(processed) + } + + // 絵文字を削除 + processed = processed.replace( + /[\u{1F300}-\u{1F9FF}]|[\u{1F600}-\u{1F64F}]|[\u{1F680}-\u{1F6FF}]|[\u{2600}-\u{26FF}]|[\u{2700}-\u{27BF}]|[\u{1F900}-\u{1F9FF}]|[\u{1F1E0}-\u{1F1FF}]/gu, + '' + ) + + // 空文字列の場合はnullを返す + return processed || null +} + const createSpeakCharacter = () => { let lastTime = 0 let prevFetchPromise: Promise = Promise.resolve() @@ -33,8 +55,13 @@ const createSpeakCharacter = () => { const ss = settingsStore.getState() onStart?.() - if (ss.changeEnglishToJapanese && ss.selectLanguage === 'ja') { - talk.message = convertEnglishToJapaneseReading(talk.message) + const processedMessage = preprocessMessage(talk.message, ss) + if (!processedMessage && !talk.buffer) { + return + } + + if (processedMessage) { + talk.message = processedMessage } let isNeedDecode = true From 98113fa6c08ade397a2c4765789388b1fb3060e7 Mon Sep 17 00:00:00 2001 From: tegnike Date: Wed, 11 Dec 2024 22:09:30 +0100 Subject: [PATCH 03/14] =?UTF-8?q?=E9=9F=B3=E5=A3=B0=E3=82=92=E4=B8=A6?= =?UTF-8?q?=E5=88=97=E5=87=A6=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/features/messages/speakCharacter.ts | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/src/features/messages/speakCharacter.ts b/src/features/messages/speakCharacter.ts index 9a7fc42d..a57881e1 100644 --- a/src/features/messages/speakCharacter.ts +++ b/src/features/messages/speakCharacter.ts @@ -49,7 +49,6 @@ function preprocessMessage( const createSpeakCharacter = () => { let lastTime = 0 - let prevFetchPromise: Promise = Promise.resolve() return (talk: Talk, onStart?: () => void, onComplete?: () => void) => { const ss = settingsStore.getState() @@ -66,12 +65,8 @@ const createSpeakCharacter = () => { let isNeedDecode = true - const fetchPromise = prevFetchPromise.then(async () => { - const now = Date.now() - if (now - lastTime < 1000) { - await wait(1000 - (now - lastTime)) - } - + // API呼び出しを即時実行 + const fetchPromise = (async () => { let buffer try { if (talk.message == '' && talk.buffer) { @@ -161,13 +156,10 @@ const createSpeakCharacter = () => { handleTTSError(error, ss.selectVoice) return null } - lastTime = Date.now() return buffer - }) - - prevFetchPromise = fetchPromise + })() - // キューを使用した処理に変更 + // 音声バッファの取得後、再生キューに追加 fetchPromise.then((audioBuffer) => { if (!audioBuffer) return From a775750f5d8ca9ee941814a8ac3932ca764f384c Mon Sep 17 00:00:00 2001 From: tegnike Date: Wed, 11 Dec 2024 23:04:36 +0100 Subject: [PATCH 04/14] =?UTF-8?q?README=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 91 ++++++++------------------------------------- docs/README_en.md | 94 ++++++++--------------------------------------- docs/README_ko.md | 15 ++++++++ docs/README_zh.md | 15 ++++++++ 4 files changed, 61 insertions(+), 154 deletions(-) diff --git a/README.md b/README.md index 63d28e70..01694359 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,20 @@ [![今日からあなたもAITuberデベロッパー|ニケちゃん](https://github.com/tegnike/aituber-kit/assets/35606144/a958f505-72f9-4665-ab6c-b57b692bb166)](https://note.com/nike_cha_n/n/ne98acb25e00f) +## ⚠️ セキュリティに関する重要な注意事項 + +このリポジトリは、個人利用やローカル環境での開発はもちろん、適切なセキュリティ対策を施した上での商用利用も想定しています。ただし、Web環境にデプロイする際は以下の点にご注意ください: + +- **APIキーの取り扱い**: バックエンドサーバーを経由してAIサービス(OpenAI, Anthropic等)やTTSサービスのAPIを呼び出す仕様となっているため、APIキーの適切な管理が必要です。 + +### 本番環境での利用について + +本番環境で利用する場合は、以下のいずれかの対応を推奨します: + +1. **バックエンドサーバーの実装**: APIキーの管理をサーバーサイドで行い、クライアントからの直接的なAPIアクセスを避ける +2. **利用者への適切な説明**: 各利用者が自身のAPIキーを使用する場合は、セキュリティ上の注意点について説明する +3. **アクセス制限の実装**: 必要に応じて、適切な認証・認可の仕組みを実装する + ## 開発環境 このプロジェクトは以下の環境で開発されています: @@ -84,7 +98,7 @@ npm run dev ### 使用方法 -1. 設定画面で各種LLMのAPIキーを入力します。 +1. 設定画面で選択したLLMのAPIキーを入力します。 - OpenAI - Anthropic - Google Gemini @@ -108,6 +122,7 @@ npm run dev - ElevenLabs: 様々な言語の選択が可能です。APIキーの入力が必要です。 - OpenAI: APIキーの入力が必要です。 - Azure OpenAI: APIキーの入力が必要です。 + - にじボイス: APIキーの入力が必要です。 5. 入力フォームからキャラクターと会話を開始します。マイク入力も可能です。 ## AITuber配信 @@ -302,77 +317,3 @@ npm run dev - [ロゴの利用規約](./docs/logo_licence.md) - [VRMモデルの利用規約](./docs/vrm_licence.md) - -## コントリビューター用TIPS - -### 新しい言語の追加方法 - -新しい言語をプロジェクトに追加するには、以下の手順に従ってください。 - -1. **言語ファイルの追加**: - - - `locales` ディレクトリに新しい言語のディレクトリを作成し、その中に `translation.json` ファイルを作成します。 - - 例: `locales/fr/translation.json` (フランス語の場合) - -2. **翻訳の追加**: - - - `translation.json` ファイルに、既存の言語ファイルを参考にして翻訳を追加します。 - -3. **言語設定の更新**: - - - `src/lib/i18n.js` ファイルを開き、`resources` オブジェクトに新しい言語を追加します。 - - ```javascript:src/lib/i18n.js - resources: { - ..., - fr: { // 新しい言語コード - translation: require("../../locales/fr/translation.json"), - }, - }, - ``` - -4. **言語選択オプションの追加**: - - - ユーザーが言語を選択できるように、UIの適切な部分(例えば設定画面の言語選択ドロップダウン)に新しい言語オプションを追加します。 - - ```typescript:src/components/settings.tsx - - ``` - -5. **テスト**: - - 新しい言語でアプリケーションが正しく表示されるかテストします。 - -これで新しい言語のサポートがプロジェクトに追加されます。 - -#### 音声言語コードの追加 - -- 音声言語コードの対応も追加する必要があります。 -- `Introduction` コンポーネント内の `getVoiceLanguageCode` 関数に新しい言語コードを追加します。 - -```typescript:nike-ChatVRM/src/components/introduction.tsx -const getVoiceLanguageCode = (selectLanguage: string) => { - switch (selectLanguage) { - case 'JP': - return 'ja-JP'; - case 'EN': - return 'en-US'; - case 'ZH': - return 'zh-TW'; - case 'zh-TW': - return 'zh-TW'; - case 'KO': - return 'ko-KR'; - case 'FR': - return 'fr-FR'; - default: - return 'ja-JP'; - } -} -``` - -#### READMEの追加 - -- 新しい言語のREADME (`README_fr.md`), ロゴ利用規約 (`logo_licence_fr.md`), VRMモデル利用規約 (`vrm_licence_fr.md`) を `docs` ディレクトリに追加してください。 diff --git a/docs/README_en.md b/docs/README_en.md index 39b8c7bf..a6059789 100644 --- a/docs/README_en.md +++ b/docs/README_en.md @@ -40,6 +40,20 @@ I've written a detailed usage guide in the article below: [![You are AITuber Developer from Today | Nike-chan](https://github.com/tegnike/aituber-kit/assets/35606144/a958f505-72f9-4665-ab6c-b57b692bb166)](https://note.com/nike_cha_n/n/ne98acb25e00f) +## ⚠️ Important Security Notice + +This repository is designed for personal use, local development, and commercial use with appropriate security measures. However, please note the following points when deploying to a web environment: + +- **API Key Handling**: Since the specification requires API calls to AI services (OpenAI, Anthropic, etc.) and TTS services through a backend server, proper management of API keys is necessary. + +### Regarding Production Environment Usage + +When using in a production environment, we recommend one of the following approaches: + +1. **Backend Server Implementation**: Manage API keys on the server side and avoid direct API access from clients +2. **Proper User Instructions**: When users use their own API keys, explain the security considerations +3. **Access Control Implementation**: Implement appropriate authentication and authorization mechanisms as needed + ## Development Environment This project is developed in the following environment: @@ -109,6 +123,7 @@ npm run dev - ElevenLabs: Various language selection is possible. Please enter the API key. - OpenAI: API key is required. - Azure OpenAI: API key is required. + - Nijivoice: API key is required. 5. Start conversing with the character from the input form. Microphone input is also possible. ## AITuber Streaming @@ -303,82 +318,3 @@ From version v2.0.0, this project adopts a **custom license**. - [Logo Usage Agreement](./logo_licence_en.md) - [VRM Model Usage Agreement](./vrm_licence_en.md) - -## Tips for Contributors - -### How to Add a New Language - -To add a new language to the project, follow these steps: - -1. **Add Language File**: - - - Create a new language directory in the `locales` directory and create a `translation.json` file inside it. - - Example: `locales/fr/translation.json` (for French) - -2. **Add Translations**: - - - Add translations to the `translation.json` file, referring to existing language files. - -3. **Update Language Settings**: - - - Open the `src/lib/i18n.js` file and add the new language to the `resources` object. - - ```javascript:src/lib/i18n.js - resources: { - ..., - fr: { // New language code - translation: require("../../locales/fr/translation.json"), - }, - }, - ``` - -4. **Add Language Selection Option**: - - - Add a new language option to the appropriate part of the UI (e.g., language selection dropdown in the settings screen) so users can select the language. - - ```typescript:src/components/settings.tsx - - ``` - -5. **Test**: - - Test if the application displays correctly in the new language. - -This will add support for the new language to the project. - -#### Adding Voice Language Code - -- You also need to add support for the voice language code. -- Add the new language code to the `getVoiceLanguageCode` function in the `Introduction` component. - -```typescript:nike-ChatVRM/src/components/introduction.tsx -const getVoiceLanguageCode = (selectLanguage: string) => { - switch (selectLanguage) { - case 'JP': - return 'ja-JP'; - case 'EN': - return 'en-US'; - case 'ZH': - return 'zh-TW'; - case 'zh-TW': - return 'zh-TW'; - case 'KO': - return 'ko-KR'; - case 'FR': - return 'fr-FR'; - default: - return 'ja-JP'; - } -} -``` - -#### Adding README - -- Add a new language README (`README_fr.md`), logo usage terms (`logo_licence_fr.md`), and VRM model usage terms (`vrm_licence_fr.md`) to the `docs` directory. - -``` - - -``` diff --git a/docs/README_ko.md b/docs/README_ko.md index d1a5741b..c4d596dc 100644 --- a/docs/README_ko.md +++ b/docs/README_ko.md @@ -40,6 +40,20 @@ [![오늘부터 당신도 AITuber 개발자|니케짱](https://github.com/tegnike/aituber-kit/assets/35606144/a958f505-72f9-4665-ab6c-b57b692bb166)](https://note.com/nike_cha_n/n/ne98acb25e00f) +## ⚠️ 보안에 관한 중요 주의사항 + +이 저장소는 개인 사용과 로컬 환경에서의 개발은 물론, 적절한 보안 대책을 마련한 상태에서의 상업적 사용도 고려하고 있습니다. 단, 웹 환경에 배포할 때는 다음 사항에 주의해 주시기 바랍니다: + +- **API 키 취급**: 백엔드 서버를 통해 AI 서비스(OpenAI, Anthropic 등)와 TTS 서비스의 API를 호출하는 사양이므로, API 키의 적절한 관리가 필요합니다. + +### 프로덕션 환경에서의 사용에 대하여 + +프로덕션 환경에서 사용할 경우, 다음 중 하나의 대응을 권장합니다: + +1. **백엔드 서버 구현**: API 키 관리를 서버 사이드에서 수행하여 클라이언트로부터의 직접적인 API 접근을 피하기 +2. **사용자에 대한 적절한 설명**: 각 사용자가 자신의 API 키를 사용하는 경우, 보안상의 주의사항에 대해 설명하기 +3. **접근 제한 구현**: 필요에 따라 적절한 인증・인가 메커니즘을 구현하기 + ## 개발 환경 이 프로젝트는 다음과 같은 환경에서 개발되었습니다: @@ -109,6 +123,7 @@ npm run dev - ElevenLabs: 다양한 언어 선택이 가능합니다. API 키를 입력하세요. - OpenAI: API 키가 필요합니다. - Azure OpenAI: API 키가 필요합니다. + - Nijivoice: API 키가 필요합니다. 5. 입력 양식에서 캐릭터와 대화를 시작합니다. 마이크 입력도 가능합니다. ## AITuber 스트리밍 diff --git a/docs/README_zh.md b/docs/README_zh.md index 0347c5b0..7d8fadb2 100644 --- a/docs/README_zh.md +++ b/docs/README_zh.md @@ -39,6 +39,20 @@ [![今天成為AITuber開發者 | Nike-chan](https://github.com/tegnike/aituber-kit/assets/35606144/a958f505-72f9-4665-ab6c-b57b692bb166)](https://note.com/nike_cha_n/n/ne98acb25e00f) +## ⚠️ 安全性相關重要注意事項 + +本存儲庫不僅考慮個人使用和本地環境開發,還考慮在採取適當安全措施的情況下進行商業使用。但是,在部署到Web環境時,請注意以下幾點: + +- **API密鑰的處理**: 由於系統設計需要通過後端服務器調用AI服務(如OpenAI、Anthropic等)和TTS服務的API,因此需要適當管理API密鑰。 + +### 關於生產環境的使用 + +在生產環境中使用時,建議採取以下其中一種方案: + +1. **實現後端服務器**: 在服務器端管理API密鑰,避免客戶端直接訪問API +2. **向用戶提供適當說明**: 當用戶使用自己的API密鑰時,需要說明安全注意事項 +3. **實現訪問限制**: 根據需要實現適當的身份驗證和授權機制 + ## 開發環境 此項目在以下環境中開發: @@ -108,6 +122,7 @@ npm run dev - ElevenLabs:支持多種語言選擇。需要輸入API密鑰。 - OpenAI:需要API密鑰。 - Azure OpenAI:需要API密鑰。 + - Nijivoice:需要API密鑰。 5. 從輸入表單開始與角色對話。也可以使用麥克風輸入。 ## AITuber直播 From 1500e7499cde511351560a67c3a51b6b570e8660 Mon Sep 17 00:00:00 2001 From: tegnike Date: Thu, 12 Dec 2024 00:09:13 +0100 Subject: [PATCH 05/14] =?UTF-8?q?Issue=E7=94=A8=E3=82=A2=E3=83=8A=E3=83=A9?= =?UTF-8?q?=E3=82=A4=E3=82=B6=E3=82=92=E4=B8=80=E6=97=A6=E9=9D=9E=E6=B4=BB?= =?UTF-8?q?=E6=80=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/issue-analyzer.yml | 78 ++++++++++++++-------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/.github/workflows/issue-analyzer.yml b/.github/workflows/issue-analyzer.yml index 1712c0cd..a41371d1 100644 --- a/.github/workflows/issue-analyzer.yml +++ b/.github/workflows/issue-analyzer.yml @@ -1,44 +1,44 @@ -name: Issue Analyzer +# name: Issue Analyzer -on: - issues: - types: [opened] - workflow_dispatch: - inputs: - issue_number: - description: 'Issue Number' - required: true - issue_title: - description: 'Issue Title' - required: true - issue_body: - description: 'Issue Body' - required: true +# on: +# issues: +# types: [opened] +# workflow_dispatch: +# inputs: +# issue_number: +# description: 'Issue Number' +# required: true +# issue_title: +# description: 'Issue Title' +# required: true +# issue_body: +# description: 'Issue Body' +# required: true -jobs: - analyze-issue: - runs-on: ubuntu-latest - permissions: - issues: write - steps: - - name: Checkout repository - uses: actions/checkout@v2 +# jobs: +# analyze-issue: +# runs-on: ubuntu-latest +# permissions: +# issues: write +# steps: +# - name: Checkout repository +# uses: actions/checkout@v2 - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: '3.x' +# - name: Set up Python +# uses: actions/setup-python@v2 +# with: +# python-version: '3.x' - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install requests anthropic +# - name: Install dependencies +# run: | +# python -m pip install --upgrade pip +# pip install requests anthropic - - name: Analyze issue - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} - ISSUE_NUMBER: ${{ github.event.inputs.issue_number || github.event.issue.number }} - ISSUE_TITLE: ${{ github.event.inputs.issue_title || github.event.issue.title }} - ISSUE_BODY: ${{ github.event.inputs.issue_body || github.event.issue.body }} - run: python scripts/analyze_issue.py +# - name: Analyze issue +# env: +# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} +# ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} +# ISSUE_NUMBER: ${{ github.event.inputs.issue_number || github.event.issue.number }} +# ISSUE_TITLE: ${{ github.event.inputs.issue_title || github.event.issue.title }} +# ISSUE_BODY: ${{ github.event.inputs.issue_body || github.event.issue.body }} +# run: python scripts/analyze_issue.py From 5ffe9e611c9a26e989dae8b62188337a3d5037fa Mon Sep 17 00:00:00 2001 From: tegnike Date: Thu, 12 Dec 2024 00:20:19 +0100 Subject: [PATCH 06/14] =?UTF-8?q?gemini=E3=81=AE=E6=96=B0=E3=81=97?= =?UTF-8?q?=E3=81=84=E3=83=A2=E3=83=87=E3=83=AB=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/components/settings/modelProvider.tsx | 13 ++++++++----- src/components/settings/slideConvert.tsx | 13 ++++++++----- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/components/settings/modelProvider.tsx b/src/components/settings/modelProvider.tsx index c6ef5d79..cdddb099 100644 --- a/src/components/settings/modelProvider.tsx +++ b/src/components/settings/modelProvider.tsx @@ -408,6 +408,12 @@ const ModelProvider = () => { }) } > + + @@ -417,11 +423,8 @@ const ModelProvider = () => { - - diff --git a/src/components/settings/slideConvert.tsx b/src/components/settings/slideConvert.tsx index b218ae43..727bc5a3 100644 --- a/src/components/settings/slideConvert.tsx +++ b/src/components/settings/slideConvert.tsx @@ -157,6 +157,12 @@ const SlideConvert: React.FC = ({ onFolderUpdate }) => { )} {aiService === 'google' && ( <> + + @@ -166,11 +172,8 @@ const SlideConvert: React.FC = ({ onFolderUpdate }) => { - - )} From 9360b87bb1fd9679b667cd3bbe4055a57b42467a Mon Sep 17 00:00:00 2001 From: tegnike Date: Thu, 12 Dec 2024 00:22:29 +0100 Subject: [PATCH 07/14] lint fix --- src/components/settings/slideConvert.tsx | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/components/settings/slideConvert.tsx b/src/components/settings/slideConvert.tsx index 727bc5a3..239e87bb 100644 --- a/src/components/settings/slideConvert.tsx +++ b/src/components/settings/slideConvert.tsx @@ -172,9 +172,7 @@ const SlideConvert: React.FC = ({ onFolderUpdate }) => { - + )} From 72f3c5dae78ec05d435f105e1713730ff234f573 Mon Sep 17 00:00:00 2001 From: tegnike Date: Thu, 12 Dec 2024 18:45:04 +0100 Subject: [PATCH 08/14] =?UTF-8?q?TTS=E3=81=AE=E9=A0=86=E7=95=AA=E3=81=8C?= =?UTF-8?q?=E6=8B=85=E4=BF=9D=E3=81=95=E3=82=8C=E3=81=A6=E3=81=84=E3=81=AA?= =?UTF-8?q?=E3=81=8B=E3=81=A3=E3=81=9F=E3=81=AE=E3=81=A7=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/features/messages/speakCharacter.ts | 191 ++++++++++++------------ src/features/messages/speakQueue.ts | 22 ++- 2 files changed, 108 insertions(+), 105 deletions(-) diff --git a/src/features/messages/speakCharacter.ts b/src/features/messages/speakCharacter.ts index a57881e1..78c3d8ae 100644 --- a/src/features/messages/speakCharacter.ts +++ b/src/features/messages/speakCharacter.ts @@ -1,7 +1,6 @@ import homeStore from '@/features/stores/home' import settingsStore from '@/features/stores/settings' import englishToJapanese from '@/utils/englishToJapanese.json' -import { wait } from '@/utils/wait' import { Talk } from './messages' import { synthesizeStyleBertVITS2Api } from './synthesizeStyleBertVITS2' import { synthesizeVoiceKoeiromapApi } from './synthesizeVoiceKoeiromap' @@ -48,8 +47,6 @@ function preprocessMessage( } const createSpeakCharacter = () => { - let lastTime = 0 - return (talk: Talk, onStart?: () => void, onComplete?: () => void) => { const ss = settingsStore.getState() onStart?.() @@ -65,110 +62,110 @@ const createSpeakCharacter = () => { let isNeedDecode = true - // API呼び出しを即時実行 - const fetchPromise = (async () => { - let buffer + // audioBufferのフェッチを開始 + const audioBufferPromise: Promise = (async () => { try { - if (talk.message == '' && talk.buffer) { - buffer = talk.buffer + if (talk.message === '' && talk.buffer) { isNeedDecode = false + return talk.buffer } else if (ss.audioMode) { - buffer = null - } else if (ss.selectVoice == 'koeiromap') { - buffer = await synthesizeVoiceKoeiromapApi( - talk, - ss.koeiromapKey, - ss.koeiroParam - ) - } else if (ss.selectVoice == 'voicevox') { - buffer = await synthesizeVoiceVoicevoxApi( - talk, - ss.voicevoxSpeaker, - ss.voicevoxSpeed, - ss.voicevoxPitch, - ss.voicevoxIntonation - ) - } else if (ss.selectVoice == 'google') { - buffer = await synthesizeVoiceGoogleApi( - talk, - ss.googleTtsType, - ss.selectLanguage - ) - } else if (ss.selectVoice == 'stylebertvits2') { - buffer = await synthesizeStyleBertVITS2Api( - talk, - ss.stylebertvits2ServerUrl, - ss.stylebertvits2ApiKey, - ss.stylebertvits2ModelId, - ss.stylebertvits2Style, - ss.stylebertvits2SdpRatio, - ss.stylebertvits2Length, - ss.selectLanguage - ) - } else if (ss.selectVoice == 'aivis_speech') { - buffer = await synthesizeVoiceAivisSpeechApi( - talk, - ss.aivisSpeechSpeaker, - ss.aivisSpeechSpeed, - ss.aivisSpeechPitch, - ss.aivisSpeechIntonation - ) - } else if (ss.selectVoice == 'gsvitts') { - buffer = await synthesizeVoiceGSVIApi( - talk, - ss.gsviTtsServerUrl, - ss.gsviTtsModelId, - ss.gsviTtsBatchSize, - ss.gsviTtsSpeechRate - ) - } else if (ss.selectVoice == 'elevenlabs') { - buffer = await synthesizeVoiceElevenlabsApi( - talk, - ss.elevenlabsApiKey, - ss.elevenlabsVoiceId, - ss.selectLanguage - ) - } else if (ss.selectVoice == 'openai') { - buffer = await synthesizeVoiceOpenAIApi( - talk, - ss.openaiTTSKey || ss.openaiKey, - ss.openaiTTSVoice, - ss.openaiTTSModel, - ss.openaiTTSSpeed - ) - } else if (ss.selectVoice == 'azure') { - buffer = await synthesizeVoiceAzureOpenAIApi( - talk, - ss.azureTTSKey || ss.azureKey, - ss.azureTTSEndpoint || ss.azureEndpoint, - ss.openaiTTSVoice, - ss.openaiTTSSpeed - ) - } else if (ss.selectVoice == 'nijivoice') { - buffer = await synthesizeVoiceNijivoiceApi( - talk, - ss.nijivoiceApiKey, - ss.nijivoiceActorId, - ss.nijivoiceSpeed - ) + return null + } + + // 選択されたボイスに応じたTTS APIを呼び出す + switch (ss.selectVoice) { + case 'koeiromap': + return await synthesizeVoiceKoeiromapApi( + talk, + ss.koeiromapKey, + ss.koeiroParam + ) + case 'voicevox': + return await synthesizeVoiceVoicevoxApi( + talk, + ss.voicevoxSpeaker, + ss.voicevoxSpeed, + ss.voicevoxPitch, + ss.voicevoxIntonation + ) + case 'google': + return await synthesizeVoiceGoogleApi( + talk, + ss.googleTtsType, + ss.selectLanguage + ) + case 'stylebertvits2': + return await synthesizeStyleBertVITS2Api( + talk, + ss.stylebertvits2ServerUrl, + ss.stylebertvits2ApiKey, + ss.stylebertvits2ModelId, + ss.stylebertvits2Style, + ss.stylebertvits2SdpRatio, + ss.stylebertvits2Length, + ss.selectLanguage + ) + case 'aivis_speech': + return await synthesizeVoiceAivisSpeechApi( + talk, + ss.aivisSpeechSpeaker, + ss.aivisSpeechSpeed, + ss.aivisSpeechPitch, + ss.aivisSpeechIntonation + ) + case 'gsvitts': + return await synthesizeVoiceGSVIApi( + talk, + ss.gsviTtsServerUrl, + ss.gsviTtsModelId, + ss.gsviTtsBatchSize, + ss.gsviTtsSpeechRate + ) + case 'elevenlabs': + return await synthesizeVoiceElevenlabsApi( + talk, + ss.elevenlabsApiKey, + ss.elevenlabsVoiceId, + ss.selectLanguage + ) + case 'openai': + return await synthesizeVoiceOpenAIApi( + talk, + ss.openaiTTSKey || ss.openaiKey, + ss.openaiTTSVoice, + ss.openaiTTSModel, + ss.openaiTTSSpeed + ) + case 'azure': + return await synthesizeVoiceAzureOpenAIApi( + talk, + ss.azureTTSKey || ss.azureKey, + ss.azureTTSEndpoint || ss.azureEndpoint, + ss.openaiTTSVoice, + ss.openaiTTSSpeed + ) + case 'nijivoice': + return await synthesizeVoiceNijivoiceApi( + talk, + ss.nijivoiceApiKey, + ss.nijivoiceActorId, + ss.nijivoiceSpeed + ) + default: + throw new Error('Unsupported voice type') } } catch (error) { handleTTSError(error, ss.selectVoice) return null } - return buffer })() - // 音声バッファの取得後、再生キューに追加 - fetchPromise.then((audioBuffer) => { - if (!audioBuffer) return - - speakQueue.addTask({ - audioBuffer, - talk, - isNeedDecode, - onComplete, - }) + // タスクをSpeakQueueに追加 + speakQueue.addTask({ + audioBufferPromise, + talk, + isNeedDecode, + onComplete, }) } } diff --git a/src/features/messages/speakQueue.ts b/src/features/messages/speakQueue.ts index fbc18b93..a7aa26b0 100644 --- a/src/features/messages/speakQueue.ts +++ b/src/features/messages/speakQueue.ts @@ -1,8 +1,8 @@ import { Talk } from './messages' import homeStore from '@/features/stores/home' -type SpeakTask = { - audioBuffer: ArrayBuffer +type SpeakTaskWithPromise = { + audioBufferPromise: Promise talk: Talk isNeedDecode: boolean onComplete?: () => void @@ -10,12 +10,12 @@ type SpeakTask = { export class SpeakQueue { private static readonly QUEUE_CHECK_DELAY = 1500 - private queue: SpeakTask[] = [] + private queue: SpeakTaskWithPromise[] = [] private isProcessing = false - async addTask(task: SpeakTask) { + addTask(task: SpeakTaskWithPromise) { this.queue.push(task) - await this.processQueue() + this.processQueue() } private async processQueue() { @@ -27,9 +27,15 @@ export class SpeakQueue { const task = this.queue.shift() if (task) { try { - const { audioBuffer, talk, isNeedDecode, onComplete } = task - await hs.viewer.model?.speak(audioBuffer, talk, isNeedDecode) - onComplete?.() + const audioBuffer = await task.audioBufferPromise + if (audioBuffer) { + await hs.viewer.model?.speak( + audioBuffer, + task.talk, + task.isNeedDecode + ) + task.onComplete?.() + } } catch (error) { console.error( 'An error occurred while processing the speech synthesis task:', From fdd9b6e48ced4bc0b96a87bef0fa566796faca75 Mon Sep 17 00:00:00 2001 From: tegnike Date: Thu, 12 Dec 2024 20:29:52 +0100 Subject: [PATCH 09/14] =?UTF-8?q?=E3=83=AA=E3=82=A2=E3=83=AB=E3=82=BF?= =?UTF-8?q?=E3=82=A4=E3=83=A0API=E3=83=A2=E3=83=BC=E3=83=89=E6=99=82?= =?UTF-8?q?=E3=81=AB=E9=9F=B3=E5=A3=B0=E8=A8=AD=E5=AE=9A=E3=82=92=E9=9D=9E?= =?UTF-8?q?=E8=A1=A8=E7=A4=BA=E3=81=AB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- locales/en/translation.json | 3 ++- locales/ja/translation.json | 3 ++- locales/ko/translation.json | 3 ++- locales/zh/translation.json | 3 ++- src/components/settings/modelProvider.tsx | 2 +- src/components/settings/voice.tsx | 12 ++++++++++++ 6 files changed, 21 insertions(+), 5 deletions(-) diff --git a/locales/en/translation.json b/locales/en/translation.json index 6e118c60..54c65a20 100644 --- a/locales/en/translation.json +++ b/locales/en/translation.json @@ -197,5 +197,6 @@ "useCurrentSystemPrompt": "Use AITuberKit system prompt", "userInputTitle": "Send user input", "userInputDescription": "The message sent is processed the same as when input from the AITuberKit input form. If multiple messages are sent, they are processed in order. The AI model and voice model are the ones selected in the AITuberKit settings. The system prompt and conversation history are the values set in AITuberKit." - } + }, + "CannotUseVoice": "Realtime API mode or Audio mode is enabled, so voice settings are not required." } diff --git a/locales/ja/translation.json b/locales/ja/translation.json index 958c9f11..71e61ef1 100644 --- a/locales/ja/translation.json +++ b/locales/ja/translation.json @@ -198,5 +198,6 @@ "useCurrentSystemPrompt": "AITuberKitのシステムプロンプトを利用する", "userInputTitle": "ユーザー入力を送信する", "userInputDescription": "送信したメッセージはAITuberKitの入力フォームから入力された場合と同じ処理がされます。複数送信した場合は順番に処理されます。\nAIモデルおよび音声モデルはAITuberKitの設定で選択したものが使用されます。\nシステムプロンプトや会話履歴はAITuberKitの値が使用されます。" - } + }, + "CannotUseVoice": "リアルタイムAPIモード または オーディオモードが有効の場合、\n音声設定は不要です。" } diff --git a/locales/ko/translation.json b/locales/ko/translation.json index 039a1acd..0235900f 100644 --- a/locales/ko/translation.json +++ b/locales/ko/translation.json @@ -197,5 +197,6 @@ "useCurrentSystemPrompt": "AITuberKit의 시스템 프롬프트를 사용하기", "userInputTitle": "사용자 입력 보내기", "userInputDescription": "보낸 메시지는 AITuberKit의 입력 폼에서 입력한 경우와 동일한 처리가 됩니다. 여러 메시지를 보낸 경우 순서대로 처리됩니다.\nAI 모델과 음성 모델은 AITuberKit의 설정에서 선택한 것이 사용됩니다.\n시스템 프롬프트와 대화 기록은 AITuberKit의 값이 사용됩니다." - } + }, + "CannotUseVoice": "리얼타임 API 모드 또는 오디오 모드가 활성화된 경우, 음성 설정은 필요하지 않습니다." } diff --git a/locales/zh/translation.json b/locales/zh/translation.json index 8de358f9..a2e5ad06 100644 --- a/locales/zh/translation.json +++ b/locales/zh/translation.json @@ -197,5 +197,6 @@ "useCurrentSystemPrompt": "使用AITuberKit的系統提示", "userInputTitle": "用戶輸入發送", "userInputDescription": "發送的訊息將與AITuberKit的輸入框中輸入的訊息相同。如果發送多個訊息,將按順序處理。\nAI模型和語音模型將使用AITuberKit的設定中選擇的模型。\n系統提示和會話記錄將使用AITuberKit的值。" - } + }, + "CannotUseVoice": "實時API模式或音訊模式已啟用,因此不需要音設定。" } diff --git a/src/components/settings/modelProvider.tsx b/src/components/settings/modelProvider.tsx index cdddb099..67d82adf 100644 --- a/src/components/settings/modelProvider.tsx +++ b/src/components/settings/modelProvider.tsx @@ -269,7 +269,7 @@ const ModelProvider = () => {
- {t('realtimeAPIModeVoice')} + {t('RealtimeAPIModeVoice')}