From 103c09c4cadee1f281ed6572bb1eb8ed8c53bb19 Mon Sep 17 00:00:00 2001 From: Zetaphor Date: Mon, 22 Jan 2024 13:13:08 -0600 Subject: [PATCH] Implement punctuation removal and speechEnded event --- .gitignore | 3 ++- README.md | 11 +++++++++-- static/index.js | 32 ++++++++++++++++++++++++++++++++ templates/index.html | 16 ++++++++++++++-- 4 files changed, 57 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 24be143..8db2259 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ build/ dist/ -server.spec \ No newline at end of file +server.spec +.aider* diff --git a/README.md b/README.md index 3826ad2..fa01b49 100644 --- a/README.md +++ b/README.md @@ -36,8 +36,9 @@ The application uses **commands** and **events** to set and monitor the micropho The following features are available: - **Language**: The language to use for speech recognition. See the list of supported languages below. -- **Debug Mode**: When enabled, debug mode will send a debugConfidence event in addition to each recognition message -- **Confidence Threshold**: When enabled, the confidence threshold will be used to filter out predictions that are below the threshold. The value is a number between 0.0 and 1.0 +- **Debug Mode**: When enabled, debug mode will send a debugConfidence event in addition to each recognition message. +- **Punctuation Removal**: When enabled, punctuation will be removed from the transcript. This means only letters, numbers, and space characters will remain. +- **Confidence Threshold**: When enabled, the confidence threshold will be used to filter out predictions that are below the threshold. The value is a number between 0.0 and 1.0. - **Word Replacement**: When enabled, the user will be able to replace words in the transcript. This can be used to uncensor the Google transcript, or to fix recognition errors. By default this list is populated with a list of common profanities. Each of these features can be toggled and configured using the GUI or via the commands listed below. @@ -58,6 +59,9 @@ Note that the microphone controls for this application are separate from Resonit - **clear**: Forcibly clears the transcript (This can be finicky due to the way Google changes predictions once it's more confident) - **replacementEnable**: Enables the word replacement feature - **replacementDisable**: Disables the word replacement feature +- **removePunctuationToggle**: Toggles punctuation removal on and off +- **removePunctuationEnable**: Enables punctuation removal +- **removePunctuationDisable**: Disables punctuation removal #### Confidence Threshold @@ -109,6 +113,9 @@ The server will send the following event messages when the the microphone state - **[cleared]**: The transcript has been manually cleared - **[replacementEnabled]**: The word replacement feature has been enabled - **[replacementDisabled]**: The word replacement feature has been disabled +- **[removePunctuationEnabled]**: Punctuation removal has been enabled +- **[removePunctuationDisabled]**: Punctuation removal has been disabled +- **[speechEnded]**: Sent whenever Google Chrome has determined that the user has stopped speaking #### Confidence Threshold diff --git a/static/index.js b/static/index.js index fec888d..f0caa97 100644 --- a/static/index.js +++ b/static/index.js @@ -30,6 +30,8 @@ const wordReplacementCheckbox = document.getElementById('wordReplacementCheckbox const wordReplacementContainer = document.getElementById('wordReplacementContainer'); const addWordPairBtn = document.getElementById('addWordPairBtn'); +let removePunctuation = false; + const defaultWordDictionary = { 'f***': 'fuck', 'f****': 'fucks', @@ -76,6 +78,13 @@ function init() { saveSettings(); }); + removePunctuationCheckbox.addEventListener('change', () => { + removePunctuation = removePunctuationCheckbox.checked; + if (removePunctuation) websocket.send('[removePunctuationEnabled]'); + else websocket.send('[removePunctuationDisabled]'); + saveSettings(); + }) + confidenceThresholdCheckbox.addEventListener('change', () => { useConfidenceThreshold = confidenceThresholdCheckbox.checked; setUseConfidenceThreshold(); @@ -116,6 +125,9 @@ function loadSetings() { selectedLanguage = localStorage.getItem('selectedLanguage') || 'en-US'; langSelect.value = selectedLanguage; + + removePunctuation = localStorage.getItem('removePunctuation') === 'true'; + removePunctuationCheckbox.checked = removePunctuation; } function saveSettings() { @@ -125,6 +137,7 @@ function saveSettings() { localStorage.setItem('confidenceThreshold', confidenceThreshold); localStorage.setItem('wordDictionary', JSON.stringify(wordDictionary)); localStorage.setItem('selectedLanguage', selectedLanguage); + localStorage.setItem('removePunctuation', removePunctuation); } function initializeRecognition() { @@ -186,6 +199,10 @@ function replaceWords(text) { return text.split(' ').map(word => wordDictionary[word] || word).join(' '); } +function stripPunctuation(text) { + return text.replace(/[^a-zA-Z0-9 ]/g, ''); +} + function renderWordPairs() { const wordPairList = document.getElementById('wordPairList'); wordPairList.innerHTML = ''; @@ -238,6 +255,10 @@ function onSpeechRecognized(e) { if (useConfidenceThreshold && recognized.confidence < confidenceThreshold) return; let processedTranscript = recognized.transcript; + if (removePunctuation) { + processedTranscript = stripPunctuation(processedTranscript); + } + if (wordReplacementEnabled) { processedTranscript = replaceWords(processedTranscript); } @@ -259,6 +280,7 @@ function onSpeechEnded() { transcript = ''; clearedSection = ''; manuallyCleared = false; + websocket.send('[speechEnded]'); } function onOpen(event) { @@ -312,6 +334,16 @@ function onMessage(event) { } else if (event.data === 'replacementToggle') { wordReplacementEnabled = !wordReplacementEnabled; setWordReplacement(); + } else if (event.data === 'removePunctuationEnable') { + removePunctuation = true; + websocket.send('[removePunctuationEnabled]'); + } else if (event.data === 'removePunctuationDisable') { + removePunctuation = false; + websocket.send('[removePunctuationDisabled]'); + } else if (event.data === 'removePunctuationToggle') { + removePunctuation = !removePunctuation; + if (removePunctuation) websocket.send('[removePunctuationEnabled]'); + else websocket.send('[removePunctuationDisabled]'); } } diff --git a/templates/index.html b/templates/index.html index e22224d..222d3f1 100644 --- a/templates/index.html +++ b/templates/index.html @@ -49,6 +49,11 @@

Settings

Enable Debug Mode + +