Implement punctuation removal and speechEnded event

theneolanders · Jan 22, 2024 · 103c09c · 103c09c
1 parent 361185a
commit 103c09c
Show file tree

Hide file tree

Showing 4 changed files with 57 additions and 5 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
 build/
 dist/
-server.spec
+server.spec
+.aider*
diff --git a/README.md b/README.md
@@ -36,8 +36,9 @@ The application uses **commands** and **events** to set and monitor the micropho
 The following features are available:
 
 - **Language**: The language to use for speech recognition. See the list of supported languages below.
-- **Debug Mode**: When enabled, debug mode will send a debugConfidence event in addition to each recognition message
-- **Confidence Threshold**: When enabled, the confidence threshold will be used to filter out predictions that are below the threshold. The value is a number between 0.0 and 1.0
+- **Debug Mode**: When enabled, debug mode will send a debugConfidence event in addition to each recognition message.
+- **Punctuation Removal**: When enabled, punctuation will be removed from the transcript. This means only letters, numbers, and space characters will remain.
+- **Confidence Threshold**: When enabled, the confidence threshold will be used to filter out predictions that are below the threshold. The value is a number between 0.0 and 1.0.
 - **Word Replacement**: When enabled, the user will be able to replace words in the transcript. This can be used to uncensor the Google transcript, or to fix recognition errors. By default this list is populated with a list of common profanities.
 
 Each of these features can be toggled and configured using the GUI or via the commands listed below.
@@ -58,6 +59,9 @@ Note that the microphone controls for this application are separate from Resonit
 - **clear**: Forcibly clears the transcript (This can be finicky due to the way Google changes predictions once it's more confident)
 - **replacementEnable**: Enables the word replacement feature
 - **replacementDisable**: Disables the word replacement feature
+- **removePunctuationToggle**: Toggles punctuation removal on and off
+- **removePunctuationEnable**: Enables punctuation removal
+- **removePunctuationDisable**: Disables punctuation removal
 
 #### Confidence Threshold
 
@@ -109,6 +113,9 @@ The server will send the following event messages when the the microphone state
 - **[cleared]**: The transcript has been manually cleared
 - **[replacementEnabled]**: The word replacement feature has been enabled
 - **[replacementDisabled]**: The word replacement feature has been disabled
+- **[removePunctuationEnabled]**: Punctuation removal has been enabled
+- **[removePunctuationDisabled]**: Punctuation removal has been disabled
+- **[speechEnded]**: Sent whenever Google Chrome has determined that the user has stopped speaking
 
 #### Confidence Threshold
 

diff --git a/static/index.js b/static/index.js
@@ -30,6 +30,8 @@ const wordReplacementCheckbox = document.getElementById('wordReplacementCheckbox
 const wordReplacementContainer = document.getElementById('wordReplacementContainer');
 const addWordPairBtn = document.getElementById('addWordPairBtn');
 
+let removePunctuation = false;
+
 const defaultWordDictionary = {
   'f***': 'fuck',
   'f****': 'fucks',
@@ -76,6 +78,13 @@ function init() {
     saveSettings();
   });
 
+  removePunctuationCheckbox.addEventListener('change', () => {
+    removePunctuation = removePunctuationCheckbox.checked;
+    if (removePunctuation) websocket.send('[removePunctuationEnabled]');
+    else websocket.send('[removePunctuationDisabled]');
+    saveSettings();
+  })
+
   confidenceThresholdCheckbox.addEventListener('change', () => {
     useConfidenceThreshold = confidenceThresholdCheckbox.checked;
     setUseConfidenceThreshold();
@@ -116,6 +125,9 @@ function loadSetings() {
 
   selectedLanguage = localStorage.getItem('selectedLanguage') || 'en-US';
   langSelect.value = selectedLanguage;
+
+  removePunctuation = localStorage.getItem('removePunctuation') === 'true';
+  removePunctuationCheckbox.checked = removePunctuation;
 }
 
 function saveSettings() {
@@ -125,6 +137,7 @@ function saveSettings() {
   localStorage.setItem('confidenceThreshold', confidenceThreshold);
   localStorage.setItem('wordDictionary', JSON.stringify(wordDictionary));
   localStorage.setItem('selectedLanguage', selectedLanguage);
+  localStorage.setItem('removePunctuation', removePunctuation);
 }
 
 function initializeRecognition() {
@@ -186,6 +199,10 @@ function replaceWords(text) {
   return text.split(' ').map(word => wordDictionary[word] || word).join(' ');
 }
 
+function stripPunctuation(text) {
+  return text.replace(/[^a-zA-Z0-9 ]/g, '');
+}
+
 function renderWordPairs() {
   const wordPairList = document.getElementById('wordPairList');
   wordPairList.innerHTML = '';
@@ -238,6 +255,10 @@ function onSpeechRecognized(e) {
   if (useConfidenceThreshold && recognized.confidence < confidenceThreshold) return;
 
   let processedTranscript = recognized.transcript;
+  if (removePunctuation) {
+    processedTranscript = stripPunctuation(processedTranscript);
+  }
+
   if (wordReplacementEnabled) {
     processedTranscript = replaceWords(processedTranscript);
   }
@@ -259,6 +280,7 @@ function onSpeechEnded() {
   transcript = '';
   clearedSection = '';
   manuallyCleared = false;
+  websocket.send('[speechEnded]');
 }
 
 function onOpen(event) {
@@ -312,6 +334,16 @@ function onMessage(event) {
   } else if (event.data === 'replacementToggle') {
     wordReplacementEnabled = !wordReplacementEnabled;
     setWordReplacement();
+  } else if (event.data === 'removePunctuationEnable') {
+    removePunctuation = true;
+    websocket.send('[removePunctuationEnabled]');
+  } else if (event.data === 'removePunctuationDisable') {
+    removePunctuation = false;
+    websocket.send('[removePunctuationDisabled]');
+  } else if (event.data === 'removePunctuationToggle') {
+    removePunctuation = !removePunctuation;
+    if (removePunctuation) websocket.send('[removePunctuationEnabled]');
+    else websocket.send('[removePunctuationDisabled]');
   }
 }
 

diff --git a/templates/index.html b/templates/index.html
@@ -49,6 +49,11 @@ <h3 style="text-decoration: none; margin: 5px;">Settings</h3>
         Enable Debug Mode
     </label>
 
+    <label for="removePunctuationCheckbox" class="checkbox-label">
+        <input type="checkbox" id="removePunctuationCheckbox" class="checkbox-input">
+        Remove Punctuation
+    </label>
+
     <label for="confidenceThresholdCheckbox" class="checkbox-label">
         <input type="checkbox" id="confidenceThresholdCheckbox" class="checkbox-input">
         Enable Confidence Threshold
@@ -102,9 +107,10 @@ <h3>Options</h3>
         <li><strong>Language</strong>: The language to use for speech recognition. See the list of supported languages
             below.</li>
         <li><strong>Debug Mode</strong>: When enabled, debug mode will send a debugConfidence event in addition to each
-            recognition message</li>
+            recognition message.</li>
+        <li><strong>Remove Punctuation</strong>: When enabled, punctuation will be removed from the transcript. This means only letters, numbers, and space characters will remain.</li>
         <li><strong>Confidence Threshold</strong>: When enabled, the confidence threshold will be used to filter out
-            predictions that are below the threshold. The value is a number between 0.0 and 1.0</li>
+            predictions that are below the threshold. The value is a number between 0.0 and 1.0.</li>
         <li><strong>Word Replacement</strong>: When enabled, the user will be able to replace words in the transcript.
             This can be used to uncensor the Google transcript, or to fix recognition errors. By default this list is
             populated with a list of common profanities.</li>
@@ -127,6 +133,9 @@ <h4>Microphone/Recognition</h4>
         <li><strong>replacementToggle</strong>: Toggles the word replacement feature on and off</li>
         <li><strong>replacementEnable</strong>: Enables the word replacement feature</li>
         <li><strong>replacementDisable</strong>: Disables the word replacement feature</li>
+        <li><strong>removePunctuationToggle</strong>: Toggles the punctuation removal on and off</li>
+        <li><strong>removePunctuationEnable</strong>: Enables the punctuation removal</li>
+        <li><strong>removePunctuationDisable</strong>: Disables the punctuation removal</li>
     </ul>
     <h4>Confidence Threshold</h4>
     <p>When enabled, the confidence threshold will be used to filter out predictions that are below the threshold. The
@@ -182,6 +191,9 @@ <h4>Microphone/Recognition</h4>
         <li><strong>[cleared]</strong>: The transcript has been manually cleared</li>
         <li><strong>[replacementEnabled]</strong>: The word replacement feature has been enabled</li>
         <li><strong>[replacementDisabled]</strong>: The word replacement feature has been disabled</li>
+        <li><strong>[removePunctuationEnabled]</strong>: The punctuation removal feature has been enabled</li>
+        <li><strong>[removePunctuationDisabled]</strong>: The punctuation removal feature has been disabled</li>
+        <li><strong>[speechEnded]</strong>: Sent whenever Google Chrome has determined that the user has stopped speaking</li>
     </ul>
     <h4>Confidence Threshold</h4>
     <p>When enabled the confidence threshold will be used to filter out predictions that are below the threshold. The