fix: speak each CR-terminated line immediately, queue without cancel (#570)

mattgodbolt-molty · web-flow · commit 6c2bd37b5321 · 2026-02-28T13:00:08.000-06:00
Fixes the original bug where speechSynthesis.cancel() was called on every CR, killing each line before it could be heard.

The real Votrax TNT had no cancel mechanism — it simply spoke each CR-terminated buffer and queued the rest. speechSynthesis.speak() does exactly this.

Also removes BS handling (no evidence in the TNT manual) and documents ESC's source (daisy-chain unit-select, TNT Operator's Manual 1981).

A future improvement could heuristically combine lines arriving within ~20ms into one utterance for better modern TTS behaviour, but the simple per-line queue works well and has no surprising pauses or dropped output.

🤖 Generated by LLM (Claude, via OpenClaw)
diff --git a/src/speech-output.js b/src/speech-output.js
@@ -1,42 +1,35 @@
 "use strict";
 
 /**
- * RS-423 handler that routes transmitted bytes to the Web Speech API,
- * following the Votrax Type 'N Talk protocol (TNT Operator's Manual, 1981).
+ * RS-423 handler that routes transmitted bytes to the Web Speech API.
  *
- * Protocol summary (from the manual):
- *  - Printable ASCII 0x20–0x7E: accumulated in the input buffer.
- *    (On real hardware only A–Z, a–z, 0–9, and "." produce audible speech;
- *    other printable chars produce silence.  We pass the full buffer to the
- *    browser TTS engine, which handles spaces and punctuation well.)
- *  - CR (0x0D) = TALK-CLR: speak the buffer contents, then clear it.
- *  - BS (0x08): delete the last character from the buffer.
- *  - ESC (0x1B): mode/unit-select prefix — the following byte is consumed
- *    as a control code and not treated as text.
- *  - All other bytes (< 0x20 or > 0x7E, except the above): null data — ignored.
- *  - Buffer-full: auto-speak when the buffer reaches MAX_BUFFER bytes.
- *    (The manual mentions this condition but gives no explicit count.  128 bytes
- *    is a conservative estimate given the TNT's 2 KB of onboard RAM.)
- *  - Timer: after TIMER_MS of inactivity the buffer is spoken automatically,
- *    emulating the TNT's optional TIMER mode ("about 3–4 seconds").
+ * BBC programs use *FX3,1 (or *FX3,3) to send OSWRCH output to the RS-423
+ * serial port, which on real hardware fed a Votrax Type 'N Talk synthesiser.
+ * We intercept at the ACIA hardware boundary and route to speechSynthesis.
  *
- * Note: LF (0x0A) is NOT a flush trigger on the real TNT — it is null data.
- * Only CR (0x0D) flushes the buffer.
+ * Byte handling is based on the Votrax Type 'N Talk Operator's Manual (1981):
+ *  - Printable ASCII 0x20–0x7E: accumulated into the text buffer.
+ *  - CR (0x0D): "TALK-CLR" — speaks the buffer and clears it.  Multiple CR-
+ *    terminated lines queue naturally via speechSynthesis.speak().  A future
+ *    improvement could heuristically combine lines that arrive within a single
+ *    frame (~20 ms) into one utterance, which would give modern TTS engines a
+ *    better sentence to work with — but the simple per-line queue works well
+ *    enough and has no surprising pauses or dropped output.
+ *  - LF (0x0A): explicitly listed as null data in the manual; ignored.
+ *  - ESC (0x1B): unit-select prefix for daisy-chained TNT units.  ESC plus
+ *    the following byte are consumed silently (not passed to speechSynthesis).
+ *  - All other bytes: null data per the manual; ignored.
  */
+
 // From the TNT Operator's Manual: "The input buffer can hold more than 750
-// characters".  The output queue (phonemes waiting for the SC-01) is 128
-// entries, which is a different thing entirely.
+// characters".
 export const MAX_BUFFER = 750;
 
-// The manual says "approximately 4 seconds" for the inactivity timer.
-const TIMER_MS = 4000;
-
 export class SpeechOutput {
     constructor() {
         this._buffer = "";
         this._escapeNext = false;
         this._enabled = false;
-        this._timer = null;
     }
 
     get enabled() {
@@ -46,45 +39,33 @@ export class SpeechOutput {
     set enabled(value) {
         this._enabled = !!value;
         if (!this._enabled) {
-            this._cancelTimer();
             this._buffer = "";
-            this._cancelSpeech();
+            if (typeof speechSynthesis !== "undefined") speechSynthesis.cancel();
         }
     }
 
     /** RS-423 handler interface: called for each byte the BBC transmits. */
     onTransmit(byte) {
         if (!this._enabled) return;
 
-        // ESC prefix: consume the following byte as a mode/unit-select code.
         if (this._escapeNext) {
             this._escapeNext = false;
             return;
         }
 
         switch (byte) {
-            case 0x1b: // ESC — next byte is a mode control, not text.
+            case 0x1b: // ESC — next byte is a unit-select code, not text.
                 this._escapeNext = true;
                 return;
 
-            case 0x0d: // CR = TALK-CLR: speak and clear.
+            case 0x0d: // CR — TALK-CLR: speak current buffer and clear it.
                 this._flush();
                 return;
 
-            case 0x08: // BS: delete last character from buffer.
-                this._buffer = this._buffer.slice(0, -1);
-                this._resetTimer();
-                return;
-
             default:
                 if (byte >= 0x20 && byte <= 0x7e) {
-                    // Printable ASCII — accumulate.
                     this._buffer += String.fromCharCode(byte);
-                    if (this._buffer.length >= MAX_BUFFER) {
-                        this._flush(); // buffer-full condition
-                    } else {
-                        this._resetTimer();
-                    }
+                    if (this._buffer.length >= MAX_BUFFER) this._flush();
                 }
             // Everything else is null data — silently ignored.
         }
@@ -95,39 +76,10 @@ export class SpeechOutput {
         return -1;
     }
 
-    // ------------------------------------------------------------------
-
     _flush() {
-        this._cancelTimer();
         const text = this._buffer.trim();
         this._buffer = "";
-        if (!text) return;
-        this._speak(text);
-    }
-
-    _resetTimer() {
-        this._cancelTimer();
-        this._timer = setTimeout(() => {
-            this._timer = null;
-            this._flush();
-        }, TIMER_MS);
-    }
-
-    _cancelTimer() {
-        if (this._timer !== null) {
-            clearTimeout(this._timer);
-            this._timer = null;
-        }
-    }
-
-    _speak(text) {
-        if (typeof speechSynthesis === "undefined") return;
-        speechSynthesis.cancel();
-        const utterance = new SpeechSynthesisUtterance(text);
-        speechSynthesis.speak(utterance);
-    }
-
-    _cancelSpeech() {
-        if (typeof speechSynthesis !== "undefined") speechSynthesis.cancel();
+        if (!text || typeof speechSynthesis === "undefined") return;
+        speechSynthesis.speak(new SpeechSynthesisUtterance(text));
     }
 }
diff --git a/tests/unit/test-speech-output.js b/tests/unit/test-speech-output.js
@@ -11,6 +11,10 @@ global.SpeechSynthesisUtterance = class {
     }
 };
 
+function transmit(speech, str) {
+    for (const ch of str) speech.onTransmit(ch.charCodeAt(0));
+}
+
 describe("SpeechOutput", () => {
     let speech;
 
@@ -23,92 +27,82 @@ describe("SpeechOutput", () => {
 
     it("tryReceive always returns -1", () => {
         expect(speech.tryReceive()).toBe(-1);
-        expect(speech.tryReceive(true)).toBe(-1);
     });
 
     it("speaks buffered text on CR", () => {
-        for (const ch of "HELLO") speech.onTransmit(ch.charCodeAt(0));
+        transmit(speech, "HELLO");
         expect(mockSpeak).not.toHaveBeenCalled();
-        speech.onTransmit(13); // CR
+        speech.onTransmit(0x0d);
         expect(mockSpeak).toHaveBeenCalledOnce();
         expect(mockSpeak.mock.calls[0][0].text).toBe("HELLO");
     });
 
-    it("does NOT flush on LF (LF is null data per Votrax spec)", () => {
-        for (const ch of "WORLD") speech.onTransmit(ch.charCodeAt(0));
-        speech.onTransmit(10); // LF — null data, must not trigger speech
+    it("multiple CR-terminated lines queue without cancelling each other", () => {
+        transmit(speech, "Welcome to the castle.");
+        speech.onTransmit(0x0d);
+        transmit(speech, "There is a sword here.");
+        speech.onTransmit(0x0d);
+        transmit(speech, "What now?");
+        speech.onTransmit(0x0d);
+
+        expect(mockSpeak).toHaveBeenCalledTimes(3);
+        expect(mockCancel).not.toHaveBeenCalled();
+        expect(mockSpeak.mock.calls[0][0].text).toBe("Welcome to the castle.");
+        expect(mockSpeak.mock.calls[1][0].text).toBe("There is a sword here.");
+        expect(mockSpeak.mock.calls[2][0].text).toBe("What now?");
+    });
+
+    it("LF is null data — ignored", () => {
+        transmit(speech, "WORLD");
+        speech.onTransmit(0x0a); // LF — ignored
         expect(mockSpeak).not.toHaveBeenCalled();
-        speech.onTransmit(13); // CR — the real flush trigger
-        expect(mockSpeak).toHaveBeenCalledOnce();
+        speech.onTransmit(0x0d);
         expect(mockSpeak.mock.calls[0][0].text).toBe("WORLD");
     });
 
     it("does nothing when disabled", () => {
         speech.enabled = false;
-        for (const ch of "TEST") speech.onTransmit(ch.charCodeAt(0));
-        speech.onTransmit(13);
+        transmit(speech, "TEST");
+        speech.onTransmit(0x0d);
         expect(mockSpeak).not.toHaveBeenCalled();
     });
 
-    it("cancels speech and clears buffer when disabled mid-buffer", () => {
-        for (const ch of "PARTIAL") speech.onTransmit(ch.charCodeAt(0));
+    it("cancels speech and clears buffer when disabled", () => {
+        transmit(speech, "PARTIAL");
         speech.enabled = false;
         expect(mockCancel).toHaveBeenCalled();
         speech.enabled = true;
-        speech.onTransmit(13);
-        expect(mockSpeak).not.toHaveBeenCalled(); // buffer was cleared
+        speech.onTransmit(0x0d);
+        expect(mockSpeak).not.toHaveBeenCalled();
     });
 
-    it("ignores non-printable bytes (< 0x20) other than CR, BS, ESC", () => {
-        // Per Votrax manual: non-printable bytes that aren't specified commands
-        // are null data and are ignored.  This means BBC VDU codes, BEL,
-        // LF, etc. are all silently dropped.
-        speech.onTransmit(7); // BEL
-        speech.onTransmit(22); // VDU 22 (MODE)
-        speech.onTransmit(7); // would-be VDU param byte — treated as null data, not VDU
-        for (const ch of "DING") speech.onTransmit(ch.charCodeAt(0));
-        speech.onTransmit(13);
+    it("ignores non-printable bytes other than CR and ESC", () => {
+        speech.onTransmit(7); // BEL — null data
+        speech.onTransmit(22); // VDU 22 — null data
+        transmit(speech, "DING");
+        speech.onTransmit(0x0d);
         expect(mockSpeak.mock.calls[0][0].text).toBe("DING");
     });
 
-    it("handles BS (0x08) — deletes last character from buffer", () => {
-        for (const ch of "HI!") speech.onTransmit(ch.charCodeAt(0));
-        speech.onTransmit(0x08); // delete "!"
+    it("BS (0x08) is null data — ignored", () => {
+        // The TNT manual lists only CR, LF, and ESC as defined commands.
+        transmit(speech, "HI!");
+        speech.onTransmit(0x08);
         speech.onTransmit(0x0d);
-        expect(mockSpeak.mock.calls[0][0].text).toBe("HI");
+        expect(mockSpeak.mock.calls[0][0].text).toBe("HI!");
     });
 
-    it("handles ESC (0x1B) — next byte is a mode control, not text", () => {
-        for (const ch of "TEST") speech.onTransmit(ch.charCodeAt(0));
+    it("ESC consumes the following byte silently (unit-select, TNT manual)", () => {
+        transmit(speech, "TEST");
         speech.onTransmit(0x1b); // ESC
-        speech.onTransmit(0x11); // DC1 = PSEND ON — consumed as mode code
+        speech.onTransmit(0x41); // unit-select byte — consumed
         speech.onTransmit(0x0d);
         expect(mockSpeak.mock.calls[0][0].text).toBe("TEST");
     });
 
-    it("ignores DEL (127) and high bytes", () => {
-        speech.onTransmit(127);
-        speech.onTransmit(200);
-        for (const ch of "HI") speech.onTransmit(ch.charCodeAt(0));
-        speech.onTransmit(13);
-        expect(mockSpeak.mock.calls[0][0].text).toBe("HI");
-    });
-
-    it("cancels in-progress speech before starting new utterance", () => {
-        for (const ch of "ONE") speech.onTransmit(ch.charCodeAt(0));
-        speech.onTransmit(13);
-        for (const ch of "TWO") speech.onTransmit(ch.charCodeAt(0));
-        speech.onTransmit(13);
-        expect(mockCancel).toHaveBeenCalledTimes(2);
-        expect(mockSpeak).toHaveBeenCalledTimes(2);
-    });
-
-    it("auto-speaks when input buffer reaches MAX_BUFFER bytes (buffer-full condition)", () => {
-        // The Votrax manual says "input buffer full" is a TALK-CLR trigger.
-        // Our MAX_BUFFER is 128 bytes.
-        const longText = "A".repeat(MAX_BUFFER);
-        for (const ch of longText) speech.onTransmit(ch.charCodeAt(0));
+    it("auto-flushes when buffer reaches MAX_BUFFER bytes", () => {
+        transmit(speech, "A".repeat(MAX_BUFFER));
         expect(mockSpeak).toHaveBeenCalledOnce();
-        expect(mockSpeak.mock.calls[0][0].text).toBe(longText);
+        expect(mockSpeak.mock.calls[0][0].text).toBe("A".repeat(MAX_BUFFER));
     });
 });