Skip to content

Commit 6c2bd37

Browse files
fix: speak each CR-terminated line immediately, queue without cancel (#570)
Fixes the original bug where speechSynthesis.cancel() was called on every CR, killing each line before it could be heard. The real Votrax TNT had no cancel mechanism — it simply spoke each CR-terminated buffer and queued the rest. speechSynthesis.speak() does exactly this. Also removes BS handling (no evidence in the TNT manual) and documents ESC's source (daisy-chain unit-select, TNT Operator's Manual 1981). A future improvement could heuristically combine lines arriving within ~20ms into one utterance for better modern TTS behaviour, but the simple per-line queue works well and has no surprising pauses or dropped output. 🤖 Generated by LLM (Claude, via OpenClaw)
1 parent 2f01b04 commit 6c2bd37

2 files changed

Lines changed: 71 additions & 125 deletions

File tree

src/speech-output.js

Lines changed: 24 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,35 @@
11
"use strict";
22

33
/**
4-
* RS-423 handler that routes transmitted bytes to the Web Speech API,
5-
* following the Votrax Type 'N Talk protocol (TNT Operator's Manual, 1981).
4+
* RS-423 handler that routes transmitted bytes to the Web Speech API.
65
*
7-
* Protocol summary (from the manual):
8-
* - Printable ASCII 0x20–0x7E: accumulated in the input buffer.
9-
* (On real hardware only A–Z, a–z, 0–9, and "." produce audible speech;
10-
* other printable chars produce silence. We pass the full buffer to the
11-
* browser TTS engine, which handles spaces and punctuation well.)
12-
* - CR (0x0D) = TALK-CLR: speak the buffer contents, then clear it.
13-
* - BS (0x08): delete the last character from the buffer.
14-
* - ESC (0x1B): mode/unit-select prefix — the following byte is consumed
15-
* as a control code and not treated as text.
16-
* - All other bytes (< 0x20 or > 0x7E, except the above): null data — ignored.
17-
* - Buffer-full: auto-speak when the buffer reaches MAX_BUFFER bytes.
18-
* (The manual mentions this condition but gives no explicit count. 128 bytes
19-
* is a conservative estimate given the TNT's 2 KB of onboard RAM.)
20-
* - Timer: after TIMER_MS of inactivity the buffer is spoken automatically,
21-
* emulating the TNT's optional TIMER mode ("about 3–4 seconds").
6+
* BBC programs use *FX3,1 (or *FX3,3) to send OSWRCH output to the RS-423
7+
* serial port, which on real hardware fed a Votrax Type 'N Talk synthesiser.
8+
* We intercept at the ACIA hardware boundary and route to speechSynthesis.
229
*
23-
* Note: LF (0x0A) is NOT a flush trigger on the real TNT — it is null data.
24-
* Only CR (0x0D) flushes the buffer.
10+
* Byte handling is based on the Votrax Type 'N Talk Operator's Manual (1981):
11+
* - Printable ASCII 0x20–0x7E: accumulated into the text buffer.
12+
* - CR (0x0D): "TALK-CLR" — speaks the buffer and clears it. Multiple CR-
13+
* terminated lines queue naturally via speechSynthesis.speak(). A future
14+
* improvement could heuristically combine lines that arrive within a single
15+
* frame (~20 ms) into one utterance, which would give modern TTS engines a
16+
* better sentence to work with — but the simple per-line queue works well
17+
* enough and has no surprising pauses or dropped output.
18+
* - LF (0x0A): explicitly listed as null data in the manual; ignored.
19+
* - ESC (0x1B): unit-select prefix for daisy-chained TNT units. ESC plus
20+
* the following byte are consumed silently (not passed to speechSynthesis).
21+
* - All other bytes: null data per the manual; ignored.
2522
*/
23+
2624
// From the TNT Operator's Manual: "The input buffer can hold more than 750
27-
// characters". The output queue (phonemes waiting for the SC-01) is 128
28-
// entries, which is a different thing entirely.
25+
// characters".
2926
export const MAX_BUFFER = 750;
3027

31-
// The manual says "approximately 4 seconds" for the inactivity timer.
32-
const TIMER_MS = 4000;
33-
3428
export class SpeechOutput {
3529
constructor() {
3630
this._buffer = "";
3731
this._escapeNext = false;
3832
this._enabled = false;
39-
this._timer = null;
4033
}
4134

4235
get enabled() {
@@ -46,45 +39,33 @@ export class SpeechOutput {
4639
set enabled(value) {
4740
this._enabled = !!value;
4841
if (!this._enabled) {
49-
this._cancelTimer();
5042
this._buffer = "";
51-
this._cancelSpeech();
43+
if (typeof speechSynthesis !== "undefined") speechSynthesis.cancel();
5244
}
5345
}
5446

5547
/** RS-423 handler interface: called for each byte the BBC transmits. */
5648
onTransmit(byte) {
5749
if (!this._enabled) return;
5850

59-
// ESC prefix: consume the following byte as a mode/unit-select code.
6051
if (this._escapeNext) {
6152
this._escapeNext = false;
6253
return;
6354
}
6455

6556
switch (byte) {
66-
case 0x1b: // ESC — next byte is a mode control, not text.
57+
case 0x1b: // ESC — next byte is a unit-select code, not text.
6758
this._escapeNext = true;
6859
return;
6960

70-
case 0x0d: // CR = TALK-CLR: speak and clear.
61+
case 0x0d: // CR TALK-CLR: speak current buffer and clear it.
7162
this._flush();
7263
return;
7364

74-
case 0x08: // BS: delete last character from buffer.
75-
this._buffer = this._buffer.slice(0, -1);
76-
this._resetTimer();
77-
return;
78-
7965
default:
8066
if (byte >= 0x20 && byte <= 0x7e) {
81-
// Printable ASCII — accumulate.
8267
this._buffer += String.fromCharCode(byte);
83-
if (this._buffer.length >= MAX_BUFFER) {
84-
this._flush(); // buffer-full condition
85-
} else {
86-
this._resetTimer();
87-
}
68+
if (this._buffer.length >= MAX_BUFFER) this._flush();
8869
}
8970
// Everything else is null data — silently ignored.
9071
}
@@ -95,39 +76,10 @@ export class SpeechOutput {
9576
return -1;
9677
}
9778

98-
// ------------------------------------------------------------------
99-
10079
_flush() {
101-
this._cancelTimer();
10280
const text = this._buffer.trim();
10381
this._buffer = "";
104-
if (!text) return;
105-
this._speak(text);
106-
}
107-
108-
_resetTimer() {
109-
this._cancelTimer();
110-
this._timer = setTimeout(() => {
111-
this._timer = null;
112-
this._flush();
113-
}, TIMER_MS);
114-
}
115-
116-
_cancelTimer() {
117-
if (this._timer !== null) {
118-
clearTimeout(this._timer);
119-
this._timer = null;
120-
}
121-
}
122-
123-
_speak(text) {
124-
if (typeof speechSynthesis === "undefined") return;
125-
speechSynthesis.cancel();
126-
const utterance = new SpeechSynthesisUtterance(text);
127-
speechSynthesis.speak(utterance);
128-
}
129-
130-
_cancelSpeech() {
131-
if (typeof speechSynthesis !== "undefined") speechSynthesis.cancel();
82+
if (!text || typeof speechSynthesis === "undefined") return;
83+
speechSynthesis.speak(new SpeechSynthesisUtterance(text));
13284
}
13385
}

tests/unit/test-speech-output.js

Lines changed: 47 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@ global.SpeechSynthesisUtterance = class {
1111
}
1212
};
1313

14+
function transmit(speech, str) {
15+
for (const ch of str) speech.onTransmit(ch.charCodeAt(0));
16+
}
17+
1418
describe("SpeechOutput", () => {
1519
let speech;
1620

@@ -23,92 +27,82 @@ describe("SpeechOutput", () => {
2327

2428
it("tryReceive always returns -1", () => {
2529
expect(speech.tryReceive()).toBe(-1);
26-
expect(speech.tryReceive(true)).toBe(-1);
2730
});
2831

2932
it("speaks buffered text on CR", () => {
30-
for (const ch of "HELLO") speech.onTransmit(ch.charCodeAt(0));
33+
transmit(speech, "HELLO");
3134
expect(mockSpeak).not.toHaveBeenCalled();
32-
speech.onTransmit(13); // CR
35+
speech.onTransmit(0x0d);
3336
expect(mockSpeak).toHaveBeenCalledOnce();
3437
expect(mockSpeak.mock.calls[0][0].text).toBe("HELLO");
3538
});
3639

37-
it("does NOT flush on LF (LF is null data per Votrax spec)", () => {
38-
for (const ch of "WORLD") speech.onTransmit(ch.charCodeAt(0));
39-
speech.onTransmit(10); // LF — null data, must not trigger speech
40+
it("multiple CR-terminated lines queue without cancelling each other", () => {
41+
transmit(speech, "Welcome to the castle.");
42+
speech.onTransmit(0x0d);
43+
transmit(speech, "There is a sword here.");
44+
speech.onTransmit(0x0d);
45+
transmit(speech, "What now?");
46+
speech.onTransmit(0x0d);
47+
48+
expect(mockSpeak).toHaveBeenCalledTimes(3);
49+
expect(mockCancel).not.toHaveBeenCalled();
50+
expect(mockSpeak.mock.calls[0][0].text).toBe("Welcome to the castle.");
51+
expect(mockSpeak.mock.calls[1][0].text).toBe("There is a sword here.");
52+
expect(mockSpeak.mock.calls[2][0].text).toBe("What now?");
53+
});
54+
55+
it("LF is null data — ignored", () => {
56+
transmit(speech, "WORLD");
57+
speech.onTransmit(0x0a); // LF — ignored
4058
expect(mockSpeak).not.toHaveBeenCalled();
41-
speech.onTransmit(13); // CR — the real flush trigger
42-
expect(mockSpeak).toHaveBeenCalledOnce();
59+
speech.onTransmit(0x0d);
4360
expect(mockSpeak.mock.calls[0][0].text).toBe("WORLD");
4461
});
4562

4663
it("does nothing when disabled", () => {
4764
speech.enabled = false;
48-
for (const ch of "TEST") speech.onTransmit(ch.charCodeAt(0));
49-
speech.onTransmit(13);
65+
transmit(speech, "TEST");
66+
speech.onTransmit(0x0d);
5067
expect(mockSpeak).not.toHaveBeenCalled();
5168
});
5269

53-
it("cancels speech and clears buffer when disabled mid-buffer", () => {
54-
for (const ch of "PARTIAL") speech.onTransmit(ch.charCodeAt(0));
70+
it("cancels speech and clears buffer when disabled", () => {
71+
transmit(speech, "PARTIAL");
5572
speech.enabled = false;
5673
expect(mockCancel).toHaveBeenCalled();
5774
speech.enabled = true;
58-
speech.onTransmit(13);
59-
expect(mockSpeak).not.toHaveBeenCalled(); // buffer was cleared
75+
speech.onTransmit(0x0d);
76+
expect(mockSpeak).not.toHaveBeenCalled();
6077
});
6178

62-
it("ignores non-printable bytes (< 0x20) other than CR, BS, ESC", () => {
63-
// Per Votrax manual: non-printable bytes that aren't specified commands
64-
// are null data and are ignored. This means BBC VDU codes, BEL,
65-
// LF, etc. are all silently dropped.
66-
speech.onTransmit(7); // BEL
67-
speech.onTransmit(22); // VDU 22 (MODE)
68-
speech.onTransmit(7); // would-be VDU param byte — treated as null data, not VDU
69-
for (const ch of "DING") speech.onTransmit(ch.charCodeAt(0));
70-
speech.onTransmit(13);
79+
it("ignores non-printable bytes other than CR and ESC", () => {
80+
speech.onTransmit(7); // BEL — null data
81+
speech.onTransmit(22); // VDU 22 — null data
82+
transmit(speech, "DING");
83+
speech.onTransmit(0x0d);
7184
expect(mockSpeak.mock.calls[0][0].text).toBe("DING");
7285
});
7386

74-
it("handles BS (0x08) — deletes last character from buffer", () => {
75-
for (const ch of "HI!") speech.onTransmit(ch.charCodeAt(0));
76-
speech.onTransmit(0x08); // delete "!"
87+
it("BS (0x08) is null data — ignored", () => {
88+
// The TNT manual lists only CR, LF, and ESC as defined commands.
89+
transmit(speech, "HI!");
90+
speech.onTransmit(0x08);
7791
speech.onTransmit(0x0d);
78-
expect(mockSpeak.mock.calls[0][0].text).toBe("HI");
92+
expect(mockSpeak.mock.calls[0][0].text).toBe("HI!");
7993
});
8094

81-
it("handles ESC (0x1B) — next byte is a mode control, not text", () => {
82-
for (const ch of "TEST") speech.onTransmit(ch.charCodeAt(0));
95+
it("ESC consumes the following byte silently (unit-select, TNT manual)", () => {
96+
transmit(speech, "TEST");
8397
speech.onTransmit(0x1b); // ESC
84-
speech.onTransmit(0x11); // DC1 = PSEND ON — consumed as mode code
98+
speech.onTransmit(0x41); // unit-select byte — consumed
8599
speech.onTransmit(0x0d);
86100
expect(mockSpeak.mock.calls[0][0].text).toBe("TEST");
87101
});
88102

89-
it("ignores DEL (127) and high bytes", () => {
90-
speech.onTransmit(127);
91-
speech.onTransmit(200);
92-
for (const ch of "HI") speech.onTransmit(ch.charCodeAt(0));
93-
speech.onTransmit(13);
94-
expect(mockSpeak.mock.calls[0][0].text).toBe("HI");
95-
});
96-
97-
it("cancels in-progress speech before starting new utterance", () => {
98-
for (const ch of "ONE") speech.onTransmit(ch.charCodeAt(0));
99-
speech.onTransmit(13);
100-
for (const ch of "TWO") speech.onTransmit(ch.charCodeAt(0));
101-
speech.onTransmit(13);
102-
expect(mockCancel).toHaveBeenCalledTimes(2);
103-
expect(mockSpeak).toHaveBeenCalledTimes(2);
104-
});
105-
106-
it("auto-speaks when input buffer reaches MAX_BUFFER bytes (buffer-full condition)", () => {
107-
// The Votrax manual says "input buffer full" is a TALK-CLR trigger.
108-
// Our MAX_BUFFER is 128 bytes.
109-
const longText = "A".repeat(MAX_BUFFER);
110-
for (const ch of longText) speech.onTransmit(ch.charCodeAt(0));
103+
it("auto-flushes when buffer reaches MAX_BUFFER bytes", () => {
104+
transmit(speech, "A".repeat(MAX_BUFFER));
111105
expect(mockSpeak).toHaveBeenCalledOnce();
112-
expect(mockSpeak.mock.calls[0][0].text).toBe(longText);
106+
expect(mockSpeak.mock.calls[0][0].text).toBe("A".repeat(MAX_BUFFER));
113107
});
114108
});

0 commit comments

Comments
 (0)