Skip to content

TTS auto-speak and STT auto-submit #3696

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
8 changes: 8 additions & 0 deletions frontend/src/App.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ const BrandingSettings = lazy(
() => import("@/pages/GeneralSettings/Settings/Branding")
);

const ChatSettings = lazy(
() => import("@/pages/GeneralSettings/Settings/Chat")
);

const GeneralApiKeys = lazy(() => import("@/pages/GeneralSettings/ApiKeys"));
const GeneralLLMPreference = lazy(
() => import("@/pages/GeneralSettings/LLMPreference")
Expand Down Expand Up @@ -197,6 +201,10 @@ export default function App() {
path="/settings/branding"
element={<ManagerRoute Component={BrandingSettings} />}
/>
<Route
path="/settings/chat"
element={<ManagerRoute Component={ChatSettings} />}
/>
<Route
path="/settings/beta-features"
element={<AdminRoute Component={ExperimentalFeatures} />}
Expand Down
6 changes: 6 additions & 0 deletions frontend/src/components/SettingsSidebar/index.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,12 @@ const SidebarOptions = ({ user = null, t }) => (
flex: true,
roles: ["admin", "manager"],
},
{
btnText: t("settings.chat"),
href: paths.settings.chat(),
flex: true,
roles: ["admin", "manager"],
},
]}
/>
<Option
Expand Down
107 changes: 104 additions & 3 deletions frontend/src/components/UserMenu/AccountModal/index.jsx
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import { useLanguageOptions } from "@/hooks/useLanguageOptions";
import usePfp from "@/hooks/usePfp";
import System from "@/models/system";
import Appearance from "@/models/appearance";
import { AUTH_USER } from "@/utils/constants";
import showToast from "@/utils/toast";
import { Plus, X } from "@phosphor-icons/react";
import ModalWrapper from "@/components/ModalWrapper";
import { useTheme } from "@/hooks/useTheme";
import { useTranslation } from "react-i18next";
import { useState, useEffect } from "react";

export default function AccountModal({ user, hideModal }) {
const { pfp, setPfp } = usePfp();
Expand Down Expand Up @@ -178,9 +180,15 @@ export default function AccountModal({ user, hideModal }) {
defaultValue={user.bio}
/>
</div>
<div className="flex flex-row gap-x-8">
<ThemePreference />
<LanguagePreference />
<div className="flex gap-x-16">
<div className="flex flex-col gap-y-6">
<ThemePreference />
<LanguagePreference />
</div>
<div className="flex flex-col gap-y-6">
<AutoSubmitPreference />
<AutoSpeakPreference />
</div>
</div>
</div>
<div className="flex justify-between items-center border-t border-theme-modal-border pt-4 p-6">
Expand Down Expand Up @@ -265,3 +273,96 @@ function ThemePreference() {
</div>
);
}

function AutoSubmitPreference() {
const [autoSubmitSttInput, setAutoSubmitSttInput] = useState(true);
const { t } = useTranslation();

useEffect(() => {
const settings = Appearance.getSettings();
setAutoSubmitSttInput(settings.autoSubmitSttInput ?? true);
}, []);

const handleChange = (e) => {
const newValue = e.target.checked;
setAutoSubmitSttInput(newValue);
Appearance.updateSettings({ autoSubmitSttInput: newValue });
};

return (
<div>
<label
htmlFor="autoSubmit"
className="block mb-2 text-sm font-medium text-white"
>
{t("customization.chat.auto_submit.title")}
</label>
<div className="flex items-center gap-x-4">
<label className="relative inline-flex cursor-pointer items-center">
<input
id="autoSubmit"
type="checkbox"
name="autoSubmit"
checked={autoSubmitSttInput}
onChange={handleChange}
className="peer sr-only"
/>
<div className="pointer-events-none peer h-6 w-11 rounded-full bg-[#CFCFD0] after:absolute after:left-[2px] after:top-[2px] after:h-5 after:w-5 after:rounded-full after:shadow-xl after:border-none after:bg-white after:box-shadow-md after:transition-all after:content-[''] peer-checked:bg-[#32D583] peer-checked:after:translate-x-full peer-checked:after:border-white peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-transparent"></div>
</label>
</div>
<p className="mt-2 text-xs text-white/60 max-w-[200px]">
{t("customization.chat.auto_submit.description")}
</p>
</div>
);
}

function AutoSpeakPreference() {
const [autoPlayAssistantTtsResponse, setAutoPlayAssistantTtsResponse] =
useState(false);
const { t } = useTranslation();

useEffect(() => {
const settings = Appearance.getSettings();
setAutoPlayAssistantTtsResponse(
settings.autoPlayAssistantTtsResponse ?? false
);
}, []);

const handleChange = (e) => {
const newValue = e.target.checked;
setAutoPlayAssistantTtsResponse(newValue);
Appearance.updateSettings({ autoPlayAssistantTtsResponse: newValue });
showToast("Auto-speak preference updated", "success");
setTimeout(() => {
window.location.reload();
}, 1500);
};

return (
<div>
<label
htmlFor="autoSpeak"
className="block mb-2 text-sm font-medium text-white"
>
{t("customization.chat.auto_speak.title")}
</label>
<div className="flex items-center gap-x-4">
<label className="relative inline-flex cursor-pointer items-center">
<input
id="autoSpeak"
type="checkbox"
name="autoSpeak"
checked={autoPlayAssistantTtsResponse}
onChange={handleChange}
className="peer sr-only"
/>
<div className="pointer-events-none peer h-6 w-11 rounded-full bg-[#CFCFD0] after:absolute after:left-[2px] after:top-[2px] after:h-5 after:w-5 after:rounded-full after:shadow-xl after:border-none after:bg-white after:box-shadow-md after:transition-all after:content-[''] peer-checked:bg-[#32D583] peer-checked:after:translate-x-full peer-checked:after:border-white peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-transparent"></div>
</label>
</div>
<p className="mt-2 text-xs text-white/60 max-w-[200px]">
{t("customization.chat.auto_speak.description")}
</p>
</div>
);
}
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ export default function AsyncTTSMessage({ slug, chatId }) {
<div className="mt-3 relative">
<button
onClick={speakMessage}
data-auto-play-chat-id={chatId}
data-tooltip-id="message-to-speech"
data-tooltip-content={
speaking ? "Pause TTS speech of message" : "TTS Speak message"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,39 +1,26 @@
import { useEffect, useState } from "react";
import { useTTSProvider } from "@/components/contexts/TTSProvider";
import NativeTTSMessage from "./native";
import AsyncTTSMessage from "./asyncTts";
import PiperTTSMessage from "./piperTTS";
import System from "@/models/system";

export default function TTSMessage({ slug, chatId, message }) {
const [settings, setSettings] = useState({});
const [provider, setProvider] = useState("native");
const [loading, setLoading] = useState(true);

useEffect(() => {
async function getSettings() {
const _settings = await System.keys();
setProvider(_settings?.TextToSpeechProvider ?? "native");
setSettings(_settings);
setLoading(false);
}
getSettings();
}, []);

const { settings, provider, loading } = useTTSProvider();
if (!chatId || loading) return null;

switch (provider) {
case "openai":
case "generic-openai":
case "elevenlabs":
return <AsyncTTSMessage slug={slug} chatId={chatId} />;
return <AsyncTTSMessage chatId={chatId} slug={slug} />;
case "piper_local":
return (
<PiperTTSMessage
chatId={chatId}
voiceId={settings?.TTSPiperTTSVoiceModel}
message={message}
/>
);
default:
return <NativeTTSMessage message={message} />;
return <NativeTTSMessage chatId={chatId} message={message} />;
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import React, { useEffect, useState } from "react";
import { SpeakerHigh, PauseCircle } from "@phosphor-icons/react";

export default function NativeTTSMessage({ message }) {
export default function NativeTTSMessage({ chatId, message }) {
const [speaking, setSpeaking] = useState(false);
const [supported, setSupported] = useState(false);
useEffect(() => {
Expand Down Expand Up @@ -36,6 +36,7 @@ export default function NativeTTSMessage({ message }) {
<div className="mt-3 relative">
<button
onClick={speakMessage}
data-auto-play-chat-id={chatId}
data-tooltip-id="message-to-speech"
data-tooltip-content={
speaking ? "Pause TTS speech of message" : "TTS Speak message"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { useEffect, useState, useRef } from "react";
import { SpeakerHigh, PauseCircle, CircleNotch } from "@phosphor-icons/react";
import PiperTTSClient from "@/utils/piperTTS";

export default function PiperTTS({ voiceId = null, message }) {
export default function PiperTTS({ chatId, voiceId = null, message }) {
const playerRef = useRef(null);
const [speaking, setSpeaking] = useState(false);
const [loading, setLoading] = useState(false);
Expand Down Expand Up @@ -53,6 +53,7 @@ export default function PiperTTS({ voiceId = null, message }) {
type="button"
onClick={speakMessage}
disabled={loading}
data-auto-play-chat-id={chatId}
data-tooltip-id="message-to-speech"
data-tooltip-content={
speaking ? "Pause TTS speech of message" : "TTS Speak message"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,17 @@ import SpeechRecognition, {
} from "react-speech-recognition";
import { PROMPT_INPUT_EVENT } from "../../PromptInput";
import { useTranslation } from "react-i18next";
import Appearance from "@/models/appearance";

let timeout;
const SILENCE_INTERVAL = 3_200; // wait in seconds of silence before closing.

/**
* Speech-to-text input component for the chat window.
* @param {Object} props - The component props
* @param {(textToAppend: string, autoSubmit: boolean) => void} props.sendCommand - The function to send the command
* @returns {React.ReactElement} The SpeechToText component
*/
export default function SpeechToText({ sendCommand }) {
const {
transcript,
Expand Down Expand Up @@ -40,7 +48,7 @@ export default function SpeechToText({ sendCommand }) {
function endSTTSession() {
SpeechRecognition.stopListening();
if (transcript.length > 0) {
sendCommand(transcript, true);
sendCommand(transcript, Appearance.get("autoSubmitSttInput"));
}

resetTranscript();
Expand Down
8 changes: 8 additions & 0 deletions frontend/src/components/WorkspaceChat/ChatContainer/index.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,14 @@ export default function ChatContainer({ workspace, knownHistory = [] }) {
.catch((e) => console.error(e));
};

/**
* Send a command to the LLM prompt input.
* @param {string} command - The command to send to the LLM
* @param {boolean} submit - Whether the command was submitted (default: false)
* @param {Object[]} history - The history of the chat
* @param {Object[]} attachments - The attachments to send to the LLM
* @returns {boolean} - Whether the command was sent successfully
*/
const sendCommand = async (
command,
submit = false,
Expand Down
13 changes: 10 additions & 3 deletions frontend/src/components/WorkspaceChat/index.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,13 @@ import paths from "@/utils/paths";
import ModalWrapper from "../ModalWrapper";
import { useParams } from "react-router-dom";
import { DnDFileUploaderProvider } from "./ChatContainer/DnDWrapper";
import {
TTSProvider,
useWatchForAutoPlayAssistantTTSResponse,
} from "../contexts/TTSProvider";

export default function WorkspaceChat({ loading, workspace }) {
useWatchForAutoPlayAssistantTTSResponse();
const { threadSlug = null } = useParams();
const [history, setHistory] = useState([]);
const [loadingHistory, setLoadingHistory] = useState(true);
Expand Down Expand Up @@ -64,9 +69,11 @@ export default function WorkspaceChat({ loading, workspace }) {

setEventDelegatorForCodeSnippets();
return (
<DnDFileUploaderProvider workspace={workspace}>
<ChatContainer workspace={workspace} knownHistory={history} />
</DnDFileUploaderProvider>
<TTSProvider>
<DnDFileUploaderProvider workspace={workspace}>
<ChatContainer workspace={workspace} knownHistory={history} />
</DnDFileUploaderProvider>
</TTSProvider>
);
}

Expand Down
Loading