Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ dist/
build/
coverage/*
!coverage/coverage_gate.py
frontend/coverage/
.coverage
.coverage.*

Expand Down
11 changes: 10 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,18 @@ Current repository quality pattern:
Frontend steering highlights:

- New chat creation is handled from the sidebar Current Session action.
- Voice input uses browser microphone permission and speech recognition.
- Voice input triggers a dedicated speech service from the microphone button and streams text back live.
- Profile includes a help/forum section for feature lookup.

Local voice stack launch:

1. `npm run dev`

Manual fallback (two terminals):

1. `npm --prefix backend run dev`
2. `npm --prefix frontend run dev`

## Quality Gates For Main

The main branch is protected by required CI checks:
Expand Down
1 change: 1 addition & 0 deletions backend/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Backend runtime package for Chat Assistant AI."""
1 change: 1 addition & 0 deletions backend/app/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""FastAPI application package."""
1 change: 1 addition & 0 deletions backend/app/cache/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Cache abstractions for speech transcription sessions."""
1 change: 1 addition & 0 deletions backend/app/handlers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Request/connection handlers for backend entrypoints."""
56 changes: 56 additions & 0 deletions backend/app/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from __future__ import annotations

from contextlib import asynccontextmanager
import os
from typing import AsyncIterator

from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from starlette.concurrency import run_in_threadpool

from .services.core.chat.transcription.websocket import router as transcription_router
from .services.utils.transcription.preflight import (
print_preflight_report,
run_transcription_preflight,
)


def _parse_bool_env(name: str, default_value: bool) -> bool:
raw_value = os.getenv(name, "").strip().lower()
if len(raw_value) == 0:
return default_value

return raw_value in {"1", "true", "yes", "on"}


def create_app() -> FastAPI:
@asynccontextmanager
async def lifespan(_app: FastAPI) -> AsyncIterator[None]:
preload_on_startup = _parse_bool_env("TRANSCRIPTION_PRELOAD_ON_STARTUP", True)
report = await run_in_threadpool(lambda: run_transcription_preflight(preload_runtime=preload_on_startup))
print_preflight_report(report)
yield

app = FastAPI(
title="Chat Assistant AI Speech Backend",
version="0.1.0",
lifespan=lifespan,
)

app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)

@app.get("/health")
async def health() -> dict[str, str]:
return {"status": "ok"}

app.include_router(transcription_router)
return app


app = create_app()
1 change: 1 addition & 0 deletions backend/app/services/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Service layer for backend runtime orchestration."""
1 change: 1 addition & 0 deletions backend/app/services/core/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Core backend services package."""
1 change: 1 addition & 0 deletions backend/app/services/core/chat/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Core chat service modules."""
1 change: 1 addition & 0 deletions backend/app/services/core/chat/transcription/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Core speech transcription runtime components."""
113 changes: 113 additions & 0 deletions backend/app/services/core/chat/transcription/handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
from __future__ import annotations

import time
from typing import Any, Callable

from starlette.concurrency import run_in_threadpool

from ....dependency.transcription.speech_cache import SpeechTranscriptionCache, speech_cache
from .runtime import invoke_local_transcription_lambda, runtime_service
from .transcriber import WhisperDependenciesMissingError, WhisperInferenceError

RuntimeInvoker = Callable[[bytes, str | None, str | None], str]


class LiveTranscriptionHandler:
"""Connection-level orchestration for websocket speech transcription."""

def __init__(
self,
runtime_invoker: RuntimeInvoker | None = None,
cache_backend: SpeechTranscriptionCache | None = None,
) -> None:
self._runtime_invoker = runtime_invoker or invoke_local_transcription_lambda
self._runtime_service = runtime_service if runtime_invoker is None else None
self._cache_backend = cache_backend or speech_cache

def open_session(self) -> str:
return self._cache_backend.create_session_id()

def close_session(self, session_id: str) -> None:
self._cache_backend.clear_session(session_id)

def recommended_max_inflight_chunks(self) -> int:
if self._runtime_service is None:
return 1

return self._runtime_service.recommended_max_inflight_chunks()

@staticmethod
def normalize_language(language: str | None) -> str | None:
if language is None:
return None

normalized = language.strip().lower()
if len(normalized) == 0:
return None

if "-" in normalized:
return normalized.split("-", maxsplit=1)[0]

return normalized

async def transcribe_chunk(
self,
*,
session_id: str,
audio_chunk: bytes,
chunk_index: int,
language: str | None,
mime_type: str | None = None,
) -> dict[str, Any]:
self._cache_backend.store_received_chunk(
session_id=session_id,
chunk_index=chunk_index,
audio_chunk=audio_chunk,
language=language,
)

started_at = time.perf_counter()

try:
text = await run_in_threadpool(
self._runtime_invoker,
audio_chunk,
language,
mime_type,
)
except WhisperDependenciesMissingError:
return {
"type": "error",
"message": 'Whisper dependencies missing. Install with: pip install -e ".[backend]"',
}
except WhisperInferenceError:
return {
"type": "empty",
"chunk_index": chunk_index,
"latency_ms": int((time.perf_counter() - started_at) * 1000),
}

latency_ms = int((time.perf_counter() - started_at) * 1000)

normalized_text = text.strip()
if len(normalized_text) == 0:
return {
"type": "empty",
"chunk_index": chunk_index,
"latency_ms": latency_ms,
}

self._cache_backend.store_transcript(
session_id=session_id,
chunk_index=chunk_index,
transcript=normalized_text,
latency_ms=latency_ms,
)

return {
"type": "transcript",
"text": normalized_text,
"chunk_index": chunk_index,
"is_final": True,
"latency_ms": latency_ms,
}
75 changes: 75 additions & 0 deletions backend/app/services/core/chat/transcription/runtime.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from __future__ import annotations

from dataclasses import dataclass
from threading import Lock

from .transcriber import WhisperChunkTranscriber


@dataclass(frozen=True)
class TranscriptionInvokeEvent:
audio_chunk: bytes
language: str | None = None
mime_type: str | None = None


class TranscriptionRuntimeService:
"""Runtime service that behaves like a local lambda invocation target."""

def __init__(self, transcriber: WhisperChunkTranscriber | None = None) -> None:
self._transcriber = transcriber or WhisperChunkTranscriber()
self._accelerated_invoke_lock = Lock()

def preload(self) -> None:
self._transcriber.ensure_loaded()

@property
def runtime_device(self) -> str | None:
runtime_device = getattr(self._transcriber, "runtime_device", None)
return runtime_device if isinstance(runtime_device, str) else None

def recommended_max_inflight_chunks(self) -> int:
device = self.runtime_device
if device is None:
return 1

normalized = device.lower()
if normalized.startswith("cuda") or normalized.startswith("mps") or normalized.startswith("xpu"):
return 1

return 2

def invoke(self, event: TranscriptionInvokeEvent) -> str:
# Accelerated backends can be unstable when sharing one pipeline instance across threads.
device = self.runtime_device
if device is not None:
normalized = device.lower()
if normalized.startswith("cuda") or normalized.startswith("mps") or normalized.startswith("xpu"):
with self._accelerated_invoke_lock:
return self._transcriber.transcribe_chunk(
audio_chunk=event.audio_chunk,
language=event.language,
mime_type=event.mime_type,
)

return self._transcriber.transcribe_chunk(
audio_chunk=event.audio_chunk,
language=event.language,
mime_type=event.mime_type,
)


runtime_service = TranscriptionRuntimeService()


def invoke_local_transcription_lambda(
audio_chunk: bytes,
language: str | None = None,
mime_type: str | None = None,
) -> str:
event = TranscriptionInvokeEvent(
audio_chunk=audio_chunk,
language=language,
mime_type=mime_type,
)
return runtime_service.invoke(event)
Loading
Loading