diff --git a/plugins/voicemode/skills/voicemode/SKILL.md b/plugins/voicemode/skills/voicemode/SKILL.md index 7b364ec2..dd5af5a5 100644 --- a/plugins/voicemode/skills/voicemode/SKILL.md +++ b/plugins/voicemode/skills/voicemode/SKILL.md @@ -370,6 +370,68 @@ voicemode diag info voicemode diag devices ``` +### Conversation History Search + +VoiceMode logs all exchanges and provides powerful search capabilities to find and replay past conversations. + +**Load conversation history into SQLite:** + +```bash +# Load all new exchanges since last sync +voicemode history load + +# Load all exchanges (ignore last sync) +voicemode history load --all + +# Load from specific date +voicemode history load --since 2025-12-01 + +# Load last 7 days +voicemode history load --days 7 +``` + +**Search conversations:** + +```bash +# Full-text search +voicemode history search "minion indirectly" + +# Search only agent speech (TTS) +voicemode history search --type tts "hello" + +# Search only user speech (STT) +voicemode history search --type stt "hello" + +# Search specific date +voicemode history search --date 2025-12-27 "keyword" + +# Search and play first result automatically +voicemode history search --play "memorable quote" + +# Limit results +voicemode history search --limit 50 "conversation" +``` + +**Play audio clips:** + +```bash +# Play by exchange ID (from search results) +voicemode history play ex_abc123def456 +``` + +**Search Features:** +- Full-text search using SQLite FTS5 (fast, supports complex queries) +- Filter by type (stt/tts), date, or conversation +- Audio files automatically resolved from timestamp +- Incremental loading - won't duplicate already-loaded exchanges +- All conversations stored in `~/.voicemode/cache/conversations.db` + +**Use Cases:** +- Find memorable moments or important discussions +- Review what was said in past conversations +- Create clips of agent responses for testing +- Debug conversation issues by reviewing exact exchanges + ### Token Efficiency Tip When using CLI commands directly (not MCP tools), redirect STDERR to save tokens: diff --git a/voice_mode/cli.py b/voice_mode/cli.py index fdfe2719..83d94fbe 100644 --- a/voice_mode/cli.py +++ b/voice_mode/cli.py @@ -1713,6 +1713,7 @@ def cli(): from voice_mode.cli_commands import transcribe as transcribe_cmd from voice_mode.cli_commands import claude from voice_mode.cli_commands import hook as hook_cmd +from voice_mode.cli_commands import history as history_cmd # Add subcommands to legacy CLI cli.add_command(exchanges_cmd.exchanges) @@ -1722,6 +1723,7 @@ def cli(): # Add exchanges to main CLI voice_mode_main_cli.add_command(exchanges_cmd.exchanges) voice_mode_main_cli.add_command(claude.claude_group) +voice_mode_main_cli.add_command(history_cmd.history) # Note: We'll add these commands after the groups are defined # audio group will get transcribe and play commands diff --git a/voice_mode/cli_commands/history.py b/voice_mode/cli_commands/history.py new file mode 100644 index 00000000..3b9e881e --- /dev/null +++ b/voice_mode/cli_commands/history.py @@ -0,0 +1,236 @@ +"""CLI commands for conversation history search and playback.""" + +import shutil +import subprocess +from datetime import datetime, date +from pathlib import Path + +import click + +from voice_mode.history import HistoryDatabase, HistoryLoader, HistorySearcher + + +@click.group() +def history(): + """Manage and search conversation history.""" + pass + + +@history.command() +@click.option( + "--since", + type=click.DateTime(formats=["%Y-%m-%d"]), + help="Only load exchanges after this date", +) +@click.option( + "--days", + type=int, + help="Only load exchanges from the last N days", +) +@click.option( + "--all", + "load_all", + is_flag=True, + help="Load all exchanges (ignore last sync timestamp)", +) +def load(since, days, load_all): + """Load conversation exchanges from JSONL into SQLite database. + + By default, only loads exchanges since the last sync. Use --all to reload everything. + + Examples: + voicemode history load # Load new exchanges since last sync + voicemode history load --all # Reload all exchanges + voicemode history load --since 2025-12-01 # Load from specific date + voicemode history load --days 7 # Load last 7 days + """ + db = HistoryDatabase() + loader = HistoryLoader(db) + + click.echo("Loading conversation history into SQLite...") + + # Determine what to load + if days: + since_datetime = None + stats = loader.load_recent(days=days) + click.echo(f"Loaded exchanges from last {days} days") + elif since: + # Clear last sync to force reload from this date + stats = loader.load_all(since=since) + click.echo(f"Loaded exchanges since {since.date()}") + elif load_all: + # Clear last sync to reload everything + db.set_sync_metadata("last_sync_timestamp", "") + stats = loader.load_all() + click.echo("Loaded all exchanges") + else: + # Incremental load + stats = loader.load_all() + click.echo("Loaded new exchanges since last sync") + + # Display stats + total_count = db.get_exchange_count() + click.echo( + f"\nResults: {stats['inserted']} inserted, {stats['skipped']} skipped, " + f"{stats['errors']} errors" + ) + click.echo(f"Total exchanges in database: {total_count}") + + db.close() + + +@history.command() +@click.argument("query") +@click.option( + "--type", + "exchange_type", + type=click.Choice(["stt", "tts"]), + help="Filter by exchange type (stt=user speech, tts=agent speech)", +) +@click.option( + "--date", + type=click.DateTime(formats=["%Y-%m-%d"]), + help="Filter by specific date", +) +@click.option( + "--limit", + type=int, + default=20, + help="Maximum number of results (default: 20)", +) +@click.option( + "--play", + is_flag=True, + help="Play audio from first result automatically", +) +def search(query, exchange_type, date, limit, play): + """Search conversation history using full-text search. + + Searches through all conversation text. Results are ordered by timestamp (newest first). + + Examples: + voicemode history search "minion indirectly" + voicemode history search --type tts "hello" # Only agent speech + voicemode history search --type stt "hello" # Only user speech + voicemode history search --date 2025-12-27 "keyword" + voicemode history search --play "memorable quote" # Search and play first result + """ + db = HistoryDatabase() + searcher = HistorySearcher(db) + + # Convert datetime to date if provided + target_date = date.date() if date else None + + # Perform search + results = searcher.search( + query=query, + exchange_type=exchange_type, + target_date=target_date, + limit=limit, + ) + + if not results: + click.echo("No results found.") + db.close() + return + + # Display results + click.echo(f"Found {len(results)} result(s):\n") + + for i, result in enumerate(results, 1): + # Format timestamp + ts = result.timestamp.strftime("%Y-%m-%d %H:%M:%S") + + # Type indicator + type_label = "USER" if result.type == "stt" else "AGENT" + + # Show result + click.echo(f"{i}. [{ts}] {type_label}: {result.text}") + click.echo(f" ID: {result.id}") + + # Show audio file status + audio_path = result.get_audio_path() + if audio_path: + click.echo(f" Audio: {audio_path}") + else: + click.echo(f" Audio: {result.audio_file} (not found)") + + click.echo() + + db.close() + + # Auto-play first result if requested + if play and results: + first_result = results[0] + audio_path = first_result.get_audio_path() + if audio_path: + click.echo(f"Playing: {first_result.text}\n") + _play_audio(audio_path) + else: + click.echo("Audio file not found, cannot play.") + + +@history.command() +@click.argument("exchange_id") +def play(exchange_id): + """Play audio from a specific exchange by ID. + + Use the exchange ID from search results. + + Examples: + voicemode history play ex_abc123def456 + """ + db = HistoryDatabase() + searcher = HistorySearcher(db) + + # Get exchange by ID + result = searcher.get_by_id(exchange_id) + + if not result: + click.echo(f"Exchange not found: {exchange_id}") + db.close() + return + + # Get audio path + audio_path = result.get_audio_path() + + if not audio_path: + click.echo(f"Audio file not found: {result.audio_file}") + db.close() + return + + # Display what we're playing + ts = result.timestamp.strftime("%Y-%m-%d %H:%M:%S") + type_label = "USER" if result.type == "stt" else "AGENT" + click.echo(f"[{ts}] {type_label}: {result.text}\n") + + # Play audio + _play_audio(audio_path) + + db.close() + + +def _play_audio(audio_path: Path): + """Play audio file using available player. + + Args: + audio_path: Path to audio file + """ + # Try mpv first (preferred for CLI usage) + if shutil.which("mpv"): + subprocess.run(["mpv", "--no-video", str(audio_path)]) + return + + # Fallback to afplay on macOS + if shutil.which("afplay"): + subprocess.run(["afplay", str(audio_path)]) + return + + # Fallback to ffplay (from ffmpeg) + if shutil.which("ffplay"): + subprocess.run( + ["ffplay", "-nodisp", "-autoexit", "-hide_banner", str(audio_path)] + ) + return + + click.echo("No audio player found. Install mpv, or use afplay/ffplay.") diff --git a/voice_mode/history/__init__.py b/voice_mode/history/__init__.py new file mode 100644 index 00000000..ce17bd13 --- /dev/null +++ b/voice_mode/history/__init__.py @@ -0,0 +1,7 @@ +"""Conversation history search and playback module.""" + +from .database import HistoryDatabase +from .loader import HistoryLoader +from .search import HistorySearcher + +__all__ = ["HistoryDatabase", "HistoryLoader", "HistorySearcher"] diff --git a/voice_mode/history/database.py b/voice_mode/history/database.py new file mode 100644 index 00000000..fa18348b --- /dev/null +++ b/voice_mode/history/database.py @@ -0,0 +1,212 @@ +"""SQLite database schema and operations for conversation history.""" + +import json +import sqlite3 +from pathlib import Path +from typing import Optional + + +class HistoryDatabase: + """Manages SQLite database for conversation history.""" + + def __init__(self, db_path: Optional[Path] = None): + """Initialize database connection. + + Args: + db_path: Path to SQLite database. Defaults to ~/.voicemode/cache/conversations.db + """ + if db_path is None: + db_path = Path.home() / ".voicemode" / "cache" / "conversations.db" + + self.db_path = Path(db_path) + self.db_path.parent.mkdir(parents=True, exist_ok=True) + self.conn = sqlite3.connect(str(self.db_path)) + self.conn.row_factory = sqlite3.Row + self._init_schema() + + def _init_schema(self): + """Initialize database schema with exchanges table and FTS5 index.""" + cursor = self.conn.cursor() + + # Create main exchanges table + cursor.execute(""" + CREATE TABLE IF NOT EXISTS exchanges ( + id TEXT PRIMARY KEY, + timestamp TEXT NOT NULL, + conversation_id TEXT, + type TEXT NOT NULL, + text TEXT NOT NULL, + audio_file TEXT, + project_path TEXT, + metadata TEXT + ) + """) + + # Create FTS5 virtual table for full-text search + cursor.execute(""" + CREATE VIRTUAL TABLE IF NOT EXISTS exchanges_fts + USING fts5(text, content=exchanges, content_rowid=rowid) + """) + + # Create triggers to keep FTS5 in sync with exchanges table + cursor.execute(""" + CREATE TRIGGER IF NOT EXISTS exchanges_ai + AFTER INSERT ON exchanges + BEGIN + INSERT INTO exchanges_fts(rowid, text) + VALUES (new.rowid, new.text); + END + """) + + cursor.execute(""" + CREATE TRIGGER IF NOT EXISTS exchanges_ad + AFTER DELETE ON exchanges + BEGIN + DELETE FROM exchanges_fts WHERE rowid = old.rowid; + END + """) + + cursor.execute(""" + CREATE TRIGGER IF NOT EXISTS exchanges_au + AFTER UPDATE ON exchanges + BEGIN + DELETE FROM exchanges_fts WHERE rowid = old.rowid; + INSERT INTO exchanges_fts(rowid, text) + VALUES (new.rowid, new.text); + END + """) + + # Create metadata table for tracking sync state + cursor.execute(""" + CREATE TABLE IF NOT EXISTS sync_metadata ( + key TEXT PRIMARY KEY, + value TEXT + ) + """) + + # Create index on timestamp for efficient date filtering + cursor.execute(""" + CREATE INDEX IF NOT EXISTS idx_exchanges_timestamp + ON exchanges(timestamp) + """) + + # Create index on type for filtering by STT/TTS + cursor.execute(""" + CREATE INDEX IF NOT EXISTS idx_exchanges_type + ON exchanges(type) + """) + + # Create index on conversation_id for grouping + cursor.execute(""" + CREATE INDEX IF NOT EXISTS idx_exchanges_conversation + ON exchanges(conversation_id) + """) + + self.conn.commit() + + def insert_exchange( + self, + id: str, + timestamp: str, + conversation_id: Optional[str], + type: str, + text: str, + audio_file: Optional[str], + project_path: Optional[str], + metadata: Optional[dict], + ) -> bool: + """Insert a single exchange into the database. + + Args: + id: Unique exchange identifier + timestamp: ISO timestamp + conversation_id: Conversation group identifier + type: Exchange type ('stt' or 'tts') + text: Transcribed or spoken text + audio_file: Path to audio file + project_path: Working directory context + metadata: Full metadata dictionary + + Returns: + True if inserted, False if already exists (duplicate ID) + """ + cursor = self.conn.cursor() + + metadata_json = json.dumps(metadata) if metadata else None + + try: + cursor.execute( + """ + INSERT INTO exchanges ( + id, timestamp, conversation_id, type, text, + audio_file, project_path, metadata + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + id, + timestamp, + conversation_id, + type, + text, + audio_file, + project_path, + metadata_json, + ), + ) + self.conn.commit() + return True + except sqlite3.IntegrityError: + return False + + def get_sync_metadata(self, key: str) -> Optional[str]: + """Get sync metadata value by key. + + Args: + key: Metadata key + + Returns: + Value or None if key doesn't exist + """ + cursor = self.conn.cursor() + cursor.execute("SELECT value FROM sync_metadata WHERE key = ?", (key,)) + row = cursor.fetchone() + return row["value"] if row else None + + def set_sync_metadata(self, key: str, value: str): + """Set sync metadata value. + + Args: + key: Metadata key + value: Metadata value + """ + cursor = self.conn.cursor() + cursor.execute( + """ + INSERT OR REPLACE INTO sync_metadata (key, value) + VALUES (?, ?) + """, + (key, value), + ) + self.conn.commit() + + def get_exchange_count(self) -> int: + """Get total number of exchanges in database. + + Returns: + Total exchange count + """ + cursor = self.conn.cursor() + cursor.execute("SELECT COUNT(*) as count FROM exchanges") + return cursor.fetchone()["count"] + + def close(self): + """Close database connection.""" + self.conn.close() + + def __enter__(self): + """Context manager entry.""" + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """Context manager exit.""" + self.close() diff --git a/voice_mode/history/loader.py b/voice_mode/history/loader.py new file mode 100644 index 00000000..3700a94d --- /dev/null +++ b/voice_mode/history/loader.py @@ -0,0 +1,177 @@ +"""Load conversation history from JSONL into SQLite database.""" + +import hashlib +import logging +from datetime import datetime, date +from pathlib import Path +from typing import Optional + +from voice_mode.exchanges.reader import ExchangeReader +from voice_mode.exchanges.models import Exchange +from voice_mode.history.database import HistoryDatabase + +logger = logging.getLogger(__name__) + + +class HistoryLoader: + """Loads conversation history from JSONL files into SQLite.""" + + def __init__(self, db: HistoryDatabase, base_dir: Optional[Path] = None): + """Initialize loader. + + Args: + db: HistoryDatabase instance + base_dir: Base directory for logs. Defaults to ~/.voicemode + """ + self.db = db + self.reader = ExchangeReader(base_dir=base_dir) + + def _generate_exchange_id(self, exchange: Exchange) -> str: + """Generate a unique ID for an exchange. + + Uses a hash of timestamp + conversation_id + text to create a + deterministic ID that won't change across reloads. + + Args: + exchange: Exchange to generate ID for + + Returns: + Unique exchange ID + """ + # Create deterministic ID from key fields + id_string = f"{exchange.timestamp.isoformat()}|{exchange.conversation_id}|{exchange.text}" + hash_digest = hashlib.sha256(id_string.encode()).hexdigest() + return f"ex_{hash_digest[:16]}" + + def load_exchange(self, exchange: Exchange) -> bool: + """Load a single exchange into the database. + + Args: + exchange: Exchange to load + + Returns: + True if inserted, False if already exists + """ + exchange_id = self._generate_exchange_id(exchange) + + # Convert metadata to dict for JSON storage + metadata_dict = exchange.metadata.to_dict() if exchange.metadata else None + + return self.db.insert_exchange( + id=exchange_id, + timestamp=exchange.timestamp.isoformat(), + conversation_id=exchange.conversation_id, + type=exchange.type, + text=exchange.text, + audio_file=exchange.audio_file, + project_path=exchange.project_path, + metadata=metadata_dict, + ) + + def load_all(self, since: Optional[datetime] = None) -> dict: + """Load all exchanges from JSONL files into database. + + Args: + since: Only load exchanges after this datetime + + Returns: + Dictionary with stats: {total, inserted, skipped, errors} + """ + stats = {"total": 0, "inserted": 0, "skipped": 0, "errors": 0} + + # If since not specified, check for last sync timestamp + if since is None: + last_sync = self.db.get_sync_metadata("last_sync_timestamp") + if last_sync: + since = datetime.fromisoformat(last_sync) + logger.info(f"Resuming from last sync: {since}") + + # Read all exchanges + exchanges = self.reader._read_all() + + for exchange in exchanges: + stats["total"] += 1 + + # Skip if before cutoff time + if since and exchange.timestamp < since: + stats["skipped"] += 1 + continue + + try: + if self.load_exchange(exchange): + stats["inserted"] += 1 + else: + stats["skipped"] += 1 + + except Exception as e: + logger.error(f"Error loading exchange: {e}") + stats["errors"] += 1 + + # Update last sync timestamp to now + self.db.set_sync_metadata("last_sync_timestamp", datetime.now().isoformat()) + + logger.info( + f"Load complete: {stats['inserted']} inserted, " + f"{stats['skipped']} skipped, {stats['errors']} errors" + ) + + return stats + + def load_date(self, target_date: date) -> dict: + """Load exchanges for a specific date. + + Args: + target_date: Date to load + + Returns: + Dictionary with stats: {total, inserted, skipped, errors} + """ + stats = {"total": 0, "inserted": 0, "skipped": 0, "errors": 0} + + exchanges = self.reader.read_date(target_date) + + for exchange in exchanges: + stats["total"] += 1 + + try: + if self.load_exchange(exchange): + stats["inserted"] += 1 + else: + stats["skipped"] += 1 + + except Exception as e: + logger.error(f"Error loading exchange: {e}") + stats["errors"] += 1 + + return stats + + def load_recent(self, days: int = 7) -> dict: + """Load exchanges from recent days. + + Args: + days: Number of days to look back + + Returns: + Dictionary with stats: {total, inserted, skipped, errors} + """ + stats = {"total": 0, "inserted": 0, "skipped": 0, "errors": 0} + + exchanges = self.reader.read_recent(days=days) + + for exchange in exchanges: + stats["total"] += 1 + + try: + if self.load_exchange(exchange): + stats["inserted"] += 1 + else: + stats["skipped"] += 1 + + except Exception as e: + logger.error(f"Error loading exchange: {e}") + stats["errors"] += 1 + + # Update last sync timestamp + self.db.set_sync_metadata("last_sync_timestamp", datetime.now().isoformat()) + + return stats diff --git a/voice_mode/history/search.py b/voice_mode/history/search.py new file mode 100644 index 00000000..96235c56 --- /dev/null +++ b/voice_mode/history/search.py @@ -0,0 +1,200 @@ +"""Search conversation history using FTS5 full-text search.""" + +import json +import sqlite3 +from datetime import datetime, date +from pathlib import Path +from typing import Optional, List, Literal + +from voice_mode.history.database import HistoryDatabase + + +class SearchResult: + """Search result with exchange data and audio file path.""" + + def __init__(self, row: sqlite3.Row): + """Initialize from database row. + + Args: + row: SQLite row from search query + """ + self.id = row["id"] + self.timestamp = datetime.fromisoformat(row["timestamp"]) + self.conversation_id = row["conversation_id"] + self.type = row["type"] + self.text = row["text"] + self.audio_file = row["audio_file"] + self.project_path = row["project_path"] + + # Parse metadata JSON + self.metadata = json.loads(row["metadata"]) if row["metadata"] else None + + def get_audio_path(self, base_dir: Optional[Path] = None) -> Optional[Path]: + """Resolve audio file to full path. + + Args: + base_dir: Base directory for audio files. Defaults to ~/.voicemode + + Returns: + Full path to audio file, or None if not available + """ + if not self.audio_file: + return None + + if base_dir is None: + base_dir = Path.home() / ".voicemode" + + # Audio files are stored in audio/YYYY/MM/ directory structure + # Extract date from timestamp + year = self.timestamp.strftime("%Y") + month = self.timestamp.strftime("%m") + + # Try year/month directory structure first + audio_path = base_dir / "audio" / year / month / self.audio_file + if audio_path.exists(): + return audio_path + + # Fallback to flat structure + audio_path = base_dir / "audio" / self.audio_file + if audio_path.exists(): + return audio_path + + # File doesn't exist + return None + + def __repr__(self): + """String representation.""" + return ( + f"SearchResult(id={self.id}, timestamp={self.timestamp}, " + f"type={self.type}, text={self.text[:50]}...)" + ) + + +class HistorySearcher: + """Search conversation history using SQLite FTS5.""" + + def __init__(self, db: HistoryDatabase): + """Initialize searcher. + + Args: + db: HistoryDatabase instance + """ + self.db = db + + def search( + self, + query: str, + exchange_type: Optional[Literal["stt", "tts"]] = None, + target_date: Optional[date] = None, + limit: int = 20, + ) -> List[SearchResult]: + """Search exchanges using FTS5 full-text search. + + Args: + query: Search query (supports FTS5 syntax) + exchange_type: Filter by 'stt' or 'tts' + target_date: Filter by specific date + limit: Maximum number of results + + Returns: + List of SearchResult objects + """ + cursor = self.db.conn.cursor() + + # Build query with filters + conditions = ["exchanges_fts MATCH ?"] + params = [query] + + if exchange_type: + conditions.append("type = ?") + params.append(exchange_type) + + if target_date: + # Filter by date range (whole day) + start = f"{target_date.isoformat()} 00:00:00" + end = f"{target_date.isoformat()} 23:59:59" + conditions.append("timestamp >= ? AND timestamp <= ?") + params.extend([start, end]) + + where_clause = " AND ".join(conditions) + params.append(limit) + + sql = f""" + SELECT + e.id, e.timestamp, e.conversation_id, e.type, + e.text, e.audio_file, e.project_path, e.metadata + FROM exchanges e + JOIN exchanges_fts fts ON e.rowid = fts.rowid + WHERE {where_clause} + ORDER BY e.timestamp DESC + LIMIT ? + """ + + cursor.execute(sql, params) + rows = cursor.fetchall() + + return [SearchResult(row) for row in rows] + + def get_by_id(self, exchange_id: str) -> Optional[SearchResult]: + """Get exchange by ID. + + Args: + exchange_id: Exchange ID to retrieve + + Returns: + SearchResult or None if not found + """ + cursor = self.db.conn.cursor() + + cursor.execute( + """ + SELECT id, timestamp, conversation_id, type, text, + audio_file, project_path, metadata + FROM exchanges + WHERE id = ? + """, + (exchange_id,), + ) + + row = cursor.fetchone() + return SearchResult(row) if row else None + + def get_recent( + self, + limit: int = 20, + exchange_type: Optional[Literal["stt", "tts"]] = None, + ) -> List[SearchResult]: + """Get recent exchanges without search. + + Args: + limit: Maximum number of results + exchange_type: Filter by 'stt' or 'tts' + + Returns: + List of SearchResult objects + """ + cursor = self.db.conn.cursor() + + conditions = [] + params = [] + + if exchange_type: + conditions.append("type = ?") + params.append(exchange_type) + + where_clause = f"WHERE {' AND '.join(conditions)}" if conditions else "" + params.append(limit) + + sql = f""" + SELECT id, timestamp, conversation_id, type, text, + audio_file, project_path, metadata + FROM exchanges + {where_clause} + ORDER BY timestamp DESC + LIMIT ? + """ + + cursor.execute(sql, params) + rows = cursor.fetchall() + + return [SearchResult(row) for row in rows]