Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ $(WHISPER_BUILD)/src/libwhisper.a:
e2e-test: build
./tacit process testdata/test_voice_recording.m4a
go test -tags integration -v -count=1 ./pkg/process/ -run TestClassifier
ifeq ($(UNAME_S),Darwin)
go test -tags "integration darwin" -v -count=1 -timeout 30s ./pkg/capture/ -run TestSpeaker_Stream_E2E
endif

INSTALL_DIR := $(HOME)/.local/bin

Expand All @@ -92,6 +95,10 @@ install: build
ifeq ($(UNAME_S),Darwin)
rm -rf $(INSTALL_DIR)/ten_vad.framework
cp -R ten_vad.framework $(INSTALL_DIR)/ten_vad.framework
xattr -dr com.apple.quarantine $(INSTALL_DIR)/tacit-dev 2>/dev/null || true
xattr -dr com.apple.quarantine $(INSTALL_DIR)/ten_vad.framework 2>/dev/null || true
codesign --force --deep --sign - $(INSTALL_DIR)/tacit-dev 2>/dev/null || true
codesign --force --deep --sign - $(INSTALL_DIR)/ten_vad.framework 2>/dev/null || true
endif
@echo "Installed to $(INSTALL_DIR)/tacit-dev"

Expand Down
167 changes: 157 additions & 10 deletions cmd/tacit/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"syscall"
"time"

"github.com/sangmin7648/tacit/pkg/capture"
"github.com/sangmin7648/tacit/pkg/config"
"github.com/sangmin7648/tacit/pkg/daemon"
"github.com/sangmin7648/tacit/pkg/pipeline"
Expand Down Expand Up @@ -108,7 +109,7 @@ func cmdSetup() {
var llmProvider, llmModel string

// Step 1: LLM provider
fmt.Println("Step 1/3: Select LLM provider for summarization")
fmt.Println("Step 1/4: Select LLM provider for summarization")
providerIdx := selectOption([]string{"ollama", "claude"}, 0)
fmt.Println()

Expand All @@ -117,7 +118,7 @@ func cmdSetup() {
llmProvider = "claude"

// Step 2: Claude model
fmt.Println("Step 2/3: Select Claude model")
fmt.Println("Step 2/4: Select Claude model")
modelIdx := selectOption([]string{"haiku", "sonnet", "opus"}, 0)
fmt.Println()
switch modelIdx {
Expand All @@ -134,7 +135,7 @@ func cmdSetup() {

// Step 2: Ollama model (text input)
reader := bufio.NewReader(os.Stdin)
fmt.Println("Step 2/3: Enter Ollama model name")
fmt.Println("Step 2/4: Enter Ollama model name")
fmt.Print(" Model name [qwen3.5]: ")
input := strings.TrimSpace(readLine(reader))
fmt.Println()
Expand All @@ -146,24 +147,40 @@ func cmdSetup() {
}

// Step 3: AI agent for skill installation (only claude supported)
fmt.Println("Step 3/3: Select AI agent for skill installation")
fmt.Println("Step 3/4: Select AI agent for skill installation")
agentNames := []string{"claude"}
agentIdx := selectOption(agentNames, 0)
skillAgent := agentNames[agentIdx]
fmt.Println()

// Step 4: Audio sources (multi-select) — at least one must be selected.
var captureMic, captureSpeaker bool
for {
fmt.Println("Step 4/4: Select audio sources to listen (Space to toggle, Enter to confirm)")
sourceSelected := selectMultiple([]string{"mic", "speaker"}, []bool{true, true})
fmt.Println()
captureMic, captureSpeaker = sourceSelected[0], sourceSelected[1]
if captureMic || captureSpeaker {
break
}
fmt.Println(" At least one source must be selected. Please try again.")
fmt.Println()
}

fmt.Println()
fmt.Printf(" LLM provider : %s\n", llmProvider)
fmt.Printf(" LLM model : %s\n", llmModel)
fmt.Printf(" Skill agent : %s\n", skillAgent)
fmt.Printf(" LLM provider : %s\n", llmProvider)
fmt.Printf(" LLM model : %s\n", llmModel)
fmt.Printf(" Skill agent : %s\n", skillAgent)
fmt.Printf(" Capture mic : %v\n", captureMic)
fmt.Printf(" Capture speaker: %v\n", captureSpeaker)
fmt.Println()

// Write LLM settings to config-override.yaml
// Write settings to config-override.yaml
overridePath := config.OverridePath()
if err := os.MkdirAll(filepath.Dir(overridePath), 0755); err != nil {
log.Fatalf("Failed to create config directory: %v", err)
}
if err := config.WriteSetupOverride(overridePath, llmProvider, llmModel, skillAgent); err != nil {
if err := config.WriteSetupOverride(overridePath, llmProvider, llmModel, skillAgent, captureMic, captureSpeaker); err != nil {
log.Fatalf("Failed to write config override: %v", err)
}
fmt.Printf("Saved settings: %s\n", overridePath)
Expand Down Expand Up @@ -285,6 +302,100 @@ func selectOption(options []string, defaultIdx int) int {
return cur
}

// selectMultiple presents an interactive checkbox menu on stdout. Arrow keys
// move the cursor; Space toggles the current item; Enter confirms. Returns a
// slice of booleans aligned with options indicating the selected state.
// defaultSelected sets the initial checked state for each option.
func selectMultiple(options []string, defaultSelected []bool) []bool {
selected := make([]bool, len(options))
copy(selected, defaultSelected)
cur := 0

fd := int(os.Stdin.Fd())
oldState, err := term.MakeRaw(fd)
if err != nil {
// Fallback: numbered list
for i, o := range options {
mark := " "
if selected[i] {
mark = "x"
}
fmt.Printf(" [%s] %d) %s\n", mark, i+1, o)
}
fmt.Print("Toggle items by number (space-separated), then press Enter: ")
reader := bufio.NewReader(os.Stdin)
line := strings.TrimSpace(readLine(reader))
for _, tok := range strings.Fields(line) {
for i := range options {
if tok == fmt.Sprintf("%d", i+1) {
selected[i] = !selected[i]
}
}
}
return selected
}
defer term.Restore(fd, oldState)

draw := func(atTop bool) {
if !atTop {
fmt.Printf("\033[%dA", len(options))
}
for i, o := range options {
fmt.Print("\r\033[2K")
mark := " "
if selected[i] {
mark = "x"
}
if i == cur {
fmt.Printf(" \033[36m> [%s] %s\033[0m\n", mark, o)
} else {
fmt.Printf(" [%s] %s\n", mark, o)
}
}
}

draw(true)

buf := make([]byte, 4)
for {
n, readErr := os.Stdin.Read(buf)
if readErr != nil || n == 0 {
break
}
switch {
case n == 1 && (buf[0] == '\r' || buf[0] == '\n'): // Enter — confirm
fmt.Printf("\033[%dA", len(options))
for range options {
fmt.Print("\r\033[2K\n")
}
fmt.Printf("\033[%dA", len(options))
for i, o := range options {
mark := " "
if selected[i] {
mark = "x"
}
fmt.Printf("\r\033[2K [%s] %s\n", mark, o)
}
return selected
case n == 1 && buf[0] == ' ': // Space — toggle
selected[cur] = !selected[cur]
draw(false)
case n >= 3 && buf[0] == 0x1b && buf[1] == '[' && buf[2] == 'A': // Up
if cur > 0 {
cur--
draw(false)
}
case n >= 3 && buf[0] == 0x1b && buf[1] == '[' && buf[2] == 'B': // Down
if cur < len(options)-1 {
cur++
draw(false)
}
}
}

return selected
}

// cmdProcess handles the "process" subcommand: audio file → knowledge entry.
func cmdProcess(cfg *config.Config) {
if len(os.Args) < 3 {
Expand Down Expand Up @@ -342,6 +453,40 @@ func cmdListen(cfg *config.Config) {
}
defer p.Close()

// Build audio sources.
var sources []capture.AudioSource
var sourceLabels []string

if cfg.CaptureMic {
mic, err := capture.New()
if err != nil {
log.Fatalf("Failed to init microphone: %v", err)
}
defer mic.Close()
sources = append(sources, mic)
sourceLabels = append(sourceLabels, "mic")
}

if cfg.CaptureSpeaker {
spk, err := capture.NewSpeaker()
if err != nil {
log.Printf("Warning: system audio capture unavailable: %v", err)
if len(sources) == 0 {
log.Fatalf("No audio sources available.")
}
log.Printf("Continuing with microphone only.")
} else {
defer spk.Close()
sources = append(sources, spk)
sourceLabels = append(sourceLabels, "speaker")
log.Printf("System audio capture enabled (requires Screen Recording permission)")
}
}

if len(sources) == 0 {
log.Fatalf("No audio sources configured. Enable capture_mic or capture_speaker in config.")
}

// Setup signal handling for graceful shutdown
ctx, cancel := context.WithCancel(context.Background())
sigCh := make(chan os.Signal, 1)
Expand All @@ -357,7 +502,7 @@ func cmdListen(cfg *config.Config) {
log.Printf("Knowledge base: %s", config.BaseDir())
log.Printf("Press Ctrl+C to stop")

if err := p.Run(ctx); err != nil {
if err := p.Run(ctx, sources, sourceLabels); err != nil {
log.Printf("Pipeline error: %v", err)
}
log.Printf("tacit daemon stopped")
Expand Down Expand Up @@ -425,6 +570,8 @@ func cmdConfigView(cfg *config.Config) {
fmt.Printf("%-22s %-20s %s\n", "llm_provider:", cfg.LLMProvider, tag("llm_provider"))
fmt.Printf("%-22s %-20s %s\n", "llm_model:", cfg.LLMModel, tag("llm_model"))
fmt.Printf("%-22s %-20s %s\n", "skill_agent:", cfg.SkillAgent, tag("skill_agent"))
fmt.Printf("%-22s %-20v %s\n", "capture_mic:", cfg.CaptureMic, tag("capture_mic"))
fmt.Printf("%-22s %-20v %s\n", "capture_speaker:", cfg.CaptureSpeaker, tag("capture_speaker"))
}

// cmdConfigEdit opens the user override config file in a text editor.
Expand Down
23 changes: 20 additions & 3 deletions pkg/audio/segment.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,39 @@ type SegmentBuffer struct {
startTime time.Time
sampleRate int // typically 16000
minDuration time.Duration // minimum speech duration to keep
maxDuration time.Duration // pre-allocation cap; 0 = no pre-alloc
isActive bool
}

// NewSegmentBuffer creates a buffer with the given sample rate and minimum duration.
func NewSegmentBuffer(sampleRate int, minDuration time.Duration) *SegmentBuffer {
// NewSegmentBuffer creates a buffer with the given sample rate, minimum
// duration, and optional maximum duration. When maxDuration > 0 the backing
// array is pre-allocated to exactly that size at Start() and reused across
// segments, eliminating append-induced reallocations.
func NewSegmentBuffer(sampleRate int, minDuration, maxDuration time.Duration) *SegmentBuffer {
return &SegmentBuffer{
sampleRate: sampleRate,
minDuration: minDuration,
maxDuration: maxDuration,
}
}

// Start marks the beginning of a speech segment and records the start time.
// If maxDuration was set, the backing array is pre-allocated to that capacity
// (or reused from the previous segment) so no reallocation happens during
// Append calls.
func (b *SegmentBuffer) Start() {
b.isActive = true
b.startTime = time.Now()
b.samples = b.samples[:0]
if b.maxDuration > 0 {
maxSamples := int(b.maxDuration.Seconds() * float64(b.sampleRate))
if cap(b.samples) < maxSamples {
b.samples = make([]float32, 0, maxSamples)
} else {
b.samples = b.samples[:0]
}
} else {
b.samples = b.samples[:0]
}
}

// Append adds samples to the buffer.
Expand Down
12 changes: 6 additions & 6 deletions pkg/audio/segment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ func makeSamples(n int, v float32) []float32 {

func TestNewSegmentBuffer(t *testing.T) {
minDur := 2 * time.Second
buf := NewSegmentBuffer(testSampleRate, minDur)
buf := NewSegmentBuffer(testSampleRate, minDur, 0)

if buf.sampleRate != testSampleRate {
t.Errorf("expected sampleRate %d, got %d", testSampleRate, buf.sampleRate)
Expand All @@ -36,7 +36,7 @@ func TestNewSegmentBuffer(t *testing.T) {

func TestSegmentBuffer_ShortSpeech(t *testing.T) {
minDur := 2 * time.Second
buf := NewSegmentBuffer(testSampleRate, minDur)
buf := NewSegmentBuffer(testSampleRate, minDur, 0)

buf.Start()

Expand All @@ -58,7 +58,7 @@ func TestSegmentBuffer_ShortSpeech(t *testing.T) {

func TestSegmentBuffer_ValidSpeech(t *testing.T) {
minDur := 2 * time.Second
buf := NewSegmentBuffer(testSampleRate, minDur)
buf := NewSegmentBuffer(testSampleRate, minDur, 0)

buf.Start()

Expand Down Expand Up @@ -95,7 +95,7 @@ func TestSegmentBuffer_ValidSpeech(t *testing.T) {
}

func TestSegmentBuffer_Duration(t *testing.T) {
buf := NewSegmentBuffer(testSampleRate, time.Second)
buf := NewSegmentBuffer(testSampleRate, time.Second, 0)

buf.Start()

Expand Down Expand Up @@ -125,7 +125,7 @@ func TestSegmentBuffer_Duration(t *testing.T) {
}

func TestSegmentBuffer_Reset(t *testing.T) {
buf := NewSegmentBuffer(testSampleRate, time.Second)
buf := NewSegmentBuffer(testSampleRate, time.Second, 0)

buf.Start()
buf.Append(makeSamples(testSampleRate*2, 0.5))
Expand Down Expand Up @@ -155,7 +155,7 @@ func TestSegmentBuffer_Reset(t *testing.T) {

func TestSegmentBuffer_MultipleSegments(t *testing.T) {
minDur := time.Second
buf := NewSegmentBuffer(testSampleRate, minDur)
buf := NewSegmentBuffer(testSampleRate, minDur, 0)

// First segment: 2 seconds.
buf.Start()
Expand Down
13 changes: 13 additions & 0 deletions pkg/capture/source.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package capture

import "context"

// AudioSource provides a stream of 16kHz mono int16 PCM audio samples.
// Both Mic and Speaker implement this interface.
type AudioSource interface {
// Stream starts capturing and returns a channel of sample chunks.
// The channel is closed when ctx is cancelled.
Stream(ctx context.Context) (<-chan []int16, error)
// Close releases all resources.
Close()
}
Loading