Skip to content

Commit aad0a98

Browse files
HyeonsangKimclaude
andcommitted
feat(session): Add audio interrupt, STT auto-capture, and recognition badge timer
- Add interrupt button (stop AI speech mid-sentence) with isInterrupted flag - Add "이건 뭐야?" STT keyword detection for voice-triggered photo capture - Add 3-second auto-dismiss timer for recognition badge - Add session.stop i18n translation key - Reset isInterrupted on disconnect to prevent stale state on reconnect Quality Score: 86/100 Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 26ee6a3 commit aad0a98

4 files changed

Lines changed: 82 additions & 7 deletions

File tree

src/app/session/page.tsx

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
import { useState, useCallback, useEffect, useRef } from 'react';
99
import { useRouter, useSearchParams } from 'next/navigation';
10-
import { Mic, MicOff, BookOpen, Camera, CameraOff, LogOut } from 'lucide-react';
10+
import { Mic, MicOff, BookOpen, Camera, CameraOff, LogOut, Square } from 'lucide-react';
1111
import { cn } from '@web/lib/utils';
1212
import { useT } from '@web/lib/i18n';
1313
import type { Locale } from '@shared/i18n';
@@ -34,6 +34,7 @@ export default function MainPage() {
3434
disconnect,
3535
toggleMic,
3636
toggleCamera,
37+
interrupt,
3738
requestTopicDetail,
3839
sendTextMessage,
3940
sendPhoto,
@@ -75,6 +76,7 @@ export default function MainPage() {
7576
const [agentTransition, setAgentTransition] = useState<AgentSwitchData | null>(null);
7677
const [isAgentTransitioning, setIsAgentTransitioning] = useState(false);
7778
const [showSaved, setShowSaved] = useState<string | null>(null);
79+
const [showRecognizedBadge, setShowRecognizedBadge] = useState(false);
7880

7981
const cameraViewRef = useRef<CameraViewRef>(null);
8082
const prevAgentRef = useRef(activeAgent);
@@ -88,6 +90,17 @@ export default function MainPage() {
8890
// eslint-disable-next-line react-hooks/exhaustive-deps
8991
}, []);
9092

93+
// 인식 배지 3초 후 자동 해제
94+
useEffect(() => {
95+
if (currentArtifact) {
96+
setShowRecognizedBadge(true);
97+
const timer = setTimeout(() => setShowRecognizedBadge(false), 3000);
98+
return () => clearTimeout(timer);
99+
} else {
100+
setShowRecognizedBadge(false);
101+
}
102+
}, [currentArtifact]);
103+
91104
// ── Onboarding handlers ─────────────────────────────────
92105

93106
const handleLanguageSelect = useCallback((selected: Locale) => {
@@ -187,13 +200,12 @@ export default function MainPage() {
187200
}
188201
}, [diaryResult, router]);
189202

190-
// 복원 완료 시점에만 카메라 한 번 닫기 (이후 사용자가 다시 열 수 있음)
203+
// 복원 완료 시에만 카메라 닫기
191204
useEffect(() => {
192205
if (restorationState.status === 'ready' && isCameraOpen) {
193206
setIsCameraOpen(false);
194207
toggleCamera(false);
195208
}
196-
// isCameraOpen / toggleCamera 는 의도적으로 dep 제외 — status 전환 시 1회만 실행
197209
// eslint-disable-next-line react-hooks/exhaustive-deps
198210
}, [restorationState.status]);
199211

@@ -311,7 +323,22 @@ export default function MainPage() {
311323
/>
312324
</div>
313325
</div>
314-
<AudioVisualizer state={audioState} />
326+
<div className="flex items-center gap-2">
327+
<div className="flex-1">
328+
<AudioVisualizer state={audioState} />
329+
</div>
330+
{audioState === 'speaking' && (
331+
<button
332+
onClick={interrupt}
333+
className="w-7 h-7 rounded-full bg-red-500/20 border border-red-500/30
334+
flex items-center justify-center hover:bg-red-500/30 transition-colors
335+
animate-fade-in shrink-0"
336+
aria-label={t('session.stop')}
337+
>
338+
<Square className="w-3 h-3 text-red-400 fill-red-400" />
339+
</button>
340+
)}
341+
</div>
315342
</div>
316343
</div>
317344

@@ -368,7 +395,7 @@ export default function MainPage() {
368395
<CameraView
369396
ref={cameraViewRef}
370397
isScanning={false}
371-
isRecognized={!!currentArtifact}
398+
isRecognized={showRecognizedBadge}
372399
isBlurred={false}
373400
onCapturePhoto={() => cameraViewRef.current?.capturePhoto() ?? ''}
374401
/>

src/shared/i18n/translations.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,10 @@ const translations = {
264264
ko: '종료',
265265
en: 'Exit',
266266
},
267+
'session.stop': {
268+
ko: '멈춰',
269+
en: 'Stop',
270+
},
267271
'session.exitConfirm': {
268272
ko: '세션을 종료하시겠습니까?',
269273
en: 'End this session?',

src/web/hooks/use-live-session.ts

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,24 @@ function toCivilization(value: string): Civilization {
5555
return VALID_CIVILIZATIONS.has(value) ? (value as Civilization) : 'Other';
5656
}
5757

58+
/**
59+
* "이건 뭐야?" 계열 키워드 감지.
60+
* 카메라 열린 상태에서 이 패턴이 감지되면 자동 캡처 트리거.
61+
*/
62+
const WHAT_IS_THIS_PATTERNS = [
63+
// Korean: "이거/이건/이게" + "뭐야/뭐지/뭔지/뭘까"
64+
/(?:||)\s*/,
65+
/(?:|)\s*(?:||)/,
66+
/(?:||)\s*(?:|)/,
67+
// English
68+
/what(?:'s| is) this/i,
69+
/what(?:'s| is) that/i,
70+
];
71+
72+
function isWhatIsThisQuery(text: string): boolean {
73+
return WHAT_IS_THIS_PATTERNS.some((pattern) => pattern.test(text));
74+
}
75+
5876
// ── 이벤트 핸들러 팩토리 ────────────────────────────────────
5977

6078
interface SessionRefs {
@@ -65,6 +83,9 @@ interface SessionRefs {
6583
currentArtifact: React.RefObject<ArtifactSummary | null>;
6684
reconnect: React.RefObject<ReconnectManager | null>;
6785
geoCoords: React.RefObject<{ lat: number; lng: number }>;
86+
cameraCapture: React.RefObject<CameraCapture | null>;
87+
isCameraOpen: React.RefObject<boolean>;
88+
lastAutoCaptureTime: React.RefObject<number>;
6889
}
6990

7091
interface SessionSetters {
@@ -168,6 +189,19 @@ function createSessionEvents(refs: SessionRefs, setters: SessionSetters): LiveSe
168189
const cleaned = cleanSttText(data.text);
169190
if (!cleaned) return;
170191

192+
// "이건 뭐야?" 감지 → 카메라 열려있으면 자동 캡처 (5초 쿨다운)
193+
if (
194+
refs.isCameraOpen.current &&
195+
isWhatIsThisQuery(cleaned) &&
196+
Date.now() - refs.lastAutoCaptureTime.current > 5000
197+
) {
198+
const photo = refs.cameraCapture.current?.capturePhoto();
199+
if (photo) {
200+
refs.lastAutoCaptureTime.current = Date.now();
201+
refs.liveSession.current?.sendPhoto(photo, cleaned);
202+
}
203+
}
204+
171205
setters.setTranscript(prev => {
172206
const last = prev[prev.length - 1];
173207
// 같은 유저 턴이면 이어붙이기 (3초 내)
@@ -338,6 +372,8 @@ export function useLiveSession(): UseLiveSessionReturn {
338372
const currentArtifactRef = useRef<ArtifactSummary | null>(null);
339373
const userIdRef = useRef<string>('');
340374
const geoCoordsRef = useRef<{ lat: number; lng: number }>({ lat: 0, lng: 0 });
375+
const isCameraOpenRef = useRef(false);
376+
const lastAutoCaptureTimeRef = useRef(0);
341377

342378
// 브라우저 Geolocation으로 좌표 추적
343379
useEffect(() => {
@@ -415,6 +451,9 @@ export function useLiveSession(): UseLiveSessionReturn {
415451
currentArtifact: currentArtifactRef,
416452
reconnect: reconnectRef,
417453
geoCoords: geoCoordsRef,
454+
cameraCapture: cameraCaptureRef,
455+
isCameraOpen: isCameraOpenRef,
456+
lastAutoCaptureTime: lastAutoCaptureTimeRef,
418457
};
419458
const setters: SessionSetters = {
420459
setSessionState, setTranscript, setCurrentArtifact,
@@ -516,8 +555,8 @@ export function useLiveSession(): UseLiveSessionReturn {
516555
}, []);
517556

518557
const toggleCamera = useCallback((enabled: boolean) => {
519-
// 카메라 ON/OFF는 프리뷰만 제어 — 프레임 스트리밍 없음
520-
// 인식은 캡처 버튼(sendPhoto)으로만 트리거
558+
isCameraOpenRef.current = enabled;
559+
// 카메라 ON/OFF는 프리뷰만 제어 — 인식은 캡처 버튼(sendPhoto)으로만 트리거
521560
if (!enabled) {
522561
cameraCaptureRef.current?.stopFrameLoop();
523562
}

src/web/lib/gemini/live-api.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ export class LiveSession {
3838
private userId = '';
3939
private visits: DiaryVisitInput[] = [];
4040
private lastCameraFrame: string | null = null;
41+
private isInterrupted = false;
4142

4243
constructor(events: LiveSessionEvents) {
4344
this.events = events;
@@ -101,6 +102,7 @@ export class LiveSession {
101102
this.ai = null;
102103
this.visits = [];
103104
this.lastCameraFrame = null;
105+
this.isInterrupted = false;
104106
this.updateStatus('disconnected');
105107
}
106108

@@ -162,6 +164,7 @@ export class LiveSession {
162164
}
163165

164166
interrupt(): void {
167+
this.isInterrupted = true;
165168
if (this.state.audioState === 'speaking') {
166169
this.updateAudioState('idle');
167170
}
@@ -221,6 +224,7 @@ export class LiveSession {
221224

222225
// 3. 턴 완료 — 현재 트랜스크립트를 확정(isFinal)
223226
if (message.serverContent?.turnComplete) {
227+
this.isInterrupted = false; // 인터럽트 플래그 리셋
224228
this.events.onTranscript({ text: '', delta: '', isFinal: true });
225229
this.updateAudioState('idle');
226230
}
@@ -520,6 +524,7 @@ export class LiveSession {
520524
// --- Helpers ---
521525

522526
private handleAudioOutput(base64: string): void {
527+
if (this.isInterrupted) return; // 인터럽트 중 — 오디오 드롭
523528
if (this.onAudioData) {
524529
this.onAudioData(base64);
525530
}

0 commit comments

Comments
 (0)